From a18c3d59dc4adf6c6393cda8512714aa11919fbb Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Mon, 21 Jul 2025 12:57:23 -0700 Subject: [PATCH 01/12] add int test --- test/WaveOps/WaveReadLaneAt.32.test | 62 +++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 test/WaveOps/WaveReadLaneAt.32.test diff --git a/test/WaveOps/WaveReadLaneAt.32.test b/test/WaveOps/WaveReadLaneAt.32.test new file mode 100644 index 00000000..0ddff6d2 --- /dev/null +++ b/test/WaveOps/WaveReadLaneAt.32.test @@ -0,0 +1,62 @@ +#--- source.hlsl + +StructuredBuffer InInt : register(t0); +RWStructuredBuffer OutInt : register(u1); + +[numthreads(4,1,1)] +void main(uint3 TID : SV_GroupThreadID) { + // Int + uint OutIdx = TID.x * 3; + OutInt[OutIdx] = WaveReadLaneAt(InInt[TID.x], TID.x); + uint4 ThreadInInt = {InInt[TID.x].xyz, InInt[TID.x].w}; + OutInt[OutIdx + 1] = WaveReadLaneAt(ThreadInInt, TID.x);; + OutInt[OutIdx + 2].xy = WaveReadLaneAt(InInt[TID.x].xy, TID.x); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [4, 1, 1] +Buffers: + - Name: InInt + Format: Int32 + Stride: 16 + Data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] + - Name: OutInt + Format: Int32 + Stride: 16 + ZeroInitSize: 144 + - Name: ExpectedOutInt # The result we expect + Format: Int32 + Stride: 16 + Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] +Results: + - Result: TestInt + Rule: BufferExact + Actual: OutInt + Expected: ExpectedOutInt +DescriptorSets: + - Resources: + - Name: InInt + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: OutInt + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o From 9bdb1512ed41628956bf80b0caedb0693bec81ff Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Mon, 21 Jul 2025 13:01:08 -0700 Subject: [PATCH 02/12] add uint test --- test/WaveOps/WaveReadLaneAt.32.test | 42 ++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/test/WaveOps/WaveReadLaneAt.32.test b/test/WaveOps/WaveReadLaneAt.32.test index 0ddff6d2..6835f6e1 100644 --- a/test/WaveOps/WaveReadLaneAt.32.test +++ b/test/WaveOps/WaveReadLaneAt.32.test @@ -3,14 +3,24 @@ StructuredBuffer InInt : register(t0); RWStructuredBuffer OutInt : register(u1); +StructuredBuffer InUInt : register(t2); +RWStructuredBuffer OutUInt : register(u3); + [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { - // Int uint OutIdx = TID.x * 3; + + // Int OutInt[OutIdx] = WaveReadLaneAt(InInt[TID.x], TID.x); uint4 ThreadInInt = {InInt[TID.x].xyz, InInt[TID.x].w}; OutInt[OutIdx + 1] = WaveReadLaneAt(ThreadInInt, TID.x);; OutInt[OutIdx + 2].xy = WaveReadLaneAt(InInt[TID.x].xy, TID.x); + + // UInt + OutUInt[OutIdx] = WaveReadLaneAt(InUInt[TID.x], TID.x); + uint4 ThreadInUInt = {InUInt[TID.x].xyz, InUInt[TID.x].w}; + OutUInt[OutIdx + 1] = WaveReadLaneAt(ThreadInUInt, TID.x);; + OutUInt[OutIdx + 2].xy = WaveReadLaneAt(InUInt[TID.x].xy, TID.x); } //--- pipeline.yaml @@ -33,11 +43,27 @@ Buffers: Format: Int32 Stride: 16 Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] + - Name: InUInt + Format: UInt32 + Stride: 16 + Data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] + - Name: OutUInt + Format: UInt32 + Stride: 16 + ZeroInitSize: 144 + - Name: ExpectedOutUInt # The result we expect + Format: UInt32 + Stride: 16 + Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] Results: - Result: TestInt Rule: BufferExact Actual: OutInt Expected: ExpectedOutInt + - Result: TestUInt + Rule: BufferExact + Actual: OutUInt + Expected: ExpectedOutUInt DescriptorSets: - Resources: - Name: InInt @@ -49,11 +75,25 @@ DescriptorSets: Binding: 0 - Name: OutInt Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: InUInt + Kind: StructuredBuffer DirectXBinding: Register: 2 Space: 0 VulkanBinding: Binding: 2 + - Name: OutUInt + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 ... #--- end From 23eee329e37a537b11845b8f98cb4b3513e40ca2 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Mon, 21 Jul 2025 13:08:26 -0700 Subject: [PATCH 03/12] add basic floats --- test/WaveOps/WaveReadLaneAt.32.test | 39 +++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/test/WaveOps/WaveReadLaneAt.32.test b/test/WaveOps/WaveReadLaneAt.32.test index 6835f6e1..4e4c6be5 100644 --- a/test/WaveOps/WaveReadLaneAt.32.test +++ b/test/WaveOps/WaveReadLaneAt.32.test @@ -6,6 +6,9 @@ RWStructuredBuffer OutInt : register(u1); StructuredBuffer InUInt : register(t2); RWStructuredBuffer OutUInt : register(u3); +StructuredBuffer InFloat : register(t4); +RWStructuredBuffer OutFloat : register(u5); + [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { uint OutIdx = TID.x * 3; @@ -21,6 +24,12 @@ void main(uint3 TID : SV_GroupThreadID) { uint4 ThreadInUInt = {InUInt[TID.x].xyz, InUInt[TID.x].w}; OutUInt[OutIdx + 1] = WaveReadLaneAt(ThreadInUInt, TID.x);; OutUInt[OutIdx + 2].xy = WaveReadLaneAt(InUInt[TID.x].xy, TID.x); + + // Float + OutFloat[OutIdx] = WaveReadLaneAt(InFloat[TID.x], TID.x); + uint4 ThreadInFloat = {InFloat[TID.x].xyz, InFloat[TID.x].w}; + OutFloat[OutIdx + 1] = WaveReadLaneAt(ThreadInFloat, TID.x);; + OutFloat[OutIdx + 2].xy = WaveReadLaneAt(InFloat[TID.x].xy, TID.x); } //--- pipeline.yaml @@ -55,6 +64,18 @@ Buffers: Format: UInt32 Stride: 16 Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] + - Name: InFloat + Format: Float32 + Stride: 16 + Data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] + - Name: OutFloat + Format: Float32 + Stride: 16 + ZeroInitSize: 144 + - Name: ExpectedOutFloat # The result we expect + Format: Float32 + Stride: 16 + Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] Results: - Result: TestInt Rule: BufferExact @@ -64,6 +85,10 @@ Results: Rule: BufferExact Actual: OutUInt Expected: ExpectedOutUInt + - Result: TestFloat + Rule: BufferExact + Actual: OutFloat + Expected: ExpectedOutFloat DescriptorSets: - Resources: - Name: InInt @@ -94,6 +119,20 @@ DescriptorSets: Space: 0 VulkanBinding: Binding: 3 + - Name: InFloat + Kind: StructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: OutFloat + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 ... #--- end From ac2c8f53ea31ae683a3799cec9c7b5aa23f1a425 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Mon, 21 Jul 2025 13:18:34 -0700 Subject: [PATCH 04/12] add basic 16 bit support --- test/WaveOps/WaveReadLaneAt.16.test | 143 ++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 test/WaveOps/WaveReadLaneAt.16.test diff --git a/test/WaveOps/WaveReadLaneAt.16.test b/test/WaveOps/WaveReadLaneAt.16.test new file mode 100644 index 00000000..3432a029 --- /dev/null +++ b/test/WaveOps/WaveReadLaneAt.16.test @@ -0,0 +1,143 @@ +#--- source.hlsl + +StructuredBuffer InInt : register(t0); +RWStructuredBuffer OutInt : register(u1); + +StructuredBuffer InUInt : register(t2); +RWStructuredBuffer OutUInt : register(u3); + +StructuredBuffer InFloat : register(t4); +RWStructuredBuffer OutFloat : register(u5); + +[numthreads(4,1,1)] +void main(uint16_t3 TID : SV_GroupThreadID) { + uint OutIdx = TID.x * 3; + + // Int + OutInt[OutIdx] = WaveReadLaneAt(InInt[TID.x], TID.x); + uint16_t4 ThreadInInt = {InInt[TID.x].xyz, InInt[TID.x].w}; + OutInt[OutIdx + 1] = WaveReadLaneAt(ThreadInInt, TID.x);; + OutInt[OutIdx + 2].xy = WaveReadLaneAt(InInt[TID.x].xy, TID.x); + + // UInt + OutUInt[OutIdx] = WaveReadLaneAt(InUInt[TID.x], TID.x); + int16_t4 ThreadInUInt = {InUInt[TID.x].xyz, InUInt[TID.x].w}; + OutUInt[OutIdx + 1] = WaveReadLaneAt(ThreadInUInt, TID.x);; + OutUInt[OutIdx + 2].xy = WaveReadLaneAt(InUInt[TID.x].xy, TID.x); + + // Float + OutFloat[OutIdx] = WaveReadLaneAt(InFloat[TID.x], TID.x); + float16_t4 ThreadInFloat = {InFloat[TID.x].xyz, InFloat[TID.x].w}; + OutFloat[OutIdx + 1] = WaveReadLaneAt(ThreadInFloat, TID.x);; + OutFloat[OutIdx + 2].xy = WaveReadLaneAt(InFloat[TID.x].xy, TID.x); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [4, 1, 1] +Buffers: + - Name: InInt + Format: Int16 + Stride: 8 + Data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] + - Name: OutInt + Format: Int16 + Stride: 8 + ZeroInitSize: 72 + - Name: ExpectedOutInt # The result we expect + Format: Int16 + Stride: 8 + Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] + - Name: InUInt + Format: UInt16 + Stride: 8 + Data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] + - Name: OutUInt + Format: UInt16 + Stride: 8 + ZeroInitSize: 72 + - Name: ExpectedOutUInt # The result we expect + Format: UInt16 + Stride: 8 + Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] + - Name: InFloat + Format: Float16 + Stride: 8 + Data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] + - Name: OutFloat + Format: Float16 + Stride: 8 + ZeroInitSize: 72 + - Name: ExpectedOutFloat # The result we expect + Format: Float16 + Stride: 8 + Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] +Results: + - Result: TestInt + Rule: BufferExact + Actual: OutInt + Expected: ExpectedOutInt + - Result: TestUInt + Rule: BufferExact + Actual: OutUInt + Expected: ExpectedOutUInt + - Result: TestFloat + Rule: BufferExact + Actual: OutFloat + Expected: ExpectedOutFloat +DescriptorSets: + - Resources: + - Name: InInt + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: OutInt + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: InUInt + Kind: StructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: OutUInt + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: InFloat + Kind: StructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: OutFloat + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 +... +#--- end + +# REQUIRES: Half, Int16 + +# RUN: split-file %s %t +# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o From 22ff9ababb8bbfda5ff6f5ea3e14edf0a10a4816 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Mon, 21 Jul 2025 14:04:15 -0700 Subject: [PATCH 05/12] add 64 bit tests --- test/WaveOps/WaveReadLaneAt.64.test | 143 ++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 test/WaveOps/WaveReadLaneAt.64.test diff --git a/test/WaveOps/WaveReadLaneAt.64.test b/test/WaveOps/WaveReadLaneAt.64.test new file mode 100644 index 00000000..e23cb4e1 --- /dev/null +++ b/test/WaveOps/WaveReadLaneAt.64.test @@ -0,0 +1,143 @@ +#--- source.hlsl + +StructuredBuffer InInt : register(t0); +RWStructuredBuffer OutInt : register(u1); + +StructuredBuffer InUInt : register(t2); +RWStructuredBuffer OutUInt : register(u3); + +StructuredBuffer InFloat : register(t4); +RWStructuredBuffer OutFloat : register(u5); + +[numthreads(4,1,1)] +void main(uint64_t3 TID : SV_GroupThreadID) { + uint OutIdx = TID.x * 3; + + // Int + OutInt[OutIdx] = WaveReadLaneAt(InInt[TID.x], TID.x); + uint64_t4 ThreadInInt = {InInt[TID.x].xyz, InInt[TID.x].w}; + OutInt[OutIdx + 1] = WaveReadLaneAt(ThreadInInt, TID.x);; + OutInt[OutIdx + 2].xy = WaveReadLaneAt(InInt[TID.x].xy, TID.x); + + // UInt + OutUInt[OutIdx] = WaveReadLaneAt(InUInt[TID.x], TID.x); + int64_t4 ThreadInUInt = {InUInt[TID.x].xyz, InUInt[TID.x].w}; + OutUInt[OutIdx + 1] = WaveReadLaneAt(ThreadInUInt, TID.x);; + OutUInt[OutIdx + 2].xy = WaveReadLaneAt(InUInt[TID.x].xy, TID.x); + + // Float + OutFloat[OutIdx] = WaveReadLaneAt(InFloat[TID.x], TID.x); + float64_t4 ThreadInFloat = {InFloat[TID.x].xyz, InFloat[TID.x].w}; + OutFloat[OutIdx + 1] = WaveReadLaneAt(ThreadInFloat, TID.x);; + OutFloat[OutIdx + 2].xy = WaveReadLaneAt(InFloat[TID.x].xy, TID.x); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [4, 1, 1] +Buffers: + - Name: InInt + Format: Int64 + Stride: 16 + Data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] + - Name: OutInt + Format: Int64 + Stride: 16 + ZeroInitSize: 288 + - Name: ExpectedOutInt # The result we expect + Format: Int64 + Stride: 16 + Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] + - Name: InUInt + Format: UInt64 + Stride: 16 + Data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] + - Name: OutUInt + Format: UInt64 + Stride: 16 + ZeroInitSize: 288 + - Name: ExpectedOutUInt # The result we expect + Format: UInt64 + Stride: 16 + Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] + - Name: InFloat + Format: Float64 + Stride: 16 + Data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] + - Name: OutFloat + Format: Float64 + Stride: 16 + ZeroInitSize: 288 + - Name: ExpectedOutFloat # The result we expect + Format: Float64 + Stride: 16 + Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] +Results: + - Result: TestInt + Rule: BufferExact + Actual: OutInt + Expected: ExpectedOutInt + - Result: TestUInt + Rule: BufferExact + Actual: OutUInt + Expected: ExpectedOutUInt + - Result: TestFloat + Rule: BufferExact + Actual: OutFloat + Expected: ExpectedOutFloat +DescriptorSets: + - Resources: + - Name: InInt + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: OutInt + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: InUInt + Kind: StructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: OutUInt + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: InFloat + Kind: StructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: OutFloat + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 +... +#--- end + +# REQUIRES: Half, Int64 + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o From 0ffc20d09c09c4cd98dbb43ad2ba2b12fdd4e839 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Mon, 21 Jul 2025 14:30:13 -0700 Subject: [PATCH 06/12] fix typo --- test/WaveOps/WaveReadLaneAt.64.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/WaveOps/WaveReadLaneAt.64.test b/test/WaveOps/WaveReadLaneAt.64.test index e23cb4e1..6c9e19a9 100644 --- a/test/WaveOps/WaveReadLaneAt.64.test +++ b/test/WaveOps/WaveReadLaneAt.64.test @@ -136,7 +136,7 @@ DescriptorSets: ... #--- end -# REQUIRES: Half, Int64 +# REQUIRES: Double, Int64 # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl From f5683f6782bd246a54df7c6e6211511bbcbd7d46 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Mon, 21 Jul 2025 14:30:47 -0700 Subject: [PATCH 07/12] add basic user defined tests --- test/WaveOps/WaveReadLaneAt.udt.test | 123 +++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 test/WaveOps/WaveReadLaneAt.udt.test diff --git a/test/WaveOps/WaveReadLaneAt.udt.test b/test/WaveOps/WaveReadLaneAt.udt.test new file mode 100644 index 00000000..d021ba9c --- /dev/null +++ b/test/WaveOps/WaveReadLaneAt.udt.test @@ -0,0 +1,123 @@ +#--- source.hlsl + +struct UserDefinedStruct { + int intData; + float floatData; +}; + +StructuredBuffer InUDS : register(t0); +RWStructuredBuffer OutInt : register(u1); +RWStructuredBuffer OutFloat : register(u2); + +// Note: stored in column-major format +struct MatrixStruct { + uint4x4 matrixData; +}; + +StructuredBuffer InMatrix : register(t3); +RWStructuredBuffer OutMatrix : register(u4); + +[numthreads(4,1,1)] +void main(uint3 TID : SV_GroupThreadID) { + OutInt[TID.x] = WaveReadLaneAt(InUDS[TID.x].intData, TID.x); + OutFloat[TID.x] = WaveReadLaneAt(InUDS[TID.x].floatData, TID.x); + + OutMatrix[TID.x] = WaveReadLaneAt(InMatrix[0].matrixData[TID.x], TID.x); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [4, 1, 1] +Buffers: + - Name: InUDS + Format: Hex32 + Stride: 16 + Data: [0x0, 0x0, 0x1, 0x3f800000, 0x2, 0x40000000, 0x3, 0x40400000 ] + - Name: OutInt + Format: Int32 + Stride: 16 + ZeroInitSize: 16 + - Name: OutFloat + Format: Float32 + Stride: 16 + ZeroInitSize: 16 + - Name: ExpectedOutInt # The result we expect + Format: Int32 + Stride: 16 + Data: [ 0, 1, 2, 3] + - Name: ExpectedOutFloat # The result we expect + Format: Float32 + Stride: 16 + Data: [ 0, 1, 2, 3] + - Name: InMatrix + Format: UInt32 + Stride: 16 + Data: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ] + - Name: OutMatrix + Format: UInt32 + Stride: 16 + ZeroInitSize: 64 + - Name: ExpectedOutMatrix # The result we expect + Format: UInt32 + Stride: 16 + Data: [ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 ] +Results: + - Result: TestInt + Rule: BufferExact + Actual: OutInt + Expected: ExpectedOutInt + - Result: TestFloat + Rule: BufferExact + Actual: OutFloat + Expected: ExpectedOutFloat + - Result: TestMatrix + Rule: BufferExact + Actual: OutMatrix + Expected: ExpectedOutMatrix +DescriptorSets: + - Resources: + - Name: InUDS + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: OutInt + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: OutFloat + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: InMatrix + Kind: StructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: OutMatrix + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o From a0abca767c36fc2bc824884cf457b0500ae8ef1c Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Mon, 21 Jul 2025 14:51:19 -0700 Subject: [PATCH 08/12] add tests for index behaviour --- test/WaveOps/WaveReadLaneAt.index.test | 120 +++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 test/WaveOps/WaveReadLaneAt.index.test diff --git a/test/WaveOps/WaveReadLaneAt.index.test b/test/WaveOps/WaveReadLaneAt.index.test new file mode 100644 index 00000000..1da10bdf --- /dev/null +++ b/test/WaveOps/WaveReadLaneAt.index.test @@ -0,0 +1,120 @@ +#--- source.hlsl + +StructuredBuffer InInt : register(t0); +RWStructuredBuffer OutBroadcast : register(u1); +RWStructuredBuffer OutShift : register(u2); +RWStructuredBuffer OutMix : register(u3); + +[numthreads(4,1,1)] +void main(uint3 TID : SV_GroupThreadID) { + OutBroadcast[TID.x] = WaveReadLaneAt(InInt[TID.x], 2); + + uint PosShiftIndex = (TID.x + 1) % 4; + int PosValue = WaveReadLaneAt(InInt[TID.x], PosShiftIndex); + uint NegShiftIndex = (TID.x - 1) % 4; + int NegValue = WaveReadLaneAt(InInt[TID.x], NegShiftIndex); + OutShift[TID.x] = PosValue + NegValue; + + uint MixIndex = 0; + switch (TID.x) { + case 0: + MixIndex = 2; + break; + case 1: + MixIndex = 3; + break; + case 2: + MixIndex = 1; + break; + default: + break; + } + + OutMix[TID.x] = WaveReadLaneAt(InInt[TID.x], MixIndex); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [4, 1, 1] +Buffers: + - Name: InInt + Format: Int32 + Stride: 16 + Data: [0, 1, 2, 3 ] + - Name: OutBroadcast + Format: Int32 + Stride: 16 + ZeroInitSize: 16 + - Name: ExpectedOutBroadcast # The result we expect + Format: Int32 + Stride: 16 + Data: [ 2, 2, 2, 2 ] + - Name: OutShift + Format: Int32 + Stride: 16 + ZeroInitSize: 16 + - Name: ExpectedOutShift # The result we expect + Format: Int32 + Stride: 16 + Data: [ 4, 2, 4, 2 ] + - Name: OutMix + Format: Int32 + Stride: 16 + ZeroInitSize: 16 + - Name: ExpectedOutMix # The result we expect + Format: Int32 + Stride: 16 + Data: [ 2, 3, 1, 0 ] +Results: + - Result: TestBroadcast + Rule: BufferExact + Actual: OutBroadcast + Expected: ExpectedOutBroadcast + - Result: TestShift + Rule: BufferExact + Actual: OutShift + Expected: ExpectedOutShift + - Result: TestMix + Rule: BufferExact + Actual: OutMix + Expected: ExpectedOutMix +DescriptorSets: + - Resources: + - Name: InInt + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: OutBroadcast + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: OutShift + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: OutMix + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o From b8ff5b337d02b06136a9fedd96f0f41ef6c6bc68 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Mon, 21 Jul 2025 14:58:55 -0700 Subject: [PATCH 09/12] add float edge cases --- test/WaveOps/WaveReadLaneAt.udt.test | 36 ++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/test/WaveOps/WaveReadLaneAt.udt.test b/test/WaveOps/WaveReadLaneAt.udt.test index d021ba9c..cfe09610 100644 --- a/test/WaveOps/WaveReadLaneAt.udt.test +++ b/test/WaveOps/WaveReadLaneAt.udt.test @@ -17,12 +17,18 @@ struct MatrixStruct { StructuredBuffer InMatrix : register(t3); RWStructuredBuffer OutMatrix : register(u4); +// Checks for edge-case floats +StructuredBuffer InEdgeFloat : register(t5); +RWStructuredBuffer OutEdgeFloat : register(u6); + [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { OutInt[TID.x] = WaveReadLaneAt(InUDS[TID.x].intData, TID.x); OutFloat[TID.x] = WaveReadLaneAt(InUDS[TID.x].floatData, TID.x); OutMatrix[TID.x] = WaveReadLaneAt(InMatrix[0].matrixData[TID.x], TID.x); + + OutEdgeFloat[TID.x] = WaveReadLaneAt(InEdgeFloat[TID.x], TID.x); } //--- pipeline.yaml @@ -65,6 +71,18 @@ Buffers: Format: UInt32 Stride: 16 Data: [ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 ] + - Name: InEdgeFloat + Format: Float32 + Stride: 16 + Data: [inf, -inf, nan, 0 ] + - Name: OutEdgeFloat + Format: Float32 + Stride: 16 + ZeroInitSize: 16 + - Name: ExpectedOutEdgeFloat # The result we expect + Format: Float32 + Stride: 16 + Data: [ inf, -inf, nan, 0] Results: - Result: TestInt Rule: BufferExact @@ -78,6 +96,10 @@ Results: Rule: BufferExact Actual: OutMatrix Expected: ExpectedOutMatrix + - Result: TestEdgeFloat + Rule: BufferExact + Actual: OutEdgeFloat + Expected: ExpectedOutEdgeFloat DescriptorSets: - Resources: - Name: InUDS @@ -115,6 +137,20 @@ DescriptorSets: Space: 0 VulkanBinding: Binding: 4 + - Name: InEdgeFloat + Kind: StructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: OutEdgeFloat + Kind: RWStructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 ... #--- end From 02c0816f76b9bef7a3202934397a0b6d88d34728 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Tue, 22 Jul 2025 09:17:54 -0700 Subject: [PATCH 10/12] fix vulkan bindings --- test/WaveOps/WaveReadLaneAt.16.test | 2 +- test/WaveOps/WaveReadLaneAt.32.test | 2 +- test/WaveOps/WaveReadLaneAt.64.test | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/WaveOps/WaveReadLaneAt.16.test b/test/WaveOps/WaveReadLaneAt.16.test index 3432a029..ec79db51 100644 --- a/test/WaveOps/WaveReadLaneAt.16.test +++ b/test/WaveOps/WaveReadLaneAt.16.test @@ -104,7 +104,7 @@ DescriptorSets: Register: 1 Space: 0 VulkanBinding: - Binding: 2 + Binding: 1 - Name: InUInt Kind: StructuredBuffer DirectXBinding: diff --git a/test/WaveOps/WaveReadLaneAt.32.test b/test/WaveOps/WaveReadLaneAt.32.test index 4e4c6be5..319e0d62 100644 --- a/test/WaveOps/WaveReadLaneAt.32.test +++ b/test/WaveOps/WaveReadLaneAt.32.test @@ -104,7 +104,7 @@ DescriptorSets: Register: 1 Space: 0 VulkanBinding: - Binding: 2 + Binding: 1 - Name: InUInt Kind: StructuredBuffer DirectXBinding: diff --git a/test/WaveOps/WaveReadLaneAt.64.test b/test/WaveOps/WaveReadLaneAt.64.test index 6c9e19a9..e54a781c 100644 --- a/test/WaveOps/WaveReadLaneAt.64.test +++ b/test/WaveOps/WaveReadLaneAt.64.test @@ -104,7 +104,7 @@ DescriptorSets: Register: 1 Space: 0 VulkanBinding: - Binding: 2 + Binding: 1 - Name: InUInt Kind: StructuredBuffer DirectXBinding: From 28d18e403602f644a815ef6cf579e4fa9cd6c641 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Tue, 22 Jul 2025 09:20:17 -0700 Subject: [PATCH 11/12] move float edge case tests --- test/WaveOps/WaveReadLaneAt.32.test | 37 ++++++++++++++++++++++++++++ test/WaveOps/WaveReadLaneAt.udt.test | 36 --------------------------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/test/WaveOps/WaveReadLaneAt.32.test b/test/WaveOps/WaveReadLaneAt.32.test index 319e0d62..a3b68b05 100644 --- a/test/WaveOps/WaveReadLaneAt.32.test +++ b/test/WaveOps/WaveReadLaneAt.32.test @@ -9,6 +9,10 @@ RWStructuredBuffer OutUInt : register(u3); StructuredBuffer InFloat : register(t4); RWStructuredBuffer OutFloat : register(u5); +// Checks for edge-case floats +StructuredBuffer InEdgeFloat : register(t6); +RWStructuredBuffer OutEdgeFloat : register(u7); + [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { uint OutIdx = TID.x * 3; @@ -30,6 +34,9 @@ void main(uint3 TID : SV_GroupThreadID) { uint4 ThreadInFloat = {InFloat[TID.x].xyz, InFloat[TID.x].w}; OutFloat[OutIdx + 1] = WaveReadLaneAt(ThreadInFloat, TID.x);; OutFloat[OutIdx + 2].xy = WaveReadLaneAt(InFloat[TID.x].xy, TID.x); + + // Edge-Cases + OutEdgeFloat[TID.x] = WaveReadLaneAt(InEdgeFloat[TID.x], TID.x); } //--- pipeline.yaml @@ -76,6 +83,18 @@ Buffers: Format: Float32 Stride: 16 Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] + - Name: InEdgeFloat + Format: Float32 + Stride: 16 + Data: [inf, -inf, nan, 0 ] + - Name: OutEdgeFloat + Format: Float32 + Stride: 16 + ZeroInitSize: 16 + - Name: ExpectedOutEdgeFloat # The result we expect + Format: Float32 + Stride: 16 + Data: [ inf, -inf, nan, 0] Results: - Result: TestInt Rule: BufferExact @@ -89,6 +108,10 @@ Results: Rule: BufferExact Actual: OutFloat Expected: ExpectedOutFloat + - Result: TestEdgeFloat + Rule: BufferExact + Actual: OutEdgeFloat + Expected: ExpectedOutEdgeFloat DescriptorSets: - Resources: - Name: InInt @@ -133,6 +156,20 @@ DescriptorSets: Space: 0 VulkanBinding: Binding: 5 + - Name: InEdgeFloat + Kind: StructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: OutEdgeFloat + Kind: RWStructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 ... #--- end diff --git a/test/WaveOps/WaveReadLaneAt.udt.test b/test/WaveOps/WaveReadLaneAt.udt.test index cfe09610..d021ba9c 100644 --- a/test/WaveOps/WaveReadLaneAt.udt.test +++ b/test/WaveOps/WaveReadLaneAt.udt.test @@ -17,18 +17,12 @@ struct MatrixStruct { StructuredBuffer InMatrix : register(t3); RWStructuredBuffer OutMatrix : register(u4); -// Checks for edge-case floats -StructuredBuffer InEdgeFloat : register(t5); -RWStructuredBuffer OutEdgeFloat : register(u6); - [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { OutInt[TID.x] = WaveReadLaneAt(InUDS[TID.x].intData, TID.x); OutFloat[TID.x] = WaveReadLaneAt(InUDS[TID.x].floatData, TID.x); OutMatrix[TID.x] = WaveReadLaneAt(InMatrix[0].matrixData[TID.x], TID.x); - - OutEdgeFloat[TID.x] = WaveReadLaneAt(InEdgeFloat[TID.x], TID.x); } //--- pipeline.yaml @@ -71,18 +65,6 @@ Buffers: Format: UInt32 Stride: 16 Data: [ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 ] - - Name: InEdgeFloat - Format: Float32 - Stride: 16 - Data: [inf, -inf, nan, 0 ] - - Name: OutEdgeFloat - Format: Float32 - Stride: 16 - ZeroInitSize: 16 - - Name: ExpectedOutEdgeFloat # The result we expect - Format: Float32 - Stride: 16 - Data: [ inf, -inf, nan, 0] Results: - Result: TestInt Rule: BufferExact @@ -96,10 +78,6 @@ Results: Rule: BufferExact Actual: OutMatrix Expected: ExpectedOutMatrix - - Result: TestEdgeFloat - Rule: BufferExact - Actual: OutEdgeFloat - Expected: ExpectedOutEdgeFloat DescriptorSets: - Resources: - Name: InUDS @@ -137,20 +115,6 @@ DescriptorSets: Space: 0 VulkanBinding: Binding: 4 - - Name: InEdgeFloat - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: OutEdgeFloat - Kind: RWStructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 ... #--- end From 9bc944f7ee1bebd143f0fb129496d36cf412c1e6 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Tue, 22 Jul 2025 09:26:51 -0700 Subject: [PATCH 12/12] move matrix to own test - we only want to disable the minimal set of tests --- test/WaveOps/WaveReadLaneAt.mtx.test | 62 ++++++++++++++++++++++++++++ test/WaveOps/WaveReadLaneAt.udt.test | 40 ------------------ 2 files changed, 62 insertions(+), 40 deletions(-) create mode 100644 test/WaveOps/WaveReadLaneAt.mtx.test diff --git a/test/WaveOps/WaveReadLaneAt.mtx.test b/test/WaveOps/WaveReadLaneAt.mtx.test new file mode 100644 index 00000000..b169dc9a --- /dev/null +++ b/test/WaveOps/WaveReadLaneAt.mtx.test @@ -0,0 +1,62 @@ +#--- source.hlsl + +// Note: stored in column-major format +struct MatrixStruct { + uint4x4 matrixData; +}; + +StructuredBuffer InMatrix : register(t0); +RWStructuredBuffer OutMatrix : register(u1); + +[numthreads(4,1,1)] +void main(uint3 TID : SV_GroupThreadID) { + OutMatrix[TID.x] = WaveReadLaneAt(InMatrix[0].matrixData[TID.x], TID.x); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [4, 1, 1] +Buffers: + - Name: InMatrix + Format: UInt32 + Stride: 16 + Data: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ] + - Name: OutMatrix + Format: UInt32 + Stride: 16 + ZeroInitSize: 64 + - Name: ExpectedOutMatrix # The result we expect + Format: UInt32 + Stride: 16 + Data: [ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 ] +Results: + - Result: TestMatrix + Rule: BufferExact + Actual: OutMatrix + Expected: ExpectedOutMatrix +DescriptorSets: + - Resources: + - Name: InMatrix + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: OutMatrix + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneAt.udt.test b/test/WaveOps/WaveReadLaneAt.udt.test index d021ba9c..f850be94 100644 --- a/test/WaveOps/WaveReadLaneAt.udt.test +++ b/test/WaveOps/WaveReadLaneAt.udt.test @@ -9,20 +9,10 @@ StructuredBuffer InUDS : register(t0); RWStructuredBuffer OutInt : register(u1); RWStructuredBuffer OutFloat : register(u2); -// Note: stored in column-major format -struct MatrixStruct { - uint4x4 matrixData; -}; - -StructuredBuffer InMatrix : register(t3); -RWStructuredBuffer OutMatrix : register(u4); - [numthreads(4,1,1)] void main(uint3 TID : SV_GroupThreadID) { OutInt[TID.x] = WaveReadLaneAt(InUDS[TID.x].intData, TID.x); OutFloat[TID.x] = WaveReadLaneAt(InUDS[TID.x].floatData, TID.x); - - OutMatrix[TID.x] = WaveReadLaneAt(InMatrix[0].matrixData[TID.x], TID.x); } //--- pipeline.yaml @@ -53,18 +43,6 @@ Buffers: Format: Float32 Stride: 16 Data: [ 0, 1, 2, 3] - - Name: InMatrix - Format: UInt32 - Stride: 16 - Data: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ] - - Name: OutMatrix - Format: UInt32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOutMatrix # The result we expect - Format: UInt32 - Stride: 16 - Data: [ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 ] Results: - Result: TestInt Rule: BufferExact @@ -74,10 +52,6 @@ Results: Rule: BufferExact Actual: OutFloat Expected: ExpectedOutFloat - - Result: TestMatrix - Rule: BufferExact - Actual: OutMatrix - Expected: ExpectedOutMatrix DescriptorSets: - Resources: - Name: InUDS @@ -101,20 +75,6 @@ DescriptorSets: Space: 0 VulkanBinding: Binding: 2 - - Name: InMatrix - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: OutMatrix - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 ... #--- end