From cb28257be069975a90b207ed9bee11ba6fc0d464 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Mon, 8 Sep 2025 15:41:26 -0700 Subject: [PATCH 1/4] write wavereadlanefirst tests --- test/WaveOps/WaveReadLaneFirst.fp16.test | 185 ++++++++++++ test/WaveOps/WaveReadLaneFirst.fp32.test | 179 ++++++++++++ test/WaveOps/WaveReadLaneFirst.fp64.test | 179 ++++++++++++ test/WaveOps/WaveReadLaneFirst.int16.test | 331 ++++++++++++++++++++++ test/WaveOps/WaveReadLaneFirst.int32.test | 331 ++++++++++++++++++++++ test/WaveOps/WaveReadLaneFirst.int64.test | 331 ++++++++++++++++++++++ 6 files changed, 1536 insertions(+) create mode 100644 test/WaveOps/WaveReadLaneFirst.fp16.test create mode 100644 test/WaveOps/WaveReadLaneFirst.fp32.test create mode 100644 test/WaveOps/WaveReadLaneFirst.fp64.test create mode 100644 test/WaveOps/WaveReadLaneFirst.int16.test create mode 100644 test/WaveOps/WaveReadLaneFirst.int32.test create mode 100644 test/WaveOps/WaveReadLaneFirst.int64.test diff --git a/test/WaveOps/WaveReadLaneFirst.fp16.test b/test/WaveOps/WaveReadLaneFirst.fp16.test new file mode 100644 index 00000000..be4148ab --- /dev/null +++ b/test/WaveOps/WaveReadLaneFirst.fp16.test @@ -0,0 +1,185 @@ +#--- source.hlsl +StructuredBuffer In : register(t0); +RWStructuredBuffer Out1 : register(u1); // test scalar +RWStructuredBuffer Out2 : register(u2); // test half2 +RWStructuredBuffer Out3 : register(u3); // test half3 +RWStructuredBuffer Out4 : register(u4); // test half4 +RWStructuredBuffer Out5 : register(u5); // constant folding + +[numthreads(4,1,1)] +void main(uint3 tid : SV_GroupThreadID) +{ + half4 v = In[tid.x]; + + // Mask per "active lane set": only >= N lanes contribute + half s1 = tid.x >= 3 ? WaveReadLaneFirst( v.x ) : 0; + half s2 = tid.x >= 2 ? WaveReadLaneFirst( v.x ) : 0; + half s3 = tid.x >= 1 ? WaveReadLaneFirst( v.x ) : 0; + half s4 = tid.x >= 0 ? WaveReadLaneFirst( v.x ) : 0; + + half2 v2_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xy ) : half2(0,0); + half2 v2_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xy ) : half2(0,0); + half2 v2_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xy ) : half2(0,0); + half2 v2_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xy ) : half2(0,0); + + half3 v3_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xyz ) : half3(0,0,0); + half3 v3_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xyz ) : half3(0,0,0); + half3 v3_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xyz ) : half3(0,0,0); + half3 v3_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xyz ) : half3(0,0,0); + + half4 v4_1 = tid.x >= 3 ? WaveReadLaneFirst( v ) : half4(0,0,0,0); + half4 v4_2 = tid.x >= 2 ? WaveReadLaneFirst( v ) : half4(0,0,0,0); + half4 v4_3 = tid.x >= 1 ? WaveReadLaneFirst( v ) : half4(0,0,0,0); + half4 v4_4 = tid.x >= 0 ? WaveReadLaneFirst( v ) : half4(0,0,0,0); + + half scalars[4] = { s4, s3, s2, s1 }; + half2 vec2s [4] = { v2_4, v2_3, v2_2, v2_1 }; + half3 vec3s [4] = { v3_4, v3_3, v3_2, v3_1 }; + half4 vec4s [4] = { v4_4, v4_3, v4_2, v4_1 }; + + Out1[tid.x].x = scalars[tid.x]; + Out2[tid.x].xy = vec2s[tid.x]; + Out3[tid.x].xyz = vec3s[tid.x]; + Out4[tid.x] = vec4s[tid.x]; + + // constant folding case + Out5[0] = WaveReadLaneFirst(half4(1,2,3,4)); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In + Format: Float16 + Stride: 8 + # 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 + Data: [ 0x3c00, 0x4900, 0x5640, 0x63d0, 0x4000, 0x4d00, 0x5a40, 0x67d0, 0x4200, 0x4f80, 0x5cb0, 0x69dc, 0x4400, 0x5100, 0x5e40, 0x6bd0 ] + - Name: Out1 + Format: Float16 + Stride: 8 + ZeroInitSize: 32 + - Name: Out2 + Format: Float16 + Stride: 8 + ZeroInitSize: 32 + - Name: Out3 + Format: Float16 + Stride: 8 + ZeroInitSize: 32 + - Name: Out4 + Format: Float16 + Stride: 8 + ZeroInitSize: 32 + - Name: Out5 + Format: Float16 + Stride: 8 + ZeroInitSize: 8 + - Name: ExpectedOut1 + Format: Float16 + Stride: 8 + # 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0 + Data: [ 0x3c00, 0x0, 0x0, 0x0, 0x4000, 0x0, 0x0, 0x0, 0x4200, 0x0, 0x0, 0x0, 0x4400, 0x0, 0x0, 0x0 ] + - Name: ExpectedOut2 + Format: Float16 + Stride: 8 + # 1, 10, 0, 0, 2, 20, 0, 0, 3, 30, 0, 0, 4, 40, 0, 0 + Data: [ 0x3c00, 0x4900, 0x0, 0x0, 0x4000, 0x4d00, 0x0, 0x0, 0x4200, 0x4f80, 0x0, 0x0, 0x4400, 0x5100, 0x0, 0x0 ] + - Name: ExpectedOut3 + Format: Float16 + Stride: 8 + # 1, 10, 100, 0, 2, 20, 200, 0, 3, 30, 300, 0, 4, 40, 400, 0 + Data: [ 0x3c00, 0x4900, 0x5640, 0x0, 0x4000, 0x4d00, 0x5a40, 0x0, 0x4200, 0x4f80, 0x5cb0, 0x0, 0x4400, 0x5100, 0x5e40, 0x0 ] + - Name: ExpectedOut4 + Format: Float16 + Stride: 8 + # 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 + Data: [ 0x3c00, 0x4900, 0x5640, 0x63d0, 0x4000, 0x4d00, 0x5a40, 0x67d0, 0x4200, 0x4f80, 0x5cb0, 0x69dc, 0x4400, 0x5100, 0x5e40, 0x6bd0 ] + - Name: ExpectedOut5 + Format: Float16 + Stride: 8 + # 1, 2, 3, 4 + Data: [ 0x3C00, 0x4000, 0x4200, 0x4400 ] +Results: + - Result: ExpectedOut1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: ExpectedOut2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: ExpectedOut3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: ExpectedOut4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 + - Result: ExpectedOut5 + Rule: BufferExact + Actual: Out5 + Expected: ExpectedOut5 + +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: Out5 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 +... +#--- end + + +# Tracked by https://github.com/llvm/offload-test-suite/issues/393 +# XFAIL: Metal + +# Bug https://github.com/llvm/llvm-project/issues/156775 +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneFirst.fp32.test b/test/WaveOps/WaveReadLaneFirst.fp32.test new file mode 100644 index 00000000..304802a5 --- /dev/null +++ b/test/WaveOps/WaveReadLaneFirst.fp32.test @@ -0,0 +1,179 @@ +#--- source.hlsl +StructuredBuffer In : register(t0); +RWStructuredBuffer Out1 : register(u1); // test scalar +RWStructuredBuffer Out2 : register(u2); // test float2 +RWStructuredBuffer Out3 : register(u3); // test float3 +RWStructuredBuffer Out4 : register(u4); // test float4 +RWStructuredBuffer Out5 : register(u5); // constant folding + +[numthreads(4,1,1)] +void main(uint3 tid : SV_GroupThreadID) +{ + float4 v = In[tid.x]; + + // Mask per "active lane set": only >= N lanes contribute + float s1 = tid.x >= 3 ? WaveReadLaneFirst( v.x ) : 0; + float s2 = tid.x >= 2 ? WaveReadLaneFirst( v.x ) : 0; + float s3 = tid.x >= 1 ? WaveReadLaneFirst( v.x ) : 0; + float s4 = tid.x >= 0 ? WaveReadLaneFirst( v.x ) : 0; + + float2 v2_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xy ) : float2(0,0); + float2 v2_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xy ) : float2(0,0); + float2 v2_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xy ) : float2(0,0); + float2 v2_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xy ) : float2(0,0); + + float3 v3_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xyz ) : float3(0,0,0); + float3 v3_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xyz ) : float3(0,0,0); + float3 v3_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xyz ) : float3(0,0,0); + float3 v3_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xyz ) : float3(0,0,0); + + float4 v4_1 = tid.x >= 3 ? WaveReadLaneFirst( v ) : float4(0,0,0,0); + float4 v4_2 = tid.x >= 2 ? WaveReadLaneFirst( v ) : float4(0,0,0,0); + float4 v4_3 = tid.x >= 1 ? WaveReadLaneFirst( v ) : float4(0,0,0,0); + float4 v4_4 = tid.x >= 0 ? WaveReadLaneFirst( v ) : float4(0,0,0,0); + + float scalars[4] = { s4, s3, s2, s1 }; + float2 vec2s [4] = { v2_4, v2_3, v2_2, v2_1 }; + float3 vec3s [4] = { v3_4, v3_3, v3_2, v3_1 }; + float4 vec4s [4] = { v4_4, v4_3, v4_2, v4_1 }; + + Out1[tid.x].x = scalars[tid.x]; + Out2[tid.x].xy = vec2s[tid.x]; + Out3[tid.x].xyz = vec3s[tid.x]; + Out4[tid.x] = vec4s[tid.x]; + + // constant folding case + Out5[0] = WaveReadLaneFirst(float4(1,2,3,4)); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In + Format: Float32 + Stride: 16 + Data: [ 1.0, 10.0, 100.0, 1000.0, 2.0, 20.0, 200.0, 2000.0, 3.0, 30.0, 300.0, 3000.0, 4.0, 40.0, 400.0, 4000.0 ] + - Name: Out1 + Format: Float32 + Stride: 16 + ZeroInitSize: 64 + - Name: Out2 + Format: Float32 + Stride: 16 + ZeroInitSize: 64 + - Name: Out3 + Format: Float32 + Stride: 16 + ZeroInitSize: 64 + - Name: Out4 + Format: Float32 + Stride: 16 + ZeroInitSize: 64 + - Name: Out5 + Format: Float32 + Stride: 16 + ZeroInitSize: 16 + - Name: ExpectedOut1 + Format: Float32 + Stride: 16 + Data: [ 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0 ] + - Name: ExpectedOut2 + Format: Float32 + Stride: 16 + Data: [ 1.0, 10.0, 0.0, 0.0, 2.0, 20.0, 0.0, 0.0, 3.0, 30.0, 0.0, 0.0, 4.0, 40.0, 0.0, 0.0 ] + - Name: ExpectedOut3 + Format: Float32 + Stride: 16 + Data: [ 1.0, 10.0, 100.0, 0.0, 2.0, 20.0, 200.0, 0.0, 3.0, 30.0, 300.0, 0.0, 4.0, 40.0, 400.0, 0.0 ] + - Name: ExpectedOut4 + Format: Float32 + Stride: 16 + Data: [ 1.0, 10.0, 100.0, 1000.0, 2.0, 20.0, 200.0, 2000.0, 3.0, 30.0, 300.0, 3000.0, 4.0, 40.0, 400.0, 4000.0 ] + - Name: ExpectedOut5 + Format: Float32 + Stride: 16 + Data: [ 1, 2, 3, 4 ] +Results: + - Result: ExpectedOut1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: ExpectedOut2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: ExpectedOut3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: ExpectedOut4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 + - Result: ExpectedOut5 + Rule: BufferExact + Actual: Out5 + Expected: ExpectedOut5 + +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: Out5 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 +... +#--- end + + +# Tracked by https://github.com/llvm/offload-test-suite/issues/393 +# XFAIL: Metal + +# Bug https://github.com/llvm/llvm-project/issues/156775 +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneFirst.fp64.test b/test/WaveOps/WaveReadLaneFirst.fp64.test new file mode 100644 index 00000000..6d33f931 --- /dev/null +++ b/test/WaveOps/WaveReadLaneFirst.fp64.test @@ -0,0 +1,179 @@ +#--- source.hlsl +StructuredBuffer In : register(t0); +RWStructuredBuffer Out1 : register(u1); // test scalar +RWStructuredBuffer Out2 : register(u2); // test double2 +RWStructuredBuffer Out3 : register(u3); // test double3 +RWStructuredBuffer Out4 : register(u4); // test double4 +RWStructuredBuffer Out5 : register(u5); // constant folding + +[numthreads(4,1,1)] +void main(uint3 tid : SV_GroupThreadID) +{ + double4 v = In[tid.x]; + + // Mask per "active lane set": only >= N lanes contribute + double s1 = tid.x >= 3 ? WaveReadLaneFirst( v.x ) : 0; + double s2 = tid.x >= 2 ? WaveReadLaneFirst( v.x ) : 0; + double s3 = tid.x >= 1 ? WaveReadLaneFirst( v.x ) : 0; + double s4 = tid.x >= 0 ? WaveReadLaneFirst( v.x ) : 0; + + double2 v2_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xy ) : double2(0,0); + double2 v2_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xy ) : double2(0,0); + double2 v2_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xy ) : double2(0,0); + double2 v2_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xy ) : double2(0,0); + + double3 v3_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xyz ) : double3(0,0,0); + double3 v3_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xyz ) : double3(0,0,0); + double3 v3_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xyz ) : double3(0,0,0); + double3 v3_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xyz ) : double3(0,0,0); + + double4 v4_1 = tid.x >= 3 ? WaveReadLaneFirst( v ) : double4(0,0,0,0); + double4 v4_2 = tid.x >= 2 ? WaveReadLaneFirst( v ) : double4(0,0,0,0); + double4 v4_3 = tid.x >= 1 ? WaveReadLaneFirst( v ) : double4(0,0,0,0); + double4 v4_4 = tid.x >= 0 ? WaveReadLaneFirst( v ) : double4(0,0,0,0); + + double scalars[4] = { s4, s3, s2, s1 }; + double2 vec2s [4] = { v2_4, v2_3, v2_2, v2_1 }; + double3 vec3s [4] = { v3_4, v3_3, v3_2, v3_1 }; + double4 vec4s [4] = { v4_4, v4_3, v4_2, v4_1 }; + + Out1[tid.x].x = scalars[tid.x]; + Out2[tid.x].xy = vec2s[tid.x]; + Out3[tid.x].xyz = vec3s[tid.x]; + Out4[tid.x] = vec4s[tid.x]; + + // constant folding case + Out5[0] = WaveReadLaneFirst(double4(1,2,3,4)); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In + Format: Float64 + Stride: 32 + Data: [ 1.0, 10.0, 100.0, 1000.0, 2.0, 20.0, 200.0, 2000.0, 3.0, 30.0, 300.0, 3000.0, 4.0, 40.0, 400.0, 4000.0 ] + - Name: Out1 + Format: Float64 + Stride: 32 + ZeroInitSize: 128 + - Name: Out2 + Format: Float64 + Stride: 32 + ZeroInitSize: 128 + - Name: Out3 + Format: Float64 + Stride: 32 + ZeroInitSize: 128 + - Name: Out4 + Format: Float64 + Stride: 32 + ZeroInitSize: 128 + - Name: Out5 + Format: Float64 + Stride: 32 + ZeroInitSize: 32 + - Name: ExpectedOut1 + Format: Float64 + Stride: 32 + Data: [ 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0 ] + - Name: ExpectedOut2 + Format: Float64 + Stride: 32 + Data: [ 1.0, 10.0, 0.0, 0.0, 2.0, 20.0, 0.0, 0.0, 3.0, 30.0, 0.0, 0.0, 4.0, 40.0, 0.0, 0.0 ] + - Name: ExpectedOut3 + Format: Float64 + Stride: 32 + Data: [ 1.0, 10.0, 100.0, 0.0, 2.0, 20.0, 200.0, 0.0, 3.0, 30.0, 300.0, 0.0, 4.0, 40.0, 400.0, 0.0 ] + - Name: ExpectedOut4 + Format: Float64 + Stride: 32 + Data: [ 1.0, 10.0, 100.0, 1000.0, 2.0, 20.0, 200.0, 2000.0, 3.0, 30.0, 300.0, 3000.0, 4.0, 40.0, 400.0, 4000.0 ] + - Name: ExpectedOut5 + Format: Float64 + Stride: 32 + Data: [ 1, 2, 3, 4 ] +Results: + - Result: ExpectedOut1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: ExpectedOut2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: ExpectedOut3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: ExpectedOut4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 + - Result: ExpectedOut5 + Rule: BufferExact + Actual: Out5 + Expected: ExpectedOut5 + +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: Out5 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 +... +#--- end + + +# Tracked by https://github.com/llvm/offload-test-suite/issues/393 +# XFAIL: Metal + +# Bug https://github.com/llvm/llvm-project/issues/156775 +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneFirst.int16.test b/test/WaveOps/WaveReadLaneFirst.int16.test new file mode 100644 index 00000000..ccf57ed1 --- /dev/null +++ b/test/WaveOps/WaveReadLaneFirst.int16.test @@ -0,0 +1,331 @@ +#--- source.hlsl +StructuredBuffer In : register(t0); +RWStructuredBuffer Out1 : register(u1); // test scalar +RWStructuredBuffer Out2 : register(u2); // test int16_t2 +RWStructuredBuffer Out3 : register(u3); // test int16_t3 +RWStructuredBuffer Out4 : register(u4); // test int16_t4 +RWStructuredBuffer Out5 : register(u5); // constant folding + +// uint16_ts +StructuredBuffer UIn : register(t6); +RWStructuredBuffer UOut1 : register(u7); +RWStructuredBuffer UOut2 : register(u8); +RWStructuredBuffer UOut3 : register(u9); +RWStructuredBuffer UOut4 : register(u10); +RWStructuredBuffer UOut5 : register(u11); + +[numthreads(4,1,1)] +void main(uint3 tid : SV_GroupThreadID) +{ + int16_t4 v = In[tid.x]; + + // Mask per "active lane set": only >= N lanes contribute + int16_t s1 = tid.x >= 3 ? WaveReadLaneFirst( v.x ) : 0; + int16_t s2 = tid.x >= 2 ? WaveReadLaneFirst( v.x ) : 0; + int16_t s3 = tid.x >= 1 ? WaveReadLaneFirst( v.x ) : 0; + int16_t s4 = tid.x >= 0 ? WaveReadLaneFirst( v.x ) : 0; + + int16_t2 v2_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xy ) : int16_t2(0,0); + int16_t2 v2_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xy ) : int16_t2(0,0); + int16_t2 v2_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xy ) : int16_t2(0,0); + int16_t2 v2_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xy ) : int16_t2(0,0); + + int16_t3 v3_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xyz ) : int16_t3(0,0,0); + int16_t3 v3_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xyz ) : int16_t3(0,0,0); + int16_t3 v3_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xyz ) : int16_t3(0,0,0); + int16_t3 v3_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xyz ) : int16_t3(0,0,0); + + int16_t4 v4_1 = tid.x >= 3 ? WaveReadLaneFirst( v ) : int16_t4(0,0,0,0); + int16_t4 v4_2 = tid.x >= 2 ? WaveReadLaneFirst( v ) : int16_t4(0,0,0,0); + int16_t4 v4_3 = tid.x >= 1 ? WaveReadLaneFirst( v ) : int16_t4(0,0,0,0); + int16_t4 v4_4 = tid.x >= 0 ? WaveReadLaneFirst( v ) : int16_t4(0,0,0,0); + + int16_t scalars[4] = { s4, s3, s2, s1 }; + int16_t2 vec2s [4] = { v2_4, v2_3, v2_2, v2_1 }; + int16_t3 vec3s [4] = { v3_4, v3_3, v3_2, v3_1 }; + int16_t4 vec4s [4] = { v4_4, v4_3, v4_2, v4_1 }; + + Out1[tid.x].x = scalars[tid.x]; + Out2[tid.x].xy = vec2s[tid.x]; + Out3[tid.x].xyz = vec3s[tid.x]; + Out4[tid.x] = vec4s[tid.x]; + + // constant folding case + Out5[0] = WaveReadLaneFirst(int16_t4(1,2,3,4)); + + // UINT16_t case + + uint16_t4 uv = UIn[tid.x]; + + // Mask per "active lane set": only >= N lanes contribute + uint16_t us1 = tid.x >= 3 ? WaveReadLaneFirst( uv.x ) : 0; + uint16_t us2 = tid.x >= 2 ? WaveReadLaneFirst( uv.x ) : 0; + uint16_t us3 = tid.x >= 1 ? WaveReadLaneFirst( uv.x ) : 0; + uint16_t us4 = tid.x >= 0 ? WaveReadLaneFirst( uv.x ) : 0; + + uint16_t2 uv2_1 = tid.x >= 3 ? WaveReadLaneFirst( uv.xy ) : uint16_t2(0,0); + uint16_t2 uv2_2 = tid.x >= 2 ? WaveReadLaneFirst( uv.xy ) : uint16_t2(0,0); + uint16_t2 uv2_3 = tid.x >= 1 ? WaveReadLaneFirst( uv.xy ) : uint16_t2(0,0); + uint16_t2 uv2_4 = tid.x >= 0 ? WaveReadLaneFirst( uv.xy ) : uint16_t2(0,0); + + uint16_t3 uv3_1 = tid.x >= 3 ? WaveReadLaneFirst( uv.xyz ) : uint16_t3(0,0,0); + uint16_t3 uv3_2 = tid.x >= 2 ? WaveReadLaneFirst( uv.xyz ) : uint16_t3(0,0,0); + uint16_t3 uv3_3 = tid.x >= 1 ? WaveReadLaneFirst( uv.xyz ) : uint16_t3(0,0,0); + uint16_t3 uv3_4 = tid.x >= 0 ? WaveReadLaneFirst( uv.xyz ) : uint16_t3(0,0,0); + + uint16_t4 uv4_1 = tid.x >= 3 ? WaveReadLaneFirst( uv ) : uint16_t4(0,0,0,0); + uint16_t4 uv4_2 = tid.x >= 2 ? WaveReadLaneFirst( uv ) : uint16_t4(0,0,0,0); + uint16_t4 uv4_3 = tid.x >= 1 ? WaveReadLaneFirst( uv ) : uint16_t4(0,0,0,0); + uint16_t4 uv4_4 = tid.x >= 0 ? WaveReadLaneFirst( uv ) : uint16_t4(0,0,0,0); + + uint16_t uscalars[4] = { us4, us3, us2, us1 }; + uint16_t2 uvec2s [4] = { uv2_4, uv2_3, uv2_2, uv2_1 }; + uint16_t3 uvec3s [4] = { uv3_4, uv3_3, uv3_2, uv3_1 }; + uint16_t4 uvec4s [4] = { uv4_4, uv4_3, uv4_2, uv4_1 }; + + UOut1[tid.x].x = uscalars[tid.x]; + UOut2[tid.x].xy = uvec2s[tid.x]; + UOut3[tid.x].xyz = uvec3s[tid.x]; + UOut4[tid.x] = uvec4s[tid.x]; + + // constant folding case + UOut5[0] = WaveReadLaneFirst(uint16_t4(1,2,3,4)); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In + Format: Int16 + Stride: 8 + Data: [ 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 ] + - Name: Out1 + Format: Int16 + Stride: 8 + ZeroInitSize: 32 + - Name: Out2 + Format: Int16 + Stride: 8 + ZeroInitSize: 32 + - Name: Out3 + Format: Int16 + Stride: 8 + ZeroInitSize: 32 + - Name: Out4 + Format: Int16 + Stride: 8 + ZeroInitSize: 32 + - Name: Out5 + Format: Int16 + Stride: 8 + ZeroInitSize: 8 + - Name: ExpectedOut1 + Format: Int16 + Stride: 8 + Data: [ 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0 ] + - Name: ExpectedOut2 + Format: Int16 + Stride: 8 + Data: [ 1, 10, 0, 0, 2, 20, 0, 0, 3, 30, 0, 0, 4, 40, 0, 0 ] + - Name: ExpectedOut3 + Format: Int16 + Stride: 8 + Data: [ 1, 10, 100, 0, 2, 20, 200, 0, 3, 30, 300, 0, 4, 40, 400, 0 ] + - Name: ExpectedOut4 + Format: Int16 + Stride: 8 + Data: [ 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 ] + - Name: ExpectedOut5 + Format: Int16 + Stride: 8 + Data: [ 1, 2, 3, 4 ] + - Name: UIn + Format: UInt16 + Stride: 8 + Data: [ 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 ] + - Name: UOut1 + Format: UInt16 + Stride: 8 + ZeroInitSize: 32 + - Name: UOut2 + Format: UInt16 + Stride: 8 + ZeroInitSize: 32 + - Name: UOut3 + Format: UInt16 + Stride: 8 + ZeroInitSize: 32 + - Name: UOut4 + Format: UInt16 + Stride: 8 + ZeroInitSize: 32 + - Name: UOut5 + Format: UInt16 + Stride: 8 + ZeroInitSize: 8 + - Name: UExpectedOut1 + Format: UInt16 + Stride: 8 + Data: [ 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0 ] + - Name: UExpectedOut2 + Format: UInt16 + Stride: 8 + Data: [ 1, 10, 0, 0, 2, 20, 0, 0, 3, 30, 0, 0, 4, 40, 0, 0 ] + - Name: UExpectedOut3 + Format: UInt16 + Stride: 8 + Data: [ 1, 10, 100, 0, 2, 20, 200, 0, 3, 30, 300, 0, 4, 40, 400, 0 ] + - Name: UExpectedOut4 + Format: UInt16 + Stride: 8 + Data: [ 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 ] + - Name: UExpectedOut5 + Format: UInt16 + Stride: 8 + Data: [ 1, 2, 3, 4 ] +Results: + - Result: ExpectedOut1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: ExpectedOut2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: ExpectedOut3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: ExpectedOut4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 + - Result: ExpectedOut5 + Rule: BufferExact + Actual: Out5 + Expected: ExpectedOut5 + - Result: UExpectedOut1 + Rule: BufferExact + Actual: UOut1 + Expected: UExpectedOut1 + - Result: UExpectedOut2 + Rule: BufferExact + Actual: UOut2 + Expected: UExpectedOut2 + - Result: UExpectedOut3 + Rule: BufferExact + Actual: UOut3 + Expected: UExpectedOut3 + - Result: UExpectedOut4 + Rule: BufferExact + Actual: UOut4 + Expected: UExpectedOut4 + - Result: UExpectedOut5 + Rule: BufferExact + Actual: UOut5 + Expected: UExpectedOut5 +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: Out5 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: UIn + Kind: StructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: UOut1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 + - Name: UOut2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 8 + Space: 0 + VulkanBinding: + Binding: 8 + - Name: UOut3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 9 + Space: 0 + VulkanBinding: + Binding: 9 + - Name: UOut4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 10 + Space: 0 + VulkanBinding: + Binding: 10 + - Name: UOut5 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 11 + Space: 0 + VulkanBinding: + Binding: 11 + +... +#--- end + + +# Tracked by https://github.com/llvm/offload-test-suite/issues/393 +# XFAIL: Metal + +# Bug https://github.com/llvm/llvm-project/issues/156775 +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneFirst.int32.test b/test/WaveOps/WaveReadLaneFirst.int32.test new file mode 100644 index 00000000..ba587668 --- /dev/null +++ b/test/WaveOps/WaveReadLaneFirst.int32.test @@ -0,0 +1,331 @@ +#--- source.hlsl +StructuredBuffer In : register(t0); +RWStructuredBuffer Out1 : register(u1); // test scalar +RWStructuredBuffer Out2 : register(u2); // test int2 +RWStructuredBuffer Out3 : register(u3); // test int3 +RWStructuredBuffer Out4 : register(u4); // test int4 +RWStructuredBuffer Out5 : register(u5); // constant folding + +// uints +StructuredBuffer UIn : register(t6); +RWStructuredBuffer UOut1 : register(u7); +RWStructuredBuffer UOut2 : register(u8); +RWStructuredBuffer UOut3 : register(u9); +RWStructuredBuffer UOut4 : register(u10); +RWStructuredBuffer UOut5 : register(u11); + +[numthreads(4,1,1)] +void main(uint3 tid : SV_GroupThreadID) +{ + int4 v = In[tid.x]; + + // Mask per "active lane set": only >= N lanes contribute + int s1 = tid.x >= 3 ? WaveReadLaneFirst( v.x ) : 0; + int s2 = tid.x >= 2 ? WaveReadLaneFirst( v.x ) : 0; + int s3 = tid.x >= 1 ? WaveReadLaneFirst( v.x ) : 0; + int s4 = tid.x >= 0 ? WaveReadLaneFirst( v.x ) : 0; + + int2 v2_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xy ) : int2(0,0); + int2 v2_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xy ) : int2(0,0); + int2 v2_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xy ) : int2(0,0); + int2 v2_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xy ) : int2(0,0); + + int3 v3_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xyz ) : int3(0,0,0); + int3 v3_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xyz ) : int3(0,0,0); + int3 v3_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xyz ) : int3(0,0,0); + int3 v3_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xyz ) : int3(0,0,0); + + int4 v4_1 = tid.x >= 3 ? WaveReadLaneFirst( v ) : int4(0,0,0,0); + int4 v4_2 = tid.x >= 2 ? WaveReadLaneFirst( v ) : int4(0,0,0,0); + int4 v4_3 = tid.x >= 1 ? WaveReadLaneFirst( v ) : int4(0,0,0,0); + int4 v4_4 = tid.x >= 0 ? WaveReadLaneFirst( v ) : int4(0,0,0,0); + + int scalars[4] = { s4, s3, s2, s1 }; + int2 vec2s [4] = { v2_4, v2_3, v2_2, v2_1 }; + int3 vec3s [4] = { v3_4, v3_3, v3_2, v3_1 }; + int4 vec4s [4] = { v4_4, v4_3, v4_2, v4_1 }; + + Out1[tid.x].x = scalars[tid.x]; + Out2[tid.x].xy = vec2s[tid.x]; + Out3[tid.x].xyz = vec3s[tid.x]; + Out4[tid.x] = vec4s[tid.x]; + + // constant folding case + Out5[0] = WaveReadLaneFirst(int4(1,2,3,4)); + + // UINT case + + uint4 uv = UIn[tid.x]; + + // Mask per "active lane set": only >= N lanes contribute + uint us1 = tid.x >= 3 ? WaveReadLaneFirst( uv.x ) : 0; + uint us2 = tid.x >= 2 ? WaveReadLaneFirst( uv.x ) : 0; + uint us3 = tid.x >= 1 ? WaveReadLaneFirst( uv.x ) : 0; + uint us4 = tid.x >= 0 ? WaveReadLaneFirst( uv.x ) : 0; + + uint2 uv2_1 = tid.x >= 3 ? WaveReadLaneFirst( uv.xy ) : uint2(0,0); + uint2 uv2_2 = tid.x >= 2 ? WaveReadLaneFirst( uv.xy ) : uint2(0,0); + uint2 uv2_3 = tid.x >= 1 ? WaveReadLaneFirst( uv.xy ) : uint2(0,0); + uint2 uv2_4 = tid.x >= 0 ? WaveReadLaneFirst( uv.xy ) : uint2(0,0); + + uint3 uv3_1 = tid.x >= 3 ? WaveReadLaneFirst( uv.xyz ) : uint3(0,0,0); + uint3 uv3_2 = tid.x >= 2 ? WaveReadLaneFirst( uv.xyz ) : uint3(0,0,0); + uint3 uv3_3 = tid.x >= 1 ? WaveReadLaneFirst( uv.xyz ) : uint3(0,0,0); + uint3 uv3_4 = tid.x >= 0 ? WaveReadLaneFirst( uv.xyz ) : uint3(0,0,0); + + uint4 uv4_1 = tid.x >= 3 ? WaveReadLaneFirst( uv ) : uint4(0,0,0,0); + uint4 uv4_2 = tid.x >= 2 ? WaveReadLaneFirst( uv ) : uint4(0,0,0,0); + uint4 uv4_3 = tid.x >= 1 ? WaveReadLaneFirst( uv ) : uint4(0,0,0,0); + uint4 uv4_4 = tid.x >= 0 ? WaveReadLaneFirst( uv ) : uint4(0,0,0,0); + + uint uscalars[4] = { us4, us3, us2, us1 }; + uint2 uvec2s [4] = { uv2_4, uv2_3, uv2_2, uv2_1 }; + uint3 uvec3s [4] = { uv3_4, uv3_3, uv3_2, uv3_1 }; + uint4 uvec4s [4] = { uv4_4, uv4_3, uv4_2, uv4_1 }; + + UOut1[tid.x].x = uscalars[tid.x]; + UOut2[tid.x].xy = uvec2s[tid.x]; + UOut3[tid.x].xyz = uvec3s[tid.x]; + UOut4[tid.x] = uvec4s[tid.x]; + + // constant folding case + UOut5[0] = WaveReadLaneFirst(uint4(1,2,3,4)); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In + Format: Int32 + Stride: 16 + Data: [ 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 ] + - Name: Out1 + Format: Int32 + Stride: 16 + ZeroInitSize: 64 + - Name: Out2 + Format: Int32 + Stride: 16 + ZeroInitSize: 64 + - Name: Out3 + Format: Int32 + Stride: 16 + ZeroInitSize: 64 + - Name: Out4 + Format: Int32 + Stride: 16 + ZeroInitSize: 64 + - Name: Out5 + Format: Int32 + Stride: 16 + ZeroInitSize: 16 + - Name: ExpectedOut1 + Format: Int32 + Stride: 16 + Data: [ 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0 ] + - Name: ExpectedOut2 + Format: Int32 + Stride: 16 + Data: [ 1, 10, 0, 0, 2, 20, 0, 0, 3, 30, 0, 0, 4, 40, 0, 0 ] + - Name: ExpectedOut3 + Format: Int32 + Stride: 16 + Data: [ 1, 10, 100, 0, 2, 20, 200, 0, 3, 30, 300, 0, 4, 40, 400, 0 ] + - Name: ExpectedOut4 + Format: Int32 + Stride: 16 + Data: [ 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 ] + - Name: ExpectedOut5 + Format: Int32 + Stride: 16 + Data: [ 1, 2, 3, 4 ] + - Name: UIn + Format: UInt32 + Stride: 16 + Data: [ 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 ] + - Name: UOut1 + Format: UInt32 + Stride: 16 + ZeroInitSize: 64 + - Name: UOut2 + Format: UInt32 + Stride: 16 + ZeroInitSize: 64 + - Name: UOut3 + Format: UInt32 + Stride: 16 + ZeroInitSize: 64 + - Name: UOut4 + Format: UInt32 + Stride: 16 + ZeroInitSize: 64 + - Name: UOut5 + Format: UInt32 + Stride: 16 + ZeroInitSize: 16 + - Name: UExpectedOut1 + Format: UInt32 + Stride: 16 + Data: [ 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0 ] + - Name: UExpectedOut2 + Format: UInt32 + Stride: 16 + Data: [ 1, 10, 0, 0, 2, 20, 0, 0, 3, 30, 0, 0, 4, 40, 0, 0 ] + - Name: UExpectedOut3 + Format: UInt32 + Stride: 16 + Data: [ 1, 10, 100, 0, 2, 20, 200, 0, 3, 30, 300, 0, 4, 40, 400, 0 ] + - Name: UExpectedOut4 + Format: UInt32 + Stride: 16 + Data: [ 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 ] + - Name: UExpectedOut5 + Format: UInt32 + Stride: 16 + Data: [ 1, 2, 3, 4 ] +Results: + - Result: ExpectedOut1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: ExpectedOut2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: ExpectedOut3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: ExpectedOut4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 + - Result: ExpectedOut5 + Rule: BufferExact + Actual: Out5 + Expected: ExpectedOut5 + - Result: UExpectedOut1 + Rule: BufferExact + Actual: UOut1 + Expected: UExpectedOut1 + - Result: UExpectedOut2 + Rule: BufferExact + Actual: UOut2 + Expected: UExpectedOut2 + - Result: UExpectedOut3 + Rule: BufferExact + Actual: UOut3 + Expected: UExpectedOut3 + - Result: UExpectedOut4 + Rule: BufferExact + Actual: UOut4 + Expected: UExpectedOut4 + - Result: UExpectedOut5 + Rule: BufferExact + Actual: UOut5 + Expected: UExpectedOut5 +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: Out5 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: UIn + Kind: StructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: UOut1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 + - Name: UOut2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 8 + Space: 0 + VulkanBinding: + Binding: 8 + - Name: UOut3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 9 + Space: 0 + VulkanBinding: + Binding: 9 + - Name: UOut4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 10 + Space: 0 + VulkanBinding: + Binding: 10 + - Name: UOut5 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 11 + Space: 0 + VulkanBinding: + Binding: 11 + +... +#--- end + + +# Tracked by https://github.com/llvm/offload-test-suite/issues/393 +# XFAIL: Metal + +# Bug https://github.com/llvm/llvm-project/issues/156775 +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneFirst.int64.test b/test/WaveOps/WaveReadLaneFirst.int64.test new file mode 100644 index 00000000..b2a3770b --- /dev/null +++ b/test/WaveOps/WaveReadLaneFirst.int64.test @@ -0,0 +1,331 @@ +#--- source.hlsl +StructuredBuffer In : register(t0); +RWStructuredBuffer Out1 : register(u1); // test scalar +RWStructuredBuffer Out2 : register(u2); // test int64_t2 +RWStructuredBuffer Out3 : register(u3); // test int64_t3 +RWStructuredBuffer Out4 : register(u4); // test int64_t4 +RWStructuredBuffer Out5 : register(u5); // constant folding + +// uint64_ts +StructuredBuffer UIn : register(t6); +RWStructuredBuffer UOut1 : register(u7); +RWStructuredBuffer UOut2 : register(u8); +RWStructuredBuffer UOut3 : register(u9); +RWStructuredBuffer UOut4 : register(u10); +RWStructuredBuffer UOut5 : register(u11); + +[numthreads(4,1,1)] +void main(uint3 tid : SV_GroupThreadID) +{ + int64_t4 v = In[tid.x]; + + // Mask per "active lane set": only >= N lanes contribute + int64_t s1 = tid.x >= 3 ? WaveReadLaneFirst( v.x ) : 0; + int64_t s2 = tid.x >= 2 ? WaveReadLaneFirst( v.x ) : 0; + int64_t s3 = tid.x >= 1 ? WaveReadLaneFirst( v.x ) : 0; + int64_t s4 = tid.x >= 0 ? WaveReadLaneFirst( v.x ) : 0; + + int64_t2 v2_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xy ) : int64_t2(0,0); + int64_t2 v2_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xy ) : int64_t2(0,0); + int64_t2 v2_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xy ) : int64_t2(0,0); + int64_t2 v2_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xy ) : int64_t2(0,0); + + int64_t3 v3_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xyz ) : int64_t3(0,0,0); + int64_t3 v3_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xyz ) : int64_t3(0,0,0); + int64_t3 v3_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xyz ) : int64_t3(0,0,0); + int64_t3 v3_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xyz ) : int64_t3(0,0,0); + + int64_t4 v4_1 = tid.x >= 3 ? WaveReadLaneFirst( v ) : int64_t4(0,0,0,0); + int64_t4 v4_2 = tid.x >= 2 ? WaveReadLaneFirst( v ) : int64_t4(0,0,0,0); + int64_t4 v4_3 = tid.x >= 1 ? WaveReadLaneFirst( v ) : int64_t4(0,0,0,0); + int64_t4 v4_4 = tid.x >= 0 ? WaveReadLaneFirst( v ) : int64_t4(0,0,0,0); + + int64_t scalars[4] = { s4, s3, s2, s1 }; + int64_t2 vec2s [4] = { v2_4, v2_3, v2_2, v2_1 }; + int64_t3 vec3s [4] = { v3_4, v3_3, v3_2, v3_1 }; + int64_t4 vec4s [4] = { v4_4, v4_3, v4_2, v4_1 }; + + Out1[tid.x].x = scalars[tid.x]; + Out2[tid.x].xy = vec2s[tid.x]; + Out3[tid.x].xyz = vec3s[tid.x]; + Out4[tid.x] = vec4s[tid.x]; + + // constant folding case + Out5[0] = WaveReadLaneFirst(int64_t4(1,2,3,4)); + + // UINT64_t case + + uint64_t4 uv = UIn[tid.x]; + + // Mask per "active lane set": only >= N lanes contribute + uint64_t us1 = tid.x >= 3 ? WaveReadLaneFirst( uv.x ) : 0; + uint64_t us2 = tid.x >= 2 ? WaveReadLaneFirst( uv.x ) : 0; + uint64_t us3 = tid.x >= 1 ? WaveReadLaneFirst( uv.x ) : 0; + uint64_t us4 = tid.x >= 0 ? WaveReadLaneFirst( uv.x ) : 0; + + uint64_t2 uv2_1 = tid.x >= 3 ? WaveReadLaneFirst( uv.xy ) : uint64_t2(0,0); + uint64_t2 uv2_2 = tid.x >= 2 ? WaveReadLaneFirst( uv.xy ) : uint64_t2(0,0); + uint64_t2 uv2_3 = tid.x >= 1 ? WaveReadLaneFirst( uv.xy ) : uint64_t2(0,0); + uint64_t2 uv2_4 = tid.x >= 0 ? WaveReadLaneFirst( uv.xy ) : uint64_t2(0,0); + + uint64_t3 uv3_1 = tid.x >= 3 ? WaveReadLaneFirst( uv.xyz ) : uint64_t3(0,0,0); + uint64_t3 uv3_2 = tid.x >= 2 ? WaveReadLaneFirst( uv.xyz ) : uint64_t3(0,0,0); + uint64_t3 uv3_3 = tid.x >= 1 ? WaveReadLaneFirst( uv.xyz ) : uint64_t3(0,0,0); + uint64_t3 uv3_4 = tid.x >= 0 ? WaveReadLaneFirst( uv.xyz ) : uint64_t3(0,0,0); + + uint64_t4 uv4_1 = tid.x >= 3 ? WaveReadLaneFirst( uv ) : uint64_t4(0,0,0,0); + uint64_t4 uv4_2 = tid.x >= 2 ? WaveReadLaneFirst( uv ) : uint64_t4(0,0,0,0); + uint64_t4 uv4_3 = tid.x >= 1 ? WaveReadLaneFirst( uv ) : uint64_t4(0,0,0,0); + uint64_t4 uv4_4 = tid.x >= 0 ? WaveReadLaneFirst( uv ) : uint64_t4(0,0,0,0); + + uint64_t uscalars[4] = { us4, us3, us2, us1 }; + uint64_t2 uvec2s [4] = { uv2_4, uv2_3, uv2_2, uv2_1 }; + uint64_t3 uvec3s [4] = { uv3_4, uv3_3, uv3_2, uv3_1 }; + uint64_t4 uvec4s [4] = { uv4_4, uv4_3, uv4_2, uv4_1 }; + + UOut1[tid.x].x = uscalars[tid.x]; + UOut2[tid.x].xy = uvec2s[tid.x]; + UOut3[tid.x].xyz = uvec3s[tid.x]; + UOut4[tid.x] = uvec4s[tid.x]; + + // constant folding case + UOut5[0] = WaveReadLaneFirst(uint64_t4(1,2,3,4)); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: In + Format: Int64 + Stride: 32 + Data: [ 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 ] + - Name: Out1 + Format: Int64 + Stride: 32 + ZeroInitSize: 128 + - Name: Out2 + Format: Int64 + Stride: 32 + ZeroInitSize: 128 + - Name: Out3 + Format: Int64 + Stride: 32 + ZeroInitSize: 128 + - Name: Out4 + Format: Int64 + Stride: 32 + ZeroInitSize: 128 + - Name: Out5 + Format: Int64 + Stride: 32 + ZeroInitSize: 32 + - Name: ExpectedOut1 + Format: Int64 + Stride: 32 + Data: [ 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0 ] + - Name: ExpectedOut2 + Format: Int64 + Stride: 32 + Data: [ 1, 10, 0, 0, 2, 20, 0, 0, 3, 30, 0, 0, 4, 40, 0, 0 ] + - Name: ExpectedOut3 + Format: Int64 + Stride: 32 + Data: [ 1, 10, 100, 0, 2, 20, 200, 0, 3, 30, 300, 0, 4, 40, 400, 0 ] + - Name: ExpectedOut4 + Format: Int64 + Stride: 32 + Data: [ 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 ] + - Name: ExpectedOut5 + Format: Int64 + Stride: 32 + Data: [ 1, 2, 3, 4 ] + - Name: UIn + Format: UInt64 + Stride: 32 + Data: [ 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 ] + - Name: UOut1 + Format: UInt64 + Stride: 32 + ZeroInitSize: 128 + - Name: UOut2 + Format: UInt64 + Stride: 32 + ZeroInitSize: 128 + - Name: UOut3 + Format: UInt64 + Stride: 32 + ZeroInitSize: 128 + - Name: UOut4 + Format: UInt64 + Stride: 32 + ZeroInitSize: 128 + - Name: UOut5 + Format: UInt64 + Stride: 32 + ZeroInitSize: 32 + - Name: UExpectedOut1 + Format: UInt64 + Stride: 32 + Data: [ 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0 ] + - Name: UExpectedOut2 + Format: UInt64 + Stride: 32 + Data: [ 1, 10, 0, 0, 2, 20, 0, 0, 3, 30, 0, 0, 4, 40, 0, 0 ] + - Name: UExpectedOut3 + Format: UInt64 + Stride: 32 + Data: [ 1, 10, 100, 0, 2, 20, 200, 0, 3, 30, 300, 0, 4, 40, 400, 0 ] + - Name: UExpectedOut4 + Format: UInt64 + Stride: 32 + Data: [ 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000 ] + - Name: UExpectedOut5 + Format: UInt64 + Stride: 32 + Data: [ 1, 2, 3, 4 ] +Results: + - Result: ExpectedOut1 + Rule: BufferExact + Actual: Out1 + Expected: ExpectedOut1 + - Result: ExpectedOut2 + Rule: BufferExact + Actual: Out2 + Expected: ExpectedOut2 + - Result: ExpectedOut3 + Rule: BufferExact + Actual: Out3 + Expected: ExpectedOut3 + - Result: ExpectedOut4 + Rule: BufferExact + Actual: Out4 + Expected: ExpectedOut4 + - Result: ExpectedOut5 + Rule: BufferExact + Actual: Out5 + Expected: ExpectedOut5 + - Result: UExpectedOut1 + Rule: BufferExact + Actual: UOut1 + Expected: UExpectedOut1 + - Result: UExpectedOut2 + Rule: BufferExact + Actual: UOut2 + Expected: UExpectedOut2 + - Result: UExpectedOut3 + Rule: BufferExact + Actual: UOut3 + Expected: UExpectedOut3 + - Result: UExpectedOut4 + Rule: BufferExact + Actual: UOut4 + Expected: UExpectedOut4 + - Result: UExpectedOut5 + Rule: BufferExact + Actual: UOut5 + Expected: UExpectedOut5 +DescriptorSets: + - Resources: + - Name: In + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Out1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: Out2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Out3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 + - Name: Out4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: Out5 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 + - Name: UIn + Kind: StructuredBuffer + DirectXBinding: + Register: 6 + Space: 0 + VulkanBinding: + Binding: 6 + - Name: UOut1 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 7 + Space: 0 + VulkanBinding: + Binding: 7 + - Name: UOut2 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 8 + Space: 0 + VulkanBinding: + Binding: 8 + - Name: UOut3 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 9 + Space: 0 + VulkanBinding: + Binding: 9 + - Name: UOut4 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 10 + Space: 0 + VulkanBinding: + Binding: 10 + - Name: UOut5 + Kind: RWStructuredBuffer + DirectXBinding: + Register: 11 + Space: 0 + VulkanBinding: + Binding: 11 + +... +#--- end + + +# Tracked by https://github.com/llvm/offload-test-suite/issues/393 +# XFAIL: Metal + +# Bug https://github.com/llvm/llvm-project/issues/156775 +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o From 7811e7abbde856fa9fae670da93488e22fdd9a71 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Mon, 8 Sep 2025 16:48:00 -0700 Subject: [PATCH 2/4] add warp bug to fp16/64, and int64. Use bug verbage --- test/WaveOps/WaveReadLaneFirst.fp16.test | 5 ++++- test/WaveOps/WaveReadLaneFirst.fp32.test | 2 +- test/WaveOps/WaveReadLaneFirst.fp64.test | 5 ++++- test/WaveOps/WaveReadLaneFirst.int16.test | 2 +- test/WaveOps/WaveReadLaneFirst.int32.test | 2 +- test/WaveOps/WaveReadLaneFirst.int64.test | 6 +++++- 6 files changed, 16 insertions(+), 6 deletions(-) diff --git a/test/WaveOps/WaveReadLaneFirst.fp16.test b/test/WaveOps/WaveReadLaneFirst.fp16.test index be4148ab..2492b5ef 100644 --- a/test/WaveOps/WaveReadLaneFirst.fp16.test +++ b/test/WaveOps/WaveReadLaneFirst.fp16.test @@ -174,12 +174,15 @@ DescriptorSets: #--- end -# Tracked by https://github.com/llvm/offload-test-suite/issues/393 +# Bug https://github.com/llvm/offload-test-suite/issues/393 # XFAIL: Metal # Bug https://github.com/llvm/llvm-project/issues/156775 # XFAIL: Clang +# Bug https://github.com/llvm/offload-test-suite/issues/433 +# XFAIL: DirectX-WARP + # RUN: split-file %s %t # RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneFirst.fp32.test b/test/WaveOps/WaveReadLaneFirst.fp32.test index 304802a5..8d2c774a 100644 --- a/test/WaveOps/WaveReadLaneFirst.fp32.test +++ b/test/WaveOps/WaveReadLaneFirst.fp32.test @@ -168,7 +168,7 @@ DescriptorSets: #--- end -# Tracked by https://github.com/llvm/offload-test-suite/issues/393 +# Bug https://github.com/llvm/offload-test-suite/issues/393 # XFAIL: Metal # Bug https://github.com/llvm/llvm-project/issues/156775 diff --git a/test/WaveOps/WaveReadLaneFirst.fp64.test b/test/WaveOps/WaveReadLaneFirst.fp64.test index 6d33f931..786efaf7 100644 --- a/test/WaveOps/WaveReadLaneFirst.fp64.test +++ b/test/WaveOps/WaveReadLaneFirst.fp64.test @@ -168,12 +168,15 @@ DescriptorSets: #--- end -# Tracked by https://github.com/llvm/offload-test-suite/issues/393 +# Bug https://github.com/llvm/offload-test-suite/issues/393 # XFAIL: Metal # Bug https://github.com/llvm/llvm-project/issues/156775 # XFAIL: Clang +# Bug https://github.com/llvm/offload-test-suite/issues/433 +# XFAIL: DirectX-WARP + # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneFirst.int16.test b/test/WaveOps/WaveReadLaneFirst.int16.test index ccf57ed1..e6f4725b 100644 --- a/test/WaveOps/WaveReadLaneFirst.int16.test +++ b/test/WaveOps/WaveReadLaneFirst.int16.test @@ -320,7 +320,7 @@ DescriptorSets: #--- end -# Tracked by https://github.com/llvm/offload-test-suite/issues/393 +# Bug https://github.com/llvm/offload-test-suite/issues/393 # XFAIL: Metal # Bug https://github.com/llvm/llvm-project/issues/156775 diff --git a/test/WaveOps/WaveReadLaneFirst.int32.test b/test/WaveOps/WaveReadLaneFirst.int32.test index ba587668..284ff82c 100644 --- a/test/WaveOps/WaveReadLaneFirst.int32.test +++ b/test/WaveOps/WaveReadLaneFirst.int32.test @@ -320,7 +320,7 @@ DescriptorSets: #--- end -# Tracked by https://github.com/llvm/offload-test-suite/issues/393 +# Bug https://github.com/llvm/offload-test-suite/issues/393 # XFAIL: Metal # Bug https://github.com/llvm/llvm-project/issues/156775 diff --git a/test/WaveOps/WaveReadLaneFirst.int64.test b/test/WaveOps/WaveReadLaneFirst.int64.test index b2a3770b..b9e101ab 100644 --- a/test/WaveOps/WaveReadLaneFirst.int64.test +++ b/test/WaveOps/WaveReadLaneFirst.int64.test @@ -320,12 +320,16 @@ DescriptorSets: #--- end -# Tracked by https://github.com/llvm/offload-test-suite/issues/393 +# Bug https://github.com/llvm/offload-test-suite/issues/393 # XFAIL: Metal # Bug https://github.com/llvm/llvm-project/issues/156775 # XFAIL: Clang +# Bug https://github.com/llvm/offload-test-suite/issues/433 +# XFAIL: DirectX-WARP + + # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o From c0f681d943b7f08ec3347296a067b7f2ad8eb5b4 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Mon, 8 Sep 2025 16:51:20 -0700 Subject: [PATCH 3/4] remove warp bug, add requires statements --- test/WaveOps/WaveReadLaneFirst.fp16.test | 4 +--- test/WaveOps/WaveReadLaneFirst.fp64.test | 4 +--- test/WaveOps/WaveReadLaneFirst.int64.test | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/test/WaveOps/WaveReadLaneFirst.fp16.test b/test/WaveOps/WaveReadLaneFirst.fp16.test index 2492b5ef..e56fc723 100644 --- a/test/WaveOps/WaveReadLaneFirst.fp16.test +++ b/test/WaveOps/WaveReadLaneFirst.fp16.test @@ -173,6 +173,7 @@ DescriptorSets: ... #--- end +# REQUIRES: Half # Bug https://github.com/llvm/offload-test-suite/issues/393 # XFAIL: Metal @@ -180,9 +181,6 @@ DescriptorSets: # Bug https://github.com/llvm/llvm-project/issues/156775 # XFAIL: Clang -# Bug https://github.com/llvm/offload-test-suite/issues/433 -# XFAIL: DirectX-WARP - # RUN: split-file %s %t # RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneFirst.fp64.test b/test/WaveOps/WaveReadLaneFirst.fp64.test index 786efaf7..d7e8a1dc 100644 --- a/test/WaveOps/WaveReadLaneFirst.fp64.test +++ b/test/WaveOps/WaveReadLaneFirst.fp64.test @@ -167,6 +167,7 @@ DescriptorSets: ... #--- end +# REQUIRES: Double # Bug https://github.com/llvm/offload-test-suite/issues/393 # XFAIL: Metal @@ -174,9 +175,6 @@ DescriptorSets: # Bug https://github.com/llvm/llvm-project/issues/156775 # XFAIL: Clang -# Bug https://github.com/llvm/offload-test-suite/issues/433 -# XFAIL: DirectX-WARP - # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneFirst.int64.test b/test/WaveOps/WaveReadLaneFirst.int64.test index b9e101ab..dca5271f 100644 --- a/test/WaveOps/WaveReadLaneFirst.int64.test +++ b/test/WaveOps/WaveReadLaneFirst.int64.test @@ -319,6 +319,7 @@ DescriptorSets: ... #--- end +# REQUIRES: Int64 # Bug https://github.com/llvm/offload-test-suite/issues/393 # XFAIL: Metal @@ -326,9 +327,6 @@ DescriptorSets: # Bug https://github.com/llvm/llvm-project/issues/156775 # XFAIL: Clang -# Bug https://github.com/llvm/offload-test-suite/issues/433 -# XFAIL: DirectX-WARP - # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl From 920d7d8b46f48b6f67dad69469eccae97284bd8c Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Mon, 8 Sep 2025 20:12:51 -0700 Subject: [PATCH 4/4] add back the xfail for warp --- test/WaveOps/WaveReadLaneFirst.fp16.test | 3 +++ test/WaveOps/WaveReadLaneFirst.fp64.test | 3 +++ test/WaveOps/WaveReadLaneFirst.int64.test | 3 +++ 3 files changed, 9 insertions(+) diff --git a/test/WaveOps/WaveReadLaneFirst.fp16.test b/test/WaveOps/WaveReadLaneFirst.fp16.test index e56fc723..98559968 100644 --- a/test/WaveOps/WaveReadLaneFirst.fp16.test +++ b/test/WaveOps/WaveReadLaneFirst.fp16.test @@ -181,6 +181,9 @@ DescriptorSets: # Bug https://github.com/llvm/llvm-project/issues/156775 # XFAIL: Clang +# Bug https://github.com/llvm/offload-test-suite/issues/433 +# XFAIL: DirectX-WARP + # RUN: split-file %s %t # RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneFirst.fp64.test b/test/WaveOps/WaveReadLaneFirst.fp64.test index d7e8a1dc..1129646d 100644 --- a/test/WaveOps/WaveReadLaneFirst.fp64.test +++ b/test/WaveOps/WaveReadLaneFirst.fp64.test @@ -175,6 +175,9 @@ DescriptorSets: # Bug https://github.com/llvm/llvm-project/issues/156775 # XFAIL: Clang +# Bug https://github.com/llvm/offload-test-suite/issues/433 +# XFAIL: DirectX-WARP + # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneFirst.int64.test b/test/WaveOps/WaveReadLaneFirst.int64.test index dca5271f..09c8bce5 100644 --- a/test/WaveOps/WaveReadLaneFirst.int64.test +++ b/test/WaveOps/WaveReadLaneFirst.int64.test @@ -327,6 +327,9 @@ DescriptorSets: # Bug https://github.com/llvm/llvm-project/issues/156775 # XFAIL: Clang +# Bug https://github.com/llvm/offload-test-suite/issues/433 +# XFAIL: DirectX-WARP + # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl