Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 189 additions & 0 deletions test/WaveOps/WaveReadLaneFirst.fp16.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
#--- source.hlsl
StructuredBuffer<half4> In : register(t0);
RWStructuredBuffer<half4> Out1 : register(u1); // test scalar
RWStructuredBuffer<half4> Out2 : register(u2); // test half2
RWStructuredBuffer<half4> Out3 : register(u3); // test half3
RWStructuredBuffer<half4> Out4 : register(u4); // test half4
RWStructuredBuffer<half4> Out5 : register(u5); // constant folding

[numthreads(4,1,1)]
void main(uint3 tid : SV_GroupThreadID)
{
half4 v = In[tid.x];

// Mask per "active lane set": only >= N lanes contribute
half s1 = tid.x >= 3 ? WaveReadLaneFirst( v.x ) : 0;
half s2 = tid.x >= 2 ? WaveReadLaneFirst( v.x ) : 0;
half s3 = tid.x >= 1 ? WaveReadLaneFirst( v.x ) : 0;
half s4 = tid.x >= 0 ? WaveReadLaneFirst( v.x ) : 0;

half2 v2_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xy ) : half2(0,0);
half2 v2_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xy ) : half2(0,0);
half2 v2_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xy ) : half2(0,0);
half2 v2_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xy ) : half2(0,0);

half3 v3_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xyz ) : half3(0,0,0);
half3 v3_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xyz ) : half3(0,0,0);
half3 v3_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xyz ) : half3(0,0,0);
half3 v3_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xyz ) : half3(0,0,0);

half4 v4_1 = tid.x >= 3 ? WaveReadLaneFirst( v ) : half4(0,0,0,0);
half4 v4_2 = tid.x >= 2 ? WaveReadLaneFirst( v ) : half4(0,0,0,0);
half4 v4_3 = tid.x >= 1 ? WaveReadLaneFirst( v ) : half4(0,0,0,0);
half4 v4_4 = tid.x >= 0 ? WaveReadLaneFirst( v ) : half4(0,0,0,0);

half scalars[4] = { s4, s3, s2, s1 };
half2 vec2s [4] = { v2_4, v2_3, v2_2, v2_1 };
half3 vec3s [4] = { v3_4, v3_3, v3_2, v3_1 };
half4 vec4s [4] = { v4_4, v4_3, v4_2, v4_1 };

Out1[tid.x].x = scalars[tid.x];
Out2[tid.x].xy = vec2s[tid.x];
Out3[tid.x].xyz = vec3s[tid.x];
Out4[tid.x] = vec4s[tid.x];

// constant folding case
Out5[0] = WaveReadLaneFirst(half4(1,2,3,4));
}

//--- pipeline.yaml

---
Shaders:
- Stage: Compute
Entry: main
DispatchSize: [1, 1, 1]
Buffers:
- Name: In
Format: Float16
Stride: 8
# 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000
Data: [ 0x3c00, 0x4900, 0x5640, 0x63d0, 0x4000, 0x4d00, 0x5a40, 0x67d0, 0x4200, 0x4f80, 0x5cb0, 0x69dc, 0x4400, 0x5100, 0x5e40, 0x6bd0 ]
- Name: Out1
Format: Float16
Stride: 8
ZeroInitSize: 32
- Name: Out2
Format: Float16
Stride: 8
ZeroInitSize: 32
- Name: Out3
Format: Float16
Stride: 8
ZeroInitSize: 32
- Name: Out4
Format: Float16
Stride: 8
ZeroInitSize: 32
- Name: Out5
Format: Float16
Stride: 8
ZeroInitSize: 8
- Name: ExpectedOut1
Format: Float16
Stride: 8
# 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0
Data: [ 0x3c00, 0x0, 0x0, 0x0, 0x4000, 0x0, 0x0, 0x0, 0x4200, 0x0, 0x0, 0x0, 0x4400, 0x0, 0x0, 0x0 ]
- Name: ExpectedOut2
Format: Float16
Stride: 8
# 1, 10, 0, 0, 2, 20, 0, 0, 3, 30, 0, 0, 4, 40, 0, 0
Data: [ 0x3c00, 0x4900, 0x0, 0x0, 0x4000, 0x4d00, 0x0, 0x0, 0x4200, 0x4f80, 0x0, 0x0, 0x4400, 0x5100, 0x0, 0x0 ]
- Name: ExpectedOut3
Format: Float16
Stride: 8
# 1, 10, 100, 0, 2, 20, 200, 0, 3, 30, 300, 0, 4, 40, 400, 0
Data: [ 0x3c00, 0x4900, 0x5640, 0x0, 0x4000, 0x4d00, 0x5a40, 0x0, 0x4200, 0x4f80, 0x5cb0, 0x0, 0x4400, 0x5100, 0x5e40, 0x0 ]
- Name: ExpectedOut4
Format: Float16
Stride: 8
# 1, 10, 100, 1000, 2, 20, 200, 2000, 3, 30, 300, 3000, 4, 40, 400, 4000
Data: [ 0x3c00, 0x4900, 0x5640, 0x63d0, 0x4000, 0x4d00, 0x5a40, 0x67d0, 0x4200, 0x4f80, 0x5cb0, 0x69dc, 0x4400, 0x5100, 0x5e40, 0x6bd0 ]
- Name: ExpectedOut5
Format: Float16
Stride: 8
# 1, 2, 3, 4
Data: [ 0x3C00, 0x4000, 0x4200, 0x4400 ]
Results:
- Result: ExpectedOut1
Rule: BufferExact
Actual: Out1
Expected: ExpectedOut1
- Result: ExpectedOut2
Rule: BufferExact
Actual: Out2
Expected: ExpectedOut2
- Result: ExpectedOut3
Rule: BufferExact
Actual: Out3
Expected: ExpectedOut3
- Result: ExpectedOut4
Rule: BufferExact
Actual: Out4
Expected: ExpectedOut4
- Result: ExpectedOut5
Rule: BufferExact
Actual: Out5
Expected: ExpectedOut5

DescriptorSets:
- Resources:
- Name: In
Kind: StructuredBuffer
DirectXBinding:
Register: 0
Space: 0
VulkanBinding:
Binding: 0
- Name: Out1
Kind: RWStructuredBuffer
DirectXBinding:
Register: 1
Space: 0
VulkanBinding:
Binding: 1
- Name: Out2
Kind: RWStructuredBuffer
DirectXBinding:
Register: 2
Space: 0
VulkanBinding:
Binding: 2
- Name: Out3
Kind: RWStructuredBuffer
DirectXBinding:
Register: 3
Space: 0
VulkanBinding:
Binding: 3
- Name: Out4
Kind: RWStructuredBuffer
DirectXBinding:
Register: 4
Space: 0
VulkanBinding:
Binding: 4
- Name: Out5
Kind: RWStructuredBuffer
DirectXBinding:
Register: 5
Space: 0
VulkanBinding:
Binding: 5
...
#--- end

# REQUIRES: Half

# Bug https://github.com/llvm/offload-test-suite/issues/393
# XFAIL: Metal

# Bug https://github.com/llvm/llvm-project/issues/156775
# XFAIL: Clang

# Bug https://github.com/llvm/offload-test-suite/issues/433
# XFAIL: DirectX-WARP

# RUN: split-file %s %t
# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl
# RUN: %offloader %t/pipeline.yaml %t.o
179 changes: 179 additions & 0 deletions test/WaveOps/WaveReadLaneFirst.fp32.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#--- source.hlsl
StructuredBuffer<float4> In : register(t0);
RWStructuredBuffer<float4> Out1 : register(u1); // test scalar
RWStructuredBuffer<float4> Out2 : register(u2); // test float2
RWStructuredBuffer<float4> Out3 : register(u3); // test float3
RWStructuredBuffer<float4> Out4 : register(u4); // test float4
RWStructuredBuffer<float4> Out5 : register(u5); // constant folding

[numthreads(4,1,1)]
void main(uint3 tid : SV_GroupThreadID)
{
float4 v = In[tid.x];

// Mask per "active lane set": only >= N lanes contribute
float s1 = tid.x >= 3 ? WaveReadLaneFirst( v.x ) : 0;
float s2 = tid.x >= 2 ? WaveReadLaneFirst( v.x ) : 0;
float s3 = tid.x >= 1 ? WaveReadLaneFirst( v.x ) : 0;
float s4 = tid.x >= 0 ? WaveReadLaneFirst( v.x ) : 0;

float2 v2_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xy ) : float2(0,0);
float2 v2_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xy ) : float2(0,0);
float2 v2_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xy ) : float2(0,0);
float2 v2_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xy ) : float2(0,0);

float3 v3_1 = tid.x >= 3 ? WaveReadLaneFirst( v.xyz ) : float3(0,0,0);
float3 v3_2 = tid.x >= 2 ? WaveReadLaneFirst( v.xyz ) : float3(0,0,0);
float3 v3_3 = tid.x >= 1 ? WaveReadLaneFirst( v.xyz ) : float3(0,0,0);
float3 v3_4 = tid.x >= 0 ? WaveReadLaneFirst( v.xyz ) : float3(0,0,0);

float4 v4_1 = tid.x >= 3 ? WaveReadLaneFirst( v ) : float4(0,0,0,0);
float4 v4_2 = tid.x >= 2 ? WaveReadLaneFirst( v ) : float4(0,0,0,0);
float4 v4_3 = tid.x >= 1 ? WaveReadLaneFirst( v ) : float4(0,0,0,0);
float4 v4_4 = tid.x >= 0 ? WaveReadLaneFirst( v ) : float4(0,0,0,0);

float scalars[4] = { s4, s3, s2, s1 };
float2 vec2s [4] = { v2_4, v2_3, v2_2, v2_1 };
float3 vec3s [4] = { v3_4, v3_3, v3_2, v3_1 };
float4 vec4s [4] = { v4_4, v4_3, v4_2, v4_1 };

Out1[tid.x].x = scalars[tid.x];
Out2[tid.x].xy = vec2s[tid.x];
Out3[tid.x].xyz = vec3s[tid.x];
Out4[tid.x] = vec4s[tid.x];

// constant folding case
Out5[0] = WaveReadLaneFirst(float4(1,2,3,4));
}

//--- pipeline.yaml

---
Shaders:
- Stage: Compute
Entry: main
DispatchSize: [1, 1, 1]
Buffers:
- Name: In
Format: Float32
Stride: 16
Data: [ 1.0, 10.0, 100.0, 1000.0, 2.0, 20.0, 200.0, 2000.0, 3.0, 30.0, 300.0, 3000.0, 4.0, 40.0, 400.0, 4000.0 ]
- Name: Out1
Format: Float32
Stride: 16
ZeroInitSize: 64
- Name: Out2
Format: Float32
Stride: 16
ZeroInitSize: 64
- Name: Out3
Format: Float32
Stride: 16
ZeroInitSize: 64
- Name: Out4
Format: Float32
Stride: 16
ZeroInitSize: 64
- Name: Out5
Format: Float32
Stride: 16
ZeroInitSize: 16
- Name: ExpectedOut1
Format: Float32
Stride: 16
Data: [ 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0 ]
- Name: ExpectedOut2
Format: Float32
Stride: 16
Data: [ 1.0, 10.0, 0.0, 0.0, 2.0, 20.0, 0.0, 0.0, 3.0, 30.0, 0.0, 0.0, 4.0, 40.0, 0.0, 0.0 ]
- Name: ExpectedOut3
Format: Float32
Stride: 16
Data: [ 1.0, 10.0, 100.0, 0.0, 2.0, 20.0, 200.0, 0.0, 3.0, 30.0, 300.0, 0.0, 4.0, 40.0, 400.0, 0.0 ]
- Name: ExpectedOut4
Format: Float32
Stride: 16
Data: [ 1.0, 10.0, 100.0, 1000.0, 2.0, 20.0, 200.0, 2000.0, 3.0, 30.0, 300.0, 3000.0, 4.0, 40.0, 400.0, 4000.0 ]
- Name: ExpectedOut5
Format: Float32
Stride: 16
Data: [ 1, 2, 3, 4 ]
Results:
- Result: ExpectedOut1
Rule: BufferExact
Actual: Out1
Expected: ExpectedOut1
- Result: ExpectedOut2
Rule: BufferExact
Actual: Out2
Expected: ExpectedOut2
- Result: ExpectedOut3
Rule: BufferExact
Actual: Out3
Expected: ExpectedOut3
- Result: ExpectedOut4
Rule: BufferExact
Actual: Out4
Expected: ExpectedOut4
- Result: ExpectedOut5
Rule: BufferExact
Actual: Out5
Expected: ExpectedOut5

DescriptorSets:
- Resources:
- Name: In
Kind: StructuredBuffer
DirectXBinding:
Register: 0
Space: 0
VulkanBinding:
Binding: 0
- Name: Out1
Kind: RWStructuredBuffer
DirectXBinding:
Register: 1
Space: 0
VulkanBinding:
Binding: 1
- Name: Out2
Kind: RWStructuredBuffer
DirectXBinding:
Register: 2
Space: 0
VulkanBinding:
Binding: 2
- Name: Out3
Kind: RWStructuredBuffer
DirectXBinding:
Register: 3
Space: 0
VulkanBinding:
Binding: 3
- Name: Out4
Kind: RWStructuredBuffer
DirectXBinding:
Register: 4
Space: 0
VulkanBinding:
Binding: 4
- Name: Out5
Kind: RWStructuredBuffer
DirectXBinding:
Register: 5
Space: 0
VulkanBinding:
Binding: 5
...
#--- end


# Bug https://github.com/llvm/offload-test-suite/issues/393
# XFAIL: Metal

# Bug https://github.com/llvm/llvm-project/issues/156775
# XFAIL: Clang

# RUN: split-file %s %t
# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl
# RUN: %offloader %t/pipeline.yaml %t.o
Loading
Loading