Skip to content

Commit c7c037f

Browse files
add more wave size 4 threads 4 tests
1 parent df5d12f commit c7c037f

File tree

1,773 files changed

+185185
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,773 files changed

+185185
-0
lines changed
Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
#--- source.hlsl
2+
RWStructuredBuffer<uint> _participant_bit : register(u2);
3+
RWStructuredBuffer<uint> _wave_op_index : register(u3);
4+
5+
[numthreads(4, 1, 1)]
6+
void main(uint3 tid : SV_DispatchThreadID) {
7+
uint result = 0;
8+
uint counter0 = 0;
9+
while ((counter0 < 3)) {
10+
counter0 = (counter0 + 1);
11+
uint counter1 = 0;
12+
while ((counter1 < 2)) {
13+
counter1 = (counter1 + 1);
14+
if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) {
15+
result = (result + WaveActiveMax(result));
16+
uint temp = 0;
17+
InterlockedAdd(_wave_op_index[0], 3, temp);
18+
_participant_bit[temp] = (((24 << 6) | (counter0 << 4)) | (counter1 << 2));
19+
uint4 ballot = WaveActiveBallot(1);
20+
_participant_bit[(temp + 1)] = ballot.x;
21+
_participant_bit[(temp + 2)] = ballot.y;
22+
}
23+
if ((WaveGetLaneIndex() >= 3)) {
24+
if ((WaveGetLaneIndex() >= 3)) {
25+
if ((WaveGetLaneIndex() >= 3)) {
26+
result = (result + WaveActiveMin((WaveGetLaneIndex() + 3)));
27+
uint temp = 0;
28+
InterlockedAdd(_wave_op_index[0], 3, temp);
29+
_participant_bit[temp] = (((39 << 6) | (counter0 << 4)) | (counter1 << 2));
30+
uint4 ballot = WaveActiveBallot(1);
31+
_participant_bit[(temp + 1)] = ballot.x;
32+
_participant_bit[(temp + 2)] = ballot.y;
33+
}
34+
}
35+
}
36+
}
37+
}
38+
if ((WaveGetLaneIndex() == 1)) {
39+
if ((WaveGetLaneIndex() == 3)) {
40+
result = (result + WaveActiveSum(3));
41+
uint temp = 0;
42+
InterlockedAdd(_wave_op_index[0], 3, temp);
43+
_participant_bit[temp] = (49 << 6);
44+
uint4 ballot = WaveActiveBallot(1);
45+
_participant_bit[(temp + 1)] = ballot.x;
46+
_participant_bit[(temp + 2)] = ballot.y;
47+
}
48+
switch ((WaveGetLaneIndex() % 3)) {
49+
case 0: {
50+
if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) {
51+
if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) {
52+
result = (result + WaveActiveMin(result));
53+
uint temp = 0;
54+
InterlockedAdd(_wave_op_index[0], 3, temp);
55+
_participant_bit[temp] = (78 << 6);
56+
uint4 ballot = WaveActiveBallot(1);
57+
_participant_bit[(temp + 1)] = ballot.x;
58+
_participant_bit[(temp + 2)] = ballot.y;
59+
}
60+
if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) {
61+
result = (result + WaveActiveMax((WaveGetLaneIndex() + 1)));
62+
uint temp = 0;
63+
InterlockedAdd(_wave_op_index[0], 3, temp);
64+
_participant_bit[temp] = (95 << 6);
65+
uint4 ballot = WaveActiveBallot(1);
66+
_participant_bit[(temp + 1)] = ballot.x;
67+
_participant_bit[(temp + 2)] = ballot.y;
68+
}
69+
}
70+
}
71+
case 1: {
72+
if (((WaveGetLaneIndex() % 2) == 0)) {
73+
result = (result + WaveActiveSum(2));
74+
uint temp = 0;
75+
InterlockedAdd(_wave_op_index[0], 3, temp);
76+
_participant_bit[temp] = (104 << 6);
77+
uint4 ballot = WaveActiveBallot(1);
78+
_participant_bit[(temp + 1)] = ballot.x;
79+
_participant_bit[(temp + 2)] = ballot.y;
80+
}
81+
break;
82+
}
83+
case 2: {
84+
uint counter2 = 0;
85+
while ((counter2 < 3)) {
86+
counter2 = (counter2 + 1);
87+
if (((WaveGetLaneIndex() & 1) == 0)) {
88+
result = (result + WaveActiveSum(WaveGetLaneIndex()));
89+
uint temp = 0;
90+
InterlockedAdd(_wave_op_index[0], 3, temp);
91+
_participant_bit[temp] = ((120 << 6) | (counter2 << 4));
92+
uint4 ballot = WaveActiveBallot(1);
93+
_participant_bit[(temp + 1)] = ballot.x;
94+
_participant_bit[(temp + 2)] = ballot.y;
95+
}
96+
if (((WaveGetLaneIndex() & 1) == 0)) {
97+
result = (result + WaveActiveMin((WaveGetLaneIndex() + 3)));
98+
uint temp = 0;
99+
InterlockedAdd(_wave_op_index[0], 3, temp);
100+
_participant_bit[temp] = ((131 << 6) | (counter2 << 4));
101+
uint4 ballot = WaveActiveBallot(1);
102+
_participant_bit[(temp + 1)] = ballot.x;
103+
_participant_bit[(temp + 2)] = ballot.y;
104+
}
105+
}
106+
break;
107+
}
108+
}
109+
if ((WaveGetLaneIndex() == 3)) {
110+
result = (result + WaveActiveMin(WaveGetLaneIndex()));
111+
uint temp = 0;
112+
InterlockedAdd(_wave_op_index[0], 3, temp);
113+
_participant_bit[temp] = (138 << 6);
114+
uint4 ballot = WaveActiveBallot(1);
115+
_participant_bit[(temp + 1)] = ballot.x;
116+
_participant_bit[(temp + 2)] = ballot.y;
117+
}
118+
}
119+
for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) {
120+
if (((WaveGetLaneIndex() & 1) == 1)) {
121+
if (((WaveGetLaneIndex() & 1) == 0)) {
122+
result = (result + WaveActiveMax(result));
123+
uint temp = 0;
124+
InterlockedAdd(_wave_op_index[0], 3, temp);
125+
_participant_bit[temp] = ((160 << 6) | (i3 << 4));
126+
uint4 ballot = WaveActiveBallot(1);
127+
_participant_bit[(temp + 1)] = ballot.x;
128+
_participant_bit[(temp + 2)] = ballot.y;
129+
}
130+
uint counter4 = 0;
131+
while ((counter4 < 3)) {
132+
counter4 = (counter4 + 1);
133+
if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) {
134+
result = (result + WaveActiveMin((WaveGetLaneIndex() + 2)));
135+
uint temp = 0;
136+
InterlockedAdd(_wave_op_index[0], 3, temp);
137+
_participant_bit[temp] = (((184 << 6) | (i3 << 4)) | (counter4 << 2));
138+
uint4 ballot = WaveActiveBallot(1);
139+
_participant_bit[(temp + 1)] = ballot.x;
140+
_participant_bit[(temp + 2)] = ballot.y;
141+
}
142+
uint counter5 = 0;
143+
while ((counter5 < 3)) {
144+
counter5 = (counter5 + 1);
145+
if ((WaveGetLaneIndex() == 2)) {
146+
result = (result + WaveActiveMin(result));
147+
uint temp = 0;
148+
InterlockedAdd(_wave_op_index[0], 3, temp);
149+
_participant_bit[temp] = ((((198 << 6) | (i3 << 4)) | (counter4 << 2)) | counter5);
150+
uint4 ballot = WaveActiveBallot(1);
151+
_participant_bit[(temp + 1)] = ballot.x;
152+
_participant_bit[(temp + 2)] = ballot.y;
153+
}
154+
if ((WaveGetLaneIndex() == 1)) {
155+
result = (result + WaveActiveSum(result));
156+
uint temp = 0;
157+
InterlockedAdd(_wave_op_index[0], 3, temp);
158+
_participant_bit[temp] = ((((205 << 6) | (i3 << 4)) | (counter4 << 2)) | counter5);
159+
uint4 ballot = WaveActiveBallot(1);
160+
_participant_bit[(temp + 1)] = ballot.x;
161+
_participant_bit[(temp + 2)] = ballot.y;
162+
}
163+
if ((counter5 == 2)) {
164+
break;
165+
}
166+
}
167+
}
168+
} else {
169+
if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) {
170+
result = (result + WaveActiveMin(3));
171+
uint temp = 0;
172+
InterlockedAdd(_wave_op_index[0], 3, temp);
173+
_participant_bit[temp] = ((219 << 6) | (i3 << 4));
174+
uint4 ballot = WaveActiveBallot(1);
175+
_participant_bit[(temp + 1)] = ballot.x;
176+
_participant_bit[(temp + 2)] = ballot.y;
177+
}
178+
if ((WaveGetLaneIndex() >= 3)) {
179+
if ((WaveGetLaneIndex() >= 3)) {
180+
result = (result + WaveActiveSum(WaveGetLaneIndex()));
181+
uint temp = 0;
182+
InterlockedAdd(_wave_op_index[0], 3, temp);
183+
_participant_bit[temp] = ((229 << 6) | (i3 << 4));
184+
uint4 ballot = WaveActiveBallot(1);
185+
_participant_bit[(temp + 1)] = ballot.x;
186+
_participant_bit[(temp + 2)] = ballot.y;
187+
}
188+
}
189+
}
190+
}
191+
}
192+
193+
#--- pipeline.yaml
194+
---
195+
Shaders:
196+
- Stage: Compute
197+
Entry: main
198+
DispatchSize: [1, 1, 1] # Single dispatch for 4 threads
199+
Buffers:
200+
- Name: _participant_bit
201+
Format: UInt32
202+
Stride: 4
203+
Fill: 0
204+
Size: 96
205+
- Name: expected_bit_patterns
206+
Format: UInt32
207+
Stride: 4
208+
Data: [1556, 5, 0, 1556, 5, 0, 1560, 5, 0, 1560, 5, 0, 1572, 5, 0, 1572, 5, 0, 1576, 5, 0, 1576, 5, 0, 1588, 5, 0, 1588, 5, 0, 1592, 5, 0, 1592, 5, 0, 2516, 8, 0, 2520, 8, 0, 2532, 8, 0, 2536, 8, 0, 2548, 8, 0, 2552, 8, 0, 13125, 2, 0, 13126, 2, 0, 13129, 2, 0, 13130, 2, 0, 13133, 2, 0, 13134, 2, 0, 13141, 2, 0, 13142, 2, 0, 13145, 2, 0, 13146, 2, 0, 13149, 2, 0, 13150, 2, 0, 14016, 1, 0, 14032, 1, 0]
209+
- Name: _wave_op_index
210+
Format: UInt32
211+
Stride: 4
212+
Data: [0]
213+
Results:
214+
- Result: BitTrackingValidation
215+
Rule: BufferParticipantPattern
216+
GroupSize: 3
217+
Actual: _participant_bit
218+
Expected: expected_bit_patterns
219+
DescriptorSets:
220+
- Resources:
221+
- Name: _participant_bit
222+
Kind: RWStructuredBuffer
223+
DirectXBinding:
224+
Register: 2
225+
Space: 0
226+
VulkanBinding:
227+
Binding: 2
228+
- Name: _wave_op_index
229+
Kind: RWStructuredBuffer
230+
DirectXBinding:
231+
Register: 3
232+
Space: 0
233+
VulkanBinding:
234+
Binding: 3
235+
...
236+
#--- end
237+
238+
# RUN: split-file %s %t
239+
# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl
240+
# RUN: %offloader %t/pipeline.yaml %t.o

0 commit comments

Comments
 (0)