Skip to content

Commit 7855a68

Browse files
more tests
1 parent 3402373 commit 7855a68

File tree

38 files changed

+6225
-570
lines changed

38 files changed

+6225
-570
lines changed

test/WaveSize4BitTracking/program_1756267512374475441_28_increment_0_WaveParticipantBitTracking.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ Buffers:
235235
- Name: expected_bit_patterns
236236
Format: UInt32
237237
Stride: 4
238-
Data: [768, 1, 0, 7616, 8, 0, 8768, 5, 0, 8768, 5, 0, 12608, 4, 0, 12608, 1, 0]
238+
Data: [768, 1, 0, 7616, 8, 0, 8768, 5, 0, 8768, 5, 0, 12608, 5, 0, 12608, 5, 0]
239239
- Name: _wave_op_index
240240
Format: UInt32
241241
Stride: 4
Lines changed: 344 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,344 @@
1+
#--- source.hlsl
2+
RWStructuredBuffer<uint> _participant_bit : register(u0);
3+
RWStructuredBuffer<uint> _wave_op_index : register(u1);
4+
5+
[numthreads(4, 1, 1)]
6+
void main(uint3 tid : SV_DispatchThreadID) {
7+
uint result = 0;
8+
switch ((WaveGetLaneIndex() % 4)) {
9+
case 0: {
10+
if ((WaveGetLaneIndex() < 8)) {
11+
result = (result + WaveActiveSum(1));
12+
uint temp = 0;
13+
InterlockedAdd(_wave_op_index[0], 3, temp);
14+
_participant_bit[temp] = (9 << 6);
15+
uint4 ballot = WaveActiveBallot(1);
16+
_participant_bit[(temp + 1)] = ballot.x;
17+
_participant_bit[(temp + 2)] = ballot.y;
18+
}
19+
}
20+
case 1: {
21+
if (((WaveGetLaneIndex() % 2) == 0)) {
22+
result = (result + WaveActiveSum(2));
23+
uint temp = 0;
24+
InterlockedAdd(_wave_op_index[0], 3, temp);
25+
_participant_bit[temp] = (18 << 6);
26+
uint4 ballot = WaveActiveBallot(1);
27+
_participant_bit[(temp + 1)] = ballot.x;
28+
_participant_bit[(temp + 2)] = ballot.y;
29+
}
30+
break;
31+
}
32+
case 2: {
33+
for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) {
34+
uint counter1 = 0;
35+
while ((counter1 < 3)) {
36+
counter1 = (counter1 + 1);
37+
switch ((WaveGetLaneIndex() % 3)) {
38+
case 0: {
39+
if ((WaveGetLaneIndex() < 8)) {
40+
result = (result + WaveActiveSum(1));
41+
uint temp = 0;
42+
InterlockedAdd(_wave_op_index[0], 3, temp);
43+
_participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (counter1 << 2));
44+
uint4 ballot = WaveActiveBallot(1);
45+
_participant_bit[(temp + 1)] = ballot.x;
46+
_participant_bit[(temp + 2)] = ballot.y;
47+
}
48+
break;
49+
}
50+
case 1: {
51+
if (((WaveGetLaneIndex() % 2) == 0)) {
52+
result = (result + WaveActiveSum(2));
53+
uint temp = 0;
54+
InterlockedAdd(_wave_op_index[0], 3, temp);
55+
_participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (counter1 << 2));
56+
uint4 ballot = WaveActiveBallot(1);
57+
_participant_bit[(temp + 1)] = ballot.x;
58+
_participant_bit[(temp + 2)] = ballot.y;
59+
}
60+
break;
61+
}
62+
case 2: {
63+
if (true) {
64+
result = (result + WaveActiveSum(3));
65+
uint temp = 0;
66+
InterlockedAdd(_wave_op_index[0], 3, temp);
67+
_participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (counter1 << 2));
68+
uint4 ballot = WaveActiveBallot(1);
69+
_participant_bit[(temp + 1)] = ballot.x;
70+
_participant_bit[(temp + 2)] = ballot.y;
71+
}
72+
break;
73+
}
74+
}
75+
if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) {
76+
result = (result + WaveActiveMax((WaveGetLaneIndex() + 4)));
77+
uint temp = 0;
78+
InterlockedAdd(_wave_op_index[0], 3, temp);
79+
_participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (counter1 << 2));
80+
uint4 ballot = WaveActiveBallot(1);
81+
_participant_bit[(temp + 1)] = ballot.x;
82+
_participant_bit[(temp + 2)] = ballot.y;
83+
}
84+
}
85+
if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) {
86+
result = (result + WaveActiveMin(result));
87+
uint temp = 0;
88+
InterlockedAdd(_wave_op_index[0], 3, temp);
89+
_participant_bit[temp] = ((89 << 6) | (i0 << 4));
90+
uint4 ballot = WaveActiveBallot(1);
91+
_participant_bit[(temp + 1)] = ballot.x;
92+
_participant_bit[(temp + 2)] = ballot.y;
93+
}
94+
if ((i0 == 2)) {
95+
break;
96+
}
97+
}
98+
}
99+
case 3: {
100+
uint counter2 = 0;
101+
while ((counter2 < 2)) {
102+
counter2 = (counter2 + 1);
103+
for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) {
104+
switch ((WaveGetLaneIndex() % 4)) {
105+
case 0: {
106+
if ((WaveGetLaneIndex() < 8)) {
107+
result = (result + WaveActiveSum(1));
108+
uint temp = 0;
109+
InterlockedAdd(_wave_op_index[0], 3, temp);
110+
_participant_bit[temp] = (((117 << 6) | (counter2 << 4)) | (i3 << 2));
111+
uint4 ballot = WaveActiveBallot(1);
112+
_participant_bit[(temp + 1)] = ballot.x;
113+
_participant_bit[(temp + 2)] = ballot.y;
114+
}
115+
break;
116+
}
117+
case 1: {
118+
if (((WaveGetLaneIndex() % 2) == 0)) {
119+
result = (result + WaveActiveSum(2));
120+
uint temp = 0;
121+
InterlockedAdd(_wave_op_index[0], 3, temp);
122+
_participant_bit[temp] = (((126 << 6) | (counter2 << 4)) | (i3 << 2));
123+
uint4 ballot = WaveActiveBallot(1);
124+
_participant_bit[(temp + 1)] = ballot.x;
125+
_participant_bit[(temp + 2)] = ballot.y;
126+
}
127+
break;
128+
}
129+
case 2: {
130+
if (true) {
131+
result = (result + WaveActiveSum(3));
132+
uint temp = 0;
133+
InterlockedAdd(_wave_op_index[0], 3, temp);
134+
_participant_bit[temp] = (((131 << 6) | (counter2 << 4)) | (i3 << 2));
135+
uint4 ballot = WaveActiveBallot(1);
136+
_participant_bit[(temp + 1)] = ballot.x;
137+
_participant_bit[(temp + 2)] = ballot.y;
138+
}
139+
break;
140+
}
141+
case 3: {
142+
if ((WaveGetLaneIndex() < 20)) {
143+
result = (result + WaveActiveSum(4));
144+
uint temp = 0;
145+
InterlockedAdd(_wave_op_index[0], 3, temp);
146+
_participant_bit[temp] = (((138 << 6) | (counter2 << 4)) | (i3 << 2));
147+
uint4 ballot = WaveActiveBallot(1);
148+
_participant_bit[(temp + 1)] = ballot.x;
149+
_participant_bit[(temp + 2)] = ballot.y;
150+
}
151+
break;
152+
}
153+
default: {
154+
result = (result + WaveActiveSum(99));
155+
uint temp = 0;
156+
InterlockedAdd(_wave_op_index[0], 3, temp);
157+
_participant_bit[temp] = (((142 << 6) | (counter2 << 4)) | (i3 << 2));
158+
uint4 ballot = WaveActiveBallot(1);
159+
_participant_bit[(temp + 1)] = ballot.x;
160+
_participant_bit[(temp + 2)] = ballot.y;
161+
break;
162+
}
163+
}
164+
if ((WaveGetLaneIndex() < 2)) {
165+
result = (result + WaveActiveMax(WaveGetLaneIndex()));
166+
uint temp = 0;
167+
InterlockedAdd(_wave_op_index[0], 3, temp);
168+
_participant_bit[temp] = (((149 << 6) | (counter2 << 4)) | (i3 << 2));
169+
uint4 ballot = WaveActiveBallot(1);
170+
_participant_bit[(temp + 1)] = ballot.x;
171+
_participant_bit[(temp + 2)] = ballot.y;
172+
}
173+
if ((i3 == 1)) {
174+
continue;
175+
}
176+
if ((i3 == 2)) {
177+
break;
178+
}
179+
}
180+
if ((WaveGetLaneIndex() < 2)) {
181+
result = (result + WaveActiveMin(result));
182+
uint temp = 0;
183+
InterlockedAdd(_wave_op_index[0], 3, temp);
184+
_participant_bit[temp] = ((162 << 6) | (counter2 << 4));
185+
uint4 ballot = WaveActiveBallot(1);
186+
_participant_bit[(temp + 1)] = ballot.x;
187+
_participant_bit[(temp + 2)] = ballot.y;
188+
}
189+
if ((counter2 == 1)) {
190+
break;
191+
}
192+
}
193+
break;
194+
}
195+
}
196+
if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) {
197+
if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) {
198+
result = (result + WaveActiveMax(result));
199+
uint temp = 0;
200+
InterlockedAdd(_wave_op_index[0], 3, temp);
201+
_participant_bit[temp] = (183 << 6);
202+
uint4 ballot = WaveActiveBallot(1);
203+
_participant_bit[(temp + 1)] = ballot.x;
204+
_participant_bit[(temp + 2)] = ballot.y;
205+
}
206+
if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) {
207+
result = (result + WaveActiveMax((WaveGetLaneIndex() + 4)));
208+
uint temp = 0;
209+
InterlockedAdd(_wave_op_index[0], 3, temp);
210+
_participant_bit[temp] = (196 << 6);
211+
uint4 ballot = WaveActiveBallot(1);
212+
_participant_bit[(temp + 1)] = ballot.x;
213+
_participant_bit[(temp + 2)] = ballot.y;
214+
}
215+
} else {
216+
if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) {
217+
result = (result + WaveActiveMin(result));
218+
uint temp = 0;
219+
InterlockedAdd(_wave_op_index[0], 3, temp);
220+
_participant_bit[temp] = (211 << 6);
221+
uint4 ballot = WaveActiveBallot(1);
222+
_participant_bit[(temp + 1)] = ballot.x;
223+
_participant_bit[(temp + 2)] = ballot.y;
224+
}
225+
for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) {
226+
if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) {
227+
if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) {
228+
result = (result + WaveActiveMax(result));
229+
uint temp = 0;
230+
InterlockedAdd(_wave_op_index[0], 3, temp);
231+
_participant_bit[temp] = ((237 << 6) | (i4 << 4));
232+
uint4 ballot = WaveActiveBallot(1);
233+
_participant_bit[(temp + 1)] = ballot.x;
234+
_participant_bit[(temp + 2)] = ballot.y;
235+
}
236+
if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) {
237+
result = (result + WaveActiveMax(result));
238+
uint temp = 0;
239+
InterlockedAdd(_wave_op_index[0], 3, temp);
240+
_participant_bit[temp] = ((248 << 6) | (i4 << 4));
241+
uint4 ballot = WaveActiveBallot(1);
242+
_participant_bit[(temp + 1)] = ballot.x;
243+
_participant_bit[(temp + 2)] = ballot.y;
244+
}
245+
}
246+
if ((WaveGetLaneIndex() == 1)) {
247+
result = (result + WaveActiveMax(5));
248+
uint temp = 0;
249+
InterlockedAdd(_wave_op_index[0], 3, temp);
250+
_participant_bit[temp] = ((255 << 6) | (i4 << 4));
251+
uint4 ballot = WaveActiveBallot(1);
252+
_participant_bit[(temp + 1)] = ballot.x;
253+
_participant_bit[(temp + 2)] = ballot.y;
254+
}
255+
if ((i4 == 1)) {
256+
continue;
257+
}
258+
}
259+
if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) {
260+
result = (result + WaveActiveMin((WaveGetLaneIndex() + 4)));
261+
uint temp = 0;
262+
InterlockedAdd(_wave_op_index[0], 3, temp);
263+
_participant_bit[temp] = (275 << 6);
264+
uint4 ballot = WaveActiveBallot(1);
265+
_participant_bit[(temp + 1)] = ballot.x;
266+
_participant_bit[(temp + 2)] = ballot.y;
267+
}
268+
}
269+
switch ((WaveGetLaneIndex() % 2)) {
270+
case 0: {
271+
if ((WaveGetLaneIndex() < 8)) {
272+
result = (result + WaveActiveSum(1));
273+
uint temp = 0;
274+
InterlockedAdd(_wave_op_index[0], 3, temp);
275+
_participant_bit[temp] = (285 << 6);
276+
uint4 ballot = WaveActiveBallot(1);
277+
_participant_bit[(temp + 1)] = ballot.x;
278+
_participant_bit[(temp + 2)] = ballot.y;
279+
}
280+
break;
281+
}
282+
case 1: {
283+
if (((WaveGetLaneIndex() % 2) == 0)) {
284+
result = (result + WaveActiveSum(2));
285+
uint temp = 0;
286+
InterlockedAdd(_wave_op_index[0], 3, temp);
287+
_participant_bit[temp] = (294 << 6);
288+
uint4 ballot = WaveActiveBallot(1);
289+
_participant_bit[(temp + 1)] = ballot.x;
290+
_participant_bit[(temp + 2)] = ballot.y;
291+
}
292+
break;
293+
}
294+
}
295+
}
296+
297+
#--- pipeline.yaml
298+
---
299+
Shaders:
300+
- Stage: Compute
301+
Entry: main
302+
DispatchSize: [1, 1, 1] # Single dispatch for 4 threads
303+
Buffers:
304+
- Name: _participant_bit
305+
Format: UInt32
306+
Stride: 4
307+
Fill: 0
308+
Size: 123
309+
- Name: expected_bit_patterns
310+
Format: UInt32
311+
Stride: 4
312+
Data: [576, 1, 0, 1152, 1, 0, 3652, 4, 0, 3656, 4, 0, 3660, 4, 0, 3668, 4, 0, 3672, 4, 0, 3676, 4, 0, 3684, 4, 0, 3688, 4, 0, 3692, 4, 0, 4740, 4, 0, 4744, 4, 0, 4748, 4, 0, 4756, 4, 0, 4760, 4, 0, 4764, 4, 0, 4772, 4, 0, 4776, 4, 0, 4780, 4, 0, 5696, 4, 0, 5712, 4, 0, 5728, 4, 0, 8400, 4, 0, 8404, 4, 0, 8408, 4, 0, 8848, 8, 0, 8852, 8, 0, 8856, 8, 0, 12544, 8, 0, 13504, 6, 0, 13504, 6, 0, 15872, 4, 0, 15888, 4, 0, 15904, 4, 0, 16320, 2, 0, 16336, 2, 0, 16352, 2, 0, 17600, 2, 0, 18240, 5, 0, 18240, 5, 0]
313+
- Name: _wave_op_index
314+
Format: UInt32
315+
Stride: 4
316+
Data: [0]
317+
Results:
318+
- Result: BitTrackingValidation
319+
Rule: BufferParticipantPattern
320+
GroupSize: 3
321+
Actual: _participant_bit
322+
Expected: expected_bit_patterns
323+
DescriptorSets:
324+
- Resources:
325+
- Name: _participant_bit
326+
Kind: RWStructuredBuffer
327+
DirectXBinding:
328+
Register: 0
329+
Space: 0
330+
VulkanBinding:
331+
Binding: 0
332+
- Name: _wave_op_index
333+
Kind: RWStructuredBuffer
334+
DirectXBinding:
335+
Register: 1
336+
Space: 0
337+
VulkanBinding:
338+
Binding: 1
339+
...
340+
#--- end
341+
342+
# RUN: split-file %s %t
343+
# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl
344+
# RUN: %offloader %t/pipeline.yaml %t.o

0 commit comments

Comments
 (0)