Skip to content

Commit f8dbefb

Browse files
update to use the correct binding index
1 parent 204cf8e commit f8dbefb

File tree

2 files changed

+210
-0
lines changed

2 files changed

+210
-0
lines changed
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
#--- source.hlsl
2+
RWStructuredBuffer<uint> _participant_bit : register(u2);
3+
RWStructuredBuffer<uint> _wave_op_index : register(u3);
4+
5+
[numthreads(4, 1, 1)]
6+
void main(uint3 tid : SV_DispatchThreadID) {
7+
uint result = 0;
8+
switch ((WaveGetLaneIndex() % 2)) {
9+
case 0: {
10+
if ((WaveGetLaneIndex() < 8)) {
11+
result = (result + WaveActiveSum(1));
12+
uint temp = 0;
13+
InterlockedAdd(_wave_op_index[0], 3, temp);
14+
_participant_bit[temp] = (9 << 6);
15+
uint4 ballot = WaveActiveBallot(1);
16+
_participant_bit[(temp + 1)] = ballot.x;
17+
_participant_bit[(temp + 2)] = ballot.y;
18+
}
19+
}
20+
case 1: {
21+
if (((WaveGetLaneIndex() % 2) == 0)) {
22+
result = (result + WaveActiveSum(2));
23+
uint temp = 0;
24+
InterlockedAdd(_wave_op_index[0], 3, temp);
25+
_participant_bit[temp] = (18 << 6);
26+
uint4 ballot = WaveActiveBallot(1);
27+
_participant_bit[(temp + 1)] = ballot.x;
28+
_participant_bit[(temp + 2)] = ballot.y;
29+
}
30+
break;
31+
}
32+
}
33+
if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) {
34+
if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) {
35+
result = (result + WaveActiveMax(5));
36+
uint temp = 0;
37+
InterlockedAdd(_wave_op_index[0], 3, temp);
38+
_participant_bit[temp] = (36 << 6);
39+
uint4 ballot = WaveActiveBallot(1);
40+
_participant_bit[(temp + 1)] = ballot.x;
41+
_participant_bit[(temp + 2)] = ballot.y;
42+
}
43+
if ((WaveGetLaneIndex() == 3)) {
44+
if ((WaveGetLaneIndex() == 3)) {
45+
result = (result + WaveActiveMin(result));
46+
uint temp = 0;
47+
InterlockedAdd(_wave_op_index[0], 3, temp);
48+
_participant_bit[temp] = (46 << 6);
49+
uint4 ballot = WaveActiveBallot(1);
50+
_participant_bit[(temp + 1)] = ballot.x;
51+
_participant_bit[(temp + 2)] = ballot.y;
52+
}
53+
switch ((WaveGetLaneIndex() % 3)) {
54+
case 0: {
55+
if ((WaveGetLaneIndex() < 8)) {
56+
result = (result + WaveActiveSum(1));
57+
uint temp = 0;
58+
InterlockedAdd(_wave_op_index[0], 3, temp);
59+
_participant_bit[temp] = (56 << 6);
60+
uint4 ballot = WaveActiveBallot(1);
61+
_participant_bit[(temp + 1)] = ballot.x;
62+
_participant_bit[(temp + 2)] = ballot.y;
63+
}
64+
break;
65+
}
66+
case 1: {
67+
if (((WaveGetLaneIndex() % 2) == 0)) {
68+
result = (result + WaveActiveSum(2));
69+
uint temp = 0;
70+
InterlockedAdd(_wave_op_index[0], 3, temp);
71+
_participant_bit[temp] = (65 << 6);
72+
uint4 ballot = WaveActiveBallot(1);
73+
_participant_bit[(temp + 1)] = ballot.x;
74+
_participant_bit[(temp + 2)] = ballot.y;
75+
}
76+
break;
77+
}
78+
case 2: {
79+
if (true) {
80+
result = (result + WaveActiveSum(3));
81+
uint temp = 0;
82+
InterlockedAdd(_wave_op_index[0], 3, temp);
83+
_participant_bit[temp] = (70 << 6);
84+
uint4 ballot = WaveActiveBallot(1);
85+
_participant_bit[(temp + 1)] = ballot.x;
86+
_participant_bit[(temp + 2)] = ballot.y;
87+
}
88+
break;
89+
}
90+
}
91+
if ((WaveGetLaneIndex() == 2)) {
92+
result = (result + WaveActiveMin(result));
93+
uint temp = 0;
94+
InterlockedAdd(_wave_op_index[0], 3, temp);
95+
_participant_bit[temp] = (77 << 6);
96+
uint4 ballot = WaveActiveBallot(1);
97+
_participant_bit[(temp + 1)] = ballot.x;
98+
_participant_bit[(temp + 2)] = ballot.y;
99+
}
100+
}
101+
if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) {
102+
result = (result + WaveActiveMax(WaveGetLaneIndex()));
103+
uint temp = 0;
104+
InterlockedAdd(_wave_op_index[0], 3, temp);
105+
_participant_bit[temp] = (88 << 6);
106+
uint4 ballot = WaveActiveBallot(1);
107+
_participant_bit[(temp + 1)] = ballot.x;
108+
_participant_bit[(temp + 2)] = ballot.y;
109+
}
110+
} else {
111+
if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) {
112+
result = (result + WaveActiveMax(result));
113+
uint temp = 0;
114+
InterlockedAdd(_wave_op_index[0], 3, temp);
115+
_participant_bit[temp] = (99 << 6);
116+
uint4 ballot = WaveActiveBallot(1);
117+
_participant_bit[(temp + 1)] = ballot.x;
118+
_participant_bit[(temp + 2)] = ballot.y;
119+
}
120+
if ((WaveGetLaneIndex() < 1)) {
121+
if ((WaveGetLaneIndex() < 2)) {
122+
result = (result + WaveActiveSum(result));
123+
uint temp = 0;
124+
InterlockedAdd(_wave_op_index[0], 3, temp);
125+
_participant_bit[temp] = (109 << 6);
126+
uint4 ballot = WaveActiveBallot(1);
127+
_participant_bit[(temp + 1)] = ballot.x;
128+
_participant_bit[(temp + 2)] = ballot.y;
129+
}
130+
if ((WaveGetLaneIndex() == 3)) {
131+
if ((WaveGetLaneIndex() == 2)) {
132+
result = (result + WaveActiveMin(result));
133+
uint temp = 0;
134+
InterlockedAdd(_wave_op_index[0], 3, temp);
135+
_participant_bit[temp] = (119 << 6);
136+
uint4 ballot = WaveActiveBallot(1);
137+
_participant_bit[(temp + 1)] = ballot.x;
138+
_participant_bit[(temp + 2)] = ballot.y;
139+
}
140+
}
141+
if ((WaveGetLaneIndex() < 2)) {
142+
result = (result + WaveActiveMin(result));
143+
uint temp = 0;
144+
InterlockedAdd(_wave_op_index[0], 3, temp);
145+
_participant_bit[temp] = (126 << 6);
146+
uint4 ballot = WaveActiveBallot(1);
147+
_participant_bit[(temp + 1)] = ballot.x;
148+
_participant_bit[(temp + 2)] = ballot.y;
149+
}
150+
}
151+
if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) {
152+
result = (result + WaveActiveMax(result));
153+
uint temp = 0;
154+
InterlockedAdd(_wave_op_index[0], 3, temp);
155+
_participant_bit[temp] = (137 << 6);
156+
uint4 ballot = WaveActiveBallot(1);
157+
_participant_bit[(temp + 1)] = ballot.x;
158+
_participant_bit[(temp + 2)] = ballot.y;
159+
}
160+
}
161+
}
162+
163+
#--- pipeline.yaml
164+
---
165+
Shaders:
166+
- Stage: Compute
167+
Entry: main
168+
DispatchSize: [1, 1, 1] # Single dispatch for 4 threads
169+
Buffers:
170+
- Name: _participant_bit
171+
Format: UInt32
172+
Stride: 4
173+
Fill: 0
174+
Size: 39
175+
- Name: expected_bit_patterns
176+
Format: UInt32
177+
Stride: 4
178+
Data: [576, 5, 0, 576, 5, 0, 1152, 5, 0, 1152, 5, 0, 2304, 9, 0, 2304, 9, 0, 2944, 8, 0, 3584, 8, 0, 5632, 1, 0, 5632, 8, 0, 6336, 2, 0, 8768, 6, 0, 8768, 6, 0]
179+
- Name: _wave_op_index
180+
Format: UInt32
181+
Stride: 4
182+
Data: [0]
183+
Results:
184+
- Result: BitTrackingValidation
185+
Rule: BufferParticipantPattern
186+
GroupSize: 3
187+
Actual: _participant_bit
188+
Expected: expected_bit_patterns
189+
DescriptorSets:
190+
- Resources:
191+
- Name: _participant_bit
192+
Kind: RWStructuredBuffer
193+
DirectXBinding:
194+
Register: 2
195+
Space: 0
196+
VulkanBinding:
197+
Binding: 2
198+
- Name: _wave_op_index
199+
Kind: RWStructuredBuffer
200+
DirectXBinding:
201+
Register: 3
202+
Space: 0
203+
VulkanBinding:
204+
Binding: 3
205+
...
206+
#--- end
207+
208+
# RUN: split-file %s %t
209+
# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl
210+
# RUN: %offloader %t/pipeline.yaml %t.o

test/tests.zip

-272 KB
Binary file not shown.

0 commit comments

Comments
 (0)