@@ -43,9 +43,9 @@ struct reduce<Config, BinOp, 1, device_capabilities>
43
43
44
44
subgroup2::reduction<params_t> reduction;
45
45
vector_t value;
46
- dataAccessor.get (glsl:: gl_WorkGroupID ().x * Config::SubgroupSize + workgroup::SubgroupContiguousIndex (), value);
46
+ dataAccessor.get (workgroup::SubgroupContiguousIndex (), value);
47
47
value = reduction (value);
48
- dataAccessor.set (glsl:: gl_WorkGroupID ().x * Config::SubgroupSize + workgroup::SubgroupContiguousIndex (), value); // can be safely merged with top line?
48
+ dataAccessor.set (workgroup::SubgroupContiguousIndex (), value);
49
49
}
50
50
};
51
51
@@ -63,7 +63,7 @@ struct scan<Config, BinOp, Exclusive, 1, device_capabilities>
63
63
using params_t = subgroup2::ArithmeticParams<config_t, BinOp, Config::ItemsPerInvocation_0, device_capabilities>;
64
64
65
65
vector_t value;
66
- dataAccessor.get (glsl:: gl_WorkGroupID ().x * Config::SubgroupSize + workgroup::SubgroupContiguousIndex (), value);
66
+ dataAccessor.get (workgroup::SubgroupContiguousIndex (), value);
67
67
if (Exclusive)
68
68
{
69
69
subgroup2::exclusive_scan<params_t> excl_scan;
@@ -74,7 +74,7 @@ struct scan<Config, BinOp, Exclusive, 1, device_capabilities>
74
74
subgroup2::inclusive_scan<params_t> incl_scan;
75
75
value = incl_scan (value);
76
76
}
77
- dataAccessor.set (glsl:: gl_WorkGroupID ().x * Config::SubgroupSize + workgroup::SubgroupContiguousIndex (), value); // can be safely merged with above lines?
77
+ dataAccessor.set (workgroup::SubgroupContiguousIndex (), value); // can be safely merged with above lines?
78
78
}
79
79
};
80
80
@@ -101,7 +101,7 @@ struct reduce<Config, BinOp, 2, device_capabilities>
101
101
[unroll]
102
102
for (uint32_t idx = 0 , virtualInvocationIndex = invocationIndex; idx < Config::VirtualWorkgroupSize / Config::WorkgroupSize; idx++)
103
103
{
104
- dataAccessor.get (glsl:: gl_WorkGroupID ().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
104
+ dataAccessor.get (idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
105
105
scan_local[idx] = reduction0 (scan_local[idx]);
106
106
if (glsl::gl_SubgroupInvocationID ()==Config::SubgroupSize-1 )
107
107
{
@@ -131,7 +131,7 @@ struct reduce<Config, BinOp, 2, device_capabilities>
131
131
{
132
132
scalar_t reduce_val;
133
133
scratchAccessor.get (glsl::gl_SubgroupInvocationID (),reduce_val);
134
- dataAccessor.set (glsl:: gl_WorkGroupID ().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, reduce_val);
134
+ dataAccessor.set (idx * Config::WorkgroupSize + virtualInvocationIndex, reduce_val);
135
135
}
136
136
}
137
137
};
@@ -158,7 +158,7 @@ struct scan<Config, BinOp, Exclusive, 2, device_capabilities>
158
158
[unroll]
159
159
for (uint32_t idx = 0 , virtualInvocationIndex = invocationIndex; idx < Config::VirtualWorkgroupSize / Config::WorkgroupSize; idx++)
160
160
{
161
- dataAccessor.get (glsl:: gl_WorkGroupID ().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
161
+ dataAccessor.get (idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
162
162
scan_local[idx] = inclusiveScan0 (scan_local[idx]);
163
163
if (glsl::gl_SubgroupInvocationID ()==Config::SubgroupSize-1 )
164
164
{
@@ -204,7 +204,7 @@ struct scan<Config, BinOp, Exclusive, 2, device_capabilities>
204
204
for (uint32_t i = 0 ; i < Config::ItemsPerInvocation_0; i++)
205
205
scan_local[idx][i] = binop (left, scan_local[idx][i]);
206
206
}
207
- dataAccessor.set (glsl:: gl_WorkGroupID ().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
207
+ dataAccessor.set (idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
208
208
}
209
209
}
210
210
};
@@ -234,7 +234,7 @@ struct reduce<Config, BinOp, 3, device_capabilities>
234
234
[unroll]
235
235
for (uint32_t idx = 0 , virtualInvocationIndex = invocationIndex; idx < Config::VirtualWorkgroupSize / Config::WorkgroupSize; idx++)
236
236
{
237
- dataAccessor.get (glsl:: gl_WorkGroupID ().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
237
+ dataAccessor.get (idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
238
238
scan_local[idx] = reduction0 (scan_local[idx]);
239
239
if (glsl::gl_SubgroupInvocationID ()==Config::SubgroupSize-1 )
240
240
{
@@ -281,7 +281,7 @@ struct reduce<Config, BinOp, 3, device_capabilities>
281
281
{
282
282
scalar_t reduce_val;
283
283
scratchAccessor.get (glsl::gl_SubgroupInvocationID (),reduce_val);
284
- dataAccessor.set (glsl:: gl_WorkGroupID ().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, reduce_val);
284
+ dataAccessor.set (idx * Config::WorkgroupSize + virtualInvocationIndex, reduce_val);
285
285
}
286
286
}
287
287
};
@@ -310,7 +310,7 @@ struct scan<Config, BinOp, Exclusive, 3, device_capabilities>
310
310
[unroll]
311
311
for (uint32_t idx = 0 , virtualInvocationIndex = invocationIndex; idx < Config::VirtualWorkgroupSize / Config::WorkgroupSize; idx++)
312
312
{
313
- dataAccessor.get (glsl:: gl_WorkGroupID ().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
313
+ dataAccessor.get (idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
314
314
scan_local[idx] = inclusiveScan0 (scan_local[idx]);
315
315
if (glsl::gl_SubgroupInvocationID ()==Config::SubgroupSize-1 )
316
316
{
@@ -384,7 +384,7 @@ struct scan<Config, BinOp, Exclusive, 3, device_capabilities>
384
384
for (uint32_t i = 0 ; i < Config::ItemsPerInvocation_0; i++)
385
385
scan_local[idx][i] = binop (left, scan_local[idx][i]);
386
386
}
387
- dataAccessor.set (glsl:: gl_WorkGroupID ().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
387
+ dataAccessor.set (idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
388
388
}
389
389
}
390
390
};
0 commit comments