@@ -357,17 +357,19 @@ struct scan<Config, BinOp, Exclusive, 3, device_capabilities>
357
357
if (glsl::gl_SubgroupID () < Config::LevelInputCount_2)
358
358
{
359
359
vector_lv1_t lv1_val;
360
- scratchAccessor.template get<scalar_t, uint16_t>(Config::template sharedLoadIndex<1 >(invocationIndex-uint16_t (1u), Config::ItemsPerInvocation_1-uint16_t (1u)), lv1_val[0 ]);
361
360
[unroll]
362
- for (uint16_t i = 1 ; i < Config::ItemsPerInvocation_1; i++)
363
- scratchAccessor.template get<scalar_t, uint16_t>(Config::template sharedLoadIndex<1 >(invocationIndex, i- uint16_t (1u) ), lv1_val[i]);
361
+ for (uint16_t i = 0 ; i < Config::ItemsPerInvocation_1; i++)
362
+ scratchAccessor.template get<scalar_t, uint16_t>(Config::template sharedLoadIndex<1 >(invocationIndex, i), lv1_val[i]);
364
363
365
364
scalar_t lv2_scan;
366
365
const uint16_t bankedIndex = Config::template sharedStoreIndex<2 >(uint16_t (glsl::gl_SubgroupID ()-1u));
367
- scratchAccessor.template get<scalar_t, uint16_t>(bankedIndex, lv2_scan);
366
+ if (glsl::gl_SubgroupID () != 0 )
367
+ scratchAccessor.template get<scalar_t, uint16_t>(bankedIndex, lv2_scan);
368
+ else
369
+ lv2_scan = BinOp::identity;
368
370
369
371
[unroll]
370
- for (uint16_t i = 0 ; i < Config::ItemsPerInvocation_1; i-- )
372
+ for (uint16_t i = 0 ; i < Config::ItemsPerInvocation_1; i++ )
371
373
scratchAccessor.template set<scalar_t, uint16_t>(Config::template sharedLoadIndex<1 >(invocationIndex, i), binop (lv1_val[i],lv2_scan));
372
374
}
373
375
scratchAccessor.workgroupExecutionAndMemoryBarrier ();
0 commit comments