11#ifndef RecoTracker_LSTCore_src_alpaka_Kernels_h
22#define RecoTracker_LSTCore_src_alpaka_Kernels_h
33
4+ #include " HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
5+
46#include " RecoTracker/LSTCore/interface/alpaka/Common.h"
57#include " RecoTracker/LSTCore/interface/ModulesSoA.h"
68#include " RecoTracker/LSTCore/interface/ObjectRangesSoA.h"
@@ -139,26 +141,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
139141 }
140142
141143 struct RemoveDupQuintupletsAfterBuild {
142- template <typename TAcc>
143- ALPAKA_FN_ACC void operator ()(TAcc const & acc,
144+ ALPAKA_FN_ACC void operator ()(Acc3D const & acc,
144145 ModulesConst modules,
145146 Quintuplets quintuplets,
146147 QuintupletsOccupancyConst quintupletsOccupancy,
147148 ObjectRangesConst ranges) const {
148- auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
149- auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
150-
151- for (unsigned int lowmod = globalThreadIdx[0 ]; lowmod < modules.nLowerModules (); lowmod += gridThreadExtent[0 ]) {
149+ for (unsigned int lowmod : cms::alpakatools::uniform_elements_z (acc, modules.nLowerModules ())) {
152150 unsigned int nQuintuplets_lowmod = quintupletsOccupancy.nQuintuplets ()[lowmod];
153151 int quintupletModuleIndices_lowmod = ranges.quintupletModuleIndices ()[lowmod];
154152
155- for (unsigned int ix1 = globalThreadIdx[ 1 ]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[ 1 ] ) {
153+ for (unsigned int ix1 : cms::alpakatools::uniform_elements_y (acc, nQuintuplets_lowmod) ) {
156154 unsigned int ix = quintupletModuleIndices_lowmod + ix1;
157155 float eta1 = __H2F (quintuplets.eta ()[ix]);
158156 float phi1 = __H2F (quintuplets.phi ()[ix]);
159157 float score_rphisum1 = __H2F (quintuplets.score_rphisum ()[ix]);
160158
161- for (unsigned int jx1 = globalThreadIdx[ 2 ] + ix1 + 1 ; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[ 2 ] ) {
159+ for (unsigned int jx1 : cms::alpakatools::uniform_elements_x (acc, ix1 + 1 , nQuintuplets_lowmod) ) {
162160 unsigned int jx = quintupletModuleIndices_lowmod + jx1;
163161
164162 float eta2 = __H2F (quintuplets.eta ()[jx]);
@@ -189,25 +187,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
189187 };
190188
191189 struct RemoveDupQuintupletsBeforeTC {
192- template <typename TAcc>
193- ALPAKA_FN_ACC void operator ()(TAcc const & acc,
190+ ALPAKA_FN_ACC void operator ()(Acc2D const & acc,
194191 Quintuplets quintuplets,
195192 QuintupletsOccupancyConst quintupletsOccupancy,
196193 ObjectRangesConst ranges) const {
197- auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
198- auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
199-
200- for (unsigned int lowmodIdx1 = globalThreadIdx[1 ]; lowmodIdx1 < ranges.nEligibleT5Modules ();
201- lowmodIdx1 += gridThreadExtent[1 ]) {
194+ for (unsigned int lowmodIdx1 : cms::alpakatools::uniform_elements_y (acc, ranges.nEligibleT5Modules ())) {
202195 uint16_t lowmod1 = ranges.indicesOfEligibleT5Modules ()[lowmodIdx1];
203196 unsigned int nQuintuplets_lowmod1 = quintupletsOccupancy.nQuintuplets ()[lowmod1];
204197 if (nQuintuplets_lowmod1 == 0 )
205198 continue ;
206199
207200 unsigned int quintupletModuleIndices_lowmod1 = ranges.quintupletModuleIndices ()[lowmod1];
208201
209- for (unsigned int lowmodIdx2 = globalThreadIdx[ 2 ] + lowmodIdx1; lowmodIdx2 < ranges. nEligibleT5Modules ();
210- lowmodIdx2 += gridThreadExtent[ 2 ] ) {
202+ for (unsigned int lowmodIdx2 :
203+ cms::alpakatools::uniform_elements_x (acc, lowmodIdx1, ranges. nEligibleT5Modules ()) ) {
211204 uint16_t lowmod2 = ranges.indicesOfEligibleT5Modules ()[lowmodIdx2];
212205 unsigned int nQuintuplets_lowmod2 = quintupletsOccupancy.nQuintuplets ()[lowmod2];
213206 if (nQuintuplets_lowmod2 == 0 )
@@ -272,13 +265,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
272265 };
273266
274267 struct RemoveDupPixelTripletsFromMap {
275- template <typename TAcc>
276- ALPAKA_FN_ACC void operator ()(TAcc const & acc, PixelTriplets pixelTriplets) const {
277- auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
278- auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
279-
280- for (unsigned int ix = globalThreadIdx[1 ]; ix < pixelTriplets.nPixelTriplets (); ix += gridThreadExtent[1 ]) {
281- for (unsigned int jx = globalThreadIdx[2 ]; jx < pixelTriplets.nPixelTriplets (); jx += gridThreadExtent[2 ]) {
268+ ALPAKA_FN_ACC void operator ()(Acc2D const & acc, PixelTriplets pixelTriplets) const {
269+ for (unsigned int ix : cms::alpakatools::uniform_elements_y (acc, pixelTriplets.nPixelTriplets ())) {
270+ for (unsigned int jx : cms::alpakatools::uniform_elements_x (acc, pixelTriplets.nPixelTriplets ())) {
282271 if (ix == jx)
283272 continue ;
284273
@@ -306,15 +295,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
306295 };
307296
308297 struct RemoveDupPixelQuintupletsFromMap {
309- template <typename TAcc>
310- ALPAKA_FN_ACC void operator ()(TAcc const & acc, PixelQuintuplets pixelQuintuplets) const {
311- auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
312- auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
313-
298+ ALPAKA_FN_ACC void operator ()(Acc2D const & acc, PixelQuintuplets pixelQuintuplets) const {
314299 unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets ();
315- for (unsigned int ix = globalThreadIdx[ 1 ]; ix < nPixelQuintuplets; ix += gridThreadExtent[ 1 ] ) {
300+ for (unsigned int ix : cms::alpakatools::uniform_elements_y (acc, nPixelQuintuplets) ) {
316301 float score1 = __H2F (pixelQuintuplets.score ()[ix]);
317- for (unsigned int jx = globalThreadIdx[ 2 ]; jx < nPixelQuintuplets; jx += gridThreadExtent[ 2 ] ) {
302+ for (unsigned int jx : cms::alpakatools::uniform_elements_x (acc, nPixelQuintuplets) ) {
318303 if (ix == jx)
319304 continue ;
320305
@@ -333,22 +318,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
333318 };
334319
335320 struct CheckHitspLS {
336- template <typename TAcc>
337- ALPAKA_FN_ACC void operator ()(TAcc const & acc,
321+ ALPAKA_FN_ACC void operator ()(Acc2D const & acc,
338322 ModulesConst modules,
339323 SegmentsOccupancyConst segmentsOccupancy,
340324 SegmentsPixel segmentsPixel,
341325 bool secondpass) const {
342- auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
343- auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
344-
345326 int pixelModuleIndex = modules.nLowerModules ();
346327 unsigned int nPixelSegments = segmentsOccupancy.nSegments ()[pixelModuleIndex];
347328
348329 if (nPixelSegments > n_max_pixel_segments_per_module)
349330 nPixelSegments = n_max_pixel_segments_per_module;
350331
351- for (unsigned int ix = globalThreadIdx[ 1 ]; ix < nPixelSegments; ix += gridThreadExtent[ 1 ] ) {
332+ for (unsigned int ix : cms::alpakatools::uniform_elements_y (acc, nPixelSegments) ) {
352333 if (secondpass && (!segmentsPixel.isQuad ()[ix] || (segmentsPixel.isDup ()[ix] & 1 )))
353334 continue ;
354335
@@ -360,7 +341,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
360341 float eta_pix1 = segmentsPixel.eta ()[ix];
361342 float phi_pix1 = segmentsPixel.phi ()[ix];
362343
363- for (unsigned int jx = ix + 1 + globalThreadIdx[ 2 ]; jx < nPixelSegments; jx += gridThreadExtent[ 2 ] ) {
344+ for (unsigned int jx : cms::alpakatools::uniform_elements_x (acc, ix + 1 , nPixelSegments) ) {
364345 float eta_pix2 = segmentsPixel.eta ()[jx];
365346 float phi_pix2 = segmentsPixel.phi ()[jx];
366347
0 commit comments