11#ifndef RecoTracker_LSTCore_src_alpaka_Kernels_h
22#define RecoTracker_LSTCore_src_alpaka_Kernels_h
33
4+ #include " HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
5+
46#include " RecoTracker/LSTCore/interface/alpaka/Common.h"
57#include " RecoTracker/LSTCore/interface/ModulesSoA.h"
68#include " RecoTracker/LSTCore/interface/ObjectRangesSoA.h"
@@ -145,20 +147,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
145147 Quintuplets quintuplets,
146148 QuintupletsOccupancyConst quintupletsOccupancy,
147149 ObjectRangesConst ranges) const {
148- auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
149- auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
150-
151- for (unsigned int lowmod = globalThreadIdx[0 ]; lowmod < modules.nLowerModules (); lowmod += gridThreadExtent[0 ]) {
150+ for (unsigned int lowmod : cms::alpakatools::uniform_elements_z (acc, modules.nLowerModules ())) {
152151 unsigned int nQuintuplets_lowmod = quintupletsOccupancy.nQuintuplets ()[lowmod];
153152 int quintupletModuleIndices_lowmod = ranges.quintupletModuleIndices ()[lowmod];
154153
155- for (unsigned int ix1 = globalThreadIdx[ 1 ]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[ 1 ] ) {
154+ for (unsigned int ix1 : cms::alpakatools::uniform_elements_y (acc, nQuintuplets_lowmod) ) {
156155 unsigned int ix = quintupletModuleIndices_lowmod + ix1;
157156 float eta1 = __H2F (quintuplets.eta ()[ix]);
158157 float phi1 = __H2F (quintuplets.phi ()[ix]);
159158 float score_rphisum1 = __H2F (quintuplets.score_rphisum ()[ix]);
160159
161- for (unsigned int jx1 = globalThreadIdx[ 2 ] + ix1 + 1 ; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[ 2 ] ) {
160+ for (unsigned int jx1 : cms::alpakatools::uniform_elements_x (acc, ix1 + 1 , nQuintuplets_lowmod) ) {
162161 unsigned int jx = quintupletModuleIndices_lowmod + jx1;
163162
164163 float eta2 = __H2F (quintuplets.eta ()[jx]);
@@ -194,20 +193,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
194193 Quintuplets quintuplets,
195194 QuintupletsOccupancyConst quintupletsOccupancy,
196195 ObjectRangesConst ranges) const {
197- auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
198- auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
199-
200- for (unsigned int lowmodIdx1 = globalThreadIdx[1 ]; lowmodIdx1 < ranges.nEligibleT5Modules ();
201- lowmodIdx1 += gridThreadExtent[1 ]) {
196+ for (unsigned int lowmodIdx1 : cms::alpakatools::uniform_elements_y (acc, ranges.nEligibleT5Modules ())) {
202197 uint16_t lowmod1 = ranges.indicesOfEligibleT5Modules ()[lowmodIdx1];
203198 unsigned int nQuintuplets_lowmod1 = quintupletsOccupancy.nQuintuplets ()[lowmod1];
204199 if (nQuintuplets_lowmod1 == 0 )
205200 continue ;
206201
207202 unsigned int quintupletModuleIndices_lowmod1 = ranges.quintupletModuleIndices ()[lowmod1];
208203
209- for (unsigned int lowmodIdx2 = globalThreadIdx[ 2 ] + lowmodIdx1; lowmodIdx2 < ranges. nEligibleT5Modules ();
210- lowmodIdx2 += gridThreadExtent[ 2 ] ) {
204+ for (unsigned int lowmodIdx2 :
205+ cms::alpakatools::uniform_elements_x (acc, lowmodIdx1, ranges. nEligibleT5Modules ()) ) {
211206 uint16_t lowmod2 = ranges.indicesOfEligibleT5Modules ()[lowmodIdx2];
212207 unsigned int nQuintuplets_lowmod2 = quintupletsOccupancy.nQuintuplets ()[lowmod2];
213208 if (nQuintuplets_lowmod2 == 0 )
@@ -274,11 +269,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
274269 struct RemoveDupPixelTripletsFromMap {
275270 template <typename TAcc>
276271 ALPAKA_FN_ACC void operator ()(TAcc const & acc, PixelTriplets pixelTriplets) const {
277- auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
278- auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
279-
280- for (unsigned int ix = globalThreadIdx[1 ]; ix < pixelTriplets.nPixelTriplets (); ix += gridThreadExtent[1 ]) {
281- for (unsigned int jx = globalThreadIdx[2 ]; jx < pixelTriplets.nPixelTriplets (); jx += gridThreadExtent[2 ]) {
272+ for (unsigned int ix : cms::alpakatools::uniform_elements_y (acc, pixelTriplets.nPixelTriplets ())) {
273+ for (unsigned int jx : cms::alpakatools::uniform_elements_x (acc, pixelTriplets.nPixelTriplets ())) {
282274 if (ix == jx)
283275 continue ;
284276
@@ -308,13 +300,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
308300 struct RemoveDupPixelQuintupletsFromMap {
309301 template <typename TAcc>
310302 ALPAKA_FN_ACC void operator ()(TAcc const & acc, PixelQuintuplets pixelQuintuplets) const {
311- auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
312- auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
313-
314303 unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets ();
315- for (unsigned int ix = globalThreadIdx[ 1 ]; ix < nPixelQuintuplets; ix += gridThreadExtent[ 1 ] ) {
304+ for (unsigned int ix : cms::alpakatools::uniform_elements_y (acc, nPixelQuintuplets) ) {
316305 float score1 = __H2F (pixelQuintuplets.score ()[ix]);
317- for (unsigned int jx = globalThreadIdx[ 2 ]; jx < nPixelQuintuplets; jx += gridThreadExtent[ 2 ] ) {
306+ for (unsigned int jx : cms::alpakatools::uniform_elements_x (acc, nPixelQuintuplets) ) {
318307 if (ix == jx)
319308 continue ;
320309
@@ -339,16 +328,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
339328 SegmentsOccupancyConst segmentsOccupancy,
340329 SegmentsPixel segmentsPixel,
341330 bool secondpass) const {
342- auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
343- auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
344-
345331 int pixelModuleIndex = modules.nLowerModules ();
346332 unsigned int nPixelSegments = segmentsOccupancy.nSegments ()[pixelModuleIndex];
347333
348334 if (nPixelSegments > n_max_pixel_segments_per_module)
349335 nPixelSegments = n_max_pixel_segments_per_module;
350336
351- for (unsigned int ix = globalThreadIdx[ 1 ]; ix < nPixelSegments; ix += gridThreadExtent[ 1 ] ) {
337+ for (unsigned int ix : cms::alpakatools::uniform_elements_y (acc, nPixelSegments) ) {
352338 if (secondpass && (!segmentsPixel.isQuad ()[ix] || (segmentsPixel.isDup ()[ix] & 1 )))
353339 continue ;
354340
@@ -360,7 +346,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
360346 float eta_pix1 = segmentsPixel.eta ()[ix];
361347 float phi_pix1 = segmentsPixel.phi ()[ix];
362348
363- for (unsigned int jx = ix + 1 + globalThreadIdx[ 2 ]; jx < nPixelSegments; jx += gridThreadExtent[ 2 ] ) {
349+ for (unsigned int jx : cms::alpakatools::uniform_elements_x (acc, ix + 1 , nPixelSegments) ) {
364350 float eta_pix2 = segmentsPixel.eta ()[jx];
365351 float phi_pix2 = segmentsPixel.phi ()[jx];
366352
0 commit comments