Skip to content

Commit 7e5703d

Browse files
committed
Use make_workdiv and uniform_elements
1 parent 3950ee4 commit 7e5703d

File tree

11 files changed

+119
-295
lines changed

11 files changed

+119
-295
lines changed

RecoTracker/LSTCore/interface/alpaka/Common.h

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,33 +10,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
1010

1111
using namespace ::lst;
1212

13-
Vec3D constexpr elementsPerThread(Vec3D::all(static_cast<Idx>(1)));
14-
15-
ALPAKA_FN_HOST ALPAKA_FN_INLINE void lstWarning(std::string warning) {
16-
edm::LogWarning("LST") << warning;
17-
return;
18-
}
19-
20-
// Adjust grid and block sizes based on backend configuration
21-
template <typename Vec, typename TAcc = Acc<typename Vec::Dim>>
22-
ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv<typename Vec::Dim> createWorkDiv(const Vec& blocksPerGrid,
23-
const Vec& threadsPerBlock,
24-
const Vec& elementsPerThreadArg) {
25-
Vec adjustedBlocks = blocksPerGrid;
26-
Vec adjustedThreads = threadsPerBlock;
27-
28-
// special overrides for CPU/host cases
29-
if constexpr (std::is_same_v<Platform, alpaka::PlatformCpu>) {
30-
adjustedBlocks = Vec::all(static_cast<Idx>(1));
31-
32-
if constexpr (alpaka::accMatchesTags<TAcc, alpaka::TagCpuSerial>) {
33-
// Serial execution, set threads to 1 as well
34-
adjustedThreads = Vec::all(static_cast<Idx>(1)); // probably redundant
35-
}
36-
}
37-
38-
return WorkDiv<typename Vec::Dim>(adjustedBlocks, adjustedThreads, elementsPerThreadArg);
39-
}
13+
ALPAKA_FN_HOST ALPAKA_FN_INLINE void lstWarning(std::string_view warning) { edm::LogWarning("LST") << warning; }
4014

4115
// The constants below are usually used in functions like alpaka::math::min(),
4216
// expecting a reference (T const&) in the arguments. Hence,

RecoTracker/LSTCore/src/alpaka/Hit.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#ifndef RecoTracker_LSTCore_src_alpaka_Hit_h
22
#define RecoTracker_LSTCore_src_alpaka_Hit_h
33

4+
#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
5+
46
#include "RecoTracker/LSTCore/interface/alpaka/Common.h"
57
#include "RecoTracker/LSTCore/interface/ModulesSoA.h"
68
#include "RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h"
@@ -62,10 +64,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
6264
ModulesConst modules,
6365
HitsRanges hitsRanges,
6466
int nLowerModules) const {
65-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
66-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
67-
68-
for (int lowerIndex = globalThreadIdx[2]; lowerIndex < nLowerModules; lowerIndex += gridThreadExtent[2]) {
67+
for (int lowerIndex : cms::alpakatools::uniform_elements(acc, nLowerModules)) {
6968
uint16_t upperIndex = modules.partnerModuleIndices()[lowerIndex];
7069
if (hitsRanges.hitRanges()[lowerIndex][0] != -1 && hitsRanges.hitRanges()[upperIndex][0] != -1) {
7170
hitsRanges.hitRangesLower()[lowerIndex] = hitsRanges.hitRanges()[lowerIndex][0];
@@ -94,9 +93,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
9493
{
9594
auto geoMapDetId = endcapGeometry.geoMapDetId(); // DetId's from endcap map
9695
auto geoMapPhi = endcapGeometry.geoMapPhi(); // Phi values from endcap map
97-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
98-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
99-
for (unsigned int ihit = globalThreadIdx[2]; ihit < nHits; ihit += gridThreadExtent[2]) {
96+
for (unsigned int ihit : cms::alpakatools::uniform_elements(acc, nHits)) {
10097
float ihit_x = hits.xs()[ihit];
10198
float ihit_y = hits.ys()[ihit];
10299
float ihit_z = hits.zs()[ihit];

RecoTracker/LSTCore/src/alpaka/Kernels.h

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#ifndef RecoTracker_LSTCore_src_alpaka_Kernels_h
22
#define RecoTracker_LSTCore_src_alpaka_Kernels_h
33

4+
#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
5+
46
#include "RecoTracker/LSTCore/interface/alpaka/Common.h"
57
#include "RecoTracker/LSTCore/interface/ModulesSoA.h"
68
#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h"
@@ -145,20 +147,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
145147
Quintuplets quintuplets,
146148
QuintupletsOccupancyConst quintupletsOccupancy,
147149
ObjectRangesConst ranges) const {
148-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
149-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
150-
151-
for (unsigned int lowmod = globalThreadIdx[0]; lowmod < modules.nLowerModules(); lowmod += gridThreadExtent[0]) {
150+
for (unsigned int lowmod : cms::alpakatools::uniform_elements_z(acc, modules.nLowerModules())) {
152151
unsigned int nQuintuplets_lowmod = quintupletsOccupancy.nQuintuplets()[lowmod];
153152
int quintupletModuleIndices_lowmod = ranges.quintupletModuleIndices()[lowmod];
154153

155-
for (unsigned int ix1 = globalThreadIdx[1]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[1]) {
154+
for (unsigned int ix1 : cms::alpakatools::uniform_elements_y(acc, nQuintuplets_lowmod)) {
156155
unsigned int ix = quintupletModuleIndices_lowmod + ix1;
157156
float eta1 = __H2F(quintuplets.eta()[ix]);
158157
float phi1 = __H2F(quintuplets.phi()[ix]);
159158
float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]);
160159

161-
for (unsigned int jx1 = globalThreadIdx[2] + ix1 + 1; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) {
160+
for (unsigned int jx1 : cms::alpakatools::uniform_elements_x(acc, ix1 + 1, nQuintuplets_lowmod)) {
162161
unsigned int jx = quintupletModuleIndices_lowmod + jx1;
163162

164163
float eta2 = __H2F(quintuplets.eta()[jx]);
@@ -194,20 +193,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
194193
Quintuplets quintuplets,
195194
QuintupletsOccupancyConst quintupletsOccupancy,
196195
ObjectRangesConst ranges) const {
197-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
198-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
199-
200-
for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < ranges.nEligibleT5Modules();
201-
lowmodIdx1 += gridThreadExtent[1]) {
196+
for (unsigned int lowmodIdx1 : cms::alpakatools::uniform_elements_y(acc, ranges.nEligibleT5Modules())) {
202197
uint16_t lowmod1 = ranges.indicesOfEligibleT5Modules()[lowmodIdx1];
203198
unsigned int nQuintuplets_lowmod1 = quintupletsOccupancy.nQuintuplets()[lowmod1];
204199
if (nQuintuplets_lowmod1 == 0)
205200
continue;
206201

207202
unsigned int quintupletModuleIndices_lowmod1 = ranges.quintupletModuleIndices()[lowmod1];
208203

209-
for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < ranges.nEligibleT5Modules();
210-
lowmodIdx2 += gridThreadExtent[2]) {
204+
for (unsigned int lowmodIdx2 :
205+
cms::alpakatools::uniform_elements_x(acc, lowmodIdx1, ranges.nEligibleT5Modules())) {
211206
uint16_t lowmod2 = ranges.indicesOfEligibleT5Modules()[lowmodIdx2];
212207
unsigned int nQuintuplets_lowmod2 = quintupletsOccupancy.nQuintuplets()[lowmod2];
213208
if (nQuintuplets_lowmod2 == 0)
@@ -274,11 +269,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
274269
struct RemoveDupPixelTripletsFromMap {
275270
template <typename TAcc>
276271
ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTriplets) const {
277-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
278-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
279-
280-
for (unsigned int ix = globalThreadIdx[1]; ix < pixelTriplets.nPixelTriplets(); ix += gridThreadExtent[1]) {
281-
for (unsigned int jx = globalThreadIdx[2]; jx < pixelTriplets.nPixelTriplets(); jx += gridThreadExtent[2]) {
272+
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, pixelTriplets.nPixelTriplets())) {
273+
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, pixelTriplets.nPixelTriplets())) {
282274
if (ix == jx)
283275
continue;
284276

@@ -308,13 +300,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
308300
struct RemoveDupPixelQuintupletsFromMap {
309301
template <typename TAcc>
310302
ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintuplets) const {
311-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
312-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
313-
314303
unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets();
315-
for (unsigned int ix = globalThreadIdx[1]; ix < nPixelQuintuplets; ix += gridThreadExtent[1]) {
304+
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelQuintuplets)) {
316305
float score1 = __H2F(pixelQuintuplets.score()[ix]);
317-
for (unsigned int jx = globalThreadIdx[2]; jx < nPixelQuintuplets; jx += gridThreadExtent[2]) {
306+
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, nPixelQuintuplets)) {
318307
if (ix == jx)
319308
continue;
320309

@@ -339,16 +328,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
339328
SegmentsOccupancyConst segmentsOccupancy,
340329
SegmentsPixel segmentsPixel,
341330
bool secondpass) const {
342-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
343-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
344-
345331
int pixelModuleIndex = modules.nLowerModules();
346332
unsigned int nPixelSegments = segmentsOccupancy.nSegments()[pixelModuleIndex];
347333

348334
if (nPixelSegments > n_max_pixel_segments_per_module)
349335
nPixelSegments = n_max_pixel_segments_per_module;
350336

351-
for (unsigned int ix = globalThreadIdx[1]; ix < nPixelSegments; ix += gridThreadExtent[1]) {
337+
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelSegments)) {
352338
if (secondpass && (!segmentsPixel.isQuad()[ix] || (segmentsPixel.isDup()[ix] & 1)))
353339
continue;
354340

@@ -360,7 +346,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
360346
float eta_pix1 = segmentsPixel.eta()[ix];
361347
float phi_pix1 = segmentsPixel.phi()[ix];
362348

363-
for (unsigned int jx = ix + 1 + globalThreadIdx[2]; jx < nPixelSegments; jx += gridThreadExtent[2]) {
349+
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, ix + 1, nPixelSegments)) {
364350
float eta_pix2 = segmentsPixel.eta()[jx];
365351
float phi_pix2 = segmentsPixel.phi()[jx];
366352

0 commit comments

Comments
 (0)