Skip to content

Commit 878e0b4

Browse files
authored
Merge pull request #47084 from SegmentLinking/ariostas/integration_pr_followups
LST followups: better work divisions, concrete kernel dimension, some cleanup and fixes
2 parents dd230c0 + baa91b3 commit 878e0b4

File tree

16 files changed

+255
-442
lines changed

16 files changed

+255
-442
lines changed

HeterogeneousCore/AlpakaInterface/interface/alpakastdAlgorithm.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,28 @@
77

88
#include <alpaka/alpaka.hpp>
99

10-
// reimplementation of std algorithms able to compile with Alpaka,
11-
// mostly by declaring them constexpr (until C++20, which will make it
12-
// constexpr by default. TODO: drop when moving to C++20)
10+
// reimplementation of std algorithms able to work on device code
1311

1412
namespace alpaka_std {
1513

14+
template <typename RandomIt, typename T, typename Compare = std::less<T>>
15+
ALPAKA_FN_HOST_ACC constexpr RandomIt lower_bound(RandomIt first, RandomIt last, const T &value, Compare comp = {}) {
16+
auto count = last - first;
17+
18+
while (count > 0) {
19+
auto it = first;
20+
auto step = count / 2;
21+
it += step;
22+
if (comp(*it, value)) {
23+
first = ++it;
24+
count -= step + 1;
25+
} else {
26+
count = step;
27+
}
28+
}
29+
return first;
30+
}
31+
1632
template <typename RandomIt, typename T, typename Compare = std::less<T>>
1733
ALPAKA_FN_HOST_ACC constexpr RandomIt upper_bound(RandomIt first, RandomIt last, const T &value, Compare comp = {}) {
1834
auto count = last - first;
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#ifndef RecoTracker_LSTCore_interface_Circle_h
2+
#define RecoTracker_LSTCore_interface_Circle_h
3+
4+
#include <tuple>
5+
6+
#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
7+
8+
namespace lst {
9+
10+
template <typename TAcc>
11+
ALPAKA_FN_ACC ALPAKA_FN_INLINE std::tuple<float, float, float> computeRadiusFromThreeAnchorHits(
12+
TAcc const& acc, float x1, float y1, float x2, float y2, float x3, float y3) {
13+
float radius = 0.f;
14+
15+
//first anchor hit - (x1,y1), second anchor hit - (x2,y2), third anchor hit - (x3, y3)
16+
17+
float denomInv = 1.0f / ((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3));
18+
19+
float xy1sqr = x1 * x1 + y1 * y1;
20+
21+
float xy2sqr = x2 * x2 + y2 * y2;
22+
23+
float xy3sqr = x3 * x3 + y3 * y3;
24+
25+
float regressionCenterX = 0.5f * ((y3 - y2) * xy1sqr + (y1 - y3) * xy2sqr + (y2 - y1) * xy3sqr) * denomInv;
26+
27+
float regressionCenterY = 0.5f * ((x2 - x3) * xy1sqr + (x3 - x1) * xy2sqr + (x1 - x2) * xy3sqr) * denomInv;
28+
29+
float c = ((x2 * y3 - x3 * y2) * xy1sqr + (x3 * y1 - x1 * y3) * xy2sqr + (x1 * y2 - x2 * y1) * xy3sqr) * denomInv;
30+
31+
if (((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3) == 0) ||
32+
(regressionCenterX * regressionCenterX + regressionCenterY * regressionCenterY - c < 0)) {
33+
#ifdef WARNINGS
34+
printf("three collinear points or FATAL! r^2 < 0!\n");
35+
#endif
36+
radius = -1.f;
37+
} else
38+
radius =
39+
alpaka::math::sqrt(acc, regressionCenterX * regressionCenterX + regressionCenterY * regressionCenterY - c);
40+
41+
return std::make_tuple(radius, regressionCenterX, regressionCenterY);
42+
}
43+
44+
} //namespace lst
45+
46+
#endif

RecoTracker/LSTCore/interface/HitsSoA.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ namespace lst {
2727
SOA_COLUMN(ArrayIx2, hitRanges),
2828
SOA_COLUMN(int, hitRangesLower),
2929
SOA_COLUMN(int, hitRangesUpper),
30-
SOA_COLUMN(int8_t, hitRangesnLower),
31-
SOA_COLUMN(int8_t, hitRangesnUpper))
30+
SOA_COLUMN(int16_t, hitRangesnLower),
31+
SOA_COLUMN(int16_t, hitRangesnUpper))
3232

3333
using HitsSoA = HitsSoALayout<>;
3434
using HitsRangesSoA = HitsRangesSoALayout<>;

RecoTracker/LSTCore/interface/alpaka/Common.h

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,33 +11,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
1111

1212
using namespace ::lst;
1313

14-
Vec3D constexpr elementsPerThread(Vec3D::all(static_cast<Idx>(1)));
15-
16-
ALPAKA_FN_HOST ALPAKA_FN_INLINE void lstWarning(std::string warning) {
17-
edm::LogWarning("LST") << warning;
18-
return;
19-
}
20-
21-
// Adjust grid and block sizes based on backend configuration
22-
template <typename Vec, typename TAcc = Acc<typename Vec::Dim>>
23-
ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv<typename Vec::Dim> createWorkDiv(const Vec& blocksPerGrid,
24-
const Vec& threadsPerBlock,
25-
const Vec& elementsPerThreadArg) {
26-
Vec adjustedBlocks = blocksPerGrid;
27-
Vec adjustedThreads = threadsPerBlock;
28-
29-
// special overrides for CPU/host cases
30-
if constexpr (std::is_same_v<Platform, alpaka::PlatformCpu>) {
31-
adjustedBlocks = Vec::all(static_cast<Idx>(1));
32-
33-
if constexpr (alpaka::accMatchesTags<TAcc, alpaka::TagCpuSerial>) {
34-
// Serial execution, set threads to 1 as well
35-
adjustedThreads = Vec::all(static_cast<Idx>(1)); // probably redundant
36-
}
37-
}
38-
39-
return WorkDiv<typename Vec::Dim>(adjustedBlocks, adjustedThreads, elementsPerThreadArg);
40-
}
14+
ALPAKA_FN_HOST ALPAKA_FN_INLINE void lstWarning(std::string_view warning) { edm::LogWarning("LST") << warning; }
4115

4216
// The constants below are usually used in functions like alpaka::math::min(),
4317
// expecting a reference (T const&) in the arguments. Hence,

RecoTracker/LSTCore/src/alpaka/Hit.h

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
#ifndef RecoTracker_LSTCore_src_alpaka_Hit_h
22
#define RecoTracker_LSTCore_src_alpaka_Hit_h
33

4+
#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
5+
#include "HeterogeneousCore/AlpakaInterface/interface/alpakastdAlgorithm.h"
6+
47
#include "RecoTracker/LSTCore/interface/alpaka/Common.h"
58
#include "RecoTracker/LSTCore/interface/ModulesSoA.h"
69
#include "RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h"
@@ -57,15 +60,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
5760
}
5861

5962
struct ModuleRangesKernel {
60-
template <typename TAcc>
61-
ALPAKA_FN_ACC void operator()(TAcc const& acc,
63+
ALPAKA_FN_ACC void operator()(Acc1D const& acc,
6264
ModulesConst modules,
6365
HitsRanges hitsRanges,
6466
int nLowerModules) const {
65-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
66-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
67-
68-
for (int lowerIndex = globalThreadIdx[2]; lowerIndex < nLowerModules; lowerIndex += gridThreadExtent[2]) {
67+
for (int lowerIndex : cms::alpakatools::uniform_elements(acc, nLowerModules)) {
6968
uint16_t upperIndex = modules.partnerModuleIndices()[lowerIndex];
7069
if (hitsRanges.hitRanges()[lowerIndex][0] != -1 && hitsRanges.hitRanges()[upperIndex][0] != -1) {
7170
hitsRanges.hitRangesLower()[lowerIndex] = hitsRanges.hitRanges()[lowerIndex][0];
@@ -80,8 +79,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
8079
};
8180

8281
struct HitLoopKernel {
83-
template <typename TAcc>
84-
ALPAKA_FN_ACC void operator()(TAcc const& acc,
82+
ALPAKA_FN_ACC void operator()(Acc1D const& acc,
8583
uint16_t Endcap, // Integer corresponding to endcap in module subdets
8684
uint16_t TwoS, // Integer corresponding to TwoS in moduleType
8785
unsigned int nModules, // Number of modules
@@ -94,9 +92,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
9492
{
9593
auto geoMapDetId = endcapGeometry.geoMapDetId(); // DetId's from endcap map
9694
auto geoMapPhi = endcapGeometry.geoMapPhi(); // Phi values from endcap map
97-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
98-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
99-
for (unsigned int ihit = globalThreadIdx[2]; ihit < nHits; ihit += gridThreadExtent[2]) {
95+
for (unsigned int ihit : cms::alpakatools::uniform_elements(acc, nHits)) {
10096
float ihit_x = hits.xs()[ihit];
10197
float ihit_y = hits.ys()[ihit];
10298
float ihit_z = hits.zs()[ihit];
@@ -108,7 +104,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
108104
((ihit_z > 0) - (ihit_z < 0)) *
109105
alpaka::math::acosh(
110106
acc, alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y + ihit_z * ihit_z) / hits.rts()[ihit]);
111-
auto found_pointer = std::lower_bound(modules.mapdetId(), modules.mapdetId() + nModules, iDetId);
107+
auto found_pointer = alpaka_std::lower_bound(modules.mapdetId(), modules.mapdetId() + nModules, iDetId);
112108
int found_index = std::distance(modules.mapdetId(), found_pointer);
113109
if (found_pointer == modules.mapdetId() + nModules)
114110
found_index = -1;
@@ -117,7 +113,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
117113
hits.moduleIndices()[ihit] = lastModuleIndex;
118114

119115
if (modules.subdets()[lastModuleIndex] == Endcap && modules.moduleType()[lastModuleIndex] == TwoS) {
120-
found_pointer = std::lower_bound(geoMapDetId, geoMapDetId + nEndCapMap, iDetId);
116+
found_pointer = alpaka_std::lower_bound(geoMapDetId, geoMapDetId + nEndCapMap, iDetId);
121117
found_index = std::distance(geoMapDetId, found_pointer);
122118
if (found_pointer == geoMapDetId + nEndCapMap)
123119
found_index = -1;

RecoTracker/LSTCore/src/alpaka/Kernels.h

Lines changed: 19 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#ifndef RecoTracker_LSTCore_src_alpaka_Kernels_h
22
#define RecoTracker_LSTCore_src_alpaka_Kernels_h
33

4+
#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
5+
46
#include "RecoTracker/LSTCore/interface/alpaka/Common.h"
57
#include "RecoTracker/LSTCore/interface/ModulesSoA.h"
68
#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h"
@@ -139,26 +141,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
139141
}
140142

141143
struct RemoveDupQuintupletsAfterBuild {
142-
template <typename TAcc>
143-
ALPAKA_FN_ACC void operator()(TAcc const& acc,
144+
ALPAKA_FN_ACC void operator()(Acc3D const& acc,
144145
ModulesConst modules,
145146
Quintuplets quintuplets,
146147
QuintupletsOccupancyConst quintupletsOccupancy,
147148
ObjectRangesConst ranges) const {
148-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
149-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
150-
151-
for (unsigned int lowmod = globalThreadIdx[0]; lowmod < modules.nLowerModules(); lowmod += gridThreadExtent[0]) {
149+
for (unsigned int lowmod : cms::alpakatools::uniform_elements_z(acc, modules.nLowerModules())) {
152150
unsigned int nQuintuplets_lowmod = quintupletsOccupancy.nQuintuplets()[lowmod];
153151
int quintupletModuleIndices_lowmod = ranges.quintupletModuleIndices()[lowmod];
154152

155-
for (unsigned int ix1 = globalThreadIdx[1]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[1]) {
153+
for (unsigned int ix1 : cms::alpakatools::uniform_elements_y(acc, nQuintuplets_lowmod)) {
156154
unsigned int ix = quintupletModuleIndices_lowmod + ix1;
157155
float eta1 = __H2F(quintuplets.eta()[ix]);
158156
float phi1 = __H2F(quintuplets.phi()[ix]);
159157
float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]);
160158

161-
for (unsigned int jx1 = globalThreadIdx[2] + ix1 + 1; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) {
159+
for (unsigned int jx1 : cms::alpakatools::uniform_elements_x(acc, ix1 + 1, nQuintuplets_lowmod)) {
162160
unsigned int jx = quintupletModuleIndices_lowmod + jx1;
163161

164162
float eta2 = __H2F(quintuplets.eta()[jx]);
@@ -189,25 +187,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
189187
};
190188

191189
struct RemoveDupQuintupletsBeforeTC {
192-
template <typename TAcc>
193-
ALPAKA_FN_ACC void operator()(TAcc const& acc,
190+
ALPAKA_FN_ACC void operator()(Acc2D const& acc,
194191
Quintuplets quintuplets,
195192
QuintupletsOccupancyConst quintupletsOccupancy,
196193
ObjectRangesConst ranges) const {
197-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
198-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
199-
200-
for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < ranges.nEligibleT5Modules();
201-
lowmodIdx1 += gridThreadExtent[1]) {
194+
for (unsigned int lowmodIdx1 : cms::alpakatools::uniform_elements_y(acc, ranges.nEligibleT5Modules())) {
202195
uint16_t lowmod1 = ranges.indicesOfEligibleT5Modules()[lowmodIdx1];
203196
unsigned int nQuintuplets_lowmod1 = quintupletsOccupancy.nQuintuplets()[lowmod1];
204197
if (nQuintuplets_lowmod1 == 0)
205198
continue;
206199

207200
unsigned int quintupletModuleIndices_lowmod1 = ranges.quintupletModuleIndices()[lowmod1];
208201

209-
for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < ranges.nEligibleT5Modules();
210-
lowmodIdx2 += gridThreadExtent[2]) {
202+
for (unsigned int lowmodIdx2 :
203+
cms::alpakatools::uniform_elements_x(acc, lowmodIdx1, ranges.nEligibleT5Modules())) {
211204
uint16_t lowmod2 = ranges.indicesOfEligibleT5Modules()[lowmodIdx2];
212205
unsigned int nQuintuplets_lowmod2 = quintupletsOccupancy.nQuintuplets()[lowmod2];
213206
if (nQuintuplets_lowmod2 == 0)
@@ -272,13 +265,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
272265
};
273266

274267
struct RemoveDupPixelTripletsFromMap {
275-
template <typename TAcc>
276-
ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTriplets) const {
277-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
278-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
279-
280-
for (unsigned int ix = globalThreadIdx[1]; ix < pixelTriplets.nPixelTriplets(); ix += gridThreadExtent[1]) {
281-
for (unsigned int jx = globalThreadIdx[2]; jx < pixelTriplets.nPixelTriplets(); jx += gridThreadExtent[2]) {
268+
ALPAKA_FN_ACC void operator()(Acc2D const& acc, PixelTriplets pixelTriplets) const {
269+
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, pixelTriplets.nPixelTriplets())) {
270+
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, pixelTriplets.nPixelTriplets())) {
282271
if (ix == jx)
283272
continue;
284273

@@ -306,15 +295,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
306295
};
307296

308297
struct RemoveDupPixelQuintupletsFromMap {
309-
template <typename TAcc>
310-
ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintuplets) const {
311-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
312-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
313-
298+
ALPAKA_FN_ACC void operator()(Acc2D const& acc, PixelQuintuplets pixelQuintuplets) const {
314299
unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets();
315-
for (unsigned int ix = globalThreadIdx[1]; ix < nPixelQuintuplets; ix += gridThreadExtent[1]) {
300+
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelQuintuplets)) {
316301
float score1 = __H2F(pixelQuintuplets.score()[ix]);
317-
for (unsigned int jx = globalThreadIdx[2]; jx < nPixelQuintuplets; jx += gridThreadExtent[2]) {
302+
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, nPixelQuintuplets)) {
318303
if (ix == jx)
319304
continue;
320305

@@ -333,22 +318,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
333318
};
334319

335320
struct CheckHitspLS {
336-
template <typename TAcc>
337-
ALPAKA_FN_ACC void operator()(TAcc const& acc,
321+
ALPAKA_FN_ACC void operator()(Acc2D const& acc,
338322
ModulesConst modules,
339323
SegmentsOccupancyConst segmentsOccupancy,
340324
SegmentsPixel segmentsPixel,
341325
bool secondpass) const {
342-
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
343-
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
344-
345326
int pixelModuleIndex = modules.nLowerModules();
346327
unsigned int nPixelSegments = segmentsOccupancy.nSegments()[pixelModuleIndex];
347328

348329
if (nPixelSegments > n_max_pixel_segments_per_module)
349330
nPixelSegments = n_max_pixel_segments_per_module;
350331

351-
for (unsigned int ix = globalThreadIdx[1]; ix < nPixelSegments; ix += gridThreadExtent[1]) {
332+
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelSegments)) {
352333
if (secondpass && (!segmentsPixel.isQuad()[ix] || (segmentsPixel.isDup()[ix] & 1)))
353334
continue;
354335

@@ -360,7 +341,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
360341
float eta_pix1 = segmentsPixel.eta()[ix];
361342
float phi_pix1 = segmentsPixel.phi()[ix];
362343

363-
for (unsigned int jx = ix + 1 + globalThreadIdx[2]; jx < nPixelSegments; jx += gridThreadExtent[2]) {
344+
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, ix + 1, nPixelSegments)) {
364345
float eta_pix2 = segmentsPixel.eta()[jx];
365346
float phi_pix2 = segmentsPixel.phi()[jx];
366347

0 commit comments

Comments
 (0)