Skip to content

Commit 78cc7ca

Browse files
authored
Merge pull request #48698 from SegmentLinking/full_dyn_t5
Dynamic Memory Caps for LS, T3, T5 Objects in LST
2 parents 89828e6 + 1563fca commit 78cc7ca

File tree

7 files changed

+364
-228
lines changed

7 files changed

+364
-228
lines changed

RecoTracker/LSTCore/interface/MiniDoubletsSoA.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ namespace lst {
3939
SOA_COLUMN(float, outerHighEdgeX),
4040
SOA_COLUMN(float, outerHighEdgeY),
4141
SOA_COLUMN(float, outerLowEdgeX),
42-
SOA_COLUMN(float, outerLowEdgeY))
42+
SOA_COLUMN(float, outerLowEdgeY),
43+
SOA_COLUMN(unsigned int, connectedMax))
4344

4445
GENERATE_SOA_LAYOUT(MiniDoubletsOccupancySoALayout,
4546
SOA_COLUMN(unsigned int, nMDs),

RecoTracker/LSTCore/interface/SegmentsSoA.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ namespace lst {
1919
SOA_COLUMN(uint16_t, outerLowerModuleIndices),
2020
SOA_COLUMN(Params_LS::ArrayUxLayers, mdIndices),
2121
SOA_COLUMN(unsigned int, innerMiniDoubletAnchorHitIndices),
22-
SOA_COLUMN(unsigned int, outerMiniDoubletAnchorHitIndices))
22+
SOA_COLUMN(unsigned int, outerMiniDoubletAnchorHitIndices),
23+
SOA_COLUMN(unsigned int, connectedMax))
2324

2425
GENERATE_SOA_LAYOUT(SegmentsOccupancySoALayout,
2526
SOA_COLUMN(unsigned int, nSegments), //number of segments per inner lower module

RecoTracker/LSTCore/interface/TripletsSoA.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@ namespace lst {
1313
SOA_COLUMN(Params_T3::ArrayU16xLayers, lowerModuleIndices), // lower module index in each layer
1414
SOA_COLUMN(Params_T3::ArrayU8xLayers, logicalLayers), // layer ID
1515
SOA_COLUMN(Params_T3::ArrayUxHits, hitIndices), // hit indices
16-
SOA_COLUMN(FPX, betaIn), // beta/chord angle of the inner segment
17-
SOA_COLUMN(float, centerX), // lower/anchor-hit based circle center x
18-
SOA_COLUMN(float, centerY), // lower/anchor-hit based circle center y
19-
SOA_COLUMN(float, radius), // lower/anchor-hit based circle radius
20-
SOA_COLUMN(float, fakeScore), // DNN confidence score for fake t3
21-
SOA_COLUMN(float, promptScore), // DNN confidence score for real (prompt) t3
22-
SOA_COLUMN(float, displacedScore), // DNN confidence score for real (displaced) t3
16+
SOA_COLUMN(FPX, betaIn), // beta/chord angle of the inner segment
17+
SOA_COLUMN(float, centerX), // lower/anchor-hit based circle center x
18+
SOA_COLUMN(float, centerY), // lower/anchor-hit based circle center y
19+
SOA_COLUMN(float, radius), // lower/anchor-hit based circle radius
20+
SOA_COLUMN(float, fakeScore), // DNN confidence score for fake t3
21+
SOA_COLUMN(float, promptScore), // DNN confidence score for real (prompt) t3
22+
SOA_COLUMN(float, displacedScore), // DNN confidence score for real (displaced) t3
23+
SOA_COLUMN(unsigned int, connectedMax), // number of outer-triplets that pass the MD-equality cut
2324
#ifdef CUT_VALUE_DEBUG
2425
SOA_COLUMN(float, zOut),
2526
SOA_COLUMN(float, rtOut),

RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,10 @@ void LSTEvent::createMiniDoublets() {
209209
alpaka::memset(queue_, totOccupancyMDs_view, 0u);
210210
}
211211

212+
auto mdView = miniDoubletsDC_->view<MiniDoubletsSoA>();
213+
auto connView = cms::alpakatools::make_device_view(queue_, mdView.connectedMax(), mdView.metadata().size());
214+
alpaka::memset(queue_, connView, 0u);
215+
212216
unsigned int mdSize = pixelSize_ * 2;
213217
auto src_view_mdSize = cms::alpakatools::make_host_view(mdSize);
214218

@@ -255,6 +259,17 @@ void LSTEvent::createMiniDoublets() {
255259

256260
void LSTEvent::createSegmentsWithModuleMap() {
257261
if (!segmentsDC_) {
262+
auto const countMDConn_wd = cms::alpakatools::make_workdiv<Acc3D>({nLowerModules_, 1, 1}, {1, 8, 32});
263+
264+
alpaka::exec<Acc3D>(queue_,
265+
countMDConn_wd,
266+
CountMiniDoubletConnections{},
267+
modules_.const_view<ModulesSoA>(),
268+
miniDoubletsDC_->view<MiniDoubletsSoA>(),
269+
miniDoubletsDC_->const_view<MiniDoubletsOccupancySoA>(),
270+
rangesDC_->const_view(),
271+
ptCut_);
272+
258273
auto const createSegmentArrayRanges_workDiv = cms::alpakatools::make_workdiv<Acc1D>(1, 1024);
259274

260275
alpaka::exec<Acc1D>(queue_,
@@ -263,8 +278,7 @@ void LSTEvent::createSegmentsWithModuleMap() {
263278
modules_.const_view<ModulesSoA>(),
264279
rangesDC_->view(),
265280
miniDoubletsDC_->const_view<MiniDoubletsSoA>(),
266-
miniDoubletsDC_->const_view<MiniDoubletsOccupancySoA>(),
267-
ptCut_);
281+
miniDoubletsDC_->const_view<MiniDoubletsOccupancySoA>());
268282

269283
auto rangesOccupancy = rangesDC_->view();
270284
auto nTotalSegments_view_h = cms::alpakatools::make_host_view(nTotalSegments_);
@@ -278,12 +292,15 @@ void LSTEvent::createSegmentsWithModuleMap() {
278292
segmentsDC_.emplace(segments_sizes, queue_);
279293

280294
auto segmentsOccupancy = segmentsDC_->view<SegmentsOccupancySoA>();
295+
auto segments = segmentsDC_->view<SegmentsSoA>();
281296
auto nSegments_view =
282297
cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments(), segmentsOccupancy.metadata().size());
283298
auto totOccupancySegments_view = cms::alpakatools::make_device_view(
284299
queue_, segmentsOccupancy.totOccupancySegments(), segmentsOccupancy.metadata().size());
285300
alpaka::memset(queue_, nSegments_view, 0u);
286301
alpaka::memset(queue_, totOccupancySegments_view, 0u);
302+
auto conn_view = cms::alpakatools::make_device_view(queue_, segments.connectedMax(), segments.metadata().size());
303+
alpaka::memset(queue_, conn_view, 0u);
287304

288305
auto src_view_size = cms::alpakatools::make_host_view(pixelSize_);
289306

@@ -297,7 +314,7 @@ void LSTEvent::createSegmentsWithModuleMap() {
297314
alpaka::wait(queue_);
298315
}
299316

300-
auto const createSegments_workDiv = cms::alpakatools::make_workdiv<Acc3D>({nLowerModules_, 1, 1}, {1, 1, 64});
317+
auto const createSegments_workDiv = cms::alpakatools::make_workdiv<Acc3D>({nLowerModules_, 1, 1}, {1, 8, 32});
301318

302319
alpaka::exec<Acc3D>(queue_,
303320
createSegments_workDiv,
@@ -326,6 +343,16 @@ void LSTEvent::createSegmentsWithModuleMap() {
326343

327344
void LSTEvent::createTriplets() {
328345
if (!tripletsDC_) {
346+
auto const countSegConn_wd = cms::alpakatools::make_workdiv<Acc3D>({nLowerModules_, 1, 1}, {1, 16, 16});
347+
348+
alpaka::exec<Acc3D>(queue_,
349+
countSegConn_wd,
350+
CountSegmentConnections{},
351+
modules_.const_view<ModulesSoA>(),
352+
segmentsDC_->view<SegmentsSoA>(),
353+
segmentsDC_->const_view<SegmentsOccupancySoA>(),
354+
rangesDC_->const_view());
355+
329356
auto const createTripletArrayRanges_workDiv = cms::alpakatools::make_workdiv<Acc1D>(1, 1024);
330357

331358
alpaka::exec<Acc1D>(queue_,
@@ -334,8 +361,7 @@ void LSTEvent::createTriplets() {
334361
modules_.const_view<ModulesSoA>(),
335362
rangesDC_->view(),
336363
segmentsDC_->const_view<SegmentsSoA>(),
337-
segmentsDC_->const_view<SegmentsOccupancySoA>(),
338-
ptCut_);
364+
segmentsDC_->const_view<SegmentsOccupancySoA>());
339365

340366
// TODO: Why are we pulling this back down only to put it back on the device in a new struct?
341367
auto rangesOccupancy = rangesDC_->view();
@@ -362,6 +388,9 @@ void LSTEvent::createTriplets() {
362388
alpaka::memset(queue_, partOfT5_view, 0u);
363389
auto partOfPT3_view = cms::alpakatools::make_device_view(queue_, triplets.partOfPT3(), triplets.metadata().size());
364390
alpaka::memset(queue_, partOfPT3_view, 0u);
391+
auto connectedMax_view =
392+
cms::alpakatools::make_device_view(queue_, triplets.connectedMax(), triplets.metadata().size());
393+
alpaka::memset(queue_, connectedMax_view, 0u);
365394
}
366395

367396
uint16_t nonZeroModules = 0;
@@ -715,6 +744,17 @@ void LSTEvent::createPixelTriplets() {
715744
}
716745

717746
void LSTEvent::createQuintuplets() {
747+
auto const countConn_workDiv = cms::alpakatools::make_workdiv<Acc3D>({nLowerModules_, 1, 1}, {1, 8, 32});
748+
749+
alpaka::exec<Acc3D>(queue_,
750+
countConn_workDiv,
751+
CountTripletConnections{},
752+
modules_.const_view<ModulesSoA>(),
753+
segmentsDC_->const_view<SegmentsSoA>(),
754+
tripletsDC_->view<TripletsSoA>(),
755+
tripletsDC_->const_view<TripletsOccupancySoA>(),
756+
rangesDC_->const_view());
757+
718758
auto const createEligibleModulesListForQuintuplets_workDiv = cms::alpakatools::make_workdiv<Acc1D>(1, 1024);
719759

720760
alpaka::exec<Acc1D>(queue_,
@@ -723,8 +763,7 @@ void LSTEvent::createQuintuplets() {
723763
modules_.const_view<ModulesSoA>(),
724764
tripletsDC_->const_view<TripletsOccupancySoA>(),
725765
rangesDC_->view(),
726-
tripletsDC_->view<TripletsSoA>(),
727-
ptCut_);
766+
tripletsDC_->view<TripletsSoA>());
728767

729768
auto nEligibleT5Modules_buf = cms::alpakatools::make_host_buffer<uint16_t>(queue_);
730769
auto nTotalQuintuplets_buf = cms::alpakatools::make_host_buffer<unsigned int>(queue_);

RecoTracker/LSTCore/src/alpaka/Quintuplet.h

Lines changed: 77 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1697,7 +1697,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
16971697
ObjectRangesConst ranges,
16981698
uint16_t nEligibleT5Modules,
16991699
const float ptCut) const {
1700-
for (int iter : cms::alpakatools::uniform_elements_z(acc, nEligibleT5Modules)) {
1700+
ALPAKA_ASSERT_ACC((alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[1] == 1) &&
1701+
(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[2] == 1));
1702+
for (int iter : cms::alpakatools::uniform_groups_z(acc, nEligibleT5Modules)) {
17011703
uint16_t lowerModule1 = ranges.indicesOfEligibleT5Modules()[iter];
17021704
short layer2_adjustment;
17031705
int layer = modules.layers()[lowerModule1];
@@ -1819,17 +1821,70 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
18191821
}
18201822
};
18211823

1824+
ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isValidQuintRegion(ModulesConst modules, uint16_t lowerModule) {
1825+
const short layer = modules.layers()[lowerModule];
1826+
const short subdet = modules.subdets()[lowerModule];
1827+
// Quintuplets starting outside these regions are not built.
1828+
return (subdet == Barrel && layer < 3) || (subdet == Endcap && layer <= 1);
1829+
}
1830+
1831+
struct CountTripletConnections {
1832+
ALPAKA_FN_ACC void operator()(Acc3D const& acc,
1833+
ModulesConst modules,
1834+
SegmentsConst segments,
1835+
Triplets triplets,
1836+
TripletsOccupancyConst tripletsOcc,
1837+
ObjectRangesConst ranges) const {
1838+
// The atomicAdd below with hierarchy::Threads{} requires one block in x, y dimensions.
1839+
ALPAKA_ASSERT_ACC((alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[1] == 1) &&
1840+
(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[2] == 1));
1841+
const auto& mdIndices = segments.mdIndices();
1842+
const auto& segIdx = triplets.segmentIndices();
1843+
const auto& lmIdx = triplets.lowerModuleIndices();
1844+
const auto& tripIdx = ranges.tripletModuleIndices();
1845+
1846+
for (uint16_t lowerModule1 : cms::alpakatools::uniform_groups_z(acc, modules.nLowerModules())) {
1847+
if (!isValidQuintRegion(modules, lowerModule1))
1848+
continue;
1849+
1850+
const unsigned int nInnerTriplets = tripletsOcc.nTriplets()[lowerModule1];
1851+
if (nInnerTriplets == 0)
1852+
continue;
1853+
1854+
for (unsigned int innerTripletArrayIndex : cms::alpakatools::uniform_elements_y(acc, nInnerTriplets)) {
1855+
const unsigned int innerTripletIndex = tripIdx[lowerModule1] + innerTripletArrayIndex;
1856+
1857+
const uint16_t lowerModule3 = lmIdx[innerTripletIndex][2];
1858+
const unsigned int nOuterTriplets = tripletsOcc.nTriplets()[lowerModule3];
1859+
if (nOuterTriplets == 0)
1860+
continue;
1861+
1862+
const unsigned int secondSegIdx = segIdx[innerTripletIndex][1];
1863+
const unsigned int secondMDOuter = mdIndices[secondSegIdx][1];
1864+
1865+
for (unsigned int outerTripletArrayIndex : cms::alpakatools::uniform_elements_x(acc, nOuterTriplets)) {
1866+
const unsigned int outerTripletIndex = tripIdx[lowerModule3] + outerTripletArrayIndex;
1867+
const unsigned int thirdSegIdx = segIdx[outerTripletIndex][0];
1868+
const unsigned int thirdMDInner = mdIndices[thirdSegIdx][0];
1869+
1870+
if (secondMDOuter == thirdMDInner) {
1871+
alpaka::atomicAdd(acc, &triplets.connectedMax()[innerTripletIndex], 1u, alpaka::hierarchy::Threads{});
1872+
}
1873+
}
1874+
}
1875+
}
1876+
}
1877+
};
1878+
18221879
struct CreateEligibleModulesListForQuintuplets {
18231880
ALPAKA_FN_ACC void operator()(Acc1D const& acc,
18241881
ModulesConst modules,
1825-
TripletsOccupancyConst tripletsOccupancy,
1882+
TripletsOccupancyConst tripletsOcc,
18261883
ObjectRanges ranges,
1827-
Triplets triplets,
1828-
const float ptCut) const {
1829-
// implementation is 1D with a single block
1884+
TripletsConst triplets) const {
1885+
// Single-block kernel
18301886
ALPAKA_ASSERT_ACC((alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0] == 1));
18311887

1832-
// Initialize variables in shared memory and set to 0
18331888
int& nEligibleT5Modulesx = alpaka::declareSharedVar<int, __COUNTER__>(acc);
18341889
int& nTotalQuintupletsx = alpaka::declareSharedVar<int, __COUNTER__>(acc);
18351890
if (cms::alpakatools::once_per_block(acc)) {
@@ -1838,75 +1893,31 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
18381893
}
18391894
alpaka::syncBlockThreads(acc);
18401895

1841-
// Occupancy matrix for 0.8 GeV pT Cut
1842-
constexpr int p08_occupancy_matrix[4][4] = {
1843-
{336, 414, 231, 146}, // category 0
1844-
{0, 0, 0, 0}, // category 1
1845-
{0, 0, 0, 0}, // category 2
1846-
{0, 0, 191, 106} // category 3
1847-
};
1848-
1849-
// Occupancy matrix for 0.6 GeV pT Cut, 99.99%
1850-
constexpr int p06_occupancy_matrix[4][4] = {
1851-
{325, 237, 217, 176}, // category 0
1852-
{0, 0, 0, 0}, // category 1
1853-
{0, 0, 0, 0}, // category 2
1854-
{0, 0, 129, 180} // category 3
1855-
};
1856-
1857-
// Select the appropriate occupancy matrix based on ptCut
1858-
const auto& occupancy_matrix = (ptCut < 0.8f) ? p06_occupancy_matrix : p08_occupancy_matrix;
1859-
1860-
for (int i : cms::alpakatools::uniform_elements(acc, modules.nLowerModules())) {
1861-
// Condition for a quintuple to exist for a module
1862-
// TCs don't exist for layers 5 and 6 barrel, and layers 2,3,4,5 endcap
1863-
short module_rings = modules.rings()[i];
1864-
short module_layers = modules.layers()[i];
1865-
short module_subdets = modules.subdets()[i];
1866-
float module_eta = alpaka::math::abs(acc, modules.eta()[i]);
1867-
1868-
if (tripletsOccupancy.nTriplets()[i] == 0)
1896+
for (uint16_t lowerModule : cms::alpakatools::uniform_elements(acc, modules.nLowerModules())) {
1897+
if (!isValidQuintRegion(modules, lowerModule))
18691898
continue;
1870-
if (module_subdets == Barrel && module_layers >= 3)
1871-
continue;
1872-
if (module_subdets == Endcap && module_layers > 1)
1899+
1900+
unsigned int nInnerTriplets = tripletsOcc.nTriplets()[lowerModule];
1901+
if (nInnerTriplets == 0)
18731902
continue;
18741903

1904+
// Sum the real connectivity for triplets in this module
18751905
int dynamic_count = 0;
1876-
1877-
// How many triplets are in module i?
1878-
int nTriplets_i = tripletsOccupancy.nTriplets()[i];
1879-
int firstTripletIdx = ranges.tripletModuleIndices()[i];
1880-
1881-
// Loop over all triplets that live in module i
1882-
for (int t = 0; t < nTriplets_i; t++) {
1883-
int tripletIndex = firstTripletIdx + t;
1884-
uint16_t outerModule = triplets.lowerModuleIndices()[tripletIndex][2];
1885-
dynamic_count += tripletsOccupancy.nTriplets()[outerModule];
1906+
const unsigned int firstTripletIdx = ranges.tripletModuleIndices()[lowerModule];
1907+
for (unsigned int t = 0; t < nInnerTriplets; ++t) {
1908+
unsigned int tripletIndex = firstTripletIdx + t;
1909+
dynamic_count += triplets.connectedMax()[tripletIndex];
18861910
}
18871911

1888-
int category_number = getCategoryNumber(module_layers, module_subdets, module_rings);
1889-
int eta_number = getEtaBin(module_eta);
1890-
1891-
#ifdef WARNINGS
1892-
if (category_number == -1 || eta_number == -1) {
1893-
printf("Unhandled case in createEligibleModulesListForQuintupletsGPU! Module index = %i\n", i);
1894-
}
1895-
#endif
1896-
1897-
// Get matrix-based cap (use dynamic_count as fallback)
1898-
int matrix_cap =
1899-
(category_number != -1 && eta_number != -1) ? occupancy_matrix[category_number][eta_number] : 0;
1900-
1901-
// Cap occupancy at minimum of dynamic count and matrix value
1902-
int occupancy = alpaka::math::min(acc, dynamic_count, matrix_cap);
1912+
if (dynamic_count == 0)
1913+
continue;
19031914

19041915
int nEligibleT5Modules = alpaka::atomicAdd(acc, &nEligibleT5Modulesx, 1, alpaka::hierarchy::Threads{});
1905-
int nTotQ = alpaka::atomicAdd(acc, &nTotalQuintupletsx, occupancy, alpaka::hierarchy::Threads{});
1916+
int nTotQ = alpaka::atomicAdd(acc, &nTotalQuintupletsx, dynamic_count, alpaka::hierarchy::Threads{});
19061917

1907-
ranges.quintupletModuleIndices()[i] = nTotQ;
1908-
ranges.indicesOfEligibleT5Modules()[nEligibleT5Modules] = i;
1909-
ranges.quintupletModuleOccupancy()[i] = occupancy;
1918+
ranges.quintupletModuleIndices()[lowerModule] = nTotQ;
1919+
ranges.indicesOfEligibleT5Modules()[nEligibleT5Modules] = lowerModule;
1920+
ranges.quintupletModuleOccupancy()[lowerModule] = dynamic_count;
19101921
}
19111922

19121923
// Wait for all threads to finish before reporting final values

0 commit comments

Comments
 (0)