Skip to content

Commit 1d7a156

Browse files
authored
Merge pull request #48079 from SegmentLinking/T3_profiling_study
optimize LST T3 kernel block size and rearrange T3 kernel functions
2 parents 49783f4 + ed66493 commit 1d7a156

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ void LSTEvent::createTriplets() {
402402
auto index_gpu_buf = cms::alpakatools::make_device_buffer<uint16_t[]>(queue_, nLowerModules_);
403403
alpaka::memcpy(queue_, index_gpu_buf, index_buf_h, nonZeroModules);
404404

405-
auto const createTriplets_workDiv = cms::alpakatools::make_workdiv<Acc3D>({max_blocks, 1, 1}, {1, 16, 16});
405+
auto const createTriplets_workDiv = cms::alpakatools::make_workdiv<Acc3D>({nonZeroModules, 1, 1}, {1, 16, 16});
406406

407407
alpaka::exec<Acc3D>(queue_,
408408
createTriplets_workDiv,

RecoTracker/LSTCore/src/alpaka/Triplet.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -648,10 +648,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
648648
float& circleCenterX,
649649
float& circleCenterY,
650650
const float ptCut) {
651-
//this cut reduces the number of candidates by a factor of 4, i.e., 3 out of 4 warps can end right here!
652-
if (segments.mdIndices()[innerSegmentIndex][1] != segments.mdIndices()[outerSegmentIndex][0])
653-
return false;
654-
655651
unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0];
656652
unsigned int secondMDIndex = segments.mdIndices()[outerSegmentIndex][0];
657653
unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1];
@@ -741,6 +737,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
741737
for (unsigned int outerSegmentArrayIndex : cms::alpakatools::uniform_elements_x(acc, nOuterSegments)) {
742738
unsigned int outerSegmentIndex = ranges.segmentRanges()[middleLowerModuleIndex][0] + outerSegmentArrayIndex;
743739

740+
//this cut reduces the number of candidates by a factor of 4, i.e., 3 out of 4 warps can end right here!
741+
if (segments.mdIndices()[innerSegmentIndex][1] != segments.mdIndices()[outerSegmentIndex][0])
742+
continue;
743+
744744
uint16_t outerOuterLowerModuleIndex = segments.outerLowerModuleIndices()[outerSegmentIndex];
745745

746746
float zOut, rtOut, betaIn, betaInCut, circleRadius, circleCenterX, circleCenterY;

0 commit comments

Comments
 (0)