@@ -1697,7 +1697,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
16971697 ObjectRangesConst ranges,
16981698 uint16_t nEligibleT5Modules,
16991699 const float ptCut) const {
1700- for (int iter : cms::alpakatools::uniform_elements_z (acc, nEligibleT5Modules)) {
1700+ ALPAKA_ASSERT_ACC ((alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[1 ] == 1 ) &&
1701+ (alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[2 ] == 1 ));
1702+ for (int iter : cms::alpakatools::uniform_groups_z (acc, nEligibleT5Modules)) {
17011703 uint16_t lowerModule1 = ranges.indicesOfEligibleT5Modules ()[iter];
17021704 short layer2_adjustment;
17031705 int layer = modules.layers ()[lowerModule1];
@@ -1819,17 +1821,70 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
18191821 }
18201822 };
18211823
1824+ ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isValidQuintRegion (ModulesConst modules, uint16_t lowerModule) {
1825+ const short layer = modules.layers ()[lowerModule];
1826+ const short subdet = modules.subdets ()[lowerModule];
1827+ // Quintuplets starting outside these regions are not built.
1828+ return (subdet == Barrel && layer < 3 ) || (subdet == Endcap && layer <= 1 );
1829+ }
1830+
1831+ struct CountTripletConnections {
1832+ ALPAKA_FN_ACC void operator ()(Acc3D const & acc,
1833+ ModulesConst modules,
1834+ SegmentsConst segments,
1835+ Triplets triplets,
1836+ TripletsOccupancyConst tripletsOcc,
1837+ ObjectRangesConst ranges) const {
1838+ // The atomicAdd below with hierarchy::Threads{} requires one block in x, y dimensions.
1839+ ALPAKA_ASSERT_ACC ((alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[1 ] == 1 ) &&
1840+ (alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[2 ] == 1 ));
1841+ const auto & mdIndices = segments.mdIndices ();
1842+ const auto & segIdx = triplets.segmentIndices ();
1843+ const auto & lmIdx = triplets.lowerModuleIndices ();
1844+ const auto & tripIdx = ranges.tripletModuleIndices ();
1845+
1846+ for (uint16_t lowerModule1 : cms::alpakatools::uniform_groups_z (acc, modules.nLowerModules ())) {
1847+ if (!isValidQuintRegion (modules, lowerModule1))
1848+ continue ;
1849+
1850+ const unsigned int nInnerTriplets = tripletsOcc.nTriplets ()[lowerModule1];
1851+ if (nInnerTriplets == 0 )
1852+ continue ;
1853+
1854+ for (unsigned int innerTripletArrayIndex : cms::alpakatools::uniform_elements_y (acc, nInnerTriplets)) {
1855+ const unsigned int innerTripletIndex = tripIdx[lowerModule1] + innerTripletArrayIndex;
1856+
1857+ const uint16_t lowerModule3 = lmIdx[innerTripletIndex][2 ];
1858+ const unsigned int nOuterTriplets = tripletsOcc.nTriplets ()[lowerModule3];
1859+ if (nOuterTriplets == 0 )
1860+ continue ;
1861+
1862+ const unsigned int secondSegIdx = segIdx[innerTripletIndex][1 ];
1863+ const unsigned int secondMDOuter = mdIndices[secondSegIdx][1 ];
1864+
1865+ for (unsigned int outerTripletArrayIndex : cms::alpakatools::uniform_elements_x (acc, nOuterTriplets)) {
1866+ const unsigned int outerTripletIndex = tripIdx[lowerModule3] + outerTripletArrayIndex;
1867+ const unsigned int thirdSegIdx = segIdx[outerTripletIndex][0 ];
1868+ const unsigned int thirdMDInner = mdIndices[thirdSegIdx][0 ];
1869+
1870+ if (secondMDOuter == thirdMDInner) {
1871+ alpaka::atomicAdd (acc, &triplets.connectedMax ()[innerTripletIndex], 1u , alpaka::hierarchy::Threads{});
1872+ }
1873+ }
1874+ }
1875+ }
1876+ }
1877+ };
1878+
18221879 struct CreateEligibleModulesListForQuintuplets {
18231880 ALPAKA_FN_ACC void operator ()(Acc1D const & acc,
18241881 ModulesConst modules,
1825- TripletsOccupancyConst tripletsOccupancy ,
1882+ TripletsOccupancyConst tripletsOcc ,
18261883 ObjectRanges ranges,
1827- Triplets triplets,
1828- const float ptCut) const {
1829- // implementation is 1D with a single block
1884+ TripletsConst triplets) const {
1885+ // Single-block kernel
18301886 ALPAKA_ASSERT_ACC ((alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0 ] == 1 ));
18311887
1832- // Initialize variables in shared memory and set to 0
18331888 int & nEligibleT5Modulesx = alpaka::declareSharedVar<int , __COUNTER__>(acc);
18341889 int & nTotalQuintupletsx = alpaka::declareSharedVar<int , __COUNTER__>(acc);
18351890 if (cms::alpakatools::once_per_block (acc)) {
@@ -1838,75 +1893,31 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
18381893 }
18391894 alpaka::syncBlockThreads (acc);
18401895
1841- // Occupancy matrix for 0.8 GeV pT Cut
1842- constexpr int p08_occupancy_matrix[4 ][4 ] = {
1843- {336 , 414 , 231 , 146 }, // category 0
1844- {0 , 0 , 0 , 0 }, // category 1
1845- {0 , 0 , 0 , 0 }, // category 2
1846- {0 , 0 , 191 , 106 } // category 3
1847- };
1848-
1849- // Occupancy matrix for 0.6 GeV pT Cut, 99.99%
1850- constexpr int p06_occupancy_matrix[4 ][4 ] = {
1851- {325 , 237 , 217 , 176 }, // category 0
1852- {0 , 0 , 0 , 0 }, // category 1
1853- {0 , 0 , 0 , 0 }, // category 2
1854- {0 , 0 , 129 , 180 } // category 3
1855- };
1856-
1857- // Select the appropriate occupancy matrix based on ptCut
1858- const auto & occupancy_matrix = (ptCut < 0 .8f ) ? p06_occupancy_matrix : p08_occupancy_matrix;
1859-
1860- for (int i : cms::alpakatools::uniform_elements (acc, modules.nLowerModules ())) {
1861- // Condition for a quintuple to exist for a module
1862- // TCs don't exist for layers 5 and 6 barrel, and layers 2,3,4,5 endcap
1863- short module_rings = modules.rings ()[i];
1864- short module_layers = modules.layers ()[i];
1865- short module_subdets = modules.subdets ()[i];
1866- float module_eta = alpaka::math::abs (acc, modules.eta ()[i]);
1867-
1868- if (tripletsOccupancy.nTriplets ()[i] == 0 )
1896+ for (uint16_t lowerModule : cms::alpakatools::uniform_elements (acc, modules.nLowerModules ())) {
1897+ if (!isValidQuintRegion (modules, lowerModule))
18691898 continue ;
1870- if (module_subdets == Barrel && module_layers >= 3 )
1871- continue ;
1872- if (module_subdets == Endcap && module_layers > 1 )
1899+
1900+ unsigned int nInnerTriplets = tripletsOcc. nTriplets ()[lowerModule] ;
1901+ if (nInnerTriplets == 0 )
18731902 continue ;
18741903
1904+ // Sum the real connectivity for triplets in this module
18751905 int dynamic_count = 0 ;
1876-
1877- // How many triplets are in module i?
1878- int nTriplets_i = tripletsOccupancy.nTriplets ()[i];
1879- int firstTripletIdx = ranges.tripletModuleIndices ()[i];
1880-
1881- // Loop over all triplets that live in module i
1882- for (int t = 0 ; t < nTriplets_i; t++) {
1883- int tripletIndex = firstTripletIdx + t;
1884- uint16_t outerModule = triplets.lowerModuleIndices ()[tripletIndex][2 ];
1885- dynamic_count += tripletsOccupancy.nTriplets ()[outerModule];
1906+ const unsigned int firstTripletIdx = ranges.tripletModuleIndices ()[lowerModule];
1907+ for (unsigned int t = 0 ; t < nInnerTriplets; ++t) {
1908+ unsigned int tripletIndex = firstTripletIdx + t;
1909+ dynamic_count += triplets.connectedMax ()[tripletIndex];
18861910 }
18871911
1888- int category_number = getCategoryNumber (module_layers, module_subdets, module_rings);
1889- int eta_number = getEtaBin (module_eta);
1890-
1891- #ifdef WARNINGS
1892- if (category_number == -1 || eta_number == -1 ) {
1893- printf (" Unhandled case in createEligibleModulesListForQuintupletsGPU! Module index = %i\n " , i);
1894- }
1895- #endif
1896-
1897- // Get matrix-based cap (use dynamic_count as fallback)
1898- int matrix_cap =
1899- (category_number != -1 && eta_number != -1 ) ? occupancy_matrix[category_number][eta_number] : 0 ;
1900-
1901- // Cap occupancy at minimum of dynamic count and matrix value
1902- int occupancy = alpaka::math::min (acc, dynamic_count, matrix_cap);
1912+ if (dynamic_count == 0 )
1913+ continue ;
19031914
19041915 int nEligibleT5Modules = alpaka::atomicAdd (acc, &nEligibleT5Modulesx, 1 , alpaka::hierarchy::Threads{});
1905- int nTotQ = alpaka::atomicAdd (acc, &nTotalQuintupletsx, occupancy , alpaka::hierarchy::Threads{});
1916+ int nTotQ = alpaka::atomicAdd (acc, &nTotalQuintupletsx, dynamic_count , alpaka::hierarchy::Threads{});
19061917
1907- ranges.quintupletModuleIndices ()[i ] = nTotQ;
1908- ranges.indicesOfEligibleT5Modules ()[nEligibleT5Modules] = i ;
1909- ranges.quintupletModuleOccupancy ()[i ] = occupancy ;
1918+ ranges.quintupletModuleIndices ()[lowerModule ] = nTotQ;
1919+ ranges.indicesOfEligibleT5Modules ()[nEligibleT5Modules] = lowerModule ;
1920+ ranges.quintupletModuleOccupancy ()[lowerModule ] = dynamic_count ;
19101921 }
19111922
19121923 // Wait for all threads to finish before reporting final values
0 commit comments