@@ -39,8 +39,23 @@ void trackletFinderHandler(const Cluster* clustersNextLayer, // 0 2
3939 const unsigned int startRofId,
4040 const unsigned int rofSize,
4141 const float phiCut,
42- const size_t maxTrackletsPerCluster)
42+ const unsigned int maxTrackletsPerCluster,
43+ const int nBlocks,
44+ const int nThreads)
4345{
46+ gpu::trackleterKernelMultipleRof<Mode><<<nBlocks, nThreads>>> (
47+ clustersNextLayer, // const Cluster* clustersNextLayer, // 0 2
48+ clustersCurrentLayer, // const Cluster* clustersCurrentLayer, // 1 1
49+ sizeNextLClusters, // const int* sizeNextLClusters,
50+ sizeCurrentLClusters, // const int* sizeCurrentLClusters,
51+ nextIndexTables, // const int* nextIndexTables,
52+ Tracklets, // Tracklet* Tracklets,
53+ foundTracklets, // int* foundTracklets,
54+ utils, // const IndexTableUtils* utils,
55+ startRofId, // const unsigned int startRofId,
56+ rofSize, // const unsigned int rofSize,
57+ phiCut, // const float phiCut,
58+ maxTrackletsPerCluster); // const unsigned int maxTrackletsPerCluster = 1e2
4459}
4560/*
4661GPUd() float smallestAngleDifference(float a, float b)
@@ -96,7 +111,7 @@ GPUd() void printOnBlock(const unsigned int bId, const char* str, Args... args)
96111 }
97112}
98113
99- GPUg() void printBufferOnThread(const int* v, size_t size, const int len = 150, const unsigned int tId = 0)
114+ GPUg() void printBufferOnThread(const int* v, unsigned int size, const int len = 150, const unsigned int tId = 0)
100115{
101116 if (blockIdx.x * blockDim.x + threadIdx.x == tId) {
102117 for (int i{0}; i < size; ++i) {
@@ -109,7 +124,7 @@ GPUg() void printBufferOnThread(const int* v, size_t size, const int len = 150,
109124 }
110125}
111126
112- GPUg() void printBufferOnThreadF(const float* v, size_t size, const unsigned int tId = 0)
127+ GPUg() void printBufferOnThreadF(const float* v, unsigned int size, const unsigned int tId = 0)
113128{
114129 if (blockIdx.x * blockDim.x + threadIdx.x == tId) {
115130 printf("vector :");
@@ -127,7 +142,7 @@ GPUg() void resetTrackletsKernel(Tracklet* tracklets, const int nTracklets)
127142 }
128143}
129144
130- GPUg() void dumpFoundTrackletsKernel(const Tracklet* tracklets, const int* nTracklet, const size_t nClustersMiddleLayer, const int maxTrackletsPerCluster)
145+ GPUg() void dumpFoundTrackletsKernel(const Tracklet* tracklets, const int* nTracklet, const unsigned int nClustersMiddleLayer, const int maxTrackletsPerCluster)
131146{
132147 for (int iCurrentLayerClusterIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentLayerClusterIndex < nClustersMiddleLayer; iCurrentLayerClusterIndex += blockDim.x * gridDim.x) {
133148 const int stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
@@ -160,15 +175,15 @@ GPUg() void trackleterKernelSingleRof(
160175 int* foundTracklets,
161176 const IndexTableUtils* utils,
162177 const short rofId,
163- const size_t maxTrackletsPerCluster = 1e2)
178+ const unsigned int maxTrackletsPerCluster = 1e2)
164179{
165180 const int phiBins{utils->getNphiBins()};
166181 const int zBins{utils->getNzBins()};
167182 // loop on layer1 clusters
168183 for (int iCurrentLayerClusterIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentLayerClusterIndex < sizeCurrentLClusters; iCurrentLayerClusterIndex += blockDim.x * gridDim.x) {
169184 if (iCurrentLayerClusterIndex < sizeCurrentLClusters) {
170185 unsigned int storedTracklets{0};
171- const size_t stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
186+ const unsigned int stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
172187 const Cluster& currentCluster = clustersCurrentLayer[iCurrentLayerClusterIndex];
173188 const int4 selectedBinsRect{VertexerTraits::getBinsRect(currentCluster, (int)Mode, 0.f, 50.f, phiCut / 2, *utils)};
174189 if (selectedBinsRect.x != 0 || selectedBinsRect.y != 0 || selectedBinsRect.z != 0 || selectedBinsRect.w != 0) {
@@ -218,7 +233,7 @@ GPUg() void trackleterKernelMultipleRof(
218233 const short startRofId,
219234 const short rofSize,
220235 const float phiCut,
221- const size_t maxTrackletsPerCluster = 1e2)
236+ const unsigned int maxTrackletsPerCluster = 1e2)
222237{
223238 const int phiBins{utils->getNphiBins()};
224239 const int zBins{utils->getNzBins()};
@@ -235,7 +250,7 @@ GPUg() void trackleterKernelMultipleRof(
235250 // single rof loop on layer1 clusters
236251 for (int iCurrentLayerClusterIndex = threadIdx.x; iCurrentLayerClusterIndex < nClustersCurrentLayerRof; iCurrentLayerClusterIndex += blockDim.x) {
237252 unsigned int storedTracklets{0};
238- const size_t stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
253+ const unsigned int stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
239254 const Cluster& currentCluster = clustersCurrentLayerRof[iCurrentLayerClusterIndex];
240255 const int4 selectedBinsRect{VertexerTraits::getBinsRect(currentCluster, (int)Mode, 0.f, 50.f, phiCut / 2, *utils)};
241256 if (selectedBinsRect.x != 0 || selectedBinsRect.y != 0 || selectedBinsRect.z != 0 || selectedBinsRect.w != 0) {
@@ -276,7 +291,7 @@ template <bool initRun>
276291GPUg() void trackletSelectionKernelSingleRof(
277292 const Cluster* clusters0,
278293 const Cluster* clusters1,
279- const size_t nClustersMiddleLayer,
294+ const unsigned int nClustersMiddleLayer,
280295 Tracklet* tracklets01,
281296 Tracklet* tracklets12,
282297 const int* nFoundTracklet01,
@@ -436,7 +451,7 @@ GPUg() void computeCentroidsKernel(
436451 Line* lines,
437452 int* nFoundLines,
438453 int* nExclusiveFoundLines,
439- const size_t nClustersMiddleLayer,
454+ const unsigned int nClustersMiddleLayer,
440455 float* centroids,
441456 const float lowHistX,
442457 const float highHistX,
@@ -446,7 +461,7 @@ GPUg() void computeCentroidsKernel(
446461{
447462 const int nLines = nExclusiveFoundLines[nClustersMiddleLayer - 1] + nFoundLines[nClustersMiddleLayer - 1];
448463 const int maxIterations{nLines * (nLines - 1) / 2};
449- for (size_t currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < maxIterations; currentThreadIndex += blockDim.x * gridDim.x) {
464+ for (unsigned int currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < maxIterations; currentThreadIndex += blockDim.x * gridDim.x) {
450465 int iFirstLine = currentThreadIndex / nLines;
451466 int iSecondLine = currentThreadIndex % nLines;
452467 // All unique pairs
@@ -496,7 +511,7 @@ GPUg() void computeZCentroidsKernel(
496511 const int binOpeningX,
497512 const int binOpeningY)
498513{
499- for (size_t currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < nLines; currentThreadIndex += blockDim.x * gridDim.x) {
514+ for (unsigned int currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < nLines; currentThreadIndex += blockDim.x * gridDim.x) {
500515 if (tmpVtX[0].value || tmpVtX[1].value) {
501516 float tmpX{lowHistX + tmpVtX[0].key * binSizeHistX + binSizeHistX / 2};
502517 int sumWX{tmpVtX[0].value};
@@ -543,7 +558,7 @@ GPUg() void computeVertexKernel(
543558 const int minContributors,
544559 const int binOpeningZ)
545560{
546- for (size_t currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < binOpeningZ; currentThreadIndex += blockDim.x * gridDim.x) {
561+ for (unsigned int currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < binOpeningZ; currentThreadIndex += blockDim.x * gridDim.x) {
547562 if (currentThreadIndex == 0) {
548563 if (tmpVertexBins[2].value > 1 && (tmpVertexBins[0].value || tmpVertexBins[1].value)) {
549564 float z{lowHistZ + tmpVertexBins[2].key * binSizeHistZ + binSizeHistZ / 2};
0 commit comments