@@ -29,6 +29,10 @@ void TrackerTraitsGPU<nLayers>::initialiseTimeFrame(const int iteration)
2929{
3030 mTimeFrameGPU ->initialise (iteration, this ->mTrkParams [iteration], nLayers);
3131
32+ // Configuration
33+ // TODO only do once and put these into permanent storage
34+ mTimeFrameGPU ->loadConfigMinPt (this ->mTrkParams [iteration].MinPt );
35+
3236 // on default stream
3337 mTimeFrameGPU ->loadVertices (iteration);
3438 mTimeFrameGPU ->loadIndexTableUtils (iteration);
@@ -290,56 +294,55 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
290294 auto & conf = o2::its::ITSGpuTrackingParamConfig::Instance ();
291295 for (int startLevel{this ->mTrkParams [iteration].CellsPerRoad ()}; startLevel >= this ->mTrkParams [iteration].CellMinimumLevel (); --startLevel) {
292296 const int minimumLayer{startLevel - 1 };
293- bounded_vector<CellSeed<nLayers>> trackSeeds ( this -> getMemoryPool (). get ()) ;
294- for (int startLayer{this ->mTrkParams [iteration].CellsPerRoad () - 1 }; startLayer >= minimumLayer; --startLayer) {
297+ int totalCellSeeds{ 0 } ;
298+ for (int startLayer{this ->mTrkParams [iteration].NeighboursPerRoad () }; startLayer >= minimumLayer; --startLayer) {
295299 if ((this ->mTrkParams [iteration].StartLayerMask & (1 << (startLayer + 2 ))) == 0 ) {
296300 continue ;
297301 }
298- processNeighboursHandler<nLayers>(startLayer,
299- startLevel,
300- mTimeFrameGPU ->getDeviceArrayCells (),
301- mTimeFrameGPU ->getDeviceCells ()[startLayer],
302- mTimeFrameGPU ->getArrayNCells (),
303- mTimeFrameGPU ->getDeviceArrayUsedClusters (),
304- mTimeFrameGPU ->getDeviceNeighboursAll (),
305- mTimeFrameGPU ->getDeviceNeighboursLUTs (),
306- mTimeFrameGPU ->getDeviceArrayTrackingFrameInfo (),
307- trackSeeds ,
308- this ->mBz ,
309- this ->mTrkParams [0 ].MaxChi2ClusterAttachment ,
310- this ->mTrkParams [0 ].MaxChi2NDF ,
311- mTimeFrameGPU ->getDevicePropagator (),
312- this ->mTrkParams [0 ].CorrType ,
313- mTimeFrameGPU ->getExternalAllocator (),
314- conf.nBlocksProcessNeighbours [iteration],
315- conf.nThreadsProcessNeighbours [iteration]);
302+ totalCellSeeds += processNeighboursHandler<nLayers>(startLayer,
303+ startLevel,
304+ mTimeFrameGPU ->getDeviceArrayCells (),
305+ mTimeFrameGPU ->getDeviceCells ()[startLayer],
306+ mTimeFrameGPU ->getArrayNCells (),
307+ mTimeFrameGPU ->getDeviceArrayUsedClusters (),
308+ mTimeFrameGPU ->getDeviceNeighboursAll (),
309+ mTimeFrameGPU ->getDeviceNeighboursLUTs (),
310+ mTimeFrameGPU ->getDeviceArrayTrackingFrameInfo (),
311+ mTimeFrameGPU -> getDeviceCellSeeds () ,
312+ this ->mBz ,
313+ this ->mTrkParams [0 ].MaxChi2ClusterAttachment ,
314+ this ->mTrkParams [0 ].MaxChi2NDF ,
315+ mTimeFrameGPU ->getDevicePropagator (),
316+ this ->mTrkParams [0 ].CorrType ,
317+ mTimeFrameGPU ->getExternalAllocator (),
318+ conf.nBlocksProcessNeighbours [iteration],
319+ conf.nThreadsProcessNeighbours [iteration]);
316320 }
317- // fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory.
318- if (trackSeeds.empty ()) {
319- LOGP (debug, " No track seeds found, skipping track finding" );
321+ if (!totalCellSeeds) {
320322 continue ;
321323 }
322- mTimeFrameGPU ->createTrackITSExtDevice (trackSeeds);
323- mTimeFrameGPU ->loadTrackSeedsDevice (trackSeeds);
324-
325- trackSeedHandler (mTimeFrameGPU ->getDeviceTrackSeeds (), // CellSeed* trackSeeds
326- mTimeFrameGPU ->getDeviceArrayTrackingFrameInfo (), // TrackingFrameInfo** foundTrackingFrameInfo
327- mTimeFrameGPU ->getDeviceTrackITSExt (), // o2::its::TrackITSExt* tracks
328- this ->mTrkParams [iteration].MinPt , // std::vector<float>& minPtsHost,
329- trackSeeds.size (), // const size_t nSeeds
330- this ->mBz , // const float Bz
331- startLevel, // const int startLevel,
332- this ->mTrkParams [0 ].MaxChi2ClusterAttachment , // float maxChi2ClusterAttachment
333- this ->mTrkParams [0 ].MaxChi2NDF , // float maxChi2NDF
334- mTimeFrameGPU ->getDevicePropagator (), // const o2::base::Propagator* propagator
335- this ->mTrkParams [0 ].CorrType , // o2::base::PropagatorImpl<float>::MatCorrType
336- conf.nBlocksTracksSeeds [iteration],
337- conf.nThreadsTracksSeeds [iteration]);
338-
339- mTimeFrameGPU ->downloadTrackITSExtDevice (trackSeeds);
324+ mTimeFrameGPU ->createTrackITSExtDevice (totalCellSeeds);
325+ int offset{0 };
326+ for (auto & p : mTimeFrameGPU ->getDeviceCellSeeds ()) {
327+ trackSeedHandler<nLayers>(p,
328+ mTimeFrameGPU ->getDeviceArrayTrackingFrameInfo (),
329+ mTimeFrameGPU ->getDeviceTrackITSExt (),
330+ mTimeFrameGPU ->getConfigMinPtDevice (),
331+ offset,
332+ this ->mBz ,
333+ startLevel,
334+ this ->mTrkParams [0 ].MaxChi2ClusterAttachment ,
335+ this ->mTrkParams [0 ].MaxChi2NDF ,
336+ mTimeFrameGPU ->getDevicePropagator (),
337+ this ->mTrkParams [0 ].CorrType ,
338+ conf.nBlocksTracksSeeds [iteration],
339+ conf.nThreadsTracksSeeds [iteration]);
340+ offset += p.second ;
341+ }
342+ sortTrackITSExtDevice (mTimeFrameGPU ->getDeviceTrackITSExt (), offset, mTimeFrameGPU ->getExternalAllocator ());
343+ mTimeFrameGPU ->downloadTrackITSExtDevice (offset);
340344
341345 auto & tracks = mTimeFrameGPU ->getTrackITSExt ();
342-
343346 for (auto & track : tracks) {
344347 if (!track.getChi2 ()) {
345348 continue ; // this is to skip the unset tracks that are put at the beginning of the vector by the sorting. To see if this can be optimised.
@@ -382,6 +385,7 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
382385 }
383386 mTimeFrameGPU ->getTracks (std::min (rofs[0 ], rofs[1 ])).emplace_back (track);
384387 }
388+ mTimeFrameGPU ->clearDeviceCellSeeds ();
385389 mTimeFrameGPU ->loadUsedClustersDevice ();
386390 }
387391};
0 commit comments