Skip to content

Commit b1438a9

Browse files
committed
GPU: More WORKAROUNDs for CUDA bug exposing device code via host symbols
1 parent 5577abf commit b1438a9

File tree

12 files changed

+45
-26
lines changed

12 files changed

+45
-26
lines changed

DataFormats/Detectors/TPC/include/DataFormatsTPC/TrackTPC.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ class TrackTPC : public o2::track::TrackParCov
8080

8181
template <class T>
8282
GPUdi() static void getClusterReference(T& clinfo, int nCluster,
83-
uint8_t& sectorIndex, uint8_t& rowIndex, uint32_t& clusterIndex, const ClusRef& ref)
83+
uint8_t& sectorIndex, uint8_t& rowIndex, uint32_t& clusterIndex, const ClusRef& ref)
8484
{
8585
// data for given tracks starts at clinfo[ ref.getFirstEntry() ],
8686
// 1st ref.getEntries() cluster indices are stored as uint32_t
@@ -96,14 +96,14 @@ class TrackTPC : public o2::track::TrackParCov
9696

9797
template <class T>
9898
GPUdi() void getClusterReference(T& clinfo, int nCluster,
99-
uint8_t& sectorIndex, uint8_t& rowIndex, uint32_t& clusterIndex) const
99+
uint8_t& sectorIndex, uint8_t& rowIndex, uint32_t& clusterIndex) const
100100
{
101101
getClusterReference<T>(clinfo, nCluster, sectorIndex, rowIndex, clusterIndex, mClustersReference);
102102
}
103103

104104
template <class T>
105105
GPUdi() static const o2::tpc::ClusterNative& getCluster(T& clinfo, int nCluster,
106-
const o2::tpc::ClusterNativeAccess& clusters, uint8_t& sectorIndex, uint8_t& rowIndex, const ClusRef& ref)
106+
const o2::tpc::ClusterNativeAccess& clusters, uint8_t& sectorIndex, uint8_t& rowIndex, const ClusRef& ref)
107107
{
108108
uint32_t clusterIndex;
109109
getClusterReference<T>(clinfo, nCluster, sectorIndex, rowIndex, clusterIndex, ref);
@@ -112,14 +112,14 @@ class TrackTPC : public o2::track::TrackParCov
112112

113113
template <class T>
114114
GPUdi() const o2::tpc::ClusterNative& getCluster(T& clinfo, int nCluster,
115-
const o2::tpc::ClusterNativeAccess& clusters, uint8_t& sectorIndex, uint8_t& rowIndex) const
115+
const o2::tpc::ClusterNativeAccess& clusters, uint8_t& sectorIndex, uint8_t& rowIndex) const
116116
{
117117
return getCluster<T>(clinfo, nCluster, clusters, sectorIndex, rowIndex, mClustersReference);
118118
}
119119

120120
template <class T>
121121
GPUdi() const o2::tpc::ClusterNative& getCluster(T& clinfo, int nCluster,
122-
const o2::tpc::ClusterNativeAccess& clusters) const
122+
const o2::tpc::ClusterNativeAccess& clusters) const
123123
{
124124
uint8_t sectorIndex, rowIndex;
125125
return (getCluster<T>(clinfo, nCluster, clusters, sectorIndex, rowIndex));

Detectors/Base/src/Propagator.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -788,8 +788,8 @@ namespace o2::base
788788
{
789789
#if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code.
790790
template class PropagatorImpl<float>;
791-
template bool GPUd() PropagatorImpl<float>::propagateToAlphaX<PropagatorImpl<float>::TrackPar_t>(PropagatorImpl<float>::TrackPar_t&, float, float, bool, float, float, int, PropagatorImpl<float>::MatCorrType matCorr, track::TrackLTIntegral*, int) const;
792-
template bool GPUd() PropagatorImpl<float>::propagateToAlphaX<PropagatorImpl<float>::TrackParCov_t>(PropagatorImpl<float>::TrackParCov_t&, float, float, bool, float, float, int, PropagatorImpl<float>::MatCorrType matCorr, track::TrackLTIntegral*, int) const;
791+
template bool GPUdni() PropagatorImpl<float>::propagateToAlphaX<PropagatorImpl<float>::TrackPar_t>(PropagatorImpl<float>::TrackPar_t&, float, float, bool, float, float, int, PropagatorImpl<float>::MatCorrType matCorr, track::TrackLTIntegral*, int) const;
792+
template bool GPUdni() PropagatorImpl<float>::propagateToAlphaX<PropagatorImpl<float>::TrackParCov_t>(PropagatorImpl<float>::TrackParCov_t&, float, float, bool, float, float, int, PropagatorImpl<float>::MatCorrType matCorr, track::TrackLTIntegral*, int) const;
793793
#endif
794794
#ifndef GPUCA_GPUCODE
795795
template class PropagatorImpl<double>;

GPU/Common/GPUCommonDefAPI.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,11 @@
152152
#define GPUbarrierWarp()
153153
#define GPUAtomic(type) type
154154
#elif defined(__CUDACC__) //Defines for CUDA
155-
#define GPUd() __device__
155+
#ifndef GPUCA_GPUCODE_DEVICE
156+
#define GPUd() __device__ inline // FIXME: DR: Workaround: mark device function as inline such that nvcc does not create bogus host symbols
157+
#else
158+
#define GPUd() __device__
159+
#endif
156160
#define GPUdDefault()
157161
#define GPUhdDefault()
158162
#define GPUdi() __device__ inline

GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,9 +1007,11 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks(int nBlocks, int nThreads, int iBl
10071007
MergeBorderTracks<I>(nBlocks, nThreads, iBlock, iThread, iSlice, b1, n1, jSlice, b2, n2, mergeMode);
10081008
}
10091009

1010-
template GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int nBlocks, int nThreads, int iBlock, int iThread, int iSlice, char withinSlice, char mergeMode);
1011-
template GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int nBlocks, int nThreads, int iBlock, int iThread, int iSlice, char withinSlice, char mergeMode);
1012-
template GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int nBlocks, int nThreads, int iBlock, int iThread, int iSlice, char withinSlice, char mergeMode);
1010+
#if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code.
1011+
template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<0>(int nBlocks, int nThreads, int iBlock, int iThread, int iSlice, char withinSlice, char mergeMode);
1012+
template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<1>(int nBlocks, int nThreads, int iBlock, int iThread, int iSlice, char withinSlice, char mergeMode);
1013+
template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<2>(int nBlocks, int nThreads, int iBlock, int iThread, int iSlice, char withinSlice, char mergeMode);
1014+
#endif
10131015

10141016
GPUd() void GPUTPCGMMerger::MergeWithinSlicesPrepare(int nBlocks, int nThreads, int iBlock, int iThread)
10151017
{

GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,11 @@ GPUdii() void GPUTPCGMMergerMergeBorders::Thread(int nBlocks, int nThreads, int
120120
{
121121
merger.MergeBorderTracks<I>(nBlocks, nThreads, iBlock, iThread, args...);
122122
}
123-
template GPUd() void GPUTPCGMMergerMergeBorders::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int iSlice, char withinSlice, char mergeMode);
124-
template GPUd() void GPUTPCGMMergerMergeBorders::Thread<2>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int iSlice, char withinSlice, char mergeMode);
125-
template GPUd() void GPUTPCGMMergerMergeBorders::Thread<3>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, gputpcgmmergertypes::GPUTPCGMBorderRange* range, int N, int cmpMax);
123+
#if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code.
124+
template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int iSlice, char withinSlice, char mergeMode);
125+
template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<2>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int iSlice, char withinSlice, char mergeMode);
126+
template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<3>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, gputpcgmmergertypes::GPUTPCGMBorderRange* range, int N, int cmpMax);
127+
#endif
126128
template <>
127129
GPUdii() void GPUTPCGMMergerMergeBorders::Thread<1>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int iSlice, char withinSlice, char mergeMode)
128130
{

GPU/GPUTracking/Refit/GPUTrackingRefit.cxx

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -421,12 +421,14 @@ GPUd() int GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov)
421421
return nFitted;
422422
}
423423

424-
template GPUd() int GPUTrackingRefit::RefitTrack<GPUTPCGMMergedTrack, TrackParCov>(GPUTPCGMMergedTrack& trk, bool outward, bool resetCov);
425-
template GPUd() int GPUTrackingRefit::RefitTrack<GPUTPCGMMergedTrack, GPUTPCGMTrackParam>(GPUTPCGMMergedTrack& trk, bool outward, bool resetCov);
426-
template GPUd() int GPUTrackingRefit::RefitTrack<TrackTPC, TrackParCov>(TrackTPC& trk, bool outward, bool resetCov);
427-
template GPUd() int GPUTrackingRefit::RefitTrack<TrackTPC, GPUTPCGMTrackParam>(TrackTPC& trk, bool outward, bool resetCov);
428-
template GPUd() int GPUTrackingRefit::RefitTrack<GPUTrackingRefit::TrackParCovWithArgs, TrackParCov>(GPUTrackingRefit::TrackParCovWithArgs& trk, bool outward, bool resetCov);
429-
template GPUd() int GPUTrackingRefit::RefitTrack<GPUTrackingRefit::TrackParCovWithArgs, GPUTPCGMTrackParam>(GPUTrackingRefit::TrackParCovWithArgs& trk, bool outward, bool resetCov);
424+
#if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code.
425+
template GPUdni() int GPUTrackingRefit::RefitTrack<GPUTPCGMMergedTrack, TrackParCov>(GPUTPCGMMergedTrack& trk, bool outward, bool resetCov);
426+
template GPUdni() int GPUTrackingRefit::RefitTrack<GPUTPCGMMergedTrack, GPUTPCGMTrackParam>(GPUTPCGMMergedTrack& trk, bool outward, bool resetCov);
427+
template GPUdni() int GPUTrackingRefit::RefitTrack<TrackTPC, TrackParCov>(TrackTPC& trk, bool outward, bool resetCov);
428+
template GPUdni() int GPUTrackingRefit::RefitTrack<TrackTPC, GPUTPCGMTrackParam>(TrackTPC& trk, bool outward, bool resetCov);
429+
template GPUdni() int GPUTrackingRefit::RefitTrack<GPUTrackingRefit::TrackParCovWithArgs, TrackParCov>(GPUTrackingRefit::TrackParCovWithArgs& trk, bool outward, bool resetCov);
430+
template GPUdni() int GPUTrackingRefit::RefitTrack<GPUTrackingRefit::TrackParCovWithArgs, GPUTPCGMTrackParam>(GPUTrackingRefit::TrackParCovWithArgs& trk, bool outward, bool resetCov);
431+
#endif
430432

431433
#ifndef GPUCA_GPUCODE
432434
void GPUTrackingRefit::SetPtrsFromGPUConstantMem(const GPUConstantMem* v, MEM_CONSTANT(GPUParam) * p)

GPU/GPUTracking/Refit/GPUTrackingRefitKernel.cxx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,7 @@ GPUdii() void GPUTrackingRefitKernel::Thread(int nBlocks, int nThreads, int iBlo
4444
}
4545
}
4646
}
47-
template GPUd() void GPUTrackingRefitKernel::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
48-
template GPUd() void GPUTrackingRefitKernel::Thread<1>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
47+
#if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code.
48+
template GPUdni() void GPUTrackingRefitKernel::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
49+
template GPUdni() void GPUTrackingRefitKernel::Thread<1>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
50+
#endif

GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ class GPUTPCTrackLinearisation
6767
float mQPt; // QPt
6868
};
6969

70-
MEM_CLASS_PRE2() GPUdi() GPUTPCTrackLinearisation::GPUTPCTrackLinearisation(const MEM_LG2(GPUTPCTrackParam) & GPUrestrict() t) : mSinPhi(t.SinPhi()), mCosPhi(0), mDzDs(t.DzDs()), mQPt(t.QPt())
70+
MEM_CLASS_PRE2()
71+
GPUdi() GPUTPCTrackLinearisation::GPUTPCTrackLinearisation(const MEM_LG2(GPUTPCTrackParam) & GPUrestrict() t) : mSinPhi(t.SinPhi()), mCosPhi(0), mDzDs(t.DzDs()), mQPt(t.QPt())
7172
{
7273
if (mSinPhi > GPUCA_MAX_SIN_PHI) {
7374
mSinPhi = GPUCA_MAX_SIN_PHI;

GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,8 @@ class GPUTPCTrackParam
157157
int mNDF; // the Number of Degrees of Freedom
158158
};
159159

160-
MEM_CLASS_PRE() GPUdi() void MEM_LG(GPUTPCTrackParam)::InitParam()
160+
MEM_CLASS_PRE()
161+
GPUdi() void MEM_LG(GPUTPCTrackParam)::InitParam()
161162
{
162163
// Initialize Tracklet Parameters using default values
163164
SetSinPhi(0);

GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,12 +345,14 @@ class GPUTRDTrackerDebug
345345
GPUd() void SetFindable(bool* findable) {}
346346
GPUd() void Output() {}
347347
};
348+
#if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code.
348349
#ifndef GPUCA_ALIROOT_LIB
349350
template class GPUTRDTrackerDebug<GPUTRDTrackGPU>;
350351
#endif
351352
#if !defined(GPUCA_STANDALONE) && !defined(GPUCA_GPUCODE)
352353
template class GPUTRDTrackerDebug<GPUTRDTrack>;
353354
#endif
355+
#endif
354356
} // namespace gpu
355357
} // namespace GPUCA_NAMESPACE
356358

0 commit comments

Comments
 (0)