@@ -121,15 +121,18 @@ double setTicksToTime() {
121
121
return TicksToTime;
122
122
}
123
123
124
+ // / HSA system clock frequency
125
+ double TicksToTime = 1.0 ;
126
+
127
+ // / Compute system timestamp conversion factor, modeled after ROCclr
128
+ void setHSATicksToTimeConstant () { TicksToTime = setTicksToTime (); }
129
+
124
130
#ifdef OMPT_SUPPORT
125
131
#include " OmptDeviceTracing.h"
126
132
#include < omp-tools.h>
127
133
128
134
extern void ompt::setOmptHostToDeviceRate (double Slope, double Offset);
129
135
130
- // / HSA system clock frequency
131
- double TicksToTime = 1.0 ;
132
-
133
136
// / Forward declare
134
137
namespace llvm {
135
138
namespace omp {
@@ -207,9 +210,6 @@ void setOmptAsyncCopyProfile(bool Enable) {
207
210
DP (" Error enabling async copy profiling\n " );
208
211
}
209
212
210
- // / Compute system timestamp conversion factor, modeled after ROCclr.
211
- void setOmptTicksToTime () { TicksToTime = setTicksToTime (); }
212
-
213
213
// / Get the current HSA-based device timestamp.
214
214
uint64_t getSystemTimestampInNs () {
215
215
uint64_t TimeStamp = 0 ;
@@ -2866,7 +2866,7 @@ struct AMDGPUStreamManagerTy final
2866
2866
}
2867
2867
2868
2868
// / Enable/disable profiling of the HSA queues.
2869
- void setOmptQueueProfile (int Enable) {
2869
+ void setHSAQueueProfiling (int Enable) {
2870
2870
// If queue profiling is enabled with an env-var, it means that
2871
2871
// profiling is already ON and should remain so all the time.
2872
2872
if (OMPX_EnableQueueProfiling)
@@ -3269,7 +3269,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3269
3269
if (auto Err = initMemoryPools ())
3270
3270
return Err;
3271
3271
3272
- OMPT_IF_ENABLED (:: setOmptTicksToTime (); );
3272
+ setHSATicksToTimeConstant ( );
3273
3273
3274
3274
#ifdef OMPT_SUPPORT
3275
3275
// At init we capture two time points for host and device. The two
@@ -4462,7 +4462,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4462
4462
// Implementation sanity checks: either unified_shared_memory or auto
4463
4463
// zero-copy, not both
4464
4464
if (isUnifiedSharedMemory && isAutoZeroCopy)
4465
- return Plugin::error (ErrorCode::UNKNOWN,
4465
+ return Plugin::error (ErrorCode::UNKNOWN,
4466
4466
" Internal runtime error: cannot be both "
4467
4467
" unified_shared_memory and auto zero-copy." );
4468
4468
@@ -4575,8 +4575,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4575
4575
}
4576
4576
4577
4577
// / Propagate the enable/disable profiling request to the StreamManager.
4578
- void setOmptQueueProfile (int Enable) {
4579
- AMDGPUStreamManager.setOmptQueueProfile (Enable);
4578
+ void setHSAQueueProfiling (int Enable) {
4579
+ AMDGPUStreamManager.setHSAQueueProfiling (Enable);
4580
4580
}
4581
4581
4582
4582
// / Get the address of pointer to the preallocated device memory pool.
@@ -5978,6 +5978,11 @@ unsigned AMDGPUKernelTy::computeAchievedOccupancy(GenericDeviceTy &Device,
5978
5978
return Occupancy;
5979
5979
}
5980
5980
5981
+ // / Enable profiling of HSA queues
5982
+ void setHSAQueueProfiling (void *Device, int Enable) {
5983
+ reinterpret_cast <AMDGPUDeviceTy *>(Device)->setHSAQueueProfiling (Enable);
5984
+ }
5985
+
5981
5986
} // namespace plugin
5982
5987
} // namespace target
5983
5988
} // namespace omp
@@ -5988,15 +5993,14 @@ namespace llvm::omp::target::plugin {
5988
5993
5989
5994
// / Enable/disable kernel profiling for the given device.
5990
5995
void setOmptQueueProfile (void *Device, int Enable) {
5991
- reinterpret_cast <llvm::omp::target::plugin::AMDGPUDeviceTy *>(Device)
5992
- ->setOmptQueueProfile (Enable);
5996
+ setHSAQueueProfiling (Device, Enable);
5993
5997
}
5994
5998
5995
5999
} // namespace llvm::omp::target::plugin
5996
6000
5997
6001
// / Enable/disable kernel profiling for the given device.
5998
6002
void setGlobalOmptKernelProfile (void *Device, int Enable) {
5999
- llvm::omp::target::plugin::setOmptQueueProfile (Device, Enable);
6003
+ llvm::omp::target::plugin::setHSAQueueProfiling (Device, Enable);
6000
6004
}
6001
6005
6002
6006
#endif
0 commit comments