@@ -121,15 +121,18 @@ double setTicksToTime() {
121
121
return TicksToTime;
122
122
}
123
123
124
+ // / HSA system clock frequency
125
+ double TicksToTime = 1.0 ;
126
+
127
+ // / Compute system timestamp conversion factor, modeled after ROCclr
128
+ void setHSATicksToTimeConstant () { TicksToTime = setTicksToTime (); }
129
+
124
130
#ifdef OMPT_SUPPORT
125
131
#include " OmptDeviceTracing.h"
126
132
#include < omp-tools.h>
127
133
128
134
extern void ompt::setOmptHostToDeviceRate (double Slope, double Offset);
129
135
130
- // / HSA system clock frequency
131
- double TicksToTime = 1.0 ;
132
-
133
136
// / Forward declare
134
137
namespace llvm {
135
138
namespace omp {
@@ -207,9 +210,6 @@ void setOmptAsyncCopyProfile(bool Enable) {
207
210
DP (" Error enabling async copy profiling\n " );
208
211
}
209
212
210
- // / Compute system timestamp conversion factor, modeled after ROCclr.
211
- void setOmptTicksToTime () { TicksToTime = setTicksToTime (); }
212
-
213
213
// / Get the current HSA-based device timestamp.
214
214
uint64_t getSystemTimestampInNs () {
215
215
uint64_t TimeStamp = 0 ;
@@ -2830,7 +2830,7 @@ struct AMDGPUStreamManagerTy final
2830
2830
}
2831
2831
2832
2832
// / Enable/disable profiling of the HSA queues.
2833
- void setOmptQueueProfile (int Enable) {
2833
+ void setHSAQueueProfiling (int Enable) {
2834
2834
// If queue profiling is enabled with an env-var, it means that
2835
2835
// profiling is already ON and should remain so all the time.
2836
2836
if (OMPX_EnableQueueProfiling)
@@ -3233,7 +3233,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3233
3233
if (auto Err = initMemoryPools ())
3234
3234
return Err;
3235
3235
3236
- OMPT_IF_ENABLED (:: setOmptTicksToTime (); );
3236
+ setHSATicksToTimeConstant ( );
3237
3237
3238
3238
#ifdef OMPT_SUPPORT
3239
3239
// At init we capture two time points for host and device. The two
@@ -4347,7 +4347,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4347
4347
// Implementation sanity checks: either unified_shared_memory or auto
4348
4348
// zero-copy, not both
4349
4349
if (isUnifiedSharedMemory && isAutoZeroCopy)
4350
- return Plugin::error (ErrorCode::UNKNOWN,
4350
+ return Plugin::error (ErrorCode::UNKNOWN,
4351
4351
" Internal runtime error: cannot be both "
4352
4352
" unified_shared_memory and auto zero-copy." );
4353
4353
@@ -4460,8 +4460,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4460
4460
}
4461
4461
4462
4462
// / Propagate the enable/disable profiling request to the StreamManager.
4463
- void setOmptQueueProfile (int Enable) {
4464
- AMDGPUStreamManager.setOmptQueueProfile (Enable);
4463
+ void setHSAQueueProfiling (int Enable) {
4464
+ AMDGPUStreamManager.setHSAQueueProfiling (Enable);
4465
4465
}
4466
4466
4467
4467
// / Get the address of pointer to the preallocated device memory pool.
@@ -5860,6 +5860,11 @@ unsigned AMDGPUKernelTy::computeAchievedOccupancy(GenericDeviceTy &Device,
5860
5860
return Occupancy;
5861
5861
}
5862
5862
5863
+ // / Enable profiling of HSA queues
5864
+ void setQueueProfiling (void *Device, int Enable) {
5865
+ reinterpret_cast <AMDGPUDeviceTy *>(Device)->setHSAQueueProfiling (Enable);
5866
+ }
5867
+
5863
5868
} // namespace plugin
5864
5869
} // namespace target
5865
5870
} // namespace omp
@@ -5871,14 +5876,14 @@ namespace llvm::omp::target::plugin {
5871
5876
// / Enable/disable kernel profiling for the given device.
5872
5877
void setOmptQueueProfile (void *Device, int Enable) {
5873
5878
reinterpret_cast <llvm::omp::target::plugin::AMDGPUDeviceTy *>(Device)
5874
- ->setOmptQueueProfile (Enable);
5879
+ ->setHSAQueueProfiling (Enable);
5875
5880
}
5876
5881
5877
5882
} // namespace llvm::omp::target::plugin
5878
5883
5879
5884
// / Enable/disable kernel profiling for the given device.
5880
5885
void setGlobalOmptKernelProfile (void *Device, int Enable) {
5881
- llvm::omp::target::plugin::setOmptQueueProfile (Device, Enable);
5886
+ llvm::omp::target::plugin::setQueueProfiling (Device, Enable);
5882
5887
}
5883
5888
5884
5889
#endif
0 commit comments