Skip to content

Commit 995ef70

Browse files
committed
[NFC][OMPT] Refactor set-queue-profiling
This functionality is not tied to OMPT in general. Rename to make that clear and pull out of OMPT_SUPPORT define.
1 parent a53433c commit 995ef70

File tree

1 file changed

+18
-13
lines changed
  • offload/plugins-nextgen/amdgpu/src

1 file changed

+18
-13
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -121,15 +121,18 @@ double setTicksToTime() {
121121
return TicksToTime;
122122
}
123123

124+
/// HSA system clock frequency
125+
double TicksToTime = 1.0;
126+
127+
/// Compute system timestamp conversion factor, modeled after ROCclr
128+
void setHSATicksToTimeConstant() { TicksToTime = setTicksToTime(); }
129+
124130
#ifdef OMPT_SUPPORT
125131
#include "OmptDeviceTracing.h"
126132
#include <omp-tools.h>
127133

128134
extern void ompt::setOmptHostToDeviceRate(double Slope, double Offset);
129135

130-
/// HSA system clock frequency
131-
double TicksToTime = 1.0;
132-
133136
/// Forward declare
134137
namespace llvm {
135138
namespace omp {
@@ -207,9 +210,6 @@ void setOmptAsyncCopyProfile(bool Enable) {
207210
DP("Error enabling async copy profiling\n");
208211
}
209212

210-
/// Compute system timestamp conversion factor, modeled after ROCclr.
211-
void setOmptTicksToTime() { TicksToTime = setTicksToTime(); }
212-
213213
/// Get the current HSA-based device timestamp.
214214
uint64_t getSystemTimestampInNs() {
215215
uint64_t TimeStamp = 0;
@@ -2830,7 +2830,7 @@ struct AMDGPUStreamManagerTy final
28302830
}
28312831

28322832
/// Enable/disable profiling of the HSA queues.
2833-
void setOmptQueueProfile(int Enable) {
2833+
void setHSAQueueProfiling(int Enable) {
28342834
// If queue profiling is enabled with an env-var, it means that
28352835
// profiling is already ON and should remain so all the time.
28362836
if (OMPX_EnableQueueProfiling)
@@ -3233,7 +3233,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
32333233
if (auto Err = initMemoryPools())
32343234
return Err;
32353235

3236-
OMPT_IF_ENABLED(::setOmptTicksToTime(););
3236+
setHSATicksToTimeConstant();
32373237

32383238
#ifdef OMPT_SUPPORT
32393239
// At init we capture two time points for host and device. The two
@@ -4347,7 +4347,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
43474347
// Implementation sanity checks: either unified_shared_memory or auto
43484348
// zero-copy, not both
43494349
if (isUnifiedSharedMemory && isAutoZeroCopy)
4350-
return Plugin::error(ErrorCode::UNKNOWN,
4350+
return Plugin::error(ErrorCode::UNKNOWN,
43514351
"Internal runtime error: cannot be both "
43524352
"unified_shared_memory and auto zero-copy.");
43534353

@@ -4460,8 +4460,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
44604460
}
44614461

44624462
/// Propagate the enable/disable profiling request to the StreamManager.
4463-
void setOmptQueueProfile(int Enable) {
4464-
AMDGPUStreamManager.setOmptQueueProfile(Enable);
4463+
void setHSAQueueProfiling(int Enable) {
4464+
AMDGPUStreamManager.setHSAQueueProfiling(Enable);
44654465
}
44664466

44674467
/// Get the address of pointer to the preallocated device memory pool.
@@ -5860,6 +5860,11 @@ unsigned AMDGPUKernelTy::computeAchievedOccupancy(GenericDeviceTy &Device,
58605860
return Occupancy;
58615861
}
58625862

5863+
/// Enable profiling of HSA queues
5864+
void setQueueProfiling(void *Device, int Enable) {
5865+
reinterpret_cast<AMDGPUDeviceTy *>(Device)->setHSAQueueProfiling(Enable);
5866+
}
5867+
58635868
} // namespace plugin
58645869
} // namespace target
58655870
} // namespace omp
@@ -5871,14 +5876,14 @@ namespace llvm::omp::target::plugin {
58715876
/// Enable/disable kernel profiling for the given device.
58725877
void setOmptQueueProfile(void *Device, int Enable) {
58735878
reinterpret_cast<llvm::omp::target::plugin::AMDGPUDeviceTy *>(Device)
5874-
->setOmptQueueProfile(Enable);
5879+
->setHSAQueueProfiling(Enable);
58755880
}
58765881

58775882
} // namespace llvm::omp::target::plugin
58785883

58795884
/// Enable/disable kernel profiling for the given device.
58805885
void setGlobalOmptKernelProfile(void *Device, int Enable) {
5881-
llvm::omp::target::plugin::setOmptQueueProfile(Device, Enable);
5886+
llvm::omp::target::plugin::setQueueProfiling(Device, Enable);
58825887
}
58835888

58845889
#endif

0 commit comments

Comments
 (0)