Skip to content

Commit 54e3700

Browse files
authored
[NFC][OMPT] Refactor set-queue-profiling (llvm#3744)
2 parents 44e5533 + a6830fb commit 54e3700

File tree

1 file changed

+18
-14
lines changed
  • offload/plugins-nextgen/amdgpu/src

1 file changed

+18
-14
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -121,15 +121,18 @@ double setTicksToTime() {
121121
return TicksToTime;
122122
}
123123

124+
/// HSA system clock frequency
125+
double TicksToTime = 1.0;
126+
127+
/// Compute system timestamp conversion factor, modeled after ROCclr
128+
void setHSATicksToTimeConstant() { TicksToTime = setTicksToTime(); }
129+
124130
#ifdef OMPT_SUPPORT
125131
#include "OmptDeviceTracing.h"
126132
#include <omp-tools.h>
127133

128134
extern void ompt::setOmptHostToDeviceRate(double Slope, double Offset);
129135

130-
/// HSA system clock frequency
131-
double TicksToTime = 1.0;
132-
133136
/// Forward declare
134137
namespace llvm {
135138
namespace omp {
@@ -207,9 +210,6 @@ void setOmptAsyncCopyProfile(bool Enable) {
207210
DP("Error enabling async copy profiling\n");
208211
}
209212

210-
/// Compute system timestamp conversion factor, modeled after ROCclr.
211-
void setOmptTicksToTime() { TicksToTime = setTicksToTime(); }
212-
213213
/// Get the current HSA-based device timestamp.
214214
uint64_t getSystemTimestampInNs() {
215215
uint64_t TimeStamp = 0;
@@ -2866,7 +2866,7 @@ struct AMDGPUStreamManagerTy final
28662866
}
28672867

28682868
/// Enable/disable profiling of the HSA queues.
2869-
void setOmptQueueProfile(int Enable) {
2869+
void setHSAQueueProfiling(int Enable) {
28702870
// If queue profiling is enabled with an env-var, it means that
28712871
// profiling is already ON and should remain so all the time.
28722872
if (OMPX_EnableQueueProfiling)
@@ -3269,7 +3269,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
32693269
if (auto Err = initMemoryPools())
32703270
return Err;
32713271

3272-
OMPT_IF_ENABLED(::setOmptTicksToTime(););
3272+
setHSATicksToTimeConstant();
32733273

32743274
#ifdef OMPT_SUPPORT
32753275
// At init we capture two time points for host and device. The two
@@ -4462,7 +4462,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
44624462
// Implementation sanity checks: either unified_shared_memory or auto
44634463
// zero-copy, not both
44644464
if (isUnifiedSharedMemory && isAutoZeroCopy)
4465-
return Plugin::error(ErrorCode::UNKNOWN,
4465+
return Plugin::error(ErrorCode::UNKNOWN,
44664466
"Internal runtime error: cannot be both "
44674467
"unified_shared_memory and auto zero-copy.");
44684468

@@ -4575,8 +4575,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
45754575
}
45764576

45774577
/// Propagate the enable/disable profiling request to the StreamManager.
4578-
void setOmptQueueProfile(int Enable) {
4579-
AMDGPUStreamManager.setOmptQueueProfile(Enable);
4578+
void setHSAQueueProfiling(int Enable) {
4579+
AMDGPUStreamManager.setHSAQueueProfiling(Enable);
45804580
}
45814581

45824582
/// Get the address of pointer to the preallocated device memory pool.
@@ -5978,6 +5978,11 @@ unsigned AMDGPUKernelTy::computeAchievedOccupancy(GenericDeviceTy &Device,
59785978
return Occupancy;
59795979
}
59805980

5981+
/// Enable profiling of HSA queues
5982+
void setHSAQueueProfiling(void *Device, int Enable) {
5983+
reinterpret_cast<AMDGPUDeviceTy *>(Device)->setHSAQueueProfiling(Enable);
5984+
}
5985+
59815986
} // namespace plugin
59825987
} // namespace target
59835988
} // namespace omp
@@ -5988,15 +5993,14 @@ namespace llvm::omp::target::plugin {
59885993

59895994
/// Enable/disable kernel profiling for the given device.
59905995
void setOmptQueueProfile(void *Device, int Enable) {
5991-
reinterpret_cast<llvm::omp::target::plugin::AMDGPUDeviceTy *>(Device)
5992-
->setOmptQueueProfile(Enable);
5996+
setHSAQueueProfiling(Device, Enable);
59935997
}
59945998

59955999
} // namespace llvm::omp::target::plugin
59966000

59976001
/// Enable/disable kernel profiling for the given device.
59986002
void setGlobalOmptKernelProfile(void *Device, int Enable) {
5999-
llvm::omp::target::plugin::setOmptQueueProfile(Device, Enable);
6003+
llvm::omp::target::plugin::setHSAQueueProfiling(Device, Enable);
60006004
}
60016005

60026006
#endif

0 commit comments

Comments
 (0)