Skip to content

Commit c6a6e89

Browse files
[profiler] filter runtime ops tracing to reduce the overhead (#4645) (#4668)
filter runtime ops tracing to reduce the overhead Signed-off-by: Chen, Zejun <[email protected]> Co-authored-by: Jinghui <[email protected]>
1 parent f1f288c commit c6a6e89

File tree

2 files changed

+31
-24
lines changed

2 files changed

+31
-24
lines changed

csrc/gpu/profiler/XPUActivityProfiler.cpp

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,21 @@ bool ConfigDerivedState::isCollectionDone(
9090
return false;
9191
}
9292

93+
const std::vector<std::string> XPUActivityProfiler::_traceableRuntimeOps = {
94+
"piextUSMEnqueueFill",
95+
"piextUSMEnqueueFill2D",
96+
"piextUSMEnqueueMemcpy",
97+
"piextUSMEnqueueMemset",
98+
"piextUSMEnqueueMemcpy2D",
99+
"piextUSMEnqueueMemset2D",
100+
"piEnqueueKernelLaunch",
101+
"piextEnqueueKernelLaunchCustom",
102+
"piextEnqueueCooperativeKernelLaunch",
103+
"piextUSMDeviceAlloc",
104+
"piextUSMHostAlloc",
105+
"piEnqueueMemBufferRead",
106+
"piEnqueueMemBufferWrite"};
107+
93108
void XPUActivityProfiler::transferCpuTrace(
94109
std::unique_ptr<libkineto::CpuTraceBuffer> cpuTrace) {
95110
std::lock_guard<std::mutex> guard(mutex_);
@@ -285,33 +300,13 @@ inline bool XPUActivityProfiler::outOfRange(const ITraceActivity& act) {
285300
return out_of_range;
286301
}
287302

288-
// inline static bool isBlockListedRuntimeCbid(Onepti_CallbackId cbid) {
289-
// // Some CUDA calls that are very frequent and also not very interesting.
290-
// // Filter these out to reduce trace size.
291-
// if (cbid == ONEPTI_RUNTIME_TRACE_CBID_cudaGetDevice_v3020 ||
292-
// cbid == ONEPTI_RUNTIME_TRACE_CBID_cudaSetDevice_v3020 ||
293-
// cbid == ONEPTI_RUNTIME_TRACE_CBID_cudaGetLastError_v3020 ||
294-
// // Don't care about cudaEvents
295-
// cbid == ONEPTI_RUNTIME_TRACE_CBID_cudaEventCreate_v3020 ||
296-
// cbid == ONEPTI_RUNTIME_TRACE_CBID_cudaEventCreateWithFlags_v3020 ||
297-
// cbid == ONEPTI_RUNTIME_TRACE_CBID_cudaEventRecord_v3020 ||
298-
// cbid == ONEPTI_RUNTIME_TRACE_CBID_cudaEventDestroy_v3020 ||
299-
// cbid == ONEPTI_RUNTIME_TRACE_CBID_cudaEventSynchronize_v3020) {
300-
// return true;
301-
// }
302-
//
303-
// return false;
304-
// }
305-
306303
void XPUActivityProfiler::handleRuntimeActivity(
307304
const pti_view_record_sycl_runtime* activity,
308305
ActivityLogger* logger) {
309-
// if (isBlockListedRuntimeCbid(activity->cbid)) {
310-
// return;
311-
// }
312-
// VLOG(2) << activity->correlationId
313-
// << ": ONEPTI_ACTIVITY_KIND_RUNTIME, cbid=" << activity->cbid
314-
// << " tid=" << activity->threadId;
306+
if (!isNeededToTrace(activity->_name)) {
307+
return;
308+
}
309+
315310
int32_t tid = activity->_thread_id;
316311
const auto& it = resourceInfo_.find({processId(), tid});
317312
if (it != resourceInfo_.end()) {

csrc/gpu/profiler/XPUActivityProfiler.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,18 @@ class XPUActivityProfiler {
170170
profilers_.push_back(std::move(profiler));
171171
}
172172

173+
// for reducing post processing overhead, filter following runtime
174+
// ops to be traced down
175+
static const std::vector<std::string> _traceableRuntimeOps;
176+
177+
// each runtime record will be checked to trace or not
178+
static bool isNeededToTrace(const char* name) {
179+
return std::find(
180+
_traceableRuntimeOps.begin(),
181+
_traceableRuntimeOps.end(),
182+
std::string(name)) != _traceableRuntimeOps.end();
183+
}
184+
173185
protected:
174186
using CpuGpuSpanPair = std::pair<TraceSpan, TraceSpan>;
175187
static const CpuGpuSpanPair& defaultTraceSpan();

0 commit comments

Comments
 (0)