@@ -75,33 +75,16 @@ processActivityKernel(XpuptiProfiler::CorrIdToExternIdMap &corrIdToExternId,
7575 std::set<Data *> &dataSet,
7676 xpupti::Pti_Activity *activity) {
7777 auto *kernel = reinterpret_cast <pti_view_record_kernel *>(activity);
78- // std::cout << "activity->_name: " << kernel->_name << "\n" << std::flush;
79- // std::cout << "activity->_sycl_queue_id: " << kernel->_sycl_queue_id << "\n"
80- // << std::flush;
8178 auto correlationId = kernel->_correlation_id ;
82- std::cout << " kernel->_correlation_id " << kernel->_correlation_id << " \n "
83- << std::flush;
84- std::cout << " kernel->_kernel_id " << kernel->_kernel_id << " \n " ;
85- // here doesn't work
86- // uint64_t corr_id = 0;
87- // auto res =
88- // ptiViewPopExternalCorrelationId(pti_view_external_kind::PTI_VIEW_EXTERNAL_KIND_CUSTOM_1,
89- // &corr_id); std::cout << "ptiViewPopExternalCorrelationId res: " << res <<
90- // "\n" << std::flush; std::cout << "corr_id: " << corr_id << "\n" <<
91- // std::flush;
9279 if (/* Not a valid context*/ !corrIdToExternId.contain (correlationId)) {
93- // if (false) {
94- std::cout << " MARK#3\n " << std::flush;
9580 return correlationId;
9681 }
9782 auto [parentId, numInstances] = corrIdToExternId.at (correlationId);
98- std::cout << " parentId: " << parentId << std::endl;
9983 if (true ) {
10084 // Non-graph kernels
10185 for (auto *data : dataSet) {
10286 auto scopeId = parentId;
10387 if (apiExternIds.contain (scopeId)) {
104- std::cout << " first branch" << std::endl;
10588 // It's triggered by a CUDA op but not triton op
10689 scopeId = data->addOp (parentId, kernel->_name );
10790 }
@@ -116,10 +99,8 @@ processActivityKernel(XpuptiProfiler::CorrIdToExternIdMap &corrIdToExternId,
11699 // 2. graphExecId -> graphId
117100 // --- CUPTI thread ---
118101 // 3. corrId -> numKernels
119- std::cout << " MARK#1\n " << std::flush;
120102 for (auto *data : dataSet) {
121103 auto externId = data->addOp (parentId, kernel->_name );
122- std::cout << " MARK#2\n " << std::flush;
123104 data->addMetric (externId, convertActivityToMetric (activity));
124105 }
125106 }
@@ -133,21 +114,6 @@ processActivityKernel(XpuptiProfiler::CorrIdToExternIdMap &corrIdToExternId,
133114 return correlationId;
134115}
135116
136- uint32_t processActivityExternalCorrelation (
137- XpuptiProfiler::CorrIdToExternIdMap &corrIdToExternId,
138- xpupti::Pti_Activity *activity) {
139- auto *externalActivity =
140- reinterpret_cast <pti_view_record_external_correlation *>(activity);
141- std::cout << " processActivityExternalCorrelation: _correlation_id: "
142- << externalActivity->_correlation_id << " \n " ;
143- std::cout << " processActivityExternalCorrelation: _external_id: "
144- << externalActivity->_external_id << " \n " ;
145-
146- // corrIdToExternId[externalActivity->_correlation_id] =
147- // {externalActivity->_external_id, 1};
148- return externalActivity->_correlation_id ;
149- }
150-
151117uint32_t processActivity (XpuptiProfiler::CorrIdToExternIdMap &corrIdToExternId,
152118 XpuptiProfiler::ApiExternIdSet &apiExternIds,
153119 std::set<Data *> &dataSet,
@@ -159,11 +125,6 @@ uint32_t processActivity(XpuptiProfiler::CorrIdToExternIdMap &corrIdToExternId,
159125 dataSet, activity);
160126 break ;
161127 }
162- case PTI_VIEW_EXTERNAL_CORRELATION: {
163- // correlationId = processActivityExternalCorrelation(corrIdToExternId,
164- // activity);
165- break ;
166- }
167128 default :
168129 break ;
169130 }
@@ -253,8 +214,6 @@ void XpuptiProfiler::XpuptiProfilerPimpl::completeBuffer(uint8_t *buffer,
253214 do {
254215 status = xpupti::viewGetNextRecord<true >(buffer, validSize, &activity);
255216 if (status == pti_result::PTI_SUCCESS) {
256- std::cout << " activity->_view_kind: " << activity->_view_kind << " \n "
257- << std::flush;
258217 auto correlationId =
259218 processActivity (profiler.correlation .corrIdToExternId ,
260219 profiler.correlation .apiExternIds , dataSet, activity);
@@ -278,7 +237,6 @@ void XpuptiProfiler::XpuptiProfilerPimpl::callbackFn(
278237 pti_callback_domain domain, pti_api_group_id driver_api_group_id,
279238 uint32_t driver_api_id, pti_backend_ctx_t backend_context, void *cb_data,
280239 void *global_user_data, void **instance_user_data) {
281- std::cout << " callback\n " << std::flush;
282240 pti_callback_gpu_op_data *callback_data =
283241 static_cast <pti_callback_gpu_op_data *>(cb_data);
284242 if (callback_data == nullptr ) {
@@ -377,25 +335,13 @@ void XpuptiProfiler::XpuptiProfilerPimpl::doStart() {
377335 if (profiler.utils_cache_path != " " ) {
378336 callEnumDeviceUUIDs (profiler.utils_cache_path );
379337 }
380- // auto res = ptiViewPushExternalCorrelationId(
381- // pti_view_external_kind::PTI_VIEW_EXTERNAL_KIND_CUSTOM_1, 42);
382- // std::cout << "res: " << res << "\n" << std::flush;
383- /*
384- ze_result_t status = ZE_RESULT_SUCCESS;
385- // status = zeInit(ZE_INIT_FLAG_GPU_ONLY);
386- // assert(status == ZE_RESULT_SUCCESS);
387- */
388338
389339 xpupti::viewSetCallbacks<true >(allocBuffer, completeBuffer);
390340 xpupti::viewEnable<true >(PTI_VIEW_DEVICE_GPU_KERNEL);
391341 xpupti::viewEnable<true >(PTI_VIEW_DEVICE_GPU_MEM_FILL);
392342 xpupti::viewEnable<true >(PTI_VIEW_DEVICE_GPU_MEM_COPY);
393343 xpupti::subscribe<true >(&subscriber, callbackFn, &subscriber);
394- // xpupti::viewEnable<true>(PTI_VIEW_DEVICE_GPU_MEM_COPY);
395- // xpupti::viewEnable<true>(PTI_VIEW_DEVICE_GPU_MEM_FILL);
396344 // xpupti::viewEnable<true>(PTI_VIEW_SYCL_RUNTIME_CALLS);
397- // xpupti::viewEnable<true>(PTI_VIEW_COLLECTION_OVERHEAD);
398- // xpupti::viewEnable<true>(PTI_VIEW_EXTERNAL_CORRELATION);
399345 // xpupti::viewEnable<true>(PTI_VIEW_LEVEL_ZERO_CALLS);
400346 // setGraphCallbacks(subscriber, /*enable=*/true);
401347 // setRuntimeCallbacks(subscriber, /*enable=*/true);
@@ -405,7 +351,6 @@ void XpuptiProfiler::XpuptiProfilerPimpl::doStart() {
405351}
406352
407353void XpuptiProfiler::XpuptiProfilerPimpl::doFlush () {
408- std::cout << " flush\n " << std::flush;
409354 XpuptiProfiler &profiler = threadState.profiler ;
410355 if (profiler.syclQueue != nullptr ) {
411356 callWaitOnSyclQueue (profiler.utils_cache_path , profiler.syclQueue );
@@ -420,11 +365,7 @@ void XpuptiProfiler::XpuptiProfilerPimpl::doStop() {
420365 xpupti::viewDisable<true >(PTI_VIEW_DEVICE_GPU_KERNEL);
421366 xpupti::viewDisable<true >(PTI_VIEW_DEVICE_GPU_MEM_FILL);
422367 xpupti::viewDisable<true >(PTI_VIEW_DEVICE_GPU_MEM_COPY);
423- // xpupti::viewDisable<true>(PTI_VIEW_DEVICE_GPU_MEM_COPY);
424- // xpupti::viewDisable<true>(PTI_VIEW_DEVICE_GPU_MEM_FILL);
425368 // xpupti::viewDisable<true>(PTI_VIEW_SYCL_RUNTIME_CALLS);
426- // xpupti::viewDisable<true>(PTI_VIEW_COLLECTION_OVERHEAD);
427- // xpupti::viewDisable<true>(PTI_VIEW_EXTERNAL_CORRELATION);
428369 // xpupti::viewDisable<true>(PTI_VIEW_LEVEL_ZERO_CALLS);
429370 // setGraphCallbacks(subscriber, /*enable=*/false);
430371 // setRuntimeCallbacks(subscriber, /*enable=*/false);
0 commit comments