@@ -42,6 +42,13 @@ namespace rocprofiler
4242{
4343namespace aql
4444{
45+ struct AQLProfileMetric
46+ {
47+ counters::Metric metric;
48+ std::vector<aqlprofile_pmc_event_t > instances;
49+ std::vector<aqlprofile_pmc_event_t > events;
50+ };
51+
4552CounterPacketConstruct::CounterPacketConstruct (rocprofiler_agent_id_t agent,
4653 const std::vector<counters::Metric>& metrics)
4754: _agent(agent)
@@ -50,7 +57,7 @@ CounterPacketConstruct::CounterPacketConstruct(rocprofiler_agent_id_t
5057 // for the counter.
5158 for (const auto & x : metrics)
5259 {
53- auto query_info = get_query_info (_agent, x);
60+ auto query_info = get_query_info (_agent, x. block (), x. name () );
5461 _metrics.emplace_back ().metric = x;
5562 uint64_t event_id = 0 ;
5663 if (!x.event ().empty ()) event_id = std::stoul (x.event (), nullptr );
@@ -278,5 +285,150 @@ CounterPacketConstruct::can_collect()
278285 }
279286 return ROCPROFILER_STATUS_SUCCESS;
280287}
288+
289+ /* * @brief Constructs the packet using the contained input parameters.
290+ * Writes into ID map and spm descriptor used to decode SPM data
291+ */
292+ std::unique_ptr<hsa::SPMPacket>
293+ spm_construct_packet (const rocprofiler_agent_id_t agent_id,
294+ const std::vector<counters::Metric>& metrics,
295+ double sample_freq,
296+ uint64_t buffer_size,
297+ uint64_t timeout)
298+ {
299+ auto events = std::vector<aqlprofile_pmc_event_t >{};
300+ auto params = std::vector<aqlprofile_spm_parameter_t >{};
301+ auto id_map = std::vector<spm::spm_counter_instance_t >{};
302+
303+ const auto * agent = CHECK_NOTNULL (rocprofiler::agent::get_agent (agent_id));
304+ const auto * aql_cache = CHECK_NOTNULL (rocprofiler::agent::get_agent_cache (agent));
305+ auto pool = std::make_shared<hsa::SPMMemoryPool>(
306+ *aql_cache, *hsa::get_amd_ext_table (), hsa::get_core_table ()->hsa_memory_copy_fn );
307+ const auto * aql_agent = rocprofiler::agent::get_aql_agent (agent->id );
308+
309+ const double sclk_freq = agent->max_engine_clk_fcompute * 1E9 ; // GHz
310+ const size_t sclk_period = static_cast <size_t >(std::roundf (sclk_freq / ((sample_freq) *1E9 )));
311+
312+ params.push_back ({AQLPROFILE_SPM_PARAMETER_TYPE_BUFFER_SIZE, buffer_size * 1024 });
313+ params.push_back ({AQLPROFILE_SPM_PARAMETER_TYPE_SAMPLE_INTERVAL, sclk_period});
314+ params.push_back ({AQLPROFILE_SPM_PARAMETER_TYPE_TIMEOUT, timeout});
315+
316+ for (const auto & metric : metrics)
317+ {
318+ auto query_info = get_query_info (agent_id, metric.block (), metric.name ());
319+
320+ for (unsigned block_index = 0 ; block_index < query_info.instance_count ; ++block_index)
321+ {
322+ auto event = aqlprofile_pmc_event_t {
323+ .block_index = block_index,
324+ .event_id =
325+ static_cast <uint32_t >(std::stoul (metric.event ().c_str (), nullptr ) & 0xFFFFFFFF ),
326+ .flags = aqlprofile_pmc_event_flags_t {metric.flags ()},
327+ .block_name = static_cast <hsa_ven_amd_aqlprofile_block_name_t >(query_info.id )};
328+
329+ events.push_back (event);
330+ id_map.push_back ({rocprofiler_counter_id_t {.handle = metric.id ()}, block_index});
331+ }
332+ }
333+
334+ aqlprofile_spm_profile_t profile{.aql_agent = *aql_agent,
335+ .hsa_agent = pool->gpu_agent ,
336+ .events = events.data (),
337+ .event_count = events.size (),
338+ .parameters = params.data (),
339+ .parameter_count = params.size (),
340+ .reserved = 0 ,
341+ .alloc_cb = &(hsa::SPMMemoryPool::Alloc),
342+ .dealloc_cb = &(hsa::SPMMemoryPool::Free),
343+ .memcpy_cb = &(hsa::SPMMemoryPool::Copy),
344+ .userdata = pool.get ()};
345+
346+ auto pkt = std::make_unique<hsa::SPMPacket>(*aql_agent, profile);
347+ ROCP_FATAL_IF (!pkt->valid ()) << " SPM Packet creation failed" ;
348+
349+ pool->delete_packets_fn = pkt->sym .delete_packets_fn ;
350+ pool->handle = pkt->handle ;
351+ pkt->pool = std::move (pool);
352+
353+ pkt->spm_desc .size =
354+ sizeof (spm::spm_desc_v0_t ) + id_map.size () * sizeof (id_map[0 ]) + pkt->aql_desc .size ;
355+
356+ pkt->container_desc_data = std::make_shared<std::vector<char >>(pkt->spm_desc .size );
357+ pkt->spm_desc .data = pkt->container_desc_data ->data ();
358+
359+ auto * desc = static_cast <spm::spm_desc_v0_t *>(pkt->spm_desc .data );
360+
361+ *desc = spm::spm_desc_v0_t {};
362+ desc->aql_desc_size = pkt->aql_desc .size ;
363+ desc->num_events = id_map.size ();
364+
365+ std::memcpy (desc->aqlprofile_desc (), pkt->aql_desc .data , pkt->aql_desc .size );
366+ std::memcpy (desc->events (), id_map.data (), id_map.size () * sizeof (id_map[0 ]));
367+
368+ pkt->clear ();
369+ return pkt;
370+ }
371+
372+ // Following the PMC check for now
373+ // ToDO: change this to SPM
374+ rocprofiler_status_t
375+ spm_can_collect (const rocprofiler_agent_id_t agent_id, const std::vector<counters::Metric>& metrics)
376+ {
377+ // Verify that the counters fit within harrdware limits
378+ auto counter_count =
379+ std::map<std::pair<hsa_ven_amd_aqlprofile_block_name_t , uint32_t >, int64_t >{};
380+ auto max_allowed =
381+ std::map<std::pair<hsa_ven_amd_aqlprofile_block_name_t , uint32_t >, int64_t >{};
382+ auto _metrics = std::vector<AQLProfileMetric>{};
383+
384+ for (const auto & metric : metrics)
385+ {
386+ auto query_info = get_query_info (agent_id, metric.block (), metric.name ());
387+ _metrics.emplace_back ().metric = metric;
388+
389+ auto event_id =
390+ static_cast <uint32_t >(std::stoul (metric.event ().c_str (), nullptr ) & 0xFFFFFFFF );
391+
392+ for (unsigned block_index = 0 ; block_index < query_info.instance_count ; ++block_index)
393+ {
394+ _metrics.back ().instances .push_back (
395+ {.block_index = block_index,
396+ .event_id = event_id,
397+ .flags = aqlprofile_pmc_event_flags_t {metric.flags ()},
398+ .block_name = static_cast <hsa_ven_amd_aqlprofile_block_name_t >(query_info.id )});
399+
400+ _metrics.back ().events .push_back (
401+ {.block_index = block_index,
402+ .event_id = event_id,
403+ .flags = aqlprofile_pmc_event_flags_t {metric.flags ()},
404+ .block_name = static_cast <hsa_ven_amd_aqlprofile_block_name_t >(query_info.id )});
405+ }
406+ }
407+
408+ for (auto & metric : _metrics)
409+ {
410+ for (auto & instance : metric.events )
411+ {
412+ auto block_pair = std::make_pair (instance.block_name , instance.block_index );
413+ auto [iter, inserted] = counter_count.emplace (block_pair, 0 );
414+ iter->second ++;
415+ if (inserted)
416+ {
417+ max_allowed.emplace (block_pair, get_block_counters (agent_id, instance));
418+ }
419+ }
420+ }
421+
422+ // Check if the block count > max count
423+ for (auto & [block_name, count] : counter_count)
424+ {
425+ if (auto * max = CHECK_NOTNULL (common::get_val (max_allowed, block_name)); count > *max)
426+ {
427+ return ROCPROFILER_STATUS_ERROR_EXCEEDS_HW_LIMIT;
428+ }
429+ }
430+ return ROCPROFILER_STATUS_SUCCESS;
431+ }
432+
281433} // namespace aql
282434} // namespace rocprofiler
0 commit comments