@@ -38,8 +38,9 @@ class CmdBuffer;
3838class CmdBuilder ;
3939
4040constexpr size_t ATT_CODEOBJ_OPCODE = 4 ;
41+ constexpr size_t ATT_TIMESTAMP_OPCODE = 5 ;
4142
42- union att_decoder_codeobj_header_t {
43+ union att_decoder_packet_header_t {
4344 struct {
4445 unsigned int opcode : 8 ;
4546 unsigned int type : 4 ;
@@ -102,11 +103,14 @@ class XCC_Packet_Lock {
102103// Thread traces status register indices to determine
103104// status of thread trace run
104105
105- struct TraceControl {
106- uint32_t status;
107- uint32_t cntr;
108- uint32_t wptr;
109- uint32_t _reserved;
106+ struct TraceControl
107+ {
108+ uint32_t status{0 };
109+ uint32_t cntr{0 };
110+ uint32_t wptr{0 };
111+ uint32_t _reserved{0 };
112+ uint64_t gpu_clock_cnt_start{0 };
113+ uint64_t gpu_clock_cnt_end{0 };
110114};
111115
112116// Encapsulates the various Api and structures that are used to enable
@@ -126,7 +130,9 @@ class SqttBuilder {
126130 virtual void End (CmdBuffer* cmd_buffer, TraceConfig* config) = 0;
127131 // Builds Pm4 command stream to program hardware registers that
128132 // inserts "data" into the SQTT buffer as USERDATA_2 (data_lo) and USERDATA_3 (data_hi)
129- virtual hsa_status_t InsertMarker (CmdBuffer* cmd_buffer, uint32_t data, unsigned channel) = 0;
133+ virtual hsa_status_t InsertCodeobjMarker (CmdBuffer* cmd_buffer, uint32_t data, unsigned channel) = 0;
134+
135+ virtual void InsertTimestampMarker (CmdBuffer* cmd_buffer, uint64_t * addr) {};
130136
131137 // Returns TT_CONTROL_UTC_ERR_MASK
132138 virtual size_t GetUTCErrorMask () const = 0;
@@ -326,8 +332,6 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
326332 Primitives::sqtt_mode_on_value ());
327333 base_addr += base_step;
328334 }
329- // Reset the GRBM to broadcast mode
330- SetGRBMToBroadcast (cmd_buffer);
331335 } else {
332336 SetGRBMToBroadcast (cmd_buffer);
333337 builder.BuildWritePConfigRegPacket (cmd_buffer, Primitives::SQ_THREAD_TRACE_STATUS_ADDR, 0 );
@@ -401,16 +405,46 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
401405
402406 builder.BuildWriteUConfigRegPacket (cmd_buffer, userdata_channel, header.u32All );
403407 builder.BuildWriteUConfigRegPacket (cmd_buffer, userdata_channel, 524801 );
408+
409+ if (Primitives::GFXIP_LEVEL == 9 && config->enable_rt_timestamp )
410+ {
411+ for (size_t xcc = 0 ; xcc < GetXCCNumber (); xcc++)
412+ {
413+ bool some_se_enabled = false ;
414+ for (int se = 0 ; se < se_number_xcc; se++) some_se_enabled |=config->target_cu_per_se .at (se + xcc*se_number_xcc) >= 0 ;
415+ if (!some_se_enabled) continue ;
416+
417+ XCC_Packet_Lock<Builder> lock (builder, cmd_buffer, GetXCCNumber (), xcc);
418+ auto & control = reinterpret_cast <TraceControl*>(config->control_buffer_ptr )[xcc];
419+ InsertTimestampMarker (cmd_buffer, &control.gpu_clock_cnt_start );
420+ }
421+ }
404422 }
405423
406424 void End (CmdBuffer* cmd_buffer, TraceConfig* config) override {
407425 SetGRBMToBroadcast (cmd_buffer);
408426 // Issue a CSPartialFlush cmd including cache flush
409427 builder.BuildWriteWaitIdlePacket (cmd_buffer);
410428
411- if (Primitives::GFXIP_LEVEL == 9 ) {
429+ if (Primitives::GFXIP_LEVEL == 9 )
430+ {
412431 const uint32_t se_number_xcc = se_number_total / std::max (1u , GetXCCNumber ());
413432
433+ if (config->enable_rt_timestamp )
434+ {
435+ for (size_t xcc = 0 ; xcc < GetXCCNumber (); xcc++)
436+ {
437+ bool some_se_enabled = false ;
438+ for (int se = 0 ; se < se_number_xcc; se++) some_se_enabled |=config->target_cu_per_se .at (se + xcc*se_number_xcc) >= 0 ;
439+ if (!some_se_enabled) continue ;
440+
441+ XCC_Packet_Lock<Builder> lock (builder, cmd_buffer, GetXCCNumber (), xcc);
442+ auto & control = reinterpret_cast <TraceControl*>(config->control_buffer_ptr )[xcc];
443+ InsertTimestampMarker (cmd_buffer, &control.gpu_clock_cnt_end );
444+ }
445+ builder.BuildWriteWaitIdlePacket (cmd_buffer);
446+ }
447+
414448 // Program the thread trace mode register to disable thread trace
415449 builder.BuildWriteUConfigRegPacket (cmd_buffer, Primitives::SQ_THREAD_TRACE_MODE_ADDR,
416450 Primitives::sqtt_mode_off_value ());
@@ -527,9 +561,9 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
527561 return uint64_t (buffer_per_se) & ~((1 << Primitives::TT_BUFF_ALIGN_SHIFT) - 1 );
528562 }
529563
530- virtual hsa_status_t InsertMarker (CmdBuffer* cmd_buffer, uint32_t data,
564+ virtual hsa_status_t InsertCodeobjMarker (CmdBuffer* cmd_buffer, uint32_t data,
531565 unsigned channel) override {
532- att_decoder_codeobj_header_t header{};
566+ att_decoder_packet_header_t header{};
533567 header.opcode = ATT_CODEOBJ_OPCODE;
534568 header.type = channel;
535569 header.reserved = 0 ;
@@ -540,6 +574,17 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
540574 builder.BuildWriteUConfigRegPacket (cmd_buffer, userdata_channel, data);
541575 return HSA_STATUS_SUCCESS;
542576 }
577+
578+ virtual void InsertTimestampMarker (CmdBuffer* cmd_buffer, uint64_t * addr) override
579+ {
580+ att_decoder_packet_header_t header{};
581+ header.opcode = ATT_TIMESTAMP_OPCODE;
582+ header.type = 0 ;
583+ header.reserved = 0 ;
584+
585+ SetGRBMToBroadcast (cmd_buffer);
586+ builder.BuildGPUClockPacket (cmd_buffer, addr, Primitives::SQ_THREAD_TRACE_USERDATA_3, header.u32All );
587+ }
543588
544589 template <typename T>
545590 void WriteConfigPacket (CmdBuffer* cmdbuf, const T& reg, uint32_t value) {
0 commit comments