Skip to content

Commit 0bb1a61

Browse files
Baraldi, GiovanniApoKalipse-V
andauthored
Adding high bits for ATT buffer size (#171)
* Adding high bits for ATT buffer size * Copilot review comments * Add buffer limits * Update src/pm4/sqtt_builder.h --------- Co-authored-by: Giovanni <gbaraldi@amd.com>
1 parent 6f236ff commit 0bb1a61

File tree

4 files changed

+29
-32
lines changed

4 files changed

+29
-32
lines changed

src/core/include/aqlprofile-sdk/aql_profile_v2.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,9 +248,17 @@ typedef enum {
248248
hsa_status_t aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile,
249249
aqlprofile_pmc_info_type_t attribute, void* value);
250250

251+
typedef enum aqlprofile_att_parameter_name_ext_t
252+
{
253+
/**
254+
* HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE + 1
255+
*/
256+
AQLPROFILE_ATT_PARAMETER_NAME_BUFFER_SIZE_HIGH = 11,
257+
} aqlprofile_att_parameter_name_ext_t;
258+
251259
// Profile parameter object
252260
typedef struct {
253-
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name;
261+
hsa_ven_amd_aqlprofile_parameter_name_t parameter_name; // Or aqlprofile_att_parameter_name_ext_t
254262
union {
255263
uint32_t value;
256264
struct {

src/core/threadtrace.cpp

Lines changed: 5 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
190190
trace_config.se_mask = 0x11111111;
191191

192192
const size_t se_number_total = pm4_factory->GetShaderEnginesNumber();
193-
size_t buffer_size = DEFAULT_TRACE_BUFFER_SIZE;
193+
uint64_t buffer_size = DEFAULT_TRACE_BUFFER_SIZE;
194194

195195
if (profile.parameters)
196196
for (const auto* p = profile.parameters; p < profile.parameters + profile.parameter_count; p++)
@@ -204,40 +204,17 @@ hsa_status_t _internal_aqlprofile_att_create_packets(
204204
"ThreadTraceConfig: CuId must be between 0 and 15, TargetCu", p->value);
205205
trace_config.targetCu = p->value;
206206
break;
207-
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK:
208-
trace_config.vmIdMask = p->value;
209-
break;
210-
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK:
211-
if ((p->value & 0x50) != 0)
212-
throw aql_profile::aql_profile_exc_val<uint32_t>(
213-
"ThreadTraceConfig: Mask should have bits [4,6] set to Zero, Mask", p->value);
214-
trace_config.deprecated_mask = p->value;
215-
trace_config.targetCu = p->value & 0xF;
216-
break;
217-
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK:
218-
if ((p->value & 0xFF000000) != 0)
219-
throw aql_profile::aql_profile_exc_val<uint32_t>(
220-
"ThreadTraceConfig: TokenMask should have bits [31:25] set to Zero, TokenMask",
221-
p->value);
222-
trace_config.deprecated_tokenMask = p->value;
223-
break;
224-
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2:
225-
trace_config.deprecated_tokenMask2 = p->value;
226-
break;
227-
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SAMPLE_RATE:
228-
trace_config.sampleRate = p->value;
229-
break;
230-
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_K_CONCURRENT:
231-
trace_config.concurrent = p->value;
232-
break;
233207
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION:
234208
trace_config.simd_sel = p->value & 0xF;
235209
break;
236210
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_OCCUPANCY_MODE:
237211
trace_config.occupancy_mode = p->value ? 1 : 0;
238212
break;
239213
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE:
240-
buffer_size = p->value;
214+
buffer_size = (buffer_size & ~static_cast<uint64_t>(UINT32_MAX)) | p->value;
215+
break;
216+
case AQLPROFILE_ATT_PARAMETER_NAME_BUFFER_SIZE_HIGH:
217+
buffer_size = (buffer_size & UINT32_MAX) | (uint64_t(p->value) << 32); // High 32 bits
241218
break;
242219
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_MASK:
243220
trace_config.perfMASK = p->value;

src/pm4/sqtt_builder.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,18 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
222222
const uint64_t se_number_xcc = se_number_total / GetXCCNumber();
223223
uint64_t base_addr = reinterpret_cast<uint64_t>(config->data_buffer_ptr);
224224
const uint64_t base_step = GetBaseStep(config->data_buffer_size, config->se_mask);
225+
226+
// Old v1 API calls this with buffer == 0 first
227+
if (config->data_buffer_size > 0)
228+
{
229+
// Max 16GB for gfx{9, 10, 12} and 512MB for gfx11. Min of 32 page per SE.
230+
if (base_step >= (1ul<<34) || (Primitives::GFXIP_LEVEL == 11 && base_step >= (1ul<<29)))
231+
throw std::runtime_error("SQTT Buffer size too high");
232+
else if (base_step < (1ul<<17))
233+
throw std::runtime_error("SQTT Buffer size too low");
234+
}
235+
236+
225237
config->capacity_per_se = base_step;
226238
config->capacity_per_disabled_se = 1 << Primitives::TT_BUFF_ALIGN_SHIFT;
227239

@@ -331,7 +343,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
331343

332344
const unsigned baddr_lo = Low32(base_addr >> Primitives::TT_BUFF_ALIGN_SHIFT);
333345
const unsigned baddr_hi = High32(base_addr >> Primitives::TT_BUFF_ALIGN_SHIFT);
334-
const uint32_t sqtt_size = bMaskedIn ? base_step : config->capacity_per_disabled_se;
346+
const uint64_t sqtt_size = bMaskedIn ? base_step : config->capacity_per_disabled_se;
335347
const uint32_t ctrl_val = Primitives::sqtt_ctrl_value(true);
336348

337349
Select_GRBM_SE_SH0(cmd_buffer, index);
@@ -432,7 +444,7 @@ class GpuSqttBuilder : public SqttBuilder, protected Primitives {
432444
// Initialize cache flush request object
433445
builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->control_buffer_ptr),
434446
config->control_buffer_size);
435-
builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->data_buffer_size),
447+
builder.BuildCacheFlushPacket(cmd_buffer, size_t(config->data_buffer_ptr),
436448
config->data_buffer_size);
437449
// Program zero size of thread trace buffer
438450
builder.BuildWriteUConfigRegPacket(cmd_buffer, Primitives::SQ_THREAD_TRACE_SIZE_ADDR,

src/pm4/trace_config.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ struct TraceConfig {
5454
void* control_buffer_ptr = nullptr;
5555
uint32_t control_buffer_size = 0;
5656
void* data_buffer_ptr = nullptr;
57-
uint32_t data_buffer_size = 0;
57+
uint64_t data_buffer_size = 0;
5858

5959
// concurrent kernels mode
6060
uint32_t concurrent = 0;

0 commit comments

Comments
 (0)