Skip to content

Commit a765dd7

Browse files
committed
rocr: add specific flag for blit kernel object
so that aql-to-pm4 conversion could verify the validity of the kernel object. Signed-off-by: Flora Cui <flora.cui@amd.com>
1 parent 6c87f5b commit a765dd7

File tree

4 files changed

+10
-2
lines changed

4 files changed

+10
-2
lines changed

libhsakmt/include/hsakmt/hsakmttypes.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,8 @@ typedef struct _HsaMemFlags
586586
unsigned int GTTAccess: 1; // default = 0; If 1: The caller indicates this memory will be mapped to GART for MES
587587
// KFD will allocate GTT memory with the Preferred_node set as gpu_id for GART mapping
588588
unsigned int Contiguous: 1; // Allocate contiguous VRAM
589-
unsigned int Reserved: 9;
589+
unsigned int ExecuteBlit: 1; // default = 0; If 1: The caller indicates that the memory is for blit kernel object.
590+
unsigned int Reserved: 8;
590591

591592
} ui32;
592593
HSAuint32 Value;

runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,9 @@ KfdDriver::AllocateMemory(const core::MemoryRegion &mem_region,
241241
? 1
242242
: kmt_alloc_flags.ui32.Uncached);
243243

244+
kmt_alloc_flags.ui32.ExecuteBlit =
245+
!!(alloc_flags & core::MemoryRegion::AllocateExecutableBlitKernelObject);
246+
244247
if (m_region.IsLocalMemory()) {
245248
// Allocate physically contiguous memory. AllocateKfdMemory function call
246249
// will fail if this flag is not supported in KFD.

runtime/hsa-runtime/core/inc/memory_region.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ class MemoryRegion : public Checked<0x9C961F19EE175BB3> {
106106
AllocateGTTAccess = (1 << 9),
107107
AllocateContiguous = (1 << 10), // Physically contiguous memory
108108
AllocateUncached = (1 << 11), // Uncached memory
109+
// this flag is ignored by Thunk and only used for emulator/dxg to track code-object
110+
// allocations in AQL to PM4 conversion.
111+
AllocateExecutableBlitKernelObject = (1 << 12),
109112
};
110113

111114
typedef uint32_t AllocateFlags;

runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,8 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar
378378
(assemble_target == AssembleTarget::AQL ? sizeof(amd_kernel_code_t) : 0);
379379
code_buf_size = AlignUp(header_size + asic_shader->size, 0x1000);
380380

381-
code_buf = system_allocator()(code_buf_size, 0x1000, core::MemoryRegion::AllocateExecutable);
381+
code_buf = system_allocator()(code_buf_size, 0x1000,
382+
core::MemoryRegion::AllocateExecutable | core::MemoryRegion::AllocateExecutableBlitKernelObject);
382383
assert(code_buf != NULL && "Code buffer allocation failed");
383384

384385
memset(code_buf, 0, code_buf_size);

0 commit comments

Comments
 (0)