Skip to content

Commit 2a64fa5

Browse files
hkasivisdayatsin-amd
authored andcommitted
libhsakmt: gfx950: Add option to enable HIGH_PRECISION
Environment variable HSA_HIGH_PRECISION_MODE can be used to control MFMA precision Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Change-Id: Ib78dd9dd8867025e090a3cca96ab6db4f65dea12
1 parent 3be9c49 commit 2a64fa5

File tree

2 files changed

+15
-5
lines changed

2 files changed

+15
-5
lines changed

libhsakmt/include/hsakmt/linux/kfd_ioctl.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ struct kfd_ioctl_set_memory_policy_args {
163163
__u32 gpu_id; /* to KFD */
164164
__u32 default_policy; /* to KFD */
165165
__u32 alternate_policy; /* to KFD */
166-
__u32 pad;
166+
__u32 misc_process_flag; /* to KFD */
167167
};
168168

169169
/*
@@ -363,6 +363,9 @@ enum kfd_dbg_trap_exception_code {
363363
#define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \
364364
(!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))
365365

366+
/* Misc. per process flags */
367+
#define ENABLE_MFMA_HIGH_PRECISION (1 << 0)
368+
366369
enum kfd_dbg_runtime_state {
367370
DEBUG_RUNTIME_STATE_DISABLED = 0,
368371
DEBUG_RUNTIME_STATE_ENABLED = 1,

libhsakmt/src/fmm.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2004,7 +2004,8 @@ HSAKMT_STATUS hsakmt_fmm_release(void *address)
20042004
}
20052005

20062006
static int fmm_set_memory_policy(uint32_t gpu_id, int default_policy, int alt_policy,
2007-
uintptr_t alt_base, uint64_t alt_size)
2007+
uintptr_t alt_base, uint64_t alt_size,
2008+
uint32_t misc_process_flags)
20082009
{
20092010
struct kfd_ioctl_set_memory_policy_args args = {0};
20102011

@@ -2013,6 +2014,7 @@ static int fmm_set_memory_policy(uint32_t gpu_id, int default_policy, int alt_po
20132014
args.alternate_policy = alt_policy;
20142015
args.alternate_aperture_base = alt_base;
20152016
args.alternate_aperture_size = alt_size;
2017+
args.misc_process_flag = misc_process_flags;
20162018

20172019
return hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args);
20182020
}
@@ -2517,10 +2519,10 @@ HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes)
25172519
uint32_t num_of_sysfs_nodes;
25182520
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
25192521
char *disableCache, *pagedUserptr, *checkUserptr, *guardPagesStr, *reserveSvm;
2520-
char *maxVaAlignStr;
2522+
char *maxVaAlignStr, *mfmaHighPrecisionModeStr;
25212523
unsigned int guardPages = 1;
25222524
uint64_t svm_base = 0, svm_limit = 0;
2523-
uint32_t svm_alignment = 0;
2525+
uint32_t svm_alignment = 0, mfma_high_precision_mode = 0;
25242526

25252527
/* If HSA_DISABLE_CACHE is set to a non-0 value, disable caching */
25262528
disableCache = getenv("HSA_DISABLE_CACHE");
@@ -2549,6 +2551,9 @@ HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes)
25492551
if (!guardPagesStr || sscanf(guardPagesStr, "%u", &guardPages) != 1)
25502552
guardPages = 1;
25512553

2554+
mfmaHighPrecisionModeStr = getenv("HSA_HIGH_PRECISION_MODE");
2555+
mfma_high_precision_mode = (mfmaHighPrecisionModeStr &&
2556+
strcmp(mfmaHighPrecisionModeStr, "0"));
25522557
/* Sets the max VA alignment order size during mapping. By default the order
25532558
* size is set to 18(1G) for GFX950 to reduce TLB hits. If any non-gfx950
25542559
* ASIC is found in the system, set back to 9(2MB).
@@ -2800,7 +2805,9 @@ HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes)
28002805
KFD_IOC_CACHE_POLICY_COHERENT :
28012806
KFD_IOC_CACHE_POLICY_NONCOHERENT,
28022807
KFD_IOC_CACHE_POLICY_COHERENT,
2803-
alt_base, alt_size);
2808+
alt_base, alt_size,
2809+
hsakmt_get_gfxv_by_node_id(i) == GFX_VERSION_GFX950 ?
2810+
mfma_high_precision_mode : 0);
28042811
if (err) {
28052812
pr_err("Failed to set mem policy for GPU [0x%x]\n",
28062813
process_apertures[i].gpu_id);

0 commit comments

Comments
 (0)