Skip to content

Commit 72061a9

Browse files
libhsakmt: modify is scratch memory helper
- Refactored scratch memory handling by introducing fmm_is_scratch_aperture to replace repeated for-loops. - Simplified code paths in hsakmt_fmm_release, hsakmt_fmm_map_to_gpu, and hsakmt_fmm_unmap_from_gpu by using the new helper. Signed-off-by: Honglei Huang <Honglei1.Huang@amd.com>
1 parent a765dd7 commit 72061a9

File tree

1 file changed

+25
-29
lines changed

1 file changed

+25
-29
lines changed

libhsakmt/src/fmm.c

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,7 @@ static manageable_aperture_t *fmm_get_aperture(HsaApertureInfo info)
957957
}
958958
}
959959

960-
static manageable_aperture_t *fmm_is_scratch_aperture(const void *address)
960+
static gpu_mem_t *fmm_is_scratch_aperture(const void *address)
961961
{
962962
uint32_t i;
963963

@@ -967,7 +967,7 @@ static manageable_aperture_t *fmm_is_scratch_aperture(const void *address)
967967

968968
if ((address >= gpu_mem[i].scratch_physical.base) &&
969969
(address <= gpu_mem[i].scratch_physical.limit))
970-
return &gpu_mem[i].scratch_physical;
970+
return &gpu_mem[i];
971971

972972
}
973973
return NULL;
@@ -979,6 +979,7 @@ static manageable_aperture_t *fmm_find_aperture(const void *address,
979979
manageable_aperture_t *aperture = NULL;
980980
uint32_t i;
981981
HsaApertureInfo _info = { .type = HSA_APERTURE_UNSUPPORTED, .idx = 0};
982+
gpu_mem_t *gpu_mem_ptr = NULL;
982983

983984
if ((address >= mem_handle_aperture.base) &&
984985
(address <= mem_handle_aperture.limit)){
@@ -990,8 +991,10 @@ static manageable_aperture_t *fmm_find_aperture(const void *address,
990991
if (address >= svm.dgpu_aperture->base &&
991992
address <= svm.dgpu_aperture->limit) {
992993

993-
aperture = fmm_is_scratch_aperture(address);
994-
if (!aperture) {
994+
gpu_mem_ptr = fmm_is_scratch_aperture(address);
995+
if (gpu_mem_ptr) {
996+
aperture = &gpu_mem_ptr->scratch_physical;
997+
} else {
995998
aperture = svm.dgpu_aperture;
996999
_info.type = HSA_APERTURE_DGPU;
9971000
}
@@ -2030,16 +2033,14 @@ HSAKMT_STATUS hsakmt_fmm_release(void *address)
20302033
{
20312034
manageable_aperture_t *aperture = NULL;
20322035
vm_object_t *object = NULL;
2033-
uint32_t i;
2036+
gpu_mem_t *gpu_mem_ptr = NULL;
20342037

20352038
/* Special handling for scratch memory */
2036-
for (i = 0; i < gpu_mem_count; i++)
2037-
if (gpu_mem[i].gpu_id != NON_VALID_GPU_ID &&
2038-
address >= gpu_mem[i].scratch_physical.base &&
2039-
address <= gpu_mem[i].scratch_physical.limit) {
2040-
fmm_release_scratch(gpu_mem[i].gpu_id);
2041-
return HSAKMT_STATUS_SUCCESS;
2042-
}
2039+
gpu_mem_ptr = fmm_is_scratch_aperture(address);
2040+
if (gpu_mem_ptr) {
2041+
fmm_release_scratch(gpu_mem_ptr->gpu_id);
2042+
return HSAKMT_STATUS_SUCCESS;
2043+
}
20432044

20442045
object = vm_find_object(address, 0, &aperture);
20452046

@@ -2061,9 +2062,6 @@ HSAKMT_STATUS hsakmt_fmm_release(void *address)
20612062

20622063
if (__fmm_release(object, aperture))
20632064
return HSAKMT_STATUS_ERROR;
2064-
2065-
if (!aperture->is_cpu_accessible)
2066-
hsakmt_fmm_print(gpu_mem[i].gpu_id);
20672065
}
20682066

20692067
return HSAKMT_STATUS_SUCCESS;
@@ -3286,17 +3284,16 @@ HSAKMT_STATUS hsakmt_fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuv
32863284
{
32873285
manageable_aperture_t *aperture = NULL;
32883286
vm_object_t *object;
3289-
uint32_t i;
32903287
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
3288+
gpu_mem_t *gpu_mem_ptr = NULL;
32913289

32923290
/* Special handling for scratch memory */
3293-
for (i = 0; i < gpu_mem_count; i++)
3294-
if (gpu_mem[i].gpu_id != NON_VALID_GPU_ID &&
3295-
address >= gpu_mem[i].scratch_physical.base &&
3296-
address <= gpu_mem[i].scratch_physical.limit)
3297-
return _fmm_map_to_gpu_scratch(gpu_mem[i].gpu_id,
3298-
&gpu_mem[i].scratch_physical,
3291+
gpu_mem_ptr = fmm_is_scratch_aperture(address);
3292+
if (gpu_mem_ptr) {
3293+
return _fmm_map_to_gpu_scratch(gpu_mem_ptr->gpu_id,
3294+
&gpu_mem_ptr->scratch_physical,
32993295
address, size);
3296+
}
33003297

33013298
object = vm_find_object(address, size, &aperture);
33023299
if (!object && !hsakmt_is_svm_api_supported) {
@@ -3497,17 +3494,16 @@ int hsakmt_fmm_unmap_from_gpu(void *address)
34973494
{
34983495
manageable_aperture_t *aperture;
34993496
vm_object_t *object;
3500-
uint32_t i;
35013497
int ret;
3498+
gpu_mem_t *gpu_mem_ptr = NULL;
35023499

35033500
/* Special handling for scratch memory */
3504-
for (i = 0; i < gpu_mem_count; i++)
3505-
if (gpu_mem[i].gpu_id != NON_VALID_GPU_ID &&
3506-
address >= gpu_mem[i].scratch_physical.base &&
3507-
address <= gpu_mem[i].scratch_physical.limit)
3508-
return _fmm_unmap_from_gpu_scratch(gpu_mem[i].gpu_id,
3509-
&gpu_mem[i].scratch_physical,
3501+
gpu_mem_ptr = fmm_is_scratch_aperture(address);
3502+
if (gpu_mem_ptr) {
3503+
return _fmm_unmap_from_gpu_scratch(gpu_mem_ptr->gpu_id,
3504+
&gpu_mem_ptr->scratch_physical,
35103505
address);
3506+
}
35113507

35123508
object = vm_find_object(address, 0, &aperture);
35133509
if (!object)

0 commit comments

Comments
 (0)