@@ -957,7 +957,7 @@ static manageable_aperture_t *fmm_get_aperture(HsaApertureInfo info)
957957 }
958958}
959959
960- static manageable_aperture_t * fmm_is_scratch_aperture (const void * address )
960+ static gpu_mem_t * fmm_is_scratch_aperture (const void * address )
961961{
962962 uint32_t i ;
963963
@@ -967,7 +967,7 @@ static manageable_aperture_t *fmm_is_scratch_aperture(const void *address)
967967
968968 if ((address >= gpu_mem [i ].scratch_physical .base ) &&
969969 (address <= gpu_mem [i ].scratch_physical .limit ))
970- return & gpu_mem [i ]. scratch_physical ;
970+ return & gpu_mem [i ];
971971
972972 }
973973 return NULL ;
@@ -979,6 +979,7 @@ static manageable_aperture_t *fmm_find_aperture(const void *address,
979979 manageable_aperture_t * aperture = NULL ;
980980 uint32_t i ;
981981 HsaApertureInfo _info = { .type = HSA_APERTURE_UNSUPPORTED , .idx = 0 };
982+ gpu_mem_t * gpu_mem_ptr = NULL ;
982983
983984 if ((address >= mem_handle_aperture .base ) &&
984985 (address <= mem_handle_aperture .limit )){
@@ -990,8 +991,10 @@ static manageable_aperture_t *fmm_find_aperture(const void *address,
990991 if (address >= svm .dgpu_aperture -> base &&
991992 address <= svm .dgpu_aperture -> limit ) {
992993
993- aperture = fmm_is_scratch_aperture (address );
994- if (!aperture ) {
994+ gpu_mem_ptr = fmm_is_scratch_aperture (address );
995+ if (gpu_mem_ptr ) {
996+ aperture = & gpu_mem_ptr -> scratch_physical ;
997+ } else {
995998 aperture = svm .dgpu_aperture ;
996999 _info .type = HSA_APERTURE_DGPU ;
9971000 }
@@ -2030,16 +2033,14 @@ HSAKMT_STATUS hsakmt_fmm_release(void *address)
20302033{
20312034 manageable_aperture_t * aperture = NULL ;
20322035 vm_object_t * object = NULL ;
2033- uint32_t i ;
2036+ gpu_mem_t * gpu_mem_ptr = NULL ;
20342037
20352038 /* Special handling for scratch memory */
2036- for (i = 0 ; i < gpu_mem_count ; i ++ )
2037- if (gpu_mem [i ].gpu_id != NON_VALID_GPU_ID &&
2038- address >= gpu_mem [i ].scratch_physical .base &&
2039- address <= gpu_mem [i ].scratch_physical .limit ) {
2040- fmm_release_scratch (gpu_mem [i ].gpu_id );
2041- return HSAKMT_STATUS_SUCCESS ;
2042- }
2039+ gpu_mem_ptr = fmm_is_scratch_aperture (address );
2040+ if (gpu_mem_ptr ) {
2041+ fmm_release_scratch (gpu_mem_ptr -> gpu_id );
2042+ return HSAKMT_STATUS_SUCCESS ;
2043+ }
20432044
20442045 object = vm_find_object (address , 0 , & aperture );
20452046
@@ -2061,9 +2062,6 @@ HSAKMT_STATUS hsakmt_fmm_release(void *address)
20612062
20622063 if (__fmm_release (object , aperture ))
20632064 return HSAKMT_STATUS_ERROR ;
2064-
2065- if (!aperture -> is_cpu_accessible )
2066- hsakmt_fmm_print (gpu_mem [i ].gpu_id );
20672065 }
20682066
20692067 return HSAKMT_STATUS_SUCCESS ;
@@ -3286,17 +3284,16 @@ HSAKMT_STATUS hsakmt_fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuv
32863284{
32873285 manageable_aperture_t * aperture = NULL ;
32883286 vm_object_t * object ;
3289- uint32_t i ;
32903287 HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS ;
3288+ gpu_mem_t * gpu_mem_ptr = NULL ;
32913289
32923290 /* Special handling for scratch memory */
3293- for (i = 0 ; i < gpu_mem_count ; i ++ )
3294- if (gpu_mem [i ].gpu_id != NON_VALID_GPU_ID &&
3295- address >= gpu_mem [i ].scratch_physical .base &&
3296- address <= gpu_mem [i ].scratch_physical .limit )
3297- return _fmm_map_to_gpu_scratch (gpu_mem [i ].gpu_id ,
3298- & gpu_mem [i ].scratch_physical ,
3291+ gpu_mem_ptr = fmm_is_scratch_aperture (address );
3292+ if (gpu_mem_ptr ) {
3293+ return _fmm_map_to_gpu_scratch (gpu_mem_ptr -> gpu_id ,
3294+ & gpu_mem_ptr -> scratch_physical ,
32993295 address , size );
3296+ }
33003297
33013298 object = vm_find_object (address , size , & aperture );
33023299 if (!object && !hsakmt_is_svm_api_supported ) {
@@ -3497,17 +3494,16 @@ int hsakmt_fmm_unmap_from_gpu(void *address)
34973494{
34983495 manageable_aperture_t * aperture ;
34993496 vm_object_t * object ;
3500- uint32_t i ;
35013497 int ret ;
3498+ gpu_mem_t * gpu_mem_ptr = NULL ;
35023499
35033500 /* Special handling for scratch memory */
3504- for (i = 0 ; i < gpu_mem_count ; i ++ )
3505- if (gpu_mem [i ].gpu_id != NON_VALID_GPU_ID &&
3506- address >= gpu_mem [i ].scratch_physical .base &&
3507- address <= gpu_mem [i ].scratch_physical .limit )
3508- return _fmm_unmap_from_gpu_scratch (gpu_mem [i ].gpu_id ,
3509- & gpu_mem [i ].scratch_physical ,
3501+ gpu_mem_ptr = fmm_is_scratch_aperture (address );
3502+ if (gpu_mem_ptr ) {
3503+ return _fmm_unmap_from_gpu_scratch (gpu_mem_ptr -> gpu_id ,
3504+ & gpu_mem_ptr -> scratch_physical ,
35103505 address );
3506+ }
35113507
35123508 object = vm_find_object (address , 0 , & aperture );
35133509 if (!object )
0 commit comments