|
12 | 12 | #endif |
13 | 13 | #:enddef |
14 | 14 |
|
| 15 | +#:def PREFER_GPU(*args) |
| 16 | +#ifdef MFC_SIMULATION |
| 17 | +#ifdef __NVCOMPILER_GPU_UNIFIED_MEM |
| 18 | + block |
| 19 | + use cudafor |
| 20 | + intrinsic :: minval, maxval, sum |
| 21 | + integer :: istat |
| 22 | + integer :: prefer_gpu_mode |
| 23 | + character(len=10) :: prefer_gpu_mode_str |
| 24 | + |
| 25 | + ! environment variable |
| 26 | + call get_environment_variable("NVIDIA_MANUAL_GPU_HINTS", prefer_gpu_mode_str) |
| 27 | + if (trim(prefer_gpu_mode_str) == "0") then ! OFF |
| 28 | + prefer_gpu_mode = 0 |
| 29 | + elseif (trim(prefer_gpu_mode_str) == "1") then ! ON |
| 30 | + prefer_gpu_mode = 1 |
| 31 | + else ! default |
| 32 | + prefer_gpu_mode = 0 |
| 33 | + endif |
| 34 | + |
| 35 | + if (prefer_gpu_mode .eq. 1) then |
| 36 | + #:for arg in args |
| 37 | + !print*, "Moving ${arg}$ to GPU => ", SHAPE(${arg}$) |
| 38 | + ! unset |
| 39 | + istat = cudaMemAdvise( c_devloc(${arg}$), SIZEOF(${arg}$), cudaMemAdviseUnSetPreferredLocation, cudaCpuDeviceId ) |
| 40 | + if (istat /= cudaSuccess) then |
| 41 | + write(*,"('Error code: ',I0, ': ')") istat |
| 42 | + write(*,*) cudaGetErrorString(istat) |
| 43 | + endif |
| 44 | + ! set |
| 45 | + istat = cudaMemAdvise( c_devloc(${arg}$), SIZEOF(${arg}$), cudaMemAdviseSetPreferredLocation, 0 ) |
| 46 | + if (istat /= cudaSuccess) then |
| 47 | + write(*,"('Error code: ',I0, ': ')") istat |
| 48 | + write(*,*) cudaGetErrorString(istat) |
| 49 | + endif |
| 50 | + #:endfor |
| 51 | + end if |
| 52 | + end block |
| 53 | +#endif |
| 54 | +#endif |
| 55 | +#:enddef |
| 56 | + |
| 57 | + |
| 58 | +#:def PARSE(s) |
| 59 | +${s if s.rfind(')') == -1 else next((s[:i] for i in range(s.rfind(')'), -1, -1) if s[i] == '(' and s.count('(', i, s.rfind(')')+1) == s.count(')', i, s.rfind(')')+1)), s)}$ |
| 60 | +#:enddef |
| 61 | + |
15 | 62 | #:def ALLOCATE(*args) |
16 | 63 | @:LOG({'@:ALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'}) |
17 | 64 | #:set allocated_variables = ', '.join(args) |
18 | 65 | allocate (${allocated_variables}$) |
19 | 66 | $:GPU_ENTER_DATA(create=('[' + allocated_variables + ']')) |
| 67 | + |
| 68 | + |
| 69 | +#ifdef MFC_SIMULATION |
| 70 | +#ifdef __NVCOMPILER_GPU_UNIFIED_MEM |
| 71 | + block |
| 72 | + use cudafor |
| 73 | + intrinsic :: minval, maxval, sum |
| 74 | + integer :: istat, stream_id |
| 75 | + integer :: alloc_mode |
| 76 | + character(len=10) :: alloc_mode_str |
| 77 | + |
| 78 | + ! environment variable |
| 79 | + call get_environment_variable("NVIDIA_ALLOC_MODE", alloc_mode_str) |
| 80 | + if (trim(alloc_mode_str) == "0") then ! no CPU first touch, no preferred location CPU |
| 81 | + alloc_mode = 0 |
| 82 | + elseif (trim(alloc_mode_str) == "1") then ! CPU first touch, no preferred location CPU |
| 83 | + alloc_mode = 1 |
| 84 | + elseif (trim(alloc_mode_str) == "2") then ! no CPU first touch, preferred location CPU |
| 85 | + alloc_mode = 2 |
| 86 | + elseif (trim(alloc_mode_str) == "3") then ! CPU first touch, preferred location CPU |
| 87 | + alloc_mode = 3 |
| 88 | + else ! default |
| 89 | + alloc_mode = 0 |
| 90 | + endif |
| 91 | + |
| 92 | + stream_id = 0 |
| 93 | + |
| 94 | + ! prefetch to CPU |
| 95 | + if ((alloc_mode .eq. 1) .or. (alloc_mode .eq. 3)) then |
| 96 | + #:for arg in args |
| 97 | + istat = cudaMemPrefetchAsync( c_devloc(@{PARSE(${arg}$)}@), SIZEOF(@{PARSE(${arg}$)}@), cudaCpuDeviceId, stream_id ) |
| 98 | + !print*, "! @{PARSE(${arg}$)}@ with shape", SHAPE(@{PARSE(${arg}$)}@), "=> prefetch to CPU" |
| 99 | + if (istat /= cudaSuccess) then |
| 100 | + write(*,"('Error code: ',I0, ': ')") istat |
| 101 | + write(*,*) cudaGetErrorString(istat) |
| 102 | + endif |
| 103 | + #:endfor |
| 104 | + endif |
| 105 | + |
| 106 | + ! memadvise preferred location |
| 107 | + if ((alloc_mode .eq. 2) .or. (alloc_mode .eq. 3)) then |
| 108 | + #:for arg in args |
| 109 | + istat = cudaMemAdvise( c_devloc(@{PARSE(${arg}$)}@), SIZEOF(@{PARSE(${arg}$)}@), cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId ) |
| 110 | + !print*, "! @{PARSE(${arg}$)}@ with shape", SHAPE(@{PARSE(${arg}$)}@), "=> preferred location CPU" |
| 111 | + if (istat /= cudaSuccess) then |
| 112 | + write(*,"('Error code: ',I0, ': ')") istat |
| 113 | + write(*,*) cudaGetErrorString(istat) |
| 114 | + endif |
| 115 | + #:endfor |
| 116 | + endif |
| 117 | + |
| 118 | + end block |
| 119 | +#endif |
| 120 | +#endif |
| 121 | + |
20 | 122 | #:enddef ALLOCATE |
21 | 123 |
|
22 | 124 | #:def DEALLOCATE(*args) |
|
0 commit comments