|
8 | 8 |
|
9 | 9 | #include "flang/Runtime/CUDA/memory.h" |
10 | 10 | #include "../terminator.h" |
| 11 | +#include "flang/Runtime/CUDA/common.h" |
11 | 12 |
|
12 | 13 | #include "cuda_runtime.h" |
13 | 14 |
|
14 | 15 | namespace Fortran::runtime::cuda { |
15 | 16 | extern "C" { |
16 | 17 |
|
| 18 | +void *RTDEF(CUFMemAlloc)( |
| 19 | + std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) { |
| 20 | + void *ptr = nullptr; |
| 21 | + if (bytes != 0) { |
| 22 | + if (type == kMemTypeDevice) { |
| 23 | + CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes)); |
| 24 | + } else if (type == kMemTypeManaged || type == kMemTypeUnified) { |
| 25 | + CUDA_REPORT_IF_ERROR( |
| 26 | + cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal)); |
| 27 | + } else if (type == kMemTypePinned) { |
| 28 | + CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&ptr, bytes)); |
| 29 | + } else { |
| 30 | + Terminator terminator{sourceFile, sourceLine}; |
| 31 | + terminator.Crash("unsupported memory type"); |
| 32 | + } |
| 33 | + } |
| 34 | + return ptr; |
| 35 | +} |
| 36 | + |
| 37 | +void RTDEF(CUFMemFree)( |
| 38 | + void *ptr, unsigned type, const char *sourceFile, int sourceLine) { |
| 39 | + if (!ptr) |
| 40 | + return; |
| 41 | + if (type == kMemTypeDevice || type == kMemTypeManaged || |
| 42 | + type == kMemTypeUnified) { |
| 43 | + CUDA_REPORT_IF_ERROR(cudaFree(ptr)); |
| 44 | + } else if (type == kMemTypePinned) { |
| 45 | + CUDA_REPORT_IF_ERROR(cudaFreeHost(ptr)); |
| 46 | + } else { |
| 47 | + Terminator terminator{sourceFile, sourceLine}; |
| 48 | + terminator.Crash("unsupported memory type"); |
| 49 | + } |
| 50 | +} |
| 51 | + |
17 | 52 | void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value, |
18 | 53 | const char *sourceFile, int sourceLine) { |
19 | 54 | Terminator terminator{sourceFile, sourceLine}; |
20 | 55 | terminator.Crash("not yet implemented: CUDA data transfer from a scalar " |
21 | 56 | "value to a descriptor"); |
22 | 57 | } |
23 | 58 |
|
| 59 | +void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, |
| 60 | + unsigned mode, const char *sourceFile, int sourceLine) { |
| 61 | + cudaMemcpyKind kind; |
| 62 | + if (mode == kHostToDevice) { |
| 63 | + kind = cudaMemcpyHostToDevice; |
| 64 | + } else if (mode == kDeviceToHost) { |
| 65 | + kind = cudaMemcpyDeviceToHost; |
| 66 | + } else if (mode == kDeviceToDevice) { |
| 67 | + kind = cudaMemcpyDeviceToDevice; |
| 68 | + } else { |
| 69 | + Terminator terminator{sourceFile, sourceLine}; |
| 70 | + terminator.Crash("host to host copy not supported"); |
| 71 | + } |
| 72 | + // TODO: Use cudaMemcpyAsync when we have support for stream. |
| 73 | + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind)); |
| 74 | +} |
| 75 | + |
24 | 76 | void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr, |
25 | 77 | std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { |
26 | 78 | Terminator terminator{sourceFile, sourceLine}; |
|
0 commit comments