Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 0 additions & 59 deletions .github/workflows/pr_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,64 +16,5 @@ permissions:
contents: read

jobs:
CodeChecks:
uses: ./.github/workflows/reusable_checks.yml
DocsBuild:
uses: ./.github/workflows/reusable_docs_build.yml
FastBuild:
name: Fast builds
needs: [CodeChecks, DocsBuild]
uses: ./.github/workflows/reusable_fast.yml
Build:
name: Basic builds
needs: [FastBuild]
uses: ./.github/workflows/reusable_basic.yml
DevDax:
needs: [FastBuild]
uses: ./.github/workflows/reusable_dax.yml
Sanitizers:
needs: [FastBuild]
uses: ./.github/workflows/reusable_sanitizers.yml
Qemu:
needs: [FastBuild]
uses: ./.github/workflows/reusable_qemu.yml
Benchmarks:
needs: [Build]
uses: ./.github/workflows/reusable_benchmarks.yml
ProxyLib:
needs: [Build]
uses: ./.github/workflows/reusable_proxy_lib.yml
GPU:
needs: [Build]
uses: ./.github/workflows/reusable_gpu.yml
Valgrind:
needs: [Build]
uses: ./.github/workflows/reusable_valgrind.yml
MultiNuma:
needs: [Build]
uses: ./.github/workflows/reusable_multi_numa.yml
Coverage:
# total coverage (on upstream only)
if: github.repository == 'oneapi-src/unified-memory-framework'
needs: [Build, DevDax, GPU, MultiNuma, Qemu, ProxyLib]
uses: ./.github/workflows/reusable_coverage.yml
secrets: inherit
with:
trigger: "${{github.event_name}}"
Coverage_partial:
# partial coverage (on forks)
if: github.repository != 'oneapi-src/unified-memory-framework'
needs: [Build, Qemu, ProxyLib]
uses: ./.github/workflows/reusable_coverage.yml
CodeQL:
needs: [Build]
permissions:
contents: read
security-events: write
uses: ./.github/workflows/reusable_codeql.yml
Trivy:
needs: [Build]
permissions:
contents: read
security-events: write
uses: ./.github/workflows/reusable_trivy.yml
141 changes: 3 additions & 138 deletions .github/workflows/reusable_gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,117 +15,6 @@ env:
COVERAGE_DIR : "${{github.workspace}}/coverage"

jobs:
gpu-Level-Zero:
name: Level-Zero
env:
VCPKG_PATH: "${{github.workspace}}/../../../../vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/../../../../vcpkg/packages/tbb_x64-windows;${{github.workspace}}/../../../../vcpkg/packages/jemalloc_x64-windows"
COVERAGE_NAME : "exports-coverage-gpu"
# run only on upstream; forks will not have the HW
if: github.repository == 'oneapi-src/unified-memory-framework'
strategy:
matrix:
shared_library: ['ON', 'OFF']
os: ['Ubuntu', 'Windows']
build_type: ['Debug', 'Release']
include:
- os: 'Ubuntu'
compiler: {c: gcc, cxx: g++}
number_of_processors: '$(nproc)'
- os: 'Windows'
compiler: {c: cl, cxx: cl}
number_of_processors: '$Env:NUMBER_OF_PROCESSORS'
exclude:
- os: 'Windows'
build_type: 'Debug'

runs-on: ["DSS-LEVEL_ZERO", "DSS-${{matrix.os}}"]
steps:
- name: Checkout
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
fetch-depth: 0

- name: Get information about platform
if: matrix.os == 'Ubuntu'
run: .github/scripts/get_system_info.sh

- name: Configure build for Win
if: matrix.os == 'Windows'
run: >
cmake
-DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}"
-B ${{env.BUILD_DIR}}
-DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}"
-DCMAKE_BUILD_TYPE=${{matrix.build_type}}
-DCMAKE_C_COMPILER=${{matrix.compiler.c}}
-DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
-DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}}
-DUMF_BUILD_BENCHMARKS=ON
-DUMF_BUILD_TESTS=ON
-DUMF_BUILD_GPU_TESTS=ON
-DUMF_BUILD_GPU_EXAMPLES=ON
-DUMF_FORMAT_CODE_STYLE=OFF
-DUMF_DEVELOPER_MODE=ON
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_TESTS_FAIL_ON_SKIP=ON

- name: Configure build for Ubuntu
if: matrix.os == 'Ubuntu'
run: >
cmake
-B ${{env.BUILD_DIR}}
-DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}"
-DCMAKE_BUILD_TYPE=${{matrix.build_type}}
-DCMAKE_C_COMPILER=${{matrix.compiler.c}}
-DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
-DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}}
-DUMF_BUILD_BENCHMARKS=ON
-DUMF_BUILD_TESTS=ON
-DUMF_BUILD_GPU_TESTS=ON
-DUMF_BUILD_GPU_EXAMPLES=ON
-DUMF_FORMAT_CODE_STYLE=OFF
-DUMF_DEVELOPER_MODE=ON
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-DUMF_BUILD_CUDA_PROVIDER=OFF
-DUMF_TESTS_FAIL_ON_SKIP=ON
${{ matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }}

- name: Build UMF
run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j ${{matrix.number_of_processors}}

- name: Run tests
working-directory: ${{env.BUILD_DIR}}
run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test

- name: Run examples
working-directory: ${{env.BUILD_DIR}}
run: ctest --output-on-failure --test-dir examples -C ${{matrix.build_type}}

- name: Run benchmarks
working-directory: ${{env.BUILD_DIR}}
run: ctest --output-on-failure --test-dir benchmark -C ${{matrix.build_type}} --exclude-regex umf-bench-multithreaded

- name: Check coverage
if: ${{ matrix.build_type == 'Debug' && matrix.os == 'Ubuntu' }}
working-directory: ${{env.BUILD_DIR}}
run: |
export COVERAGE_FILE_NAME=${{env.COVERAGE_NAME}}-shared-${{matrix.shared_library}}
echo "COVERAGE_FILE_NAME: $COVERAGE_FILE_NAME"
../scripts/coverage/coverage_capture.sh $COVERAGE_FILE_NAME
mkdir -p ${{env.COVERAGE_DIR}}
mv ./$COVERAGE_FILE_NAME ${{env.COVERAGE_DIR}}

- uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
if: ${{ matrix.build_type == 'Debug' && matrix.os == 'Ubuntu' }}
with:
name: ${{env.COVERAGE_NAME}}-shared-${{matrix.shared_library}}
path: ${{env.COVERAGE_DIR}}

gpu-CUDA:
name: CUDA
env:
Expand All @@ -135,7 +24,7 @@ jobs:
strategy:
matrix:
shared_library: ['ON', 'OFF']
build_type: ['Debug', 'Release']
build_type: ['Debug']
# TODO add windows
os: ['Ubuntu']
include:
Expand Down Expand Up @@ -179,30 +68,6 @@ jobs:
- name: Build UMF
run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j ${{matrix.number_of_processors}}

- name: Run tests
working-directory: ${{env.BUILD_DIR}}
run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test

- name: Run examples
- name: Run CUDA tests
working-directory: ${{env.BUILD_DIR}}
run: ctest --output-on-failure --test-dir examples -C ${{matrix.build_type}}

- name: Run benchmarks
working-directory: ${{env.BUILD_DIR}}
run: ctest --output-on-failure --test-dir benchmark -C ${{matrix.build_type}} --exclude-regex umf-bench-multithreaded

- name: Check coverage
if: ${{ matrix.build_type == 'Debug' && matrix.os == 'Ubuntu' }}
working-directory: ${{env.BUILD_DIR}}
run: |
export COVERAGE_FILE_NAME=${{env.COVERAGE_NAME}}-shared-${{matrix.shared_library}}
echo "COVERAGE_FILE_NAME: $COVERAGE_FILE_NAME"
../scripts/coverage/coverage_capture.sh $COVERAGE_FILE_NAME
mkdir -p ${{env.COVERAGE_DIR}}
mv ./$COVERAGE_FILE_NAME ${{env.COVERAGE_DIR}}

- uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
if: ${{ matrix.build_type == 'Debug' && matrix.os == 'Ubuntu' }}
with:
name: ${{env.COVERAGE_NAME}}-shared-${{matrix.shared_library}}
path: ${{env.COVERAGE_DIR}}
run: UMF_LOG="level:debug;flush:debug;output:stderr;pid:yes" ctest -C ${{matrix.build_type}} -V -R ipc_cuda
109 changes: 107 additions & 2 deletions src/provider/provider_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,14 @@ typedef struct cu_ops_t {
CUresult (*cuGetErrorString)(CUresult error, const char **pStr);
CUresult (*cuCtxGetCurrent)(CUcontext *pctx);
CUresult (*cuCtxSetCurrent)(CUcontext ctx);
CUresult (*cuIpcGetMemHandle)(CUipcMemHandle *pHandle, CUdeviceptr dptr);
CUresult (*cuIpcOpenMemHandle)(CUdeviceptr *pdptr, CUipcMemHandle handle,
unsigned int Flags);
CUresult (*cuIpcCloseMemHandle)(CUdeviceptr dptr);
} cu_ops_t;

typedef CUipcMemHandle cu_ipc_data_t;

static cu_ops_t g_cu_ops;
static UTIL_ONCE_FLAG cu_is_initialized = UTIL_ONCE_FLAG_INIT;
static bool Init_cu_global_state_failed;
Expand Down Expand Up @@ -123,12 +129,20 @@ static void init_cu_global_state(void) {
utils_get_symbol_addr(0, "cuCtxGetCurrent", lib_name);
*(void **)&g_cu_ops.cuCtxSetCurrent =
utils_get_symbol_addr(0, "cuCtxSetCurrent", lib_name);
*(void **)&g_cu_ops.cuIpcGetMemHandle =
utils_get_symbol_addr(0, "cuIpcGetMemHandle", lib_name);
*(void **)&g_cu_ops.cuIpcOpenMemHandle =
utils_get_symbol_addr(0, "cuIpcOpenMemHandle_v2", lib_name);
*(void **)&g_cu_ops.cuIpcCloseMemHandle =
utils_get_symbol_addr(0, "cuIpcCloseMemHandle", lib_name);

if (!g_cu_ops.cuMemGetAllocationGranularity || !g_cu_ops.cuMemAlloc ||
!g_cu_ops.cuMemAllocHost || !g_cu_ops.cuMemAllocManaged ||
!g_cu_ops.cuMemFree || !g_cu_ops.cuMemFreeHost ||
!g_cu_ops.cuGetErrorName || !g_cu_ops.cuGetErrorString ||
!g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxSetCurrent) {
!g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxSetCurrent ||
!g_cu_ops.cuIpcGetMemHandle || !g_cu_ops.cuIpcOpenMemHandle ||
!g_cu_ops.cuIpcCloseMemHandle) {
LOG_ERR("Required CUDA symbols not found.");
Init_cu_global_state_failed = true;
}
Expand Down Expand Up @@ -404,6 +418,97 @@ static const char *cu_memory_provider_get_name(void *provider) {
return "CUDA";
}

static umf_result_t cu_memory_provider_get_ipc_handle_size(void *provider,
size_t *size) {
if (provider == NULL || size == NULL) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

*size = sizeof(cu_ipc_data_t);
return UMF_RESULT_SUCCESS;
}

static umf_result_t cu_memory_provider_get_ipc_handle(void *provider,
const void *ptr,
size_t size,
void *providerIpcData) {
(void)size;

if (provider == NULL || ptr == NULL || providerIpcData == NULL) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

CUresult cu_result;
cu_ipc_data_t *cu_ipc_data = (cu_ipc_data_t *)providerIpcData;

cu_result = g_cu_ops.cuIpcGetMemHandle(cu_ipc_data, (CUdeviceptr)ptr);
if (cu_result != CUDA_SUCCESS) {
LOG_ERR("cuIpcGetMemHandle() failed.");
return cu2umf_result(cu_result);
}

return UMF_RESULT_SUCCESS;
}

static umf_result_t cu_memory_provider_put_ipc_handle(void *provider,
void *providerIpcData) {
if (provider == NULL || providerIpcData == NULL) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

return UMF_RESULT_SUCCESS;
}

static umf_result_t cu_memory_provider_open_ipc_handle(void *provider,
void *providerIpcData,
void **ptr) {
if (provider == NULL || ptr == NULL || providerIpcData == NULL) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

cu_memory_provider_t *cu_provider = (cu_memory_provider_t *)provider;

CUresult cu_result;
cu_ipc_data_t *cu_ipc_data = (cu_ipc_data_t *)providerIpcData;

// Remember current context and set the one from the provider
CUcontext restore_ctx = NULL;
umf_result_t umf_result = set_context(cu_provider->context, &restore_ctx);
if (umf_result != UMF_RESULT_SUCCESS) {
return umf_result;
}

cu_result = g_cu_ops.cuIpcOpenMemHandle((CUdeviceptr *)ptr, *cu_ipc_data,
CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS);

if (cu_result != CUDA_SUCCESS) {
LOG_ERR("cuIpcOpenMemHandle() failed.");
}

set_context(restore_ctx, &restore_ctx);

return cu2umf_result(cu_result);
}

static umf_result_t
cu_memory_provider_close_ipc_handle(void *provider, void *ptr, size_t size) {
(void)size;

if (provider == NULL || ptr == NULL) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

CUresult cu_result;

cu_result = g_cu_ops.cuIpcCloseMemHandle((CUdeviceptr)ptr);
if (cu_result != CUDA_SUCCESS) {
LOG_ERR("cuIpcCloseMemHandle() failed.");
return cu2umf_result(cu_result);
}

return UMF_RESULT_SUCCESS;
}

static struct umf_memory_provider_ops_t UMF_CUDA_MEMORY_PROVIDER_OPS = {
.version = UMF_VERSION_CURRENT,
.initialize = cu_memory_provider_initialize,
Expand All @@ -420,12 +525,12 @@ static struct umf_memory_provider_ops_t UMF_CUDA_MEMORY_PROVIDER_OPS = {
.ext.purge_force = cu_memory_provider_purge_force,
.ext.allocation_merge = cu_memory_provider_allocation_merge,
.ext.allocation_split = cu_memory_provider_allocation_split,
*/
.ipc.get_ipc_handle_size = cu_memory_provider_get_ipc_handle_size,
.ipc.get_ipc_handle = cu_memory_provider_get_ipc_handle,
.ipc.put_ipc_handle = cu_memory_provider_put_ipc_handle,
.ipc.open_ipc_handle = cu_memory_provider_open_ipc_handle,
.ipc.close_ipc_handle = cu_memory_provider_close_ipc_handle,
*/
};

umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void) {
Expand Down
Loading