oneapi-src · ldorau · Oct 22, 2024 · Oct 17, 2024 · Oct 18, 2024 · Oct 24, 2024
@@ -16,64 +16,5 @@ permissions:
   contents: read
 
 jobs:
-  CodeChecks:
-    uses: ./.github/workflows/reusable_checks.yml
-  DocsBuild:
-    uses: ./.github/workflows/reusable_docs_build.yml
-  FastBuild:
-    name: Fast builds
-    needs: [CodeChecks, DocsBuild]
-    uses: ./.github/workflows/reusable_fast.yml
-  Build:
-    name: Basic builds
-    needs: [FastBuild]
-    uses: ./.github/workflows/reusable_basic.yml
-  DevDax:
-    needs: [FastBuild]
-    uses: ./.github/workflows/reusable_dax.yml
-  Sanitizers:
-    needs: [FastBuild]
-    uses: ./.github/workflows/reusable_sanitizers.yml
-  Qemu:
-    needs: [FastBuild]
-    uses: ./.github/workflows/reusable_qemu.yml
-  Benchmarks:
-    needs: [Build]
-    uses: ./.github/workflows/reusable_benchmarks.yml
-  ProxyLib:
-    needs: [Build]
-    uses: ./.github/workflows/reusable_proxy_lib.yml
   GPU:
-    needs: [Build]
     uses: ./.github/workflows/reusable_gpu.yml
-  Valgrind:
-    needs: [Build]
-    uses: ./.github/workflows/reusable_valgrind.yml
-  MultiNuma:
-    needs: [Build]
-    uses: ./.github/workflows/reusable_multi_numa.yml
-  Coverage:
-    # total coverage (on upstream only)
-    if: github.repository == 'oneapi-src/unified-memory-framework'
-    needs: [Build, DevDax, GPU, MultiNuma, Qemu, ProxyLib]
-    uses: ./.github/workflows/reusable_coverage.yml
-    secrets: inherit
-    with:
-      trigger: "${{github.event_name}}"
-  Coverage_partial:
-    # partial coverage (on forks)
-    if: github.repository != 'oneapi-src/unified-memory-framework'
-    needs: [Build, Qemu, ProxyLib]
-    uses: ./.github/workflows/reusable_coverage.yml
-  CodeQL:
-    needs: [Build]
-    permissions:
-      contents: read
-      security-events: write
-    uses: ./.github/workflows/reusable_codeql.yml
-  Trivy:
-    needs: [Build]
-    permissions:
-      contents: read
-      security-events: write
-    uses: ./.github/workflows/reusable_trivy.yml
@@ -15,117 +15,6 @@ env:
   COVERAGE_DIR : "${{github.workspace}}/coverage"
 
 jobs:
-  gpu-Level-Zero:
-    name: Level-Zero
-    env:
-      VCPKG_PATH: "${{github.workspace}}/../../../../vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/../../../../vcpkg/packages/tbb_x64-windows;${{github.workspace}}/../../../../vcpkg/packages/jemalloc_x64-windows"
-      COVERAGE_NAME : "exports-coverage-gpu"
-    # run only on upstream; forks will not have the HW
-    if: github.repository == 'oneapi-src/unified-memory-framework'
-    strategy:
-      matrix:
-        shared_library: ['ON', 'OFF']
-        os: ['Ubuntu', 'Windows']
-        build_type: ['Debug', 'Release']
-        include:
-          - os: 'Ubuntu'
-            compiler: {c: gcc, cxx: g++}
-            number_of_processors: '$(nproc)'
-          - os: 'Windows'
-            compiler: {c: cl, cxx: cl}
-            number_of_processors: '$Env:NUMBER_OF_PROCESSORS'
-        exclude:
-          - os: 'Windows'
-            build_type: 'Debug'
-
-    runs-on: ["DSS-LEVEL_ZERO", "DSS-${{matrix.os}}"]
-    steps:
-      - name: Checkout
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
-        with:
-          fetch-depth: 0
-
-      - name: Get information about platform
-        if: matrix.os == 'Ubuntu'
-        run: .github/scripts/get_system_info.sh
-
-      - name: Configure build for Win
-        if: matrix.os == 'Windows'
-        run: >
-          cmake
-          -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}"
-          -B ${{env.BUILD_DIR}}
-          -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}"
-          -DCMAKE_BUILD_TYPE=${{matrix.build_type}}
-          -DCMAKE_C_COMPILER=${{matrix.compiler.c}}
-          -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
-          -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}}
-          -DUMF_BUILD_BENCHMARKS=ON
-          -DUMF_BUILD_TESTS=ON
-          -DUMF_BUILD_GPU_TESTS=ON
-          -DUMF_BUILD_GPU_EXAMPLES=ON
-          -DUMF_FORMAT_CODE_STYLE=OFF
-          -DUMF_DEVELOPER_MODE=ON
-          -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-          -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-          -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-          -DUMF_BUILD_CUDA_PROVIDER=OFF
-          -DUMF_TESTS_FAIL_ON_SKIP=ON
-
-      - name: Configure build for Ubuntu
-        if: matrix.os == 'Ubuntu'
-        run: >
-          cmake
-          -B ${{env.BUILD_DIR}}
-          -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}"
-          -DCMAKE_BUILD_TYPE=${{matrix.build_type}}
-          -DCMAKE_C_COMPILER=${{matrix.compiler.c}}
-          -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
-          -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}}
-          -DUMF_BUILD_BENCHMARKS=ON
-          -DUMF_BUILD_TESTS=ON
-          -DUMF_BUILD_GPU_TESTS=ON
-          -DUMF_BUILD_GPU_EXAMPLES=ON
-          -DUMF_FORMAT_CODE_STYLE=OFF
-          -DUMF_DEVELOPER_MODE=ON
-          -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-          -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-          -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
-          -DUMF_BUILD_CUDA_PROVIDER=OFF
-          -DUMF_TESTS_FAIL_ON_SKIP=ON
-          ${{ matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }}
-
-      - name: Build UMF
-        run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j ${{matrix.number_of_processors}}
-
-      - name: Run tests
-        working-directory: ${{env.BUILD_DIR}}
-        run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test
-
-      - name: Run examples
-        working-directory: ${{env.BUILD_DIR}}
-        run: ctest --output-on-failure --test-dir examples -C ${{matrix.build_type}}
-
-      - name: Run benchmarks
-        working-directory: ${{env.BUILD_DIR}}
-        run: ctest --output-on-failure --test-dir benchmark -C ${{matrix.build_type}} --exclude-regex umf-bench-multithreaded
-
-      - name: Check coverage
-        if:  ${{ matrix.build_type == 'Debug' && matrix.os == 'Ubuntu' }}
-        working-directory: ${{env.BUILD_DIR}}
-        run: |
-          export COVERAGE_FILE_NAME=${{env.COVERAGE_NAME}}-shared-${{matrix.shared_library}}
-          echo "COVERAGE_FILE_NAME: $COVERAGE_FILE_NAME"
-          ../scripts/coverage/coverage_capture.sh $COVERAGE_FILE_NAME
-          mkdir -p ${{env.COVERAGE_DIR}}
-          mv ./$COVERAGE_FILE_NAME ${{env.COVERAGE_DIR}}
-
-      - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
-        if:  ${{ matrix.build_type == 'Debug' && matrix.os == 'Ubuntu' }}
-        with:
-          name: ${{env.COVERAGE_NAME}}-shared-${{matrix.shared_library}}
-          path: ${{env.COVERAGE_DIR}}
-
   gpu-CUDA:
     name: CUDA
     env:
@@ -135,7 +24,7 @@ jobs:
     strategy:
       matrix:
         shared_library: ['ON', 'OFF']
-        build_type: ['Debug', 'Release']
+        build_type: ['Debug']
         # TODO add windows
         os: ['Ubuntu']
         include:
@@ -179,30 +68,6 @@ jobs:
     - name: Build UMF
       run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j ${{matrix.number_of_processors}}
 
-    - name: Run tests
-      working-directory: ${{env.BUILD_DIR}}
-      run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test
-
-    - name: Run examples
+    - name: Run CUDA tests
       working-directory: ${{env.BUILD_DIR}}
-      run: ctest --output-on-failure --test-dir examples -C ${{matrix.build_type}}
-
-    - name: Run benchmarks
-      working-directory: ${{env.BUILD_DIR}}
-      run: ctest --output-on-failure --test-dir benchmark -C ${{matrix.build_type}} --exclude-regex umf-bench-multithreaded
-
-    - name: Check coverage
-      if:  ${{ matrix.build_type == 'Debug' && matrix.os == 'Ubuntu' }}
-      working-directory: ${{env.BUILD_DIR}}
-      run: |
-        export COVERAGE_FILE_NAME=${{env.COVERAGE_NAME}}-shared-${{matrix.shared_library}}
-        echo "COVERAGE_FILE_NAME: $COVERAGE_FILE_NAME"
-        ../scripts/coverage/coverage_capture.sh $COVERAGE_FILE_NAME
-        mkdir -p ${{env.COVERAGE_DIR}}
-        mv ./$COVERAGE_FILE_NAME ${{env.COVERAGE_DIR}}
-
-    - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
-      if:  ${{ matrix.build_type == 'Debug' && matrix.os == 'Ubuntu' }}
-      with:
-        name: ${{env.COVERAGE_NAME}}-shared-${{matrix.shared_library}}
-        path: ${{env.COVERAGE_DIR}}
+      run: UMF_LOG="level:debug;flush:debug;output:stderr;pid:yes" ctest -C ${{matrix.build_type}} -V -R ipc_cuda
@@ -53,8 +53,14 @@ typedef struct cu_ops_t {
     CUresult (*cuGetErrorString)(CUresult error, const char **pStr);
     CUresult (*cuCtxGetCurrent)(CUcontext *pctx);
     CUresult (*cuCtxSetCurrent)(CUcontext ctx);
+    CUresult (*cuIpcGetMemHandle)(CUipcMemHandle *pHandle, CUdeviceptr dptr);
+    CUresult (*cuIpcOpenMemHandle)(CUdeviceptr *pdptr, CUipcMemHandle handle,
+                                   unsigned int Flags);
+    CUresult (*cuIpcCloseMemHandle)(CUdeviceptr dptr);
 } cu_ops_t;
 
+typedef CUipcMemHandle cu_ipc_data_t;
+
 static cu_ops_t g_cu_ops;
 static UTIL_ONCE_FLAG cu_is_initialized = UTIL_ONCE_FLAG_INIT;
 static bool Init_cu_global_state_failed;
@@ -123,12 +129,20 @@ static void init_cu_global_state(void) {
         utils_get_symbol_addr(0, "cuCtxGetCurrent", lib_name);
     *(void **)&g_cu_ops.cuCtxSetCurrent =
         utils_get_symbol_addr(0, "cuCtxSetCurrent", lib_name);
+    *(void **)&g_cu_ops.cuIpcGetMemHandle =
+        utils_get_symbol_addr(0, "cuIpcGetMemHandle", lib_name);
+    *(void **)&g_cu_ops.cuIpcOpenMemHandle =
+        utils_get_symbol_addr(0, "cuIpcOpenMemHandle_v2", lib_name);
+    *(void **)&g_cu_ops.cuIpcCloseMemHandle =
+        utils_get_symbol_addr(0, "cuIpcCloseMemHandle", lib_name);
 
     if (!g_cu_ops.cuMemGetAllocationGranularity || !g_cu_ops.cuMemAlloc ||
         !g_cu_ops.cuMemAllocHost || !g_cu_ops.cuMemAllocManaged ||
         !g_cu_ops.cuMemFree || !g_cu_ops.cuMemFreeHost ||
         !g_cu_ops.cuGetErrorName || !g_cu_ops.cuGetErrorString ||
-        !g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxSetCurrent) {
+        !g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxSetCurrent ||
+        !g_cu_ops.cuIpcGetMemHandle || !g_cu_ops.cuIpcOpenMemHandle ||
+        !g_cu_ops.cuIpcCloseMemHandle) {
         LOG_ERR("Required CUDA symbols not found.");
         Init_cu_global_state_failed = true;
     }
@@ -404,6 +418,97 @@ static const char *cu_memory_provider_get_name(void *provider) {
     return "CUDA";
 }
 
+static umf_result_t cu_memory_provider_get_ipc_handle_size(void *provider,
+                                                           size_t *size) {
+    if (provider == NULL || size == NULL) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    *size = sizeof(cu_ipc_data_t);
+    return UMF_RESULT_SUCCESS;
+}
+
+static umf_result_t cu_memory_provider_get_ipc_handle(void *provider,
+                                                      const void *ptr,
+                                                      size_t size,
+                                                      void *providerIpcData) {
+    (void)size;
+
+    if (provider == NULL || ptr == NULL || providerIpcData == NULL) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    CUresult cu_result;
+    cu_ipc_data_t *cu_ipc_data = (cu_ipc_data_t *)providerIpcData;
+
+    cu_result = g_cu_ops.cuIpcGetMemHandle(cu_ipc_data, (CUdeviceptr)ptr);
+    if (cu_result != CUDA_SUCCESS) {
+        LOG_ERR("cuIpcGetMemHandle() failed.");
+        return cu2umf_result(cu_result);
+    }
+
+    return UMF_RESULT_SUCCESS;
+}
+
+static umf_result_t cu_memory_provider_put_ipc_handle(void *provider,
+                                                      void *providerIpcData) {
+    if (provider == NULL || providerIpcData == NULL) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    return UMF_RESULT_SUCCESS;
+}
+
+static umf_result_t cu_memory_provider_open_ipc_handle(void *provider,
+                                                       void *providerIpcData,
+                                                       void **ptr) {
+    if (provider == NULL || ptr == NULL || providerIpcData == NULL) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    cu_memory_provider_t *cu_provider = (cu_memory_provider_t *)provider;
+
+    CUresult cu_result;
+    cu_ipc_data_t *cu_ipc_data = (cu_ipc_data_t *)providerIpcData;
+
+    // Remember current context and set the one from the provider
+    CUcontext restore_ctx = NULL;
+    umf_result_t umf_result = set_context(cu_provider->context, &restore_ctx);
+    if (umf_result != UMF_RESULT_SUCCESS) {
+        return umf_result;
+    }
+
+    cu_result = g_cu_ops.cuIpcOpenMemHandle((CUdeviceptr *)ptr, *cu_ipc_data,
+                                            CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS);
+
+    if (cu_result != CUDA_SUCCESS) {
+        LOG_ERR("cuIpcOpenMemHandle() failed.");
+    }
+
+    set_context(restore_ctx, &restore_ctx);
+
+    return cu2umf_result(cu_result);
+}
+
+static umf_result_t
+cu_memory_provider_close_ipc_handle(void *provider, void *ptr, size_t size) {
+    (void)size;
+
+    if (provider == NULL || ptr == NULL) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    CUresult cu_result;
+
+    cu_result = g_cu_ops.cuIpcCloseMemHandle((CUdeviceptr)ptr);
+    if (cu_result != CUDA_SUCCESS) {
+        LOG_ERR("cuIpcCloseMemHandle() failed.");
+        return cu2umf_result(cu_result);
+    }
+
+    return UMF_RESULT_SUCCESS;
+}
+
 static struct umf_memory_provider_ops_t UMF_CUDA_MEMORY_PROVIDER_OPS = {
     .version = UMF_VERSION_CURRENT,
     .initialize = cu_memory_provider_initialize,
@@ -420,12 +525,12 @@ static struct umf_memory_provider_ops_t UMF_CUDA_MEMORY_PROVIDER_OPS = {
     .ext.purge_force = cu_memory_provider_purge_force,
     .ext.allocation_merge = cu_memory_provider_allocation_merge,
     .ext.allocation_split = cu_memory_provider_allocation_split,
+    */
     .ipc.get_ipc_handle_size = cu_memory_provider_get_ipc_handle_size,
     .ipc.get_ipc_handle = cu_memory_provider_get_ipc_handle,
     .ipc.put_ipc_handle = cu_memory_provider_put_ipc_handle,
     .ipc.open_ipc_handle = cu_memory_provider_open_ipc_handle,
     .ipc.close_ipc_handle = cu_memory_provider_close_ipc_handle,
-    */
 };
 
 umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void) {