Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
39659ac
enable maltreating for disjointpool benchmark
lplewa Feb 19, 2025
21628ae
change disjointpool min bucket size in benchmark
lplewa Feb 19, 2025
f36d893
Merge pull request #1112 from lplewa/benchmark_improvements
bratpiorka Feb 20, 2025
31f6b84
[CI] Update benchmark's scripts repo from UR to SYCL
lukaszstolarczuk Feb 19, 2025
4e7f117
[CI] Adjust benchmark scripts params to new scripts' version
lukaszstolarczuk Feb 19, 2025
6c85540
Merge pull request #1108 from lukaszstolarczuk/benchmarks-ur-repo
lukaszstolarczuk Feb 20, 2025
1380620
Fix data race in the umfIpcOpenedCacheDestroy function
vinser52 Feb 19, 2025
c925acb
Suppress Valgrind errors in jemalloc and tbbmalloc
vinser52 Feb 20, 2025
b860ee1
Remove check_if_tracker_is_empty from trackingFinalize
vinser52 Feb 20, 2025
a9ff7a8
Merge pull request #1111 from vinser52/svinogra_tests
lukaszstolarczuk Feb 20, 2025
a406dde
Fix node_list_rm_first() and node_list_rm_with_alignment()
ldorau Feb 20, 2025
d6c5327
Add a test for not aligned fixed memory buffer
ldorau Feb 20, 2025
fc1cbed
Merge pull request #1116 from ldorau/Fix_node_list_rm_first_and_node_…
ldorau Feb 21, 2025
0255017
fix aligned chunk address calc in disjoint pool
bratpiorka Feb 20, 2025
efaf4ac
cleanup includes in Disjoint Pool
bratpiorka Feb 20, 2025
1129e33
Merge pull request #1118 from bratpiorka/rrudnick_fix_dp_asan
bratpiorka Feb 21, 2025
604b870
temporary disable DP MT benchmark
bratpiorka Feb 21, 2025
d658fae
Merge pull request #1124 from bratpiorka/rrudnick_disable_dp_mt_bench
lukaszstolarczuk Feb 21, 2025
d7b9de0
Add concurrent tests for IPC Get/Put functions
vinser52 Feb 21, 2025
8c841b8
Merge pull request #1126 from vinser52/svinogra_tests
vinser52 Feb 22, 2025
8228ec9
Disable building and installing the jemalloc's documentation
ldorau Feb 24, 2025
fd27836
Bump L0 loader to v1.20.2
lukaszstolarczuk Feb 20, 2025
7e68cc8
Merge pull request #1129 from ldorau/Disable_building_and_installing_…
ldorau Feb 24, 2025
ab731c0
Fix Windows Unix Makefiles generator builds
PatKamin Feb 24, 2025
8fecb89
Merge pull request #1117 from lukaszstolarczuk/bump-l0-loader
lukaszstolarczuk Feb 25, 2025
249c364
implement malloc_usable_size and enable pool tests for disjoint
pbalcer Feb 24, 2025
6b2e8f6
Merge pull request #1130 from PatKamin/fix-hwloc-make-build
lukaszstolarczuk Feb 25, 2025
bf593a0
Bump breathe
dependabot[bot] Feb 25, 2025
08d137c
Merge pull request #1137 from oneapi-src/dependabot/pip/third_party/p…
lukaszstolarczuk Feb 26, 2025
c9134a4
Use LOG_FATAL() in case of critical errors
ldorau Feb 26, 2025
5d67494
Merge pull request #1132 from pbalcer/malloc-usable-size-disjoint-pool
bratpiorka Feb 26, 2025
113f3e0
Merge pull request #1139 from ldorau/Use_LOG_FATAL_in_case_of_critica…
lukaszstolarczuk Feb 26, 2025
258c6f2
explicitly declare single threaded benchmark
lplewa Feb 26, 2025
2d19a61
remove deprecated cmake flag from compat workflow
bratpiorka Feb 26, 2025
1c9c7a1
Merge pull request #1140 from lplewa/single_thread
bratpiorka Feb 26, 2025
4c70f6c
Merge pull request #1141 from bratpiorka/rrudnick_compat_cleanup
vinser52 Feb 26, 2025
c2e758e
coarse: error out on double free instead of assert
ldorau Mar 1, 2025
eae8d63
Merge pull request #1148 from ldorau/coarse_error_out_on_double_free_…
ldorau Mar 3, 2025
789f7c1
test preloads
EuphoricThinking Mar 3, 2025
041322c
modify diocs.yl so I cvan actually test sth
EuphoricThinking Mar 3, 2025
579f5fa
Let's look at some errors
EuphoricThinking Mar 3, 2025
d5cbfd4
New error :) set cores
EuphoricThinking Mar 3, 2025
b5dd023
add libraries
EuphoricThinking Mar 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: GitHubPages

on:
push:
branches: ["main"]
branches: ["test_prealoads_llvm"]
workflow_dispatch:

# Cancel previous in-progress workflow, only the latest run is relevant
Expand All @@ -15,8 +15,19 @@ permissions:
contents: read

jobs:
Benchmarks:
uses: ./.github/workflows/reusable_benchmarks.yml
permissions:
contents: read
pull-requests: write
with:
pr_no: '0'
bench_script_params: '--save baseline'
upload_report: true

DocsBuild:
uses: ./.github/workflows/reusable_docs_build.yml
needs: Benchmarks
with:
upload: true

Expand Down
76 changes: 41 additions & 35 deletions .github/workflows/reusable_benchmarks.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Executes benchmarks implemented in this repository
# using scripts for benchmark results visualization,
# which are downloaded from Unified Runtime repository.
# Executes benchmarks implemented in this repository using scripts
# for results visualization from intel/llvm (unified-runtime dir).
name: Benchmarks

on:
Expand Down Expand Up @@ -31,9 +30,10 @@ env:
jobs:
benchmarks:
name: Benchmarks
runs-on: ubuntu-latest
# run only on upstream; forks will not have the HW
if: github.repository == 'oneapi-src/unified-memory-framework'
runs-on: L0_PERF
# if: github.repository == 'oneapi-src/unified-memory-framework'
# runs-on: L0_PERF

steps:
# Workspace on self-hosted runners is not cleaned automatically.
Expand All @@ -44,6 +44,9 @@ jobs:
ls -la ./
rm -rf ./* || true

- name: Install things
run: sudo apt-get install libhwloc-dev libjemalloc-dev libtbb-dev

- name: Add comment to PR
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
if: ${{ always() && inputs.pr_no != 0 }}
Expand Down Expand Up @@ -98,50 +101,53 @@ jobs:
- name: Build UMF
run: cmake --build ${{env.BUILD_DIR}} -j $(nproc)

# We are going to clone Unified Runtime repository in order to run
# the most up-to-date UR scripts for benchmark data visualization
- name: Checkout UR
# Get scripts for benchmark data visualization.
# Use specific tag, as the scripts or files' location may change.
- name: Checkout SYCL
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: oneapi-src/unified-runtime
path: ur-repo
repository: EuphoricThinking/llvm
ref: umf_preloaded_bench
path: sycl-repo
fetch-depth: 1
fetch-tags: false

- name: Install pip packages for benchmarking scripts from UR
run: |
pip install --force-reinstall -r ${{github.workspace}}/ur-repo/third_party/benchmark_requirements.txt

- name: Set core range and GPU mask
- name: Install benchmarking scripts deps
run: |
# Compute the core range for the second NUMA node; first node is for UR jobs.
# Skip the first 4 cores - the kernel is likely to schedule more work on these.
CORES=$(lscpu | awk '
/NUMA node1 CPU|On-line CPU/ {line=$0}
END {
split(line, a, " ")
split(a[4], b, ",")
sub(/^0/, "4", b[1])
print b[1]
}')
echo "Selected core: $CORES"
echo "CORES=$CORES" >> $GITHUB_ENV

ZE_AFFINITY_MASK=1
echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV

- name: Run UMF benchmarks (using scripts from UR)
pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/unified-runtime/third_party/benchmark_requirements.txt

# - name: Set core range and GPU mask
# run: |
# # Compute the core range for the second NUMA node; first node is for SYCL/UR jobs.
# # Skip the first 4 cores - the kernel is likely to schedule more work on these.
# CORES=$(lscpu | awk '
# /NUMA node1 CPU|On-line CPU/ {line=$0}
# END {
# split(line, a, " ")
# split(a[4], b, ",")
# sub(/^0/, "4", b[1])
# print b[1]
# }')
# echo "Selected core: $CORES"
# echo "CORES=$CORES" >> $GITHUB_ENV

# ZE_AFFINITY_MASK=1
# echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV

- name: Run UMF benchmarks
id: benchmarks
working-directory: ${{env.BUILD_DIR}}
run: >
taskset -c ${{ env.CORES }} ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py
${{ github.workspace }}/sycl-repo/unified-runtime/scripts/benchmarks/main.py
~/bench_workdir_umf
--umf ${{env.BUILD_DIR}}
--compare baseline
${{ inputs.upload_report && '--output-html' || '' }}
${{ inputs.pr_no != 0 && '--output-markdown' || '' }}
${{ inputs.bench_script_params }}

# In case it failed to add a comment, we can still print the results.
- name: Print benchmark results
if: ${{ always() }}
if: ${{ always() && inputs.pr_no != 0 }}
run: cat ${{env.BUILD_DIR}}/benchmark_results.md

- name: Add comment to PR
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/reusable_compatibility.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ jobs:
-DUMF_FORMAT_CODE_STYLE=OFF
-DUMF_DEVELOPER_MODE=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_TESTS_FAIL_ON_SKIP=ON

- name: Build latest UMF
Expand Down Expand Up @@ -197,7 +196,6 @@ jobs:
-DUMF_FORMAT_CODE_STYLE=OFF
-DUMF_DEVELOPER_MODE=ON
-DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
-DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
-DUMF_TESTS_FAIL_ON_SKIP=ON

- name: Build latest UMF
Expand Down
10 changes: 6 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,12 @@ else()
# --disable-initial-exec-tls - Disable the initial-exec TLS model for
# jemalloc's internal thread-local storage (on those platforms that
# support explicit settings). This can allow jemalloc to be dynamically
# loaded after program startup (e.g. using dlopen).
# loaded after program startup (e.g. using dlopen). --disable-doc -
# Disable building and installing the documentation.
COMMAND
./configure --prefix=${jemalloc_targ_BINARY_DIR}
--with-jemalloc-prefix=je_ --disable-cxx --disable-initial-exec-tls
CFLAGS=-fPIC
--disable-doc CFLAGS=-fPIC
WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR}
OUTPUT ${jemalloc_targ_SOURCE_DIR}/Makefile
DEPENDS ${jemalloc_targ_SOURCE_DIR}/configure)
Expand Down Expand Up @@ -282,7 +283,8 @@ else()

message(STATUS "hwloc CMAKE_GENERATOR: ${CMAKE_GENERATOR}")

if(CMAKE_GENERATOR STREQUAL "Ninja")
if(CMAKE_GENERATOR STREQUAL "Ninja" OR CMAKE_GENERATOR STREQUAL
"Unix Makefiles")
add_custom_command(
COMMAND ${CMAKE_COMMAND}
-DCMAKE_INSTALL_PREFIX=${hwloc_targ_BINARY_DIR} -B build
Expand Down Expand Up @@ -407,7 +409,7 @@ if(UMF_BUILD_LEVEL_ZERO_PROVIDER AND (NOT UMF_LEVEL_ZERO_INCLUDE_DIR))
include(FetchContent)

set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git")
set(LEVEL_ZERO_LOADER_TAG v1.19.2)
set(LEVEL_ZERO_LOADER_TAG v1.20.2)

message(
STATUS
Expand Down
15 changes: 12 additions & 3 deletions benchmark/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ static void multithreaded(benchmark::internal::Benchmark *benchmark) {
benchmark->Threads(1);
}

static void singlethreaded(benchmark::internal::Benchmark *benchmark) {
benchmark->Threads(1);
}

static void
default_multiple_alloc_fix_size(benchmark::internal::Benchmark *benchmark) {
benchmark->Args({10000, 1, 4096});
Expand Down Expand Up @@ -68,15 +72,17 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, proxy_pool,
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, proxy_pool)
->Apply(&default_multiple_alloc_fix_size)
// reduce iterations, as this benchmark is slower than others
->Iterations(50000);
->Iterations(50000)
->Apply(&singlethreaded);

UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, os_provider,
fixed_alloc_size,
provider_allocator<os_provider>);
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, os_provider)
->Apply(&default_multiple_alloc_fix_size)
// reduce iterations, as this benchmark is slower than others
->Iterations(50000);
->Iterations(50000)
->Apply(&singlethreaded);

UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix,
fixed_alloc_size,
Expand All @@ -89,7 +95,10 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark,
disjoint_pool_uniform, uniform_alloc_size,
pool_allocator<disjoint_pool<os_provider>>);
UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform)
->Apply(&default_multiple_alloc_uniform_size);
->Apply(&default_multiple_alloc_uniform_size)
->Apply(&singlethreaded);
// TODO: change to multithreaded
//->Apply(&multithreaded);

#ifdef UMF_POOL_JEMALLOC_ENABLED
UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix,
Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_umf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ struct disjoint_pool : public pool_interface<Provider> {
return {nullptr, [](void *) {}};
}

ret = umfDisjointPoolParamsSetMinBucketSize(raw_params, 4096);
ret = umfDisjointPoolParamsSetMinBucketSize(raw_params, 8);
if (ret != UMF_RESULT_SUCCESS) {
state.SkipWithError("Failed to set min bucket size");
return {nullptr, [](void *) {}};
Expand Down
2 changes: 1 addition & 1 deletion examples/ipc_level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ endif()
include(FetchContent)

set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git")
set(LEVEL_ZERO_LOADER_TAG v1.19.2)
set(LEVEL_ZERO_LOADER_TAG v1.20.2)

message(
STATUS
Expand Down
2 changes: 1 addition & 1 deletion examples/level_zero_shared_memory/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ endif()
include(FetchContent)

set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git")
set(LEVEL_ZERO_LOADER_TAG v1.19.2)
set(LEVEL_ZERO_LOADER_TAG v1.20.2)

message(
STATUS
Expand Down
13 changes: 8 additions & 5 deletions src/coarse/coarse.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2024 Intel Corporation
* Copyright (C) 2024-2025 Intel Corporation
*
* Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Expand Down Expand Up @@ -278,7 +278,7 @@ static block_t *node_list_rm_first(ravl_free_blocks_head_t *head_node,
assert(node->prev == NULL);
struct block_t *block = node->block;

if (IS_NOT_ALIGNED(block->size, alignment)) {
if (IS_NOT_ALIGNED(((uintptr_t)block->data), alignment)) {
return NULL;
}

Expand All @@ -303,7 +303,7 @@ static block_t *node_list_rm_with_alignment(ravl_free_blocks_head_t *head_node,

ravl_free_blocks_elem_t *node;
for (node = head_node->head; node != NULL; node = node->next) {
if (IS_ALIGNED(node->block->size, alignment)) {
if (IS_ALIGNED(((uintptr_t)node->block->data), alignment)) {
return node_list_rm(head_node, node);
}
}
Expand Down Expand Up @@ -1170,10 +1170,13 @@ umf_result_t coarse_free(coarse_t *coarse, void *ptr, size_t bytes) {
}

block_t *block = get_node_block(node);
assert(block->used);
if (!block->used) {
LOG_ERR("double free");
utils_mutex_unlock(&coarse->lock);
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

if (bytes > 0 && bytes != block->size) {
// wrong size of allocation
LOG_ERR("wrong size of allocation");
utils_mutex_unlock(&coarse->lock);
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
Expand Down
3 changes: 3 additions & 0 deletions src/ipc_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ umfIpcOpenedCacheCreate(ipc_opened_cache_eviction_cb_t eviction_cb) {

void umfIpcOpenedCacheDestroy(ipc_opened_cache_handle_t cache) {
ipc_opened_cache_entry_t *entry, *tmp;

utils_mutex_lock(&(cache->global->cache_lock));
HASH_ITER(hh, cache->hash_table, entry, tmp) {
DL_DELETE(cache->global->lru_list, entry);
HASH_DEL(cache->hash_table, entry);
Expand All @@ -153,6 +155,7 @@ void umfIpcOpenedCacheDestroy(ipc_opened_cache_handle_t cache) {
umf_ba_free(cache->global->cache_allocator, entry);
}
HASH_CLEAR(hh, cache->hash_table);
utils_mutex_unlock(&(cache->global->cache_lock));

umf_ba_global_free(cache);
}
Expand Down
Loading
Loading