Skip to content

Commit bdccf8a

Browse files
committed
[libc] Add a config option to disable slab reclaiming
Summary: Without slab reclaiming this interface is much simpler and it can speed up cases with a lot of churn. Basically, wastes memory for performance.
1 parent 1a0121c commit bdccf8a

File tree

6 files changed

+35
-4
lines changed

6 files changed

+35
-4
lines changed

libc/config/config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,12 @@
119119
"doc": "Force the size of time_t to 64 bits, even on platforms where compatibility considerations would otherwise make it 32-bit."
120120
}
121121
},
122+
"malloc": {
123+
"LIBC_CONF_GPU_MALLOC_DISABLE_SLAB_RECLAIMING": {
124+
"value": false,
125+
"doc": "The malloc implementation will return unused slabs to system memory."
126+
}
127+
},
122128
"general": {
123129
"LIBC_ADD_NULL_CHECKS": {
124130
"value": true,

libc/config/gpu/amdgpu/config.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,5 +36,10 @@
3636
"LIBC_CONF_MATH_OPTIMIZATIONS": {
3737
"value": "(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)"
3838
}
39+
},
40+
"malloc": {
41+
"LIBC_CONF_GPU_MALLOC_DISABLE_SLAB_RECLAIMING": {
42+
"value": false
43+
}
3944
}
4045
}

libc/config/gpu/nvptx/config.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,5 +36,10 @@
3636
"LIBC_CONF_MATH_OPTIMIZATIONS": {
3737
"value": "(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)"
3838
}
39+
},
40+
"malloc": {
41+
"LIBC_CONF_GPU_MALLOC_DISABLE_SLAB_RECLAIMING": {
42+
"value": false
43+
}
3944
}
4045
}

libc/docs/configure.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ to learn about the defaults for your platform and target.
3232
- ``LIBC_CONF_ERRNO_MODE``: The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_DEFAULT, LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, LIBC_ERRNO_MODE_SYSTEM, and LIBC_ERRNO_MODE_SYSTEM_INLINE.
3333
* **"general" options**
3434
- ``LIBC_ADD_NULL_CHECKS``: Add nullptr checks in the library's implementations to some functions for which passing nullptr is undefined behavior.
35+
* **"malloc" options**
36+
- ``LIBC_CONF_GPU_MALLOC_DISABLE_SLAB_RECLAIMING``: The malloc implementation will return unused slabs to system memory.
3537
* **"math" options**
3638
- ``LIBC_CONF_FREXP_INF_NAN_EXPONENT``: The value written back to the second parameter when calling frexp/frexpf/frexpl` with `+/-Inf`/`NaN` is unspecified. Configure an explicit exp value for Inf/NaN inputs.
3739
- ``LIBC_CONF_MATH_OPTIMIZATIONS``: Configures optimizations for math functions. Values accepted are LIBC_MATH_SKIP_ACCURATE_PASS, LIBC_MATH_SMALL_TABLES, LIBC_MATH_NO_ERRNO, LIBC_MATH_NO_EXCEPT, and LIBC_MATH_FAST.

libc/src/__support/GPU/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ if(NOT LIBC_TARGET_OS_IS_GPU)
33
return()
44
endif()
55

6+
if(LIBC_CONF_MALLOC_DISABLE_SLAB_RECLAIMING)
7+
list(APPEND malloc_config_copts "-DLIBC_CONF_MALLOC_DISABLE_SLAB_RECLAIMING")
8+
endif()
9+
610
add_header_library(
711
utils
812
HDRS
@@ -23,4 +27,5 @@ add_object_library(
2327
libc.src.__support.CPP.bit
2428
libc.src.__support.CPP.new
2529
.utils
30+
${malloc_config_copts}
2631
)

libc/src/__support/GPU/allocator.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1;
3939
// The number of times to attempt claiming an in-progress slab allocation.
4040
constexpr static uint32_t MAX_TRIES = 1024;
4141

42+
// Configuration for whether or not we will return unused slabs to memory.
43+
#ifdef LIBC_CONF_MALLOC_DISABLE_SLAB_RECLAIMING
44+
constexpr static bool RECLAIM = false;
45+
#else
46+
constexpr static bool RECLAIM = true;
47+
#endif
48+
4249
static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two");
4350

4451
namespace impl {
@@ -368,7 +375,7 @@ struct GuardPtr {
368375
// and obtain exclusive rights to deconstruct it. If the CAS failed either
369376
// another thread resurrected the counter and we quit, or a parallel read
370377
// helped us invalidating it. For the latter, claim that flag and return.
371-
if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n) {
378+
if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n && RECLAIM) {
372379
uint32_t expected = 0;
373380
if (counter.compare_exchange_strong(expected, INVALID,
374381
cpp::MemoryOrder::RELAXED,
@@ -386,8 +393,9 @@ struct GuardPtr {
386393
// thread.
387394
uint64_t read() {
388395
auto val = counter.load(cpp::MemoryOrder::RELAXED);
389-
if (val == 0 && counter.compare_exchange_strong(
390-
val, INVALID | HELPED, cpp::MemoryOrder::RELAXED))
396+
if (val == 0 && RECLAIM &&
397+
counter.compare_exchange_strong(val, INVALID | HELPED,
398+
cpp::MemoryOrder::RELAXED))
391399
return 0;
392400
return (val & INVALID) ? 0 : val;
393401
}
@@ -421,7 +429,7 @@ struct GuardPtr {
421429
return nullptr;
422430

423431
cpp::atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
424-
return ptr.load(cpp::MemoryOrder::RELAXED);
432+
return RECLAIM ? ptr.load(cpp::MemoryOrder::RELAXED) : expected;
425433
}
426434

427435
// Finalize the associated memory and signal that it is ready to use by

0 commit comments

Comments
 (0)