Skip to content

Commit e50b95f

Browse files
committed
Enable USE_PER_THREAD_MSTATE
With this, memory allocated is **much** faster in an SMP environment
1 parent a0d2332 commit e50b95f

File tree

3 files changed

+131
-24
lines changed

3 files changed

+131
-24
lines changed

include/deemon/thread.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,12 @@ struct Dee_thread_object {
404404
* [valid_if(Dee_THREAD_STATE_STARTED && !Dee_THREAD_STATE_TERMINATED)]
405405
* Thread TLS data controller. (Set to NULL during thread creation / clear) */
406406
} t_context; /* Contextual data */
407+
#ifndef CONFIG_NO_THREADS
408+
#ifdef CONFIG_EXPERIMENTAL_CUSTOM_HEAP
409+
void *t_heap; /* [0..1][lock(WRITE_ONCE && PRIVATE(DeeThread_Self()))] Thread-local heap (for faster Dee_Malloc()) */
410+
#endif /* CONFIG_EXPERIMENTAL_CUSTOM_HEAP */
411+
#endif /* !CONFIG_NO_THREADS */
412+
407413
/* OS-specific thread data goes here. */
408414
};
409415

src/deemon/runtime/heap.c

Lines changed: 98 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ ClCompile.BasicRuntimeChecks = Default
3939
#ifdef CONFIG_EXPERIMENTAL_CUSTOM_HEAP
4040
#include <deemon/format.h>
4141
#include <deemon/gc.h>
42+
#include <deemon/thread.h>
4243
#include <deemon/util/atomic.h>
4344
#include <deemon/util/lock.h>
4445

@@ -229,6 +230,8 @@ DECL_BEGIN
229230
#define NO_MSPACE_MAX_FOOTPRINT 1
230231
#define NO_MSPACE_FOOTPRINT_LIMIT 1
231232
#define NO_MSPACE_SET_FOOTPRINT_LIMIT 1
233+
#define NO_MSPACE_TRIM 1 /* TODO: Call from DeeHeap_Trim() */
234+
#define NO_DESTROY_MSPACE 1 /* Per-thread heaps are re-used as needed */
232235

233236
#undef M_TRIM_THRESHOLD
234237
#undef M_GRANULARITY
@@ -449,7 +452,9 @@ static void dlmalloc_stats(void);
449452
#if MSPACES
450453
typedef void *mspace;
451454
static mspace create_mspace(size_t capacity/*, int locked*/);
455+
#if !NO_DESTROY_MSPACE
452456
static size_t destroy_mspace(mspace msp);
457+
#endif /* NO_DESTROY_MSPACE */
453458
#if !NO_CREATE_MSPACE_WITH_BASE
454459
static mspace create_mspace_with_base(void *base, size_t capacity/*, int locked*/);
455460
#endif /* !NO_CREATE_MSPACE_WITH_BASE */
@@ -500,7 +505,9 @@ static size_t mspace_usable_size(void const *mem);
500505
#if !NO_MALLOC_STATS
501506
static void mspace_malloc_stats(mspace msp);
502507
#endif /* NO_MALLOC_STATS */
508+
#if !NO_MSPACE_TRIM
503509
static int mspace_trim(mspace msp, size_t pad);
510+
#endif /* NO_MSPACE_TRIM */
504511
#if !NO_MALLOPT && !EXPOSE_AS_DEEMON_API
505512
static int mspace_mallopt(int param_number, int value);
506513
#endif /* !NO_MALLOPT && !EXPOSE_AS_DEEMON_API */
@@ -1009,6 +1016,9 @@ struct malloc_state {
10091016
struct freelist flist;
10101017
#endif /* USE_PENDING_FREE_LIST */
10111018
msegment seg;
1019+
#if USE_PER_THREAD_MSTATE
1020+
SLIST_ENTRY(malloc_state) ms_link; /* Link for "free_tls_mspace" / "used_tls_mspace" */
1021+
#endif /* USE_PER_THREAD_MSTATE */
10121022
#if 0
10131023
void *extp; /* Unused but available for extensions */
10141024
size_t exts;
@@ -1726,8 +1736,8 @@ static void do_check_memset_free(PARAM_mstate_m_ mchunkptr p) {
17261736
}
17271737
for (i = 0; i < num_words; ++i) {
17281738
ASSERTF(words[i] == DL_DEBUG_MEMSET_FREE,
1729-
"Free pointer %p has bad patter %IX",
1730-
&words[i], words[i]);
1739+
"Free pointer %p at offset %" PRFxSIZ " from %p-%p has bad pattern %#" PRFXSIZ "",
1740+
&words[i], i * sizeof(size_t), p, (char *)p + num_bytes - 1, words[i]);
17311741
}
17321742
}
17331743
#endif /* DL_DEBUG_MEMSET_FREE */
@@ -2545,7 +2555,7 @@ static void add_segment(PARAM_mstate_m_ char *tbase, size_t tsize, flag_t mmappe
25452555
int nfences = 0;
25462556

25472557
/* reset top to new space */
2548-
init_top(ARG_mstate_m_(mchunkptr) tbase, tsize - TOP_FOOT_SIZE);
2558+
init_top(ARG_mstate_m_ (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
25492559

25502560
/* Set up segment record */
25512561
dl_assert(is_aligned(ss));
@@ -2739,11 +2749,11 @@ static void *sys_alloc(PARAM_mstate_m_ size_t nb) {
27392749
mstate_release_checks(m) = MAX_RELEASE_CHECK_RATE;
27402750
init_bins(ARG_mstate_m);
27412751
#if GM_ONLY
2742-
init_top(ARG_mstate_m_(mchunkptr) tbase, tsize - TOP_FOOT_SIZE);
2752+
init_top(ARG_mstate_m_ (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
27432753
#else /* GM_ONLY */
27442754
#if !ONLY_MSPACES
27452755
if (is_global(m)) {
2746-
init_top(ARG_mstate_m_(mchunkptr) tbase, tsize - TOP_FOOT_SIZE);
2756+
init_top(ARG_mstate_m_ (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
27472757
} else
27482758
#endif /* !ONLY_MSPACES */
27492759
{
@@ -4463,10 +4473,14 @@ static int dlmallopt(int param_number, int value) {
44634473
#if MSPACES
44644474

44654475
static mstate init_user_mstate(char *tbase, size_t tsize) {
4466-
size_t msize = pad_request(sizeof(struct malloc_state));
4476+
size_t msize;
44674477
mchunkptr mn;
4468-
mchunkptr msp = align_as_chunk(tbase);
4469-
mstate m = (mstate)(chunk2mem(msp));
4478+
mchunkptr msp;
4479+
mstate m;
4480+
dl_setfree_data(tbase, tsize);
4481+
msize = pad_request(sizeof(struct malloc_state));
4482+
msp = align_as_chunk(tbase);
4483+
m = (mstate)(chunk2mem(msp));
44704484
bzero(m, msize);
44714485
msp->head = (msize | INUSE_BITS);
44724486
mstate_seg(m).base = mstate_least_addr(m) = tbase;
@@ -4505,15 +4519,6 @@ static mspace create_mspace(size_t capacity/*, int locked*/) {
45054519
return (mspace)m;
45064520
}
45074521

4508-
#if USE_PER_THREAD_MSTATE
4509-
/* Return the calling thread's thread-local mspace (or "0" if not available) */
4510-
static mspace tls_mspace(void) {
4511-
/* TODO */
4512-
return 0;
4513-
}
4514-
#endif /* USE_PER_THREAD_MSTATE */
4515-
4516-
45174522
#if !NO_CREATE_MSPACE_WITH_BASE
45184523
static mspace create_mspace_with_base(void *base, size_t capacity/*, int locked*/) {
45194524
mstate m = 0;
@@ -4547,6 +4552,7 @@ static int mspace_track_large_chunks(mspace msp, int enable) {
45474552
}
45484553
#endif /* !NO_MSPACE_TRACK_LARGE_CHUNKS */
45494554

4555+
#if !NO_DESTROY_MSPACE
45504556
static size_t destroy_mspace(mspace msp) {
45514557
size_t freed = 0;
45524558
mstate ms = (mstate)msp;
@@ -4572,6 +4578,64 @@ static size_t destroy_mspace(mspace msp) {
45724578
#endif /* !DL_MUNMAP_ALWAYS_FAILS */
45734579
return freed;
45744580
}
4581+
#endif /* NO_DESTROY_MSPACE */
4582+
4583+
4584+
#if USE_PER_THREAD_MSTATE
4585+
SLIST_HEAD(malloc_state_slist, malloc_state);
4586+
PRIVATE struct malloc_state_slist free_tls_mspace = SLIST_HEAD_INITIALIZER(free_tls_mspace);
4587+
PRIVATE struct malloc_state_slist used_tls_mspace = SLIST_HEAD_INITIALIZER(used_tls_mspace);
4588+
#ifndef CONFIG_NO_THREADS
4589+
PRIVATE Dee_atomic_lock_t tls_mspace_lock = Dee_ATOMIC_LOCK_INIT;
4590+
#endif /* !CONFIG_NO_THREADS */
4591+
#define tls_mspace_lock_available() Dee_atomic_lock_available(&tls_mspace_lock)
4592+
#define tls_mspace_lock_acquired() Dee_atomic_lock_acquired(&tls_mspace_lock)
4593+
#define tls_mspace_lock_tryacquire() Dee_atomic_lock_tryacquire(&tls_mspace_lock)
4594+
#define tls_mspace_lock_acquire() Dee_atomic_lock_acquire(&tls_mspace_lock)
4595+
#define tls_mspace_lock_waitfor() Dee_atomic_lock_waitfor(&tls_mspace_lock)
4596+
#define tls_mspace_lock_release() Dee_atomic_lock_release(&tls_mspace_lock)
4597+
4598+
PRIVATE WUNUSED mspace DCALL create_tls_mspace(void) {
4599+
struct malloc_state *result;
4600+
tls_mspace_lock_acquire();
4601+
if (!SLIST_EMPTY(&free_tls_mspace)) {
4602+
result = SLIST_FIRST(&free_tls_mspace);
4603+
SLIST_REMOVE_HEAD(&free_tls_mspace, ms_link);
4604+
} else {
4605+
result = (struct malloc_state *)create_mspace(0);
4606+
}
4607+
if likely(result)
4608+
SLIST_INSERT(&used_tls_mspace, result, ms_link);
4609+
tls_mspace_lock_release();
4610+
return (mspace)result;
4611+
}
4612+
4613+
PRIVATE NONNULL((1)) void DCALL destroy_tls_mspace(mspace ms) {
4614+
struct malloc_state *state = (struct malloc_state *)ms;
4615+
tls_mspace_lock_acquire();
4616+
SLIST_REMOVE(&used_tls_mspace, state, struct malloc_state, ms_link);
4617+
SLIST_INSERT(&free_tls_mspace, state, ms_link);
4618+
tls_mspace_lock_release();
4619+
}
4620+
4621+
/* Return the calling thread's thread-local mspace (or "0" if not available) */
4622+
static mspace tls_mspace(void) {
4623+
DeeThreadObject *me = DeeThread_Self();
4624+
mspace result = (mspace)me->t_heap;
4625+
if unlikely(!result) {
4626+
result = create_tls_mspace();
4627+
me->t_heap = (void *)result;
4628+
}
4629+
return result;
4630+
}
4631+
4632+
#define thread_heap_destroy_DEFINED
4633+
INTERN NONNULL((1)) void DCALL
4634+
thread_heap_destroy(void *heap) {
4635+
destroy_tls_mspace((mspace)heap);
4636+
}
4637+
#endif /* USE_PER_THREAD_MSTATE */
4638+
45754639

45764640
/*
45774641
mspace versions of routines are near-clones of the global
@@ -4922,6 +4986,7 @@ static void mspace_inspect_all(mspace msp,
49224986
}
49234987
#endif /* MALLOC_INSPECT_ALL */
49244988

4989+
#if !NO_MSPACE_TRIM
49254990
static int mspace_trim(mspace msp, size_t pad) {
49264991
int result = 0;
49274992
mstate ms = (mstate)msp;
@@ -4931,6 +4996,7 @@ static int mspace_trim(mspace msp, size_t pad) {
49314996
POSTACTION(ms);
49324997
return result;
49334998
}
4999+
#endif /* NO_MSPACE_TRIM */
49345000

49355001
#if !NO_MALLOC_STATS
49365002
static void mspace_malloc_stats(mspace msp) {
@@ -5445,7 +5511,6 @@ PUBLIC ATTR_HOT WUNUSED void *
54455511
if (new_block) {
54465512
node = (struct leaknode *)dlmalloc(sizeof(struct leaknode));
54475513
if (node) {
5448-
struct lfrelist_entry *ent;
54495514
size_t common;
54505515

54515516
/* Insert new block */
@@ -5455,12 +5520,8 @@ PUBLIC ATTR_HOT WUNUSED void *
54555520
common = n_bytes < usable ? n_bytes : usable;
54565521
memcpy(new_block, ptr, common);
54575522

5458-
/* Schedule freeing of old block */
5459-
ent = (struct lfrelist_entry *)ptr;
5460-
ent->lfle_file = file;
5461-
ent->lfle_line = line;
5462-
SLIST_ATOMIC_INSERT(&leaks_pending_remove, ent, lfle_link);
5463-
leak_lock_reap();
5523+
/* Free the old block */
5524+
DeeDbg_Free(ptr, file, line);
54645525
return new_block;
54655526
}
54665527
dlfree(new_block);
@@ -6078,6 +6139,19 @@ DeeHeap_SetAllocBreakpoint(size_t id) {
60786139
#endif /* !DeeHeap_GetAllocBreakpoint_DEFINED */
60796140

60806141

6142+
6143+
#ifndef CONFIG_NO_THREADS
6144+
#ifndef thread_heap_destroy_DEFINED
6145+
#define thread_heap_destroy_DEFINED
6146+
INTERN NONNULL((1)) void DCALL
6147+
thread_heap_destroy(void *heap) {
6148+
COMPILER_IMPURE();
6149+
(void)heap;
6150+
}
6151+
#endif /* !thread_heap_destroy_DEFINED */
6152+
#endif /* !CONFIG_NO_THREADS */
6153+
6154+
60816155
DECL_END
60826156
#else /* CONFIG_EXPERIMENTAL_CUSTOM_HEAP */
60836157

src/deemon/runtime/thread.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,9 @@ INTERN DeeOSThreadObject DeeThread_Main = {
11741174
/* .t_threadname = */ (DeeStringObject *)&main_thread_name,
11751175
/* .t_inout = */ { NULL },
11761176
/* .t_context = */ { NULL },
1177+
#ifdef CONFIG_EXPERIMENTAL_CUSTOM_HEAP
1178+
/* .t_heap = */ NULL,
1179+
#endif /* CONFIG_EXPERIMENTAL_CUSTOM_HEAP */
11771180
#endif /* !CONFIG_NO_THREADS */
11781181
},
11791182
#ifdef Dee_pid_t
@@ -1186,6 +1189,13 @@ INTERN DeeOSThreadObject DeeThread_Main = {
11861189
#endif /* !DeeThread_USE_SINGLE_THREADED */
11871190
};
11881191

1192+
#ifdef CONFIG_EXPERIMENTAL_CUSTOM_HEAP
1193+
#ifdef CONFIG_NO_THREADS
1194+
#define thread_heap_destroy(heap) (void)0
1195+
#else /* CONFIG_NO_THREADS */
1196+
INTDEF NONNULL((1)) void DCALL thread_heap_destroy(void *heap);
1197+
#endif /* !CONFIG_NO_THREADS */
1198+
#endif /* CONFIG_EXPERIMENTAL_CUSTOM_HEAP */
11891199

11901200

11911201

@@ -1999,6 +2009,10 @@ DeeThread_Secede(DREF DeeObject *thread_result) {
19992009
/* ==== POINT OF NO RETURN ====
20002010
*
20012011
* from this point forth, no deemon code may be executed by the thread anymore */
2012+
#ifdef CONFIG_EXPERIMENTAL_CUSTOM_HEAP
2013+
if (self->t_heap)
2014+
thread_heap_destroy(self->t_heap);
2015+
#endif /* CONFIG_EXPERIMENTAL_CUSTOM_HEAP */
20022016

20032017
/* Must act as though the thread had exited. */
20042018
_DeeThread_AcquireSetup(self);
@@ -2460,6 +2474,9 @@ PRIVATE int DeeThread_Entry_func(void *arg)
24602474
DBG_memset(&self->ot_thread.t_inout.io_main, 0xcc,
24612475
sizeof(self->ot_thread.t_inout.io_main));
24622476
self->ot_thread.t_context.d_tls = NULL;
2477+
#ifdef CONFIG_EXPERIMENTAL_CUSTOM_HEAP
2478+
self->ot_thread.t_heap = NULL;
2479+
#endif /* CONFIG_EXPERIMENTAL_CUSTOM_HEAP */
24632480

24642481
/* Set the thread's name if the OS provides a means to do so */
24652482
#ifdef DeeThread_SetName
@@ -2568,6 +2585,10 @@ PRIVATE int DeeThread_Entry_func(void *arg)
25682585
/* ==== POINT OF NO RETURN ====
25692586
*
25702587
* from this point forth, no deemon code may be executed by the thread anymore */
2588+
#ifdef CONFIG_EXPERIMENTAL_CUSTOM_HEAP
2589+
if (self->ot_thread.t_heap)
2590+
thread_heap_destroy(self->ot_thread.t_heap);
2591+
#endif /* CONFIG_EXPERIMENTAL_CUSTOM_HEAP */
25712592

25722593
/* Set-up the thread as having exited */
25732594
_DeeThread_AcquireSetup(&self->ot_thread);
@@ -3846,6 +3867,9 @@ thread_init(DeeThreadObject *__restrict self,
38463867
DBG_memset(&me->ot_thread.t_inout, 0xcc, sizeof(me->ot_thread.t_inout));
38473868
#endif /* !CONFIG_NO_THREADS */
38483869
me->ot_thread.t_context.d_tls = NULL;
3870+
#ifdef CONFIG_EXPERIMENTAL_CUSTOM_HEAP
3871+
me->ot_thread.t_heap = NULL;
3872+
#endif /* CONFIG_EXPERIMENTAL_CUSTOM_HEAP */
38493873
return DeeInt_AsIntX(argv[0], &me->ot_tid);
38503874
}
38513875
#endif /* Dee_pid_t */
@@ -3914,6 +3938,9 @@ thread_init(DeeThreadObject *__restrict self,
39143938
self->t_deepassoc.da_mask = 0;
39153939
self->t_deepassoc.da_list = empty_deep_assoc;
39163940
self->t_deepassoc.da_recursion = 0;
3941+
#ifdef CONFIG_EXPERIMENTAL_CUSTOM_HEAP
3942+
self->t_heap = NULL;
3943+
#endif /* CONFIG_EXPERIMENTAL_CUSTOM_HEAP */
39173944
return 0;
39183945
err_main:
39193946
Dee_XDecref(self->t_inout.io_main);

0 commit comments

Comments
 (0)