Skip to content

Commit fc9344a

Browse files
committed
HP_MALLOC: Dramatically reduce memory fragmentation
For HP_MALLOC to offer optimal parallel allocation performance, it needs the memory to be fragmented: the more available chunks, the better its performance. However, there needs to be some coalescing as well, otherwise variable-length allocations past the 10K range will quickly deplete (fragment) even SHM pools of 10+ GB. This patch adds the following heuristics to HP_MALLOC and HP_MALLOC_DBG: * shm_malloc(): only split a fragment if the resulting chunk is at least MIN_SPLIT_SIZE bytes (default 256 for PKG, 4096 for SHM) * shm_free(): try to coalesce the next fragment (enough to fix all fragmentation issues) Many thanks to 46Labs for supporting this work!
1 parent 1897599 commit fc9344a

File tree

7 files changed

+247
-298
lines changed

7 files changed

+247
-298
lines changed

mem/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ typedef unsigned long (*osips_get_mmstat_f) (void *block);
9999
typedef void (*osips_shm_stats_init_f) (void *block, int core_index);
100100

101101
#define oom_errorf \
102-
"not enough free %s memory (%lu bytes left, need %lu), " \
102+
"not enough free %s memory (%ld bytes left, need %lu), " \
103103
"please increase the \"-%s\" command line parameter!\n"
104104

105105
#define oom_nostats_errorf \

mem/hp_malloc.c

Lines changed: 78 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,15 @@
4141
#endif
4242

4343
#define MIN_FRAG_SIZE ROUNDTO
44-
#define FRAG_NEXT(f) ((struct hp_frag *) \
45-
((char *)(f) + sizeof(struct hp_frag) + ((struct hp_frag *)(f))->size))
4644

47-
#define FRAG_OVERHEAD (sizeof(struct hp_frag))
48-
#define frag_is_free(_f) ((_f)->prev)
45+
/* only perform a split if the resulting free fragment is at least this size */
46+
#define MIN_SHM_SPLIT_SIZE 4096
47+
#define MIN_PKG_SPLIT_SIZE 256
48+
49+
#define FRAG_NEXT(f) ((struct hp_frag *)((char *)((f) + 1) + (f)->size))
50+
51+
#define FRAG_OVERHEAD HP_FRAG_OVERHEAD
52+
#define frag_is_free(_f) ((_f)->prev)
4953

5054
/* used when detaching free fragments */
5155
static unsigned int optimized_get_indexes[HP_HASH_SIZE];
@@ -56,7 +60,7 @@ static unsigned int optimized_put_indexes[HP_HASH_SIZE];
5660
/* finds the hash value for s, s=ROUNDTO multiple */
5761
#define GET_HASH(s) (((unsigned long)(s) <= HP_MALLOC_OPTIMIZE) ? \
5862
(unsigned long)(s) / ROUNDTO : \
59-
HP_LINEAR_HASH_SIZE + big_hash_idx((s)) - HP_MALLOC_OPTIMIZE_FACTOR + 1)
63+
HP_LINEAR_HASH_SIZE + big_hash_idx(s) - HP_MALLOC_OPTIMIZE_FACTOR + 1)
6064

6165
/*
6266
* - for heavily used sizes (which need some optimizing) it returns
@@ -96,10 +100,6 @@ static unsigned int optimized_put_indexes[HP_HASH_SIZE];
96100
}) : \
97101
HP_LINEAR_HASH_SIZE + big_hash_idx((s)) - HP_MALLOC_OPTIMIZE_FACTOR + 1)
98102

99-
100-
101-
102-
103103
extern unsigned long *shm_hash_usage;
104104

105105
/*
@@ -135,12 +135,14 @@ stat_var *shm_frags;
135135

136136
#define MEM_FRAG_AVOIDANCE
137137

138-
#define HP_MALLOC_LARGE_LIMIT HP_MALLOC_OPTIMIZE
139-
#define HP_MALLOC_DEFRAG_LIMIT (HP_MALLOC_LARGE_LIMIT * 5)
140-
#define HP_MALLOC_DEFRAG_PERCENT 5
138+
#define can_split_frag(frag, wanted_size, min_size) \
139+
((frag)->size - wanted_size >= min_size)
141140

142-
#define can_split_frag(frag, wanted_size) \
143-
((frag)->size - wanted_size > (FRAG_OVERHEAD + MIN_FRAG_SIZE))
141+
#define can_split_pkg_frag(frag, wanted_size) \
142+
can_split_frag(frag, wanted_size, MIN_PKG_SPLIT_SIZE)
143+
#define can_split_shm_frag(frag, wanted_size) \
144+
can_split_frag(frag, wanted_size, MIN_SHM_SPLIT_SIZE)
145+
#define can_split_rpm_frag can_split_shm_frag
144146

145147
/* computes hash number for big buckets */
146148
inline static unsigned long big_hash_idx(unsigned long s)
@@ -159,6 +161,36 @@ inline static unsigned long big_hash_idx(unsigned long s)
159161
return idx;
160162
}
161163

164+
static inline void hp_lock(struct hp_block *hpb, unsigned int hash)
165+
{
166+
int i;
167+
168+
if (!hpb->free_hash[hash].is_optimized) {
169+
SHM_LOCK(hash);
170+
return;
171+
}
172+
173+
/* for optimized buckets, we have to lock the entire array */
174+
hash = HP_HASH_SIZE + hash * shm_secondary_hash_size;
175+
for (i = 0; i < shm_secondary_hash_size; i++)
176+
SHM_LOCK(hash + i);
177+
}
178+
179+
static inline void hp_unlock(struct hp_block *hpb, unsigned int hash)
180+
{
181+
int i;
182+
183+
if (!hpb->free_hash[hash].is_optimized) {
184+
SHM_UNLOCK(hash);
185+
return;
186+
}
187+
188+
/* for optimized buckets, we have to unlock the entire array */
189+
hash = HP_HASH_SIZE + hash * shm_secondary_hash_size;
190+
for (i = 0; i < shm_secondary_hash_size; i++)
191+
SHM_UNLOCK(hash + i);
192+
}
193+
162194
#ifdef SHM_EXTRA_STATS
163195
#include "module_info.h"
164196
unsigned long hp_stats_get_index(void *ptr)
@@ -178,26 +210,43 @@ void hp_stats_set_index(void *ptr, unsigned long idx)
178210
}
179211
#endif
180212

213+
#if 0
214+
/* walk through all fragments and write them to the log. Useful for dev */
215+
static void hp_dump(struct hp_block *hpb)
216+
{
217+
struct hp_frag *f;
218+
219+
fprintf(stderr, "dumping all fragments...\n");
220+
221+
for (f = hpb->first_frag; f < hpb->last_frag; f = FRAG_NEXT(f)) {
222+
fprintf(stderr, " | sz: %lu, prev: %p, next: %p |\n", f->size,
223+
f->prev, f->nxt_free);
224+
}
225+
}
226+
#endif
227+
181228
static inline void hp_frag_attach(struct hp_block *hpb, struct hp_frag *frag)
182229
{
183230
struct hp_frag **f;
184231
unsigned int hash;
185232

233+
186234
hash = GET_HASH_RR(hpb, frag->size);
235+
187236
f = &(hpb->free_hash[hash].first);
188237

189238
if (frag->size > HP_MALLOC_OPTIMIZE){ /* because of '<=' in GET_HASH,
190239
purpose --andrei ) */
191-
for(; *f; f=&((*f)->u.nxt_free)){
240+
for(; *f; f=&((*f)->nxt_free)){
192241
if (frag->size <= (*f)->size) break;
193242
}
194243
}
195244

196245
/*insert it here*/
197246
frag->prev = f;
198-
frag->u.nxt_free=*f;
247+
frag->nxt_free=*f;
199248
if (*f)
200-
(*f)->prev = &(frag->u.nxt_free);
249+
(*f)->prev = &(frag->nxt_free);
201250

202251
*f = frag;
203252

@@ -213,10 +262,10 @@ static inline void hp_frag_detach(struct hp_block *hpb, struct hp_frag *frag)
213262
pf = frag->prev;
214263

215264
/* detach */
216-
*pf = frag->u.nxt_free;
265+
*pf = frag->nxt_free;
217266

218-
if (frag->u.nxt_free)
219-
frag->u.nxt_free->prev = pf;
267+
if (frag->nxt_free)
268+
frag->nxt_free->prev = pf;
220269

221270
frag->prev = NULL;
222271

@@ -411,7 +460,7 @@ int hp_mem_warming(struct hp_block *hpb)
411460
while (bucket_mem >= FRAG_OVERHEAD + current_frag_size) {
412461
hp_frag_detach(hpb, big_frag);
413462
if (stats_are_ready()) {
414-
update_stats_shm_frag_detach(big_frag);
463+
update_stats_shm_frag_detach(big_frag->size);
415464
#if defined(DBG_MALLOC) || defined(STATISTICS)
416465
hpb->used += big_frag->size;
417466
hpb->real_used += big_frag->size + FRAG_OVERHEAD;
@@ -438,10 +487,6 @@ int hp_mem_warming(struct hp_block *hpb)
438487
hp_frag_attach(hpb, big_frag);
439488
if (stats_are_ready()) {
440489
update_stats_shm_frag_attach(big_frag);
441-
#if defined(DBG_MALLOC) || defined(STATISTICS)
442-
hpb->used -= big_frag->size;
443-
hpb->real_used -= big_frag->size + FRAG_OVERHEAD;
444-
#endif
445490
} else {
446491
hpb->used -= big_frag->size;
447492
hpb->real_used -= big_frag->size + FRAG_OVERHEAD;
@@ -497,8 +542,7 @@ static struct hp_block *hp_malloc_init(char *address, unsigned long size,
497542

498543
size = ROUNDDOWN(size);
499544

500-
init_overhead = (ROUNDUP(sizeof(struct hp_block)) + 2 * FRAG_OVERHEAD);
501-
545+
init_overhead = ROUNDUP(sizeof(struct hp_block)) + 2 * FRAG_OVERHEAD;
502546
if (size < init_overhead)
503547
{
504548
LM_ERR("not enough memory for the basic structures! "
@@ -516,24 +560,19 @@ static struct hp_block *hp_malloc_init(char *address, unsigned long size,
516560
hpb->used = 0;
517561
hpb->real_used = init_overhead;
518562
hpb->max_real_used = init_overhead;
563+
hpb->total_fragments = 2;
519564
gettimeofday(&hpb->last_updated, NULL);
520565

521566
hpb->first_frag = (struct hp_frag *)(start + ROUNDUP(sizeof(struct hp_block)));
522-
hpb->last_frag = (struct hp_frag *)(end - sizeof(struct hp_frag));
523-
/* init initial fragment*/
524-
hpb->first_frag->size = size - init_overhead;
567+
hpb->last_frag = (struct hp_frag *)(end - sizeof *hpb->last_frag);
525568
hpb->last_frag->size = 0;
526569

527-
hpb->last_frag->prev = NULL;
570+
/* init initial fragment */
571+
hpb->first_frag->size = size - init_overhead;
528572
hpb->first_frag->prev = NULL;
573+
hpb->last_frag->prev = NULL;
529574

530-
/* link initial fragment into the free list*/
531-
532-
hpb->large_space = 0;
533-
hpb->large_limit = hpb->size / 100 * HP_MALLOC_DEFRAG_PERCENT;
534-
535-
if (hpb->large_limit < HP_MALLOC_DEFRAG_LIMIT)
536-
hpb->large_limit = HP_MALLOC_DEFRAG_LIMIT;
575+
hp_frag_attach(hpb, hpb->first_frag);
537576

538577
return hpb;
539578
}
@@ -549,14 +588,6 @@ struct hp_block *hp_pkg_malloc_init(char *address, unsigned long size,
549588
return NULL;
550589
}
551590

552-
hp_frag_attach(hpb, hpb->first_frag);
553-
554-
/* first fragment attach is the equivalent of a split */
555-
#if defined(DBG_MALLOC) && !defined(STATISTICS)
556-
hpb->real_used += FRAG_OVERHEAD;
557-
hpb->total_fragments++;
558-
#endif
559-
560591
return hpb;
561592
}
562593

@@ -575,23 +606,6 @@ struct hp_block *hp_shm_malloc_init(char *address, unsigned long size,
575606
hpb->free_hash[PEEK_HASH_RR(hpb, hpb->first_frag->size)].total_no++;
576607
#endif
577608

578-
hp_frag_attach(hpb, hpb->first_frag);
579-
580-
/* first fragment attach is the equivalent of a split */
581-
if (stats_are_ready()) {
582-
#if defined(STATISTICS) && !defined(HP_MALLOC_FAST_STATS)
583-
update_stat(shm_rused, FRAG_OVERHEAD);
584-
update_stat(shm_frags, 1);
585-
#endif
586-
#if defined(DBG_MALLOC) || defined(STATISTICS)
587-
hpb->real_used += FRAG_OVERHEAD;
588-
hpb->total_fragments++;
589-
#endif
590-
} else {
591-
hpb->real_used += FRAG_OVERHEAD;
592-
hpb->total_fragments++;
593-
}
594-
595609
#ifdef HP_MALLOC_FAST_STATS
596610
#ifdef DBG_MALLOC
597611
hp_stats_lock = hp_shm_malloc_unsafe(hpb, sizeof *hp_stats_lock,
@@ -619,7 +633,7 @@ void hp_stats_core_init(struct hp_block *hp, int core_index)
619633
{
620634
struct hp_frag *f;
621635

622-
for (f=hp->first_frag; (char*)f<(char*)hp->last_frag; f=FRAG_NEXT(f))
636+
for (f=hp->first_frag; f < hp->last_frag; f=FRAG_NEXT(f))
623637
if (!frag_is_free(f))
624638
f->statistic_index = core_index;
625639
}

mem/hp_malloc.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,10 @@ extern stat_var *rpm_frags;
9999

100100
struct hp_frag {
101101
unsigned long size;
102-
union {
103-
struct hp_frag *nxt_free;
104-
long reserved;
105-
} u;
102+
106103
struct hp_frag **prev;
104+
struct hp_frag *nxt_free;
105+
107106
#ifdef DBG_MALLOC
108107
const char *file;
109108
const char *func;
@@ -140,9 +139,6 @@ struct hp_block {
140139
char *name; /* purpose of this memory block */
141140

142141
unsigned long size; /* total size */
143-
unsigned long large_space;
144-
unsigned long large_limit;
145-
146142
unsigned long used; /* alloc'ed size */
147143
unsigned long real_used; /* used+malloc overhead */
148144
unsigned long max_real_used;

0 commit comments

Comments
 (0)