Skip to content
This repository was archived by the owner on Jul 9, 2025. It is now read-only.

Commit cc06afa

Browse files
committed
Bug 1669392 - Add more jemalloc size classes r=glandium
Differential Revision: https://phabricator.services.mozilla.com/D92729
1 parent 16f8694 commit cc06afa

File tree

5 files changed

+198
-90
lines changed

5 files changed

+198
-90
lines changed

memory/build/Utils.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,20 @@ struct Log2 : mozilla::tl::CeilingLog2<N> {
2020
};
2121
#define LOG2(N) Log2<N>::value
2222

23+
// Like Log2, but ignores 0.
24+
template <size_t N>
25+
struct Log2Or0 : mozilla::tl::CeilingLog2<N> {
26+
using mozilla::tl::CeilingLog2<N>::value;
27+
static_assert(1ULL << value == N, "Number is not a power of 2");
28+
};
29+
template <>
30+
struct Log2Or0<0> {
31+
// This makes no sense but neither does any other value. It's just enough
32+
// that this can be used on the unused side of a conditional expression.
33+
static const size_t value = 0;
34+
};
35+
#define LOG2_OR_0(N) Log2Or0<N>::value
36+
2337
enum class Order {
2438
eLess = -1,
2539
eEqual = 0,

memory/build/mozjemalloc.cpp

Lines changed: 143 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -52,40 +52,57 @@
5252
//
5353
// Allocation requests are rounded up to the nearest size class, and no record
5454
// of the original request size is maintained. Allocations are broken into
55-
// categories according to size class. Assuming runtime defaults, 4 kB pages
56-
// and a 16 byte quantum on a 32-bit system, the size classes in each category
57-
// are as follows:
55+
// categories according to size class. Assuming runtime defaults, the size
56+
// classes in each category are as follows (for x86, x86_64 and Apple Silicon):
5857
//
59-
// |=====================================|
60-
// | Category | Subcategory | Size |
61-
// |=====================================|
62-
// | Small | Tiny | 4 |
63-
// | | | 8 |
64-
// | |----------------+---------|
65-
// | | Quantum-spaced | 16 |
66-
// | | | 32 |
67-
// | | | 48 |
68-
// | | | ... |
69-
// | | | 480 |
70-
// | | | 496 |
71-
// | | | 512 |
72-
// | |----------------+---------|
73-
// | | Sub-page | 1 kB |
74-
// | | | 2 kB |
75-
// |=====================================|
76-
// | Large | 4 kB |
77-
// | | 8 kB |
78-
// | | 12 kB |
79-
// | | ... |
80-
// | | 1012 kB |
81-
// | | 1016 kB |
82-
// | | 1020 kB |
83-
// |=====================================|
84-
// | Huge | 1 MB |
85-
// | | 2 MB |
86-
// | | 3 MB |
87-
// | | ... |
88-
// |=====================================|
58+
// |===============================================================|
59+
// | Category | Subcategory | x86 | x86_64 | Apple Silicon |
60+
// |---------------------------+---------+---------+---------------+
61+
// | Word size | 32 bit | 64 bit | 64 bit |
62+
// | Page size | 4 Kb | 4 Kb | 16 Kb |
63+
// |===============================================================|
64+
// | Small | Tiny | 4/- | - | - |
65+
// | | | 8 | 8/- | 8 |
66+
// | |----------------+---------|---------|---------------|
67+
// | | Quantum-spaced | 16 | 16 | 16 |
68+
// | | | 32 | 32 | 32 |
69+
// | | | 48 | 48 | 48 |
70+
// | | | ... | ... | ... |
71+
// | | | 480 | 480 | 480 |
72+
// | | | 496 | 496 | 496 |
73+
// | |----------------+---------|---------|---------------|
74+
// | | Quantum-wide- | 512 | 512 | 512 |
75+
// | | spaced | 768 | 768 | 768 |
76+
// | | | ... | ... | ... |
77+
// | | | 3584 | 3584 | 3584 |
78+
// | | | 3840 | 3840 | 3840 |
79+
// | |----------------+---------|---------|---------------|
80+
// | | Sub-page | - | - | 4096 |
81+
// | | | - | - | 8 kB |
82+
// |===============================================================|
83+
// | Large | 4 kB | 4 kB | - |
84+
// | | 8 kB | 8 kB | - |
85+
// | | 12 kB | 12 kB | - |
86+
// | | 16 kB | 16 kB | 16 kB |
87+
// | | ... | ... | - |
88+
// | | 32 kB | 32 kB | 32 kB |
89+
// | | ... | ... | ... |
90+
// | | 1008 kB | 1008 kB | 1008 kB |
91+
// | | 1012 kB | 1012 kB | - |
92+
// | | 1016 kB | 1012 kB | - |
93+
// | | 1020 kB | 1020 kB | - |
94+
// |===============================================================|
95+
// | Huge | 1 MB | 1 MB | 1 MB |
96+
// | | 2 MB | 2 MB | 2 MB |
97+
// | | 3 MB | 3 MB | 3 MB |
98+
// | | ... | ... | ... |
99+
// |===============================================================|
100+
//
101+
// Legend:
102+
// n: Size class exists for this platform.
103+
// n/-: This size class doesn't exist on Windows (see kMinTinyClass).
104+
// -: This size class doesn't exist for this platform.
105+
// ...: Size classes follow a pattern here.
89106
//
90107
// NOTE: Due to Mozilla bug 691003, we cannot reserve less than one word for an
91108
// allocation on Linux or Mac. So on 32-bit *nix, the smallest bucket size is
@@ -377,6 +394,10 @@ struct arena_chunk_t {
377394
// negatively affect performance.
378395
static const size_t kCacheLineSize = 64;
379396

397+
// Our size classes are inclusive ranges of memory sizes. By describing the
398+
// minimums and how memory is allocated in each range the maximums can be
399+
// calculated.
400+
380401
// Smallest size class to support. On Windows the smallest allocation size
381402
// must be 8 bytes on 32-bit, 16 bytes on 64-bit. On Linux and Mac, even
382403
// malloc(1) must reserve a word's worth of memory (see Mozilla bug 691003).
@@ -389,28 +410,47 @@ static const size_t kMinTinyClass = sizeof(void*);
389410
// Maximum tiny size class.
390411
static const size_t kMaxTinyClass = 8;
391412

392-
// Amount (quantum) separating quantum-spaced size classes.
393-
static const size_t kQuantum = 16;
394-
static const size_t kQuantumMask = kQuantum - 1;
395-
396413
// Smallest quantum-spaced size classes. It could actually also be labelled a
397414
// tiny allocation, and is spaced as such from the largest tiny size class.
398415
// Tiny classes being powers of 2, this is twice as large as the largest of
399416
// them.
400417
static const size_t kMinQuantumClass = kMaxTinyClass * 2;
418+
static const size_t kMinQuantumWideClass = 512;
419+
static const size_t kMinSubPageClass = 4_KiB;
420+
421+
// Amount (quantum) separating quantum-spaced size classes.
422+
static const size_t kQuantum = 16;
423+
static const size_t kQuantumMask = kQuantum - 1;
424+
static const size_t kQuantumWide = 256;
425+
static const size_t kQuantumWideMask = kQuantumWide - 1;
401426

402-
// Largest quantum-spaced size classes.
403-
static const size_t kMaxQuantumClass = 512;
427+
static const size_t kMaxQuantumClass = kMinQuantumWideClass - kQuantum;
428+
static const size_t kMaxQuantumWideClass = kMinSubPageClass - kQuantumWide;
429+
430+
// We can optimise some divisions to shifts if these are powers of two.
431+
static_assert(mozilla::IsPowerOfTwo(kQuantum),
432+
"kQuantum is not a power of two");
433+
static_assert(mozilla::IsPowerOfTwo(kQuantumWide),
434+
"kQuantumWide is not a power of two");
404435

405436
static_assert(kMaxQuantumClass % kQuantum == 0,
406437
"kMaxQuantumClass is not a multiple of kQuantum");
438+
static_assert(kMaxQuantumWideClass % kQuantumWide == 0,
439+
"kMaxQuantumWideClass is not a multiple of kQuantumWide");
440+
static_assert(kQuantum < kQuantumWide,
441+
"kQuantum must be smaller than kQuantumWide");
442+
static_assert(mozilla::IsPowerOfTwo(kMinSubPageClass),
443+
"kMinSubPageClass is not a power of two");
407444

408445
// Number of (2^n)-spaced tiny classes.
409446
static const size_t kNumTinyClasses =
410-
LOG2(kMinQuantumClass) - LOG2(kMinTinyClass);
447+
LOG2(kMaxTinyClass) - LOG2(kMinTinyClass) + 1;
411448

412449
// Number of quantum-spaced classes.
413-
static const size_t kNumQuantumClasses = kMaxQuantumClass / kQuantum;
450+
static const size_t kNumQuantumClasses =
451+
(kMaxQuantumClass - kMinQuantumClass) / kQuantum + 1;
452+
static const size_t kNumQuantumWideClasses =
453+
(kMaxQuantumWideClass - kMinQuantumWideClass) / kQuantumWide + 1;
414454

415455
// Size and alignment of memory chunks that are allocated by the OS's virtual
416456
// memory system.
@@ -443,6 +483,7 @@ static size_t gPageSize;
443483
# define END_GLOBALS
444484
# define DEFINE_GLOBAL(type) static const type
445485
# define GLOBAL_LOG2 LOG2
486+
# define GLOBAL_LOG2_OR_0 LOG2_OR_0
446487
# define GLOBAL_ASSERT_HELPER1(x) static_assert(x, # x)
447488
# define GLOBAL_ASSERT_HELPER2(x, y) static_assert(x, y)
448489
# define GLOBAL_ASSERT(...) \
@@ -455,6 +496,7 @@ static size_t gPageSize;
455496
# define END_GLOBALS }
456497
# define DEFINE_GLOBAL(type)
457498
# define GLOBAL_LOG2 FloorLog2
499+
# define GLOBAL_LOG2_OR_0 FloorLog2
458500
# define GLOBAL_ASSERT MOZ_RELEASE_ASSERT
459501
#endif
460502

@@ -467,15 +509,21 @@ DECLARE_GLOBAL(size_t, gChunkHeaderNumPages)
467509
DECLARE_GLOBAL(size_t, gMaxLargeClass)
468510

469511
DEFINE_GLOBALS
470-
// Largest sub-page size class.
471-
DEFINE_GLOBAL(size_t) gMaxSubPageClass = gPageSize / 2;
512+
513+
// Largest sub-page size class, or zero if there are none
514+
DEFINE_GLOBAL(size_t)
515+
gMaxSubPageClass = gPageSize / 2 >= kMinSubPageClass ? gPageSize / 2 : 0;
472516

473517
// Max size class for bins.
474-
#define gMaxBinClass gMaxSubPageClass
518+
#define gMaxBinClass \
519+
(gMaxSubPageClass ? gMaxSubPageClass : kMaxQuantumWideClass)
475520

476-
// Number of (2^n)-spaced sub-page bins.
521+
// Number of sub-page bins.
477522
DEFINE_GLOBAL(uint8_t)
478-
gNumSubPageClasses = GLOBAL_LOG2(gMaxSubPageClass) - LOG2(kMaxQuantumClass);
523+
gNumSubPageClasses =
524+
static_cast<uint8_t>(gMaxSubPageClass ? GLOBAL_LOG2_OR_0(gMaxSubPageClass) -
525+
LOG2(kMinSubPageClass) + 1
526+
: 0);
479527

480528
DEFINE_GLOBAL(uint8_t) gPageSize2Pow = GLOBAL_LOG2(gPageSize);
481529
DEFINE_GLOBAL(size_t) gPageSizeMask = gPageSize - 1;
@@ -500,9 +548,16 @@ gMaxLargeClass =
500548
GLOBAL_ASSERT(1ULL << gPageSize2Pow == gPageSize,
501549
"Page size is not a power of two");
502550
GLOBAL_ASSERT(kQuantum >= sizeof(void*));
503-
GLOBAL_ASSERT(kQuantum <= gPageSize);
551+
GLOBAL_ASSERT(kQuantum <= kQuantumWide);
552+
GLOBAL_ASSERT(kQuantumWide <= (kMinSubPageClass - kMaxQuantumClass));
553+
554+
GLOBAL_ASSERT(kQuantumWide <= kMaxQuantumClass);
555+
556+
GLOBAL_ASSERT(gMaxSubPageClass >= kMinSubPageClass || gMaxSubPageClass == 0);
557+
GLOBAL_ASSERT(gMaxLargeClass >= gMaxSubPageClass);
504558
GLOBAL_ASSERT(kChunkSize >= gPageSize);
505559
GLOBAL_ASSERT(kQuantum * 4 <= kChunkSize);
560+
506561
END_GLOBALS
507562

508563
// Recycle at most 128 MiB of chunks. This means we retain at most
@@ -526,13 +581,19 @@ static size_t opt_dirty_max = DIRTY_MAX_DEFAULT;
526581

527582
// Return the smallest quantum multiple that is >= a.
528583
#define QUANTUM_CEILING(a) (((a) + (kQuantumMask)) & ~(kQuantumMask))
584+
#define QUANTUM_WIDE_CEILING(a) \
585+
(((a) + (kQuantumWideMask)) & ~(kQuantumWideMask))
586+
587+
// Return the smallest sub page-size that is >= a.
588+
#define SUBPAGE_CEILING(a) (RoundUpPow2(a))
529589

530590
// Return the smallest pagesize multiple that is >= s.
531591
#define PAGE_CEILING(s) (((s) + gPageSizeMask) & ~gPageSizeMask)
532592

533593
// Number of all the small-allocated classes
534-
#define NUM_SMALL_CLASSES \
535-
(kNumTinyClasses + kNumQuantumClasses + gNumSubPageClasses)
594+
#define NUM_SMALL_CLASSES \
595+
(kNumTinyClasses + kNumQuantumClasses + kNumQuantumWideClasses + \
596+
gNumSubPageClasses)
536597

537598
// ***************************************************************************
538599
// MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive.
@@ -658,6 +719,7 @@ class SizeClass {
658719
enum ClassType {
659720
Tiny,
660721
Quantum,
722+
QuantumWide,
661723
SubPage,
662724
Large,
663725
};
@@ -669,9 +731,12 @@ class SizeClass {
669731
} else if (aSize <= kMaxQuantumClass) {
670732
mType = Quantum;
671733
mSize = QUANTUM_CEILING(aSize);
734+
} else if (aSize <= kMaxQuantumWideClass) {
735+
mType = QuantumWide;
736+
mSize = QUANTUM_WIDE_CEILING(aSize);
672737
} else if (aSize <= gMaxSubPageClass) {
673738
mType = SubPage;
674-
mSize = RoundUpPow2(aSize);
739+
mSize = SUBPAGE_CEILING(aSize);
675740
} else if (aSize <= gMaxLargeClass) {
676741
mType = Large;
677742
mSize = PAGE_CEILING(aSize);
@@ -878,7 +943,10 @@ struct arena_bin_t {
878943
// 304 12 KiB 320 12 KiB 336 4 KiB 352 8 KiB
879944
// 368 4 KiB 384 8 KiB 400 20 KiB 416 16 KiB
880945
// 432 12 KiB 448 4 KiB 464 16 KiB 480 8 KiB
881-
// 496 20 KiB 512 32 KiB 1024 64 KiB 2048 128 KiB
946+
// 496 20 KiB 512 32 KiB 768 16 KiB 1024 64 KiB
947+
// 1280 24 KiB 1536 32 KiB 1792 16 KiB 2048 128 KiB
948+
// 2304 16 KiB 2560 48 KiB 2816 36 KiB 3072 64 KiB
949+
// 3328 36 KiB 3584 32 KiB 3840 64 KiB
882950
inline void Init(SizeClass aSizeClass);
883951
};
884952

@@ -972,8 +1040,12 @@ struct arena_t {
9721040
// 33 | 496 |
9731041
// 34 | 512 |
9741042
// --------+------+
975-
// 35 | 1024 |
976-
// 36 | 2048 |
1043+
// 35 | 768 |
1044+
// 36 | 1024 |
1045+
// : :
1046+
// : :
1047+
// 46 | 3584 |
1048+
// 47 | 3840 |
9771049
// --------+------+
9781050
arena_bin_t mBins[1]; // Dynamically sized.
9791051

@@ -2821,11 +2893,21 @@ void* arena_t::MallocSmall(size_t aSize, bool aZero) {
28212893
bin = &mBins[FloorLog2(aSize / kMinTinyClass)];
28222894
break;
28232895
case SizeClass::Quantum:
2824-
bin = &mBins[kNumTinyClasses + (aSize / kQuantum) - 1];
2896+
// Although we divide 2 things by kQuantum, the compiler will
2897+
// reduce `kMinQuantumClass / kQuantum` and `kNumTinyClasses` to a
2898+
// single constant.
2899+
bin = &mBins[kNumTinyClasses + (aSize / kQuantum) -
2900+
(kMinQuantumClass / kQuantum)];
2901+
break;
2902+
case SizeClass::QuantumWide:
2903+
bin =
2904+
&mBins[kNumTinyClasses + kNumQuantumClasses + (aSize / kQuantumWide) -
2905+
(kMinQuantumWideClass / kQuantumWide)];
28252906
break;
28262907
case SizeClass::SubPage:
2827-
bin = &mBins[kNumTinyClasses + kNumQuantumClasses +
2828-
(FloorLog2(aSize / kMaxQuantumClass) - 1)];
2908+
bin =
2909+
&mBins[kNumTinyClasses + kNumQuantumClasses + kNumQuantumWideClasses +
2910+
(FloorLog2(aSize) - LOG2(kMinSubPageClass))];
28292911
break;
28302912
default:
28312913
MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Unexpected size class type");
@@ -3558,8 +3640,8 @@ arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
35583640
arena_bin_t& bin = mBins[i];
35593641
bin.Init(sizeClass);
35603642

3561-
// SizeClass doesn't want sizes larger than gMaxSubPageClass for now.
3562-
if (sizeClass.Size() == gMaxSubPageClass) {
3643+
// SizeClass doesn't want sizes larger than gMaxBinClass for now.
3644+
if (sizeClass.Size() == gMaxBinClass) {
35633645
break;
35643646
}
35653647
sizeClass = sizeClass.Next();
@@ -4253,6 +4335,9 @@ inline void MozJemalloc::jemalloc_stats_internal(
42534335
aStats->opt_zero = opt_zero;
42544336
aStats->quantum = kQuantum;
42554337
aStats->quantum_max = kMaxQuantumClass;
4338+
aStats->quantum_wide = kQuantumWide;
4339+
aStats->quantum_wide_max = kMaxQuantumWideClass;
4340+
aStats->subpage_max = gMaxSubPageClass;
42564341
aStats->large_max = gMaxLargeClass;
42574342
aStats->chunksize = kChunkSize;
42584343
aStats->page_size = gPageSize;

memory/build/mozjemalloc_types.h

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,16 +77,18 @@ typedef struct arena_params_s {
7777
// file.
7878
typedef struct {
7979
// Run-time configuration settings.
80-
bool opt_junk; // Fill allocated memory with kAllocJunk?
81-
bool opt_zero; // Fill allocated memory with 0x0?
82-
size_t narenas; // Number of arenas.
83-
size_t quantum; // Allocation quantum.
84-
size_t quantum_max; // Max quantum-spaced allocation size.
85-
// The next size class, sub-pagesize's max is always page_size/2.
86-
size_t large_max; // Max sub-chunksize allocation size.
87-
size_t chunksize; // Size of each virtual memory mapping.
88-
size_t page_size; // Size of pages.
89-
size_t dirty_max; // Max dirty pages per arena.
80+
bool opt_junk; // Fill allocated memory with kAllocJunk?
81+
bool opt_zero; // Fill allocated memory with 0x0?
82+
size_t narenas; // Number of arenas.
83+
size_t quantum; // Allocation quantum.
84+
size_t quantum_max; // Max quantum-spaced allocation size.
85+
size_t quantum_wide; // Allocation quantum (QuantuWide).
86+
size_t quantum_wide_max; // Max quantum-wide-spaced allocation size.
87+
size_t subpage_max; // Max subpage allocation size.
88+
size_t large_max; // Max sub-chunksize allocation size.
89+
size_t chunksize; // Size of each virtual memory mapping.
90+
size_t page_size; // Size of pages.
91+
size_t dirty_max; // Max dirty pages per arena.
9092

9193
// Current memory usage statistics.
9294
size_t mapped; // Bytes mapped (not necessarily committed).
@@ -111,7 +113,8 @@ typedef struct {
111113
size_t bytes_per_run; // The number of bytes per run, including headers.
112114
} jemalloc_bin_stats_t;
113115

114-
#define JEMALLOC_MAX_STATS_BINS 40
116+
// This is the total number of bins.
117+
#define JEMALLOC_MAX_STATS_BINS 51
115118

116119
enum PtrInfoTag {
117120
// The pointer is not currently known to the allocator.

0 commit comments

Comments
 (0)