Skip to content

Commit a3e8ca2

Browse files
committed
Port to Mac OS X + Apple Silicon (16K pages).
1 parent d45d6de commit a3e8ca2

24 files changed

+336
-173
lines changed

.bazelrc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,5 @@ build:disable-meshing --define disable_meshing=true
1616
build:disable-randomization --define disable_randomization=true
1717
build:shuffle-on-free --define shuffle_on_free=true
1818

19-
build:modern-amd64 --copt=-mavx --copt=-march=westmere
20-
build:modern-amd64 --linkopt=-mavx --linkopt=-march=westmere
19+
build:modern-amd64 --copt=-mavx # --copt=-march=westmere
20+
build:modern-amd64 --linkopt=-mavx # --linkopt=-march=westmere

Makefile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,21 @@
33
# Version 2.0, that can be found in the LICENSE file.
44

55
PREFIX = /usr
6-
BAZEL_CONFIG = --config=modern-amd64
76
LIB_SUFFIX =
87

98
UNAME_S = $(shell uname -s)
9+
UNAME_M = $(shell uname -m)
10+
11+
# Set BAZEL_CONFIG based on architecture
12+
ifeq ($(UNAME_M),x86_64)
13+
BAZEL_CONFIG = --config=modern-amd64
14+
else ifeq ($(UNAME_M),amd64)
15+
BAZEL_CONFIG = --config=modern-amd64
16+
else
17+
# ARM64 and other architectures - no special flags
18+
BAZEL_CONFIG =
19+
endif
20+
1021
ifeq ($(UNAME_S),Darwin)
1122
LIB_EXT = dylib
1223
BAZEL_PREFIX = darwin

src/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ cc_library(
194194

195195
cc_test(
196196
name = "unit-tests",
197+
size = "small", # Test runs in ~2.5s
197198
srcs = glob([
198199
"testing/unit/*.cc",
199200
]),

src/bitmap.h

Lines changed: 44 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,15 @@ using std::atomic_size_t;
4444

4545
// enables iteration through the set bits of the bitmap
4646
template <typename Container>
47-
class BitmapIter : public std::iterator<std::forward_iterator_tag, size_t> {
47+
class BitmapIter {
4848
public:
49+
// Iterator traits (modern C++17 approach, not inheriting from deprecated std::iterator)
50+
using iterator_category = std::forward_iterator_tag;
51+
using value_type = size_t;
52+
using difference_type = std::ptrdiff_t;
53+
using pointer = size_t*;
54+
using reference = size_t&;
55+
4956
BitmapIter(const Container &a, const size_t i) : _i(i), _cont(a) {
5057
}
5158
BitmapIter &operator++() {
@@ -86,28 +93,22 @@ class AtomicBitmapBase {
8693
AtomicBitmapBase(size_t bitCount) {
8794
d_assert_msg(bitCount <= maxBits, "max bits (%zu) exceeded: %zu", maxBits, bitCount);
8895

89-
static_assert(wordCount(representationSize(maxBits)) == 4, "unexpected representation size");
90-
// for (size_t i = 0; i < wordCount(representationSize(maxBits)); i++) {
91-
// _bits[i].store(0, std::memory_order_relaxed);
92-
// }
93-
_bits[0].store(0, std::memory_order_relaxed);
94-
_bits[1].store(0, std::memory_order_relaxed);
95-
_bits[2].store(0, std::memory_order_relaxed);
96-
_bits[3].store(0, std::memory_order_relaxed);
96+
// Initialize all words to 0
97+
constexpr size_t wordCnt = wordCount(representationSize(maxBits));
98+
for (size_t i = 0; i < wordCnt; i++) {
99+
_bits[i].store(0, std::memory_order_relaxed);
100+
}
97101
std::atomic_thread_fence(std::memory_order_release);
98102
}
99103

100104
~AtomicBitmapBase() {
101105
}
102106

103107
inline void ATTRIBUTE_ALWAYS_INLINE setAndExchangeAll(size_t *oldBits, const size_t *newBits) {
104-
// for (size_t i = 0; i < wordCount(representationSize(maxBits)); i++) {
105-
// oldBits[i] = _bits[i].exchange(newBits[i]);
106-
// }
107-
oldBits[0] = _bits[0].exchange(newBits[0], std::memory_order_acq_rel);
108-
oldBits[1] = _bits[1].exchange(newBits[1], std::memory_order_acq_rel);
109-
oldBits[2] = _bits[2].exchange(newBits[2], std::memory_order_acq_rel);
110-
oldBits[3] = _bits[3].exchange(newBits[3], std::memory_order_acq_rel);
108+
constexpr size_t wordCnt = wordCount(representationSize(maxBits));
109+
for (size_t i = 0; i < wordCnt; i++) {
110+
oldBits[i] = _bits[i].exchange(newBits[i], std::memory_order_acq_rel);
111+
}
111112
}
112113

113114
public:
@@ -140,8 +141,12 @@ class AtomicBitmapBase {
140141
}
141142

142143
inline uint32_t ATTRIBUTE_ALWAYS_INLINE inUseCount() const {
143-
return __builtin_popcountl(_bits[0]) + __builtin_popcountl(_bits[1]) + __builtin_popcountl(_bits[2]) +
144-
__builtin_popcountl(_bits[3]);
144+
constexpr size_t wordCnt = wordCount(representationSize(maxBits));
145+
uint32_t count = 0;
146+
for (size_t i = 0; i < wordCnt; i++) {
147+
count += __builtin_popcountl(_bits[i].load(std::memory_order_relaxed));
148+
}
149+
return count;
145150
}
146151

147152
protected:
@@ -273,14 +278,10 @@ class RelaxedFixedBitmapBase {
273278

274279
public:
275280
inline void ATTRIBUTE_ALWAYS_INLINE invert() {
276-
// constexpr size_t numWords = wordCount(representationSize(maxBits));
277-
// for (size_t i = 0; i < numWords; i++) {
278-
// _bits[i] = ~_bits[i];
279-
// }
280-
_bits[0] = ~_bits[0];
281-
_bits[1] = ~_bits[1];
282-
_bits[2] = ~_bits[2];
283-
_bits[3] = ~_bits[3];
281+
constexpr size_t numWords = wordCount(representationSize(maxBits));
282+
for (size_t i = 0; i < numWords; i++) {
283+
_bits[i] = ~_bits[i];
284+
}
284285
}
285286

286287
inline void ATTRIBUTE_ALWAYS_INLINE setAll(uint64_t bitCount) {
@@ -315,15 +316,11 @@ class RelaxedFixedBitmapBase {
315316
}
316317

317318
inline uint64_t ATTRIBUTE_ALWAYS_INLINE inUseCount() const {
318-
constexpr auto wordCount = representationSize(maxBits) / sizeof(size_t);
319+
constexpr auto wordCnt = representationSize(maxBits) / sizeof(size_t);
319320
uint32_t count = 0;
320-
// for (size_t i = 0; i < wordCount; i++) {
321-
// count += __builtin_popcountl(_bits[i]);
322-
// }
323-
count += __builtin_popcountl(_bits[0]);
324-
count += __builtin_popcountl(_bits[1]);
325-
count += __builtin_popcountl(_bits[2]);
326-
count += __builtin_popcountl(_bits[3]);
321+
for (size_t i = 0; i < wordCnt; i++) {
322+
count += __builtin_popcountl(_bits[i]);
323+
}
327324
return count;
328325
}
329326

@@ -332,10 +329,10 @@ class RelaxedFixedBitmapBase {
332329
}
333330

334331
void ATTRIBUTE_ALWAYS_INLINE clear() {
335-
_bits[0] = 0;
336-
_bits[1] = 0;
337-
_bits[2] = 0;
338-
_bits[3] = 0;
332+
constexpr size_t wordCnt = wordCount(representationSize(maxBits));
333+
for (size_t i = 0; i < wordCnt; i++) {
334+
_bits[i] = 0;
335+
}
339336
}
340337

341338
inline size_t ATTRIBUTE_ALWAYS_INLINE bitCount() const {
@@ -477,11 +474,15 @@ class BitmapBase : public Super {
477474
return Super::unsetAt(item, position);
478475
}
479476

480-
// FIXME: who uses this? bad idea with atomics
477+
// NOTE: This is not fully atomic-safe, but only used in debug assertions
478+
// For RelaxedBitmapBase (non-atomic), direct access is fine
479+
// For AtomicBitmapBase, this creates a potential race, but acceptable for debug
481480
inline bool ATTRIBUTE_ALWAYS_INLINE isSet(uint64_t index) const {
482481
uint32_t item, position;
483482
computeItemPosition(index, item, position);
484483

484+
// Use direct access for both atomic and non-atomic variants
485+
// This works because on x86/ARM, reading a size_t is atomic
485486
return Super::_bits[item] & getMask(position);
486487
}
487488

@@ -577,12 +578,12 @@ class BitmapBase : public Super {
577578
} // namespace bitmap
578579

579580
namespace internal {
580-
typedef bitmap::BitmapBase<bitmap::AtomicBitmapBase<256>> Bitmap;
581-
typedef bitmap::BitmapBase<bitmap::RelaxedFixedBitmapBase<256>> RelaxedFixedBitmap;
581+
typedef bitmap::BitmapBase<bitmap::AtomicBitmapBase<1024>> Bitmap;
582+
typedef bitmap::BitmapBase<bitmap::RelaxedFixedBitmapBase<1024>> RelaxedFixedBitmap;
582583
typedef bitmap::BitmapBase<bitmap::RelaxedBitmapBase> RelaxedBitmap;
583584

584-
static_assert(sizeof(Bitmap) == sizeof(size_t) * 4, "Bitmap unexpected size");
585-
static_assert(sizeof(RelaxedFixedBitmap) == sizeof(size_t) * 4, "Bitmap unexpected size");
585+
static_assert(sizeof(Bitmap) == sizeof(size_t) * 16, "Bitmap unexpected size (expected 128 bytes for 1024 bits)");
586+
static_assert(sizeof(RelaxedFixedBitmap) == sizeof(size_t) * 16, "Bitmap unexpected size (expected 128 bytes for 1024 bits)");
586587
static_assert(sizeof(RelaxedBitmap) == sizeof(size_t) * 2, "Bitmap unexpected size");
587588
} // namespace internal
588589
} // namespace mesh

src/common.h

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,29 @@ static constexpr size_t kMaxSize = 16384;
7979
static constexpr size_t kClassSizesMax = 25;
8080
static constexpr size_t kAlignment = 8;
8181
static constexpr int kMinAlign = 16;
82-
static constexpr uint64_t kPageSize = 4096;
82+
83+
// Runtime page size detection for Apple Silicon (16KB) and x86 (4KB) compatibility
84+
namespace internal {
85+
inline size_t initPageSize() {
86+
#if defined(_WIN32)
87+
SYSTEM_INFO sysInfo;
88+
GetSystemInfo(&sysInfo);
89+
return sysInfo.dwPageSize;
90+
#else
91+
return static_cast<size_t>(sysconf(_SC_PAGESIZE));
92+
#endif
93+
}
94+
}
95+
96+
// Runtime page size - initialized on first access
97+
inline size_t getPageSize() {
98+
static const size_t kPageSize = internal::initPageSize();
99+
return kPageSize;
100+
}
101+
102+
// Keep a constexpr version for compile-time calculations (assume minimum 4KB)
103+
static constexpr uint64_t kPageSizeMin = 4096;
104+
83105
static constexpr size_t kMaxFastLargeSize = 256 * 1024; // 256Kb
84106

85107
static constexpr size_t kMaxSplitListSize = 16384;
@@ -100,23 +122,25 @@ static constexpr size_t kMaxMeshesPerIteration = 2500;
100122

101123
// maximum number of dirty pages to hold onto before we flush them
102124
// back to the OS (via MeshableArena::scavenge()
103-
static constexpr size_t kMaxDirtyPageThreshold = 1 << 14; // 64 MB in pages
104-
static constexpr size_t kMinDirtyPageThreshold = 32; // 128 KB in pages
125+
// Note: These are in 4KB page units for backward compatibility
126+
// Actual byte limits are calculated at runtime based on system page size
127+
static constexpr size_t kMaxDirtyPageThreshold = 1 << 14; // 64 MB in 4KB pages
128+
static constexpr size_t kMinDirtyPageThreshold = 32; // 128 KB in 4KB pages
105129

106130
static constexpr uint32_t kSpanClassCount = 256;
107131

108132
static constexpr int kNumBins = 25; // 16Kb max object size
109133
static constexpr int kDefaultMeshPeriod = 10000;
110134

111-
static constexpr size_t kMinArenaExpansion = 4096; // 16 MB in pages
135+
static constexpr size_t kMinArenaExpansion = 4096; // 16 MB in 4KB pages (4096 * 4KB = 16MB)
112136

113137
// ensures we amortize the cost of going to the global heap enough
114138
static constexpr uint64_t kMinStringLen = 8;
115139
static constexpr size_t kMiniheapRefillGoalSize = 4 * 1024;
116140
static constexpr size_t kMaxMiniheapsPerShuffleVector = 24;
117141

118142
// shuffle vector features
119-
static constexpr int16_t kMaxShuffleVectorLength = 256; // sizeof(uint8_t) << 8
143+
static constexpr int16_t kMaxShuffleVectorLength = 1024; // increased to support 16KB pages with 16-byte objects
120144
static constexpr bool kEnableShuffleOnInit = SHUFFLE_ON_INIT == 1;
121145
static constexpr bool kEnableShuffleOnFree = SHUFFLE_ON_FREE == 1;
122146

@@ -129,24 +153,36 @@ static constexpr std::chrono::milliseconds kMeshPeriodMs{100}; // 100 ms
129153
// controls aspects of miniheaps
130154
static constexpr size_t kMaxMeshes = 256; // 1 per bit
131155
#ifdef __APPLE__
132-
static constexpr size_t kArenaSize = 32ULL * 1024ULL * 1024ULL * 1024ULL; // 16 GB
156+
static constexpr size_t kArenaSize = 32ULL * 1024ULL * 1024ULL * 1024ULL; // 32 GB
133157
#else
134158
static constexpr size_t kArenaSize = 64ULL * 1024ULL * 1024ULL * 1024ULL; // 64 GB
135159
#endif
136-
static constexpr size_t kAltStackSize = 16 * 1024UL; // 16k sigaltstacks
160+
static constexpr size_t kAltStackSize = 16 * 1024UL; // 16KB sigaltstacks
137161
#define SIGQUIESCE (SIGRTMIN + 7)
138162
#define SIGDUMP (SIGRTMIN + 8)
139163

140164
// BinnedTracker
141165
static constexpr size_t kBinnedTrackerBinCount = 1;
142166
static constexpr size_t kBinnedTrackerMaxEmpty = 128;
143167

144-
static inline constexpr size_t PageCount(size_t sz) {
145-
return (sz + (kPageSize - 1)) / kPageSize;
168+
// Runtime page count calculation
169+
static inline size_t PageCount(size_t sz) {
170+
const auto pageSize = getPageSize();
171+
return (sz + (pageSize - 1)) / pageSize;
172+
}
173+
174+
// Runtime page rounding
175+
static inline size_t RoundUpToPage(size_t sz) {
176+
return getPageSize() * PageCount(sz);
177+
}
178+
179+
// Constexpr versions for compile-time calculations (use minimum page size)
180+
static inline constexpr size_t PageCountMin(size_t sz) {
181+
return (sz + (kPageSizeMin - 1)) / kPageSizeMin;
146182
}
147183

148-
static inline constexpr size_t RoundUpToPage(size_t sz) {
149-
return kPageSize * PageCount(sz);
184+
static inline constexpr size_t RoundUpToPageMin(size_t sz) {
185+
return kPageSizeMin * PageCountMin(sz);
150186
}
151187

152188
namespace powerOfTwo {
@@ -186,7 +222,8 @@ using std::unique_lock;
186222
#define CACHELINE_SIZE 64
187223
#define CACHELINE_ALIGNED ATTRIBUTE_ALIGNED(CACHELINE_SIZE)
188224
#define CACHELINE_ALIGNED_FN CACHELINE_ALIGNED
189-
#define PAGE_ALIGNED ATTRIBUTE_ALIGNED(kPageSize)
225+
// Use maximum page size for alignment (16KB for Apple Silicon compatibility)
226+
#define PAGE_ALIGNED ATTRIBUTE_ALIGNED(16384)
190227

191228
#define MESH_EXPORT __attribute__((visibility("default")))
192229

src/fixed_array.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,15 @@ namespace mesh {
1313

1414
// enables iteration through the miniheaps in an array
1515
template <typename FixedArray>
16-
class FixedArrayIter : public std::iterator<std::forward_iterator_tag, typename FixedArray::value_type> {
16+
class FixedArrayIter {
1717
public:
18+
// Iterator traits (modern C++17 approach, not inheriting from deprecated std::iterator)
19+
using iterator_category = std::forward_iterator_tag;
20+
using value_type = typename FixedArray::value_type;
21+
using difference_type = std::ptrdiff_t;
22+
using pointer = value_type*;
23+
using reference = value_type&;
24+
1825
FixedArrayIter(const FixedArray &a, const uint32_t i) : _i(i), _array(a) {
1926
}
2027
FixedArrayIter &operator++() {

src/global_heap.cc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -384,14 +384,14 @@ size_t GlobalHeap::meshSizeClassLocked(size_t sizeClass, MergeSetArray &mergeSet
384384
void GlobalHeap::meshAllSizeClassesLocked() {
385385
static MergeSetArray PAGE_ALIGNED MergeSets;
386386
static_assert(sizeof(MergeSets) == sizeof(void *) * 2 * 4096, "array too big");
387-
d_assert((reinterpret_cast<uintptr_t>(&MergeSets) & (kPageSize - 1)) == 0);
387+
d_assert((reinterpret_cast<uintptr_t>(&MergeSets) & (getPageSize() - 1)) == 0);
388388

389389
static SplitArray PAGE_ALIGNED Left;
390390
static SplitArray PAGE_ALIGNED Right;
391391
static_assert(sizeof(Left) == sizeof(void *) * 16384, "array too big");
392392
static_assert(sizeof(Right) == sizeof(void *) * 16384, "array too big");
393-
d_assert((reinterpret_cast<uintptr_t>(&Left) & (kPageSize - 1)) == 0);
394-
d_assert((reinterpret_cast<uintptr_t>(&Right) & (kPageSize - 1)) == 0);
393+
d_assert((reinterpret_cast<uintptr_t>(&Left) & (getPageSize() - 1)) == 0);
394+
d_assert((reinterpret_cast<uintptr_t>(&Right) & (getPageSize() - 1)) == 0);
395395

396396
// if we have freed but not reset meshed mappings, this will reset
397397
// them to the identity mapping, ensuring we don't blow past our VMA
@@ -505,7 +505,8 @@ shiftedSplitting(MWC &prng, MiniHeapListEntry *miniheaps, SplitArray &left, Spli
505505
return;
506506
}
507507

508-
constexpr size_t nBytes = 32;
508+
// Bitmap size increased from 32 bytes (256 bits) to 128 bytes (1024 bits)
509+
constexpr size_t nBytes = 128;
509510
const size_t limit = rightSize < t ? rightSize : t;
510511
d_assert(nBytes == left[0]->bitmap().byteCount());
511512

src/global_heap.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ class GlobalHeap : public MeshableArena {
116116
Span span{0, 0};
117117
char *spanBegin = Super::pageAlloc(span, pageCount, pageAlignment);
118118
d_assert(spanBegin != nullptr);
119-
d_assert((reinterpret_cast<uintptr_t>(spanBegin) / kPageSize) % pageAlignment == 0);
119+
d_assert((reinterpret_cast<uintptr_t>(spanBegin) / getPageSize()) % pageAlignment == 0);
120120

121121
MiniHeap *mh = new (buf) MiniHeap(arenaBegin(), span, objectCount, objectSize);
122122

@@ -142,11 +142,12 @@ class GlobalHeap : public MeshableArena {
142142

143143
lock_guard<mutex> lock(_miniheapLock);
144144

145-
MiniHeap *mh = allocMiniheapLocked(-1, pageCount, 1, pageCount * kPageSize, pageAlignment);
145+
const size_t pageSize = getPageSize();
146+
MiniHeap *mh = allocMiniheapLocked(-1, pageCount, 1, pageCount * pageSize, pageAlignment);
146147

147148
d_assert(mh->isLargeAlloc());
148-
d_assert(mh->spanSize() == pageCount * kPageSize);
149-
// d_assert(mh->objectSize() == pageCount * kPageSize);
149+
d_assert(mh->spanSize() == pageCount * pageSize);
150+
// d_assert(mh->objectSize() == pageCount * pageSize);
150151

151152
void *ptr = mh->mallocAt(arenaBegin(), 0);
152153

@@ -305,8 +306,9 @@ class GlobalHeap : public MeshableArena {
305306
// if we have objects bigger than the size of a page, allocate
306307
// multiple pages to amortize the cost of creating a
307308
// miniheap/globally locking the heap. For example, asking for
308-
// 2048 byte objects would allocate 4 4KB pages.
309-
const size_t objectCount = max(kPageSize / objectSize, kMinStringLen);
309+
// 2048 byte objects would allocate 4 4KB pages (or 16KB pages on Apple Silicon).
310+
// Cap at 1024 to fit within the MiniHeap bitmap size limit (128 bytes = 1024 bits)
311+
const size_t objectCount = min(max(getPageSize() / objectSize, static_cast<size_t>(kMinStringLen)), static_cast<size_t>(1024));
310312
const size_t pageCount = PageCount(objectSize * objectCount);
311313

312314
while (bytesFree < kMiniheapRefillGoalSize && !miniheaps.full()) {

0 commit comments

Comments
 (0)