Skip to content

Commit dd25f06

Browse files
New allocation algorithm (#1166)
Replaced algorithm in class arena with one that uses mmap()'d address spaces and demand paging for each semispace and initializes the current semispace of each arena at start time to avoid and the second semispace of each arena at swap time to avoid relying on Undefined Behavior or having an extra test in kore_arena_alloc(). --------- Co-authored-by: Dwight Guth <[email protected]>
1 parent f8a98ac commit dd25f06

File tree

3 files changed

+151
-258
lines changed

3 files changed

+151
-258
lines changed

include/runtime/arena.h

Lines changed: 111 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,49 @@
11
#ifndef ARENA_H
22
#define ARENA_H
33

4+
#include <algorithm>
45
#include <cstddef>
6+
#include <cstdint>
57
#include <sys/types.h>
8+
#include <utility>
69

710
#include "runtime/alloc.h"
811

912
extern "C" {
1013

14+
size_t const HYPERBLOCK_SIZE = (size_t)BLOCK_SIZE * 1024 * 1024;
15+
1116
// An arena can be used to allocate objects that can then be deallocated all at
1217
// once.
1318
class arena {
1419
public:
1520
arena(char id)
16-
: allocation_semispace_id(id) { }
21+
: allocation_semispace_id(id) {
22+
initialize_semispace();
23+
}
1724

1825
// Allocates the requested number of bytes as a contiguous region and returns a
1926
// pointer to the first allocated byte.
20-
// If called with requested size greater than the maximun single allocation
21-
// size, the space is allocated in a general (not garbage collected pool).
2227
void *kore_arena_alloc(size_t requested);
2328

2429
// Returns the address of the first byte that belongs in the given arena.
2530
// Returns 0 if nothing has been allocated ever in that arena.
26-
char *arena_start_ptr() const;
31+
char *arena_start_ptr() const {
32+
return current_addr_ptr ? current_addr_ptr + sizeof(memory_block_header)
33+
: nullptr;
34+
}
2735

2836
// Returns a pointer to a location holding the address of last allocated
2937
// byte in the given arena plus 1.
3038
// This address is 0 if nothing has been allocated ever in that arena.
31-
char **arena_end_ptr();
39+
char **arena_end_ptr() { return &allocation_ptr; }
3240

3341
// return the total number of allocatable bytes currently in the arena in its
3442
// active semispace.
35-
size_t arena_size() const;
43+
size_t arena_size() const {
44+
update_num_blocks();
45+
return BLOCK_SIZE * std::max(num_blocks, num_collection_blocks);
46+
}
3647

3748
// Clears the current allocation space by setting its start back to its first
3849
// block. It is used during garbage collection to effectively collect all of the
@@ -41,15 +52,18 @@ class arena {
4152

4253
// Resizes the last allocation as long as the resize does not require a new
4354
// block allocation.
44-
// Returns the address of the byte following the last newlly allocated byte when
45-
// the resize succeeds, returns 0 otherwise.
46-
void *arena_resize_last_alloc(ssize_t increase);
55+
// Returns the address of the byte following the last newlly allocated byte.
56+
void *arena_resize_last_alloc(ssize_t increase) {
57+
return (allocation_ptr += increase);
58+
}
4759

4860
// Returns the given arena's current collection semispace ID.
4961
// Each arena has 2 semispace IDs one equal to the arena ID and the other equal
5062
// to the 1's complement of the arena ID. At any time one of these semispaces
5163
// is used for allocation and the other is used for collection.
52-
char get_arena_collection_semispace_id() const;
64+
char get_arena_collection_semispace_id() const {
65+
return ~allocation_semispace_id;
66+
}
5367

5468
// Exchanges the current allocation and collection semispaces and clears the new
5569
// current allocation semispace by setting its start back to its first block.
@@ -61,7 +75,7 @@ class arena {
6175
// by the blocks of that arena. This difference will include blocks containing
6276
// sentinel bytes. Undefined behavior will result if the pointers belong to
6377
// different arenas.
64-
static ssize_t ptr_diff(char *ptr1, char *ptr2);
78+
static ssize_t ptr_diff(char *ptr1, char *ptr2) { return ptr1 - ptr2; }
6579

6680
// Given a starting pointer to an address allocated in an arena and a size in
6781
// bytes, this function returns a pointer to an address allocated in the
@@ -72,42 +86,70 @@ class arena {
7286
// 3rd argument: the address of last allocated byte in the arena plus 1
7387
// Return value: the address allocated in the arena after size bytes from the
7488
// starting pointer, or 0 if this is equal to the 3rd argument.
75-
static char *move_ptr(char *ptr, size_t size, char const *arena_end_ptr);
89+
static char *move_ptr(char *ptr, size_t size, char const *arena_end_ptr) {
90+
char *next_ptr = ptr + size;
91+
return (next_ptr == arena_end_ptr) ? 0 : next_ptr;
92+
}
7693

7794
// Returns the ID of the semispace where the given address was allocated.
7895
// The behavior is undefined if called with an address that has not been
7996
// allocated within an arena.
8097
static char get_arena_semispace_id_of_object(void *ptr);
8198

8299
private:
83-
struct memory_block_header {
84-
char *next_block;
100+
union memory_block_header {
101+
//
102+
// Currently the header just holds the semispace id. But we need it to be a
103+
// multiple of sizeof(char*) for alignment purposes so we add a dummy char*.
104+
//
85105
char semispace;
106+
char *alignment_dummy;
86107
};
87108

88-
void fresh_block();
89-
static memory_block_header *mem_block_header(void *ptr);
109+
//
110+
// We update the number of 1MB blocks actually written to, only when we need this value,
111+
// or before a garbage collection rather than trying to determine when we write to a fresh block.
112+
//
113+
void update_num_blocks() const {
114+
//
115+
// Calculate how many 1M blocks of the current arena we used.
116+
//
117+
size_t num_used_blocks
118+
= (allocation_ptr - current_addr_ptr - 1) / BLOCK_SIZE + 1;
119+
if (num_used_blocks > num_blocks)
120+
num_blocks = num_used_blocks;
121+
}
122+
123+
void initialize_semispace();
90124

91-
// helper function for `kore_arena_alloc`. Do not call directly.
92-
void *do_alloc_slow(size_t requested);
125+
static memory_block_header *mem_block_header(void *ptr) {
126+
uintptr_t address = reinterpret_cast<uintptr_t>(ptr);
127+
return reinterpret_cast<arena::memory_block_header *>(
128+
(address - 1) & ~(HYPERBLOCK_SIZE - 1));
129+
}
93130

94-
char *first_block; // beginning of first block
95-
char *block; // where allocations are being made in current block
96-
char *block_start; // start of current block
97-
char *block_end; // 1 past end of current block
98-
char *first_collection_block; // beginning of other semispace
99-
size_t num_blocks; // number of blocks in current semispace
100-
size_t num_collection_blocks; // number of blocks in other semispace
131+
//
132+
// Current semispace where allocations are being made.
133+
//
134+
char *current_addr_ptr; // pointer to start of current address space
135+
char *allocation_ptr; // next available location in current semispace
136+
char *tripwire; // allocating past this triggers slow allocation
137+
mutable size_t
138+
num_blocks; // notional number of BLOCK_SIZE blocks in current semispace
101139
char allocation_semispace_id; // id of current semispace
140+
//
141+
// Semispace where allocations will be made during and after garbage collect.
142+
//
143+
char *collection_addr_ptr
144+
= nullptr; // pointer to start of collection address space
145+
size_t num_collection_blocks
146+
= 0; // notional number of BLOCK_SIZE blocks in collection semispace
102147
};
103148

104149
// Macro to define a new arena with the given ID. Supports IDs ranging from 0 to
105150
// 127.
106151
#define REGISTER_ARENA(name, id) static thread_local arena name(id)
107152

108-
#define MEM_BLOCK_START(ptr) \
109-
((char *)(((uintptr_t)(ptr)-1) & ~(BLOCK_SIZE - 1)))
110-
111153
#ifdef __MACH__
112154
//
113155
// thread_local disabled for Apple
@@ -120,16 +162,51 @@ extern thread_local bool time_for_collection;
120162
size_t get_gc_threshold();
121163

122164
inline void *arena::kore_arena_alloc(size_t requested) {
123-
if (block + requested > block_end) {
124-
return do_alloc_slow(requested);
165+
if (allocation_ptr + requested >= tripwire) {
166+
//
167+
// We got close to or past the last location accessed in this address range so far,
168+
// depending on the requested size and tripwire setting. This triggers a garbage
169+
// collect when allowed.
170+
//
171+
time_for_collection = true;
172+
tripwire = current_addr_ptr
173+
+ HYPERBLOCK_SIZE; // won't trigger again until arena swap
125174
}
126-
void *result = block;
127-
block += requested;
175+
void *result = allocation_ptr;
176+
allocation_ptr += requested;
128177
MEM_LOG(
129-
"Allocation at %p (size %zd), next alloc at %p (if it fits)\n", result,
130-
requested, block);
178+
"Allocation at %p (size %zd), next alloc at %p\n", result, requested,
179+
block);
131180
return result;
132181
}
182+
183+
inline void arena::arena_clear() {
184+
//
185+
// We set the allocation pointer to the first available address.
186+
//
187+
allocation_ptr = arena_start_ptr();
188+
//
189+
// If the number of blocks we've touched is >= threshold, we want to trigger
190+
// a garbage collection if we get within 1 block of the end of this area.
191+
// Otherwise we only want to generate a garbage collect if we allocate off the
192+
// end of this area.
193+
//
194+
tripwire = current_addr_ptr
195+
+ (num_blocks - (num_blocks >= get_gc_threshold())) * BLOCK_SIZE;
133196
}
134197

198+
inline void arena::arena_swap_and_clear() {
199+
update_num_blocks(); // so we save the correct number of touched blocks
200+
std::swap(current_addr_ptr, collection_addr_ptr);
201+
std::swap(num_blocks, num_collection_blocks);
202+
allocation_semispace_id = ~allocation_semispace_id;
203+
if (current_addr_ptr == nullptr) {
204+
//
205+
// The other semispace hasn't be initialized yet.
206+
//
207+
initialize_semispace();
208+
} else
209+
arena_clear();
210+
}
211+
}
135212
#endif // ARENA_H

0 commit comments

Comments
 (0)