Skip to content

Commit 9fa1cd9

Browse files
committed
kernel/utils: Add cpu local variable macro
1 parent 6e525fc commit 9fa1cd9

File tree

5 files changed

+164
-50
lines changed

5 files changed

+164
-50
lines changed

lds/aarch64.ld

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,25 @@ OUTPUT_ARCH(aarch64)
33
ENTRY(asm_start)
44

55
PHDRS {
6-
text PT_LOAD FLAGS(1 << 0 | 1 << 2);
7-
rodata PT_LOAD FLAGS(1 << 2);
8-
data PT_LOAD FLAGS(1 << 1 | 1 << 2);
6+
text PT_LOAD FLAGS(1 << 0 | 1 << 2);
7+
rodata PT_LOAD FLAGS(1 << 2);
8+
data PT_LOAD FLAGS(1 << 1 | 1 << 2);
99
dynamic PT_DYNAMIC FLAGS(1 << 1 | 1 << 2);
1010
}
1111

1212
SECTIONS {
1313
. = 0xFFFFFFFF80000000;
14-
/*. = 0x40080000;*/
14+
/*. = 0x40080000;*/
1515
KERNEL_START = .;
1616

1717
TEXT_START = .;
18-
.text : {
19-
KEEP(*(.text.boot))
20-
*(.text .text.*)
21-
} :text
22-
TEXT_END = .;
18+
.text : {
19+
KEEP(*(.text.boot))
20+
*(.text .text.*)
21+
} :text
22+
TEXT_END = .;
2323

24-
. = ALIGN(CONSTANT(MAXPAGESIZE));
24+
. = ALIGN(CONSTANT(MAXPAGESIZE));
2525

2626
RODATA_START = .;
2727
.rodata : {
@@ -37,25 +37,34 @@ SECTIONS {
3737
. = ALIGN(CONSTANT(MAXPAGESIZE));
3838

3939
DATA_START = .;
40-
.data : {
41-
*(.data .data.*)
42-
} :data
40+
.data : {
41+
*(.data .data.*)
42+
} :data
4343

44-
.dynamic : {
45-
*(.dynamic)
46-
} :data :dynamic
44+
.dynamic : {
45+
*(.dynamic)
46+
} :data :dynamic
4747

4848
.init_array : {
49-
INIT_ARRAY_START = .;
50-
crti.o(.init_array)
51-
KEEP(*(SORT(EXCLUDE_FILE(crti.o crtn.o) .init_array.*)))
52-
KEEP(*(EXCLUDE_FILE(crti.o crtn.o) .init_array))
53-
crtn.o(.init_array)
54-
INIT_ARRAY_END = .;
49+
INIT_ARRAY_START = .;
50+
crti.o(.init_array)
51+
KEEP(*(SORT(EXCLUDE_FILE(crti.o crtn.o) .init_array.*)))
52+
KEEP(*(EXCLUDE_FILE(crti.o crtn.o) .init_array))
53+
crtn.o(.init_array)
54+
INIT_ARRAY_END = .;
55+
} :data
56+
57+
.cpu_local_ctors : {
58+
CPU_LOCAL_CTORS_START = .;
59+
KEEP(*(.cpu_local_ctors))
60+
CPU_LOCAL_CTORS_END = .;
5561
} :data
5662

5763
.bss : {
5864
BSS_START = .;
65+
__CPU_LOCAL_START = .;
66+
*(.bss.cpu_local)
67+
__CPU_LOCAL_END = .;
5968
*(.bss .bss.*)
6069
*(COMMON)
6170
BSS_END = .;

lds/x86_64.ld

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@ OUTPUT_ARCH(i386:x86-64)
33
ENTRY(_start)
44

55
PHDRS {
6-
text PT_LOAD FLAGS(1 << 0 | 1 << 2);
7-
rodata PT_LOAD FLAGS(1 << 2);
8-
data PT_LOAD FLAGS(1 << 1 | 1 << 2);
6+
text PT_LOAD FLAGS(1 << 0 | 1 << 2);
7+
rodata PT_LOAD FLAGS(1 << 2);
8+
data PT_LOAD FLAGS(1 << 1 | 1 << 2);
9+
cpu_local PT_NOTE FLAGS(1 << 2);
910
dynamic PT_DYNAMIC FLAGS(1 << 1 | 1 << 2);
1011
}
1112

@@ -14,43 +15,52 @@ SECTIONS {
1415
. = 0xFFFFFFFF80000000;
1516

1617
TEXT_START = .;
17-
.text : {
18-
*(.text .text.*)
19-
} :text
20-
TEXT_END = .;
18+
.text : {
19+
*(.text .text.*)
20+
} :text
21+
TEXT_END = .;
2122

22-
. = ALIGN(CONSTANT(MAXPAGESIZE));
23+
. = ALIGN(CONSTANT(MAXPAGESIZE));
2324

2425
RODATA_START = .;
2526
.rodata : {
2627
*(.rodata .rodata.*)
2728
} :rodata
2829
.drivers : {
29-
DRIVERS_START = .;
30-
KEEP(*(.drivers))
31-
DRIVERS_END = .;
32-
}
30+
DRIVERS_START = .;
31+
KEEP(*(.drivers))
32+
DRIVERS_END = .;
33+
}
3334
RODATA_END = .;
3435

3536
. = ALIGN(CONSTANT(MAXPAGESIZE));
3637

3738
DATA_START = .;
38-
.data : {
39-
*(.data .data.*)
40-
} :data
39+
.data : {
40+
*(.data .data.*)
41+
} :data
4142

42-
.dynamic : {
43-
*(.dynamic)
44-
} :data :dynamic
43+
.dynamic : {
44+
*(.dynamic)
45+
} :data :dynamic
4546

4647
.init_array : {
47-
INIT_ARRAY_START = .;
48-
KEEP(*(SORT(EXCLUDE_FILE(crti.o crtn.o) .init_array.*)))
49-
KEEP(*(EXCLUDE_FILE(crti.o crtn.o) .init_array))
50-
INIT_ARRAY_END = .;
48+
INIT_ARRAY_START = .;
49+
KEEP(*(SORT(EXCLUDE_FILE(crti.o crtn.o) .init_array.*)))
50+
KEEP(*(EXCLUDE_FILE(crti.o crtn.o) .init_array))
51+
INIT_ARRAY_END = .;
52+
} :data
53+
54+
.cpu_local_ctors : {
55+
CPU_LOCAL_CTORS_START = .;
56+
KEEP(*(.cpu_local_ctors))
57+
CPU_LOCAL_CTORS_END = .;
5158
} :data
5259

5360
.bss : {
61+
__CPU_LOCAL_START = .;
62+
*(.bss.cpu_local)
63+
__CPU_LOCAL_END = .;
5464
*(.bss .bss.*)
5565
*(COMMON)
5666
} :data

src/arch/aarch64/smp.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#include "mem/mem.hpp"
1010
#include "sched/process.hpp"
1111

12-
static ManuallyInit<Cpu> CPUS[CONFIG_MAX_CPUS] {};
12+
static Cpu* CPUS[CONFIG_MAX_CPUS] {};
1313
static kstd::atomic<u32> NUM_CPUS {1};
1414
static Spinlock<void> SMP_LOCK {};
1515
static constexpr bool LOG_SMP_STARTUP = true;
@@ -135,6 +135,10 @@ extern "C" [[noreturn, gnu::used]] void aarch64_ap_entry_stage0() {
135135
__builtin_unreachable();
136136
}
137137

138+
using CtorFn = void (*)();
139+
extern CtorFn CPU_LOCAL_CTORS_START[];
140+
extern CtorFn CPU_LOCAL_CTORS_END[];
141+
138142
static void aarch64_common_cpu_init(Cpu* self, u32 num, bool bsp) {
139143
self->number = num;
140144

@@ -166,6 +170,10 @@ static void aarch64_common_cpu_init(Cpu* self, u32 num, bool bsp) {
166170
u64 mpidr;
167171
asm volatile("mrs %0, mpidr_el1" : "=r"(mpidr));
168172
self->affinity = (mpidr & 0xFFFFFF) | (mpidr >> 32 & 0xFF) << 24;
173+
174+
for (auto* ctor = CPU_LOCAL_CTORS_START; ctor != CPU_LOCAL_CTORS_END; ++ctor) {
175+
(*ctor)();
176+
}
169177
}
170178

171179
extern char EXCEPTION_HANDLERS_START[];
@@ -179,7 +187,6 @@ extern "C" [[noreturn, gnu::used]] void aarch64_ap_entry() {
179187
u32 num = NUM_CPUS.load(kstd::memory_order::relaxed);
180188

181189
auto lock = SMP_LOCK.lock();
182-
CPUS[num].initialize();
183190
cpu = &*CPUS[num];
184191
aarch64_common_cpu_init(cpu, num, false);
185192
gic_init_on_cpu();
@@ -208,8 +215,13 @@ static void aarch64_cpu_features_init() {
208215
CPU_FEATURES.pan = supports_pan;
209216
}
210217

218+
extern char __CPU_LOCAL_START[];
219+
extern char __CPU_LOCAL_END[];
220+
211221
void aarch64_bsp_init() {
212-
CPUS[0].initialize();
222+
usize cpu_local_size = __CPU_LOCAL_END - __CPU_LOCAL_START;
223+
auto storage = ALLOCATOR.alloc(sizeof(Cpu) + cpu_local_size);
224+
CPUS[0] = new (storage) Cpu {};
213225
aarch64_cpu_features_init();
214226
aarch64_common_cpu_init(&*CPUS[0], 0, true);
215227
}
@@ -233,6 +245,8 @@ void aarch64_smp_init(dtb::Dtb& dtb) {
233245

234246
println("[kernel][aarch64]: smp init");
235247

248+
usize cpu_local_size = __CPU_LOCAL_END - __CPU_LOCAL_START;
249+
236250
for (auto child : cpus.child_iter()) {
237251
if (NUM_CPUS.load(kstd::memory_order::relaxed) >= CONFIG_MAX_CPUS) {
238252
break;
@@ -266,6 +280,9 @@ void aarch64_smp_init(dtb::Dtb& dtb) {
266280

267281
auto old = NUM_CPUS.load(kstd::memory_order::relaxed);
268282

283+
auto storage = ALLOCATOR.alloc(sizeof(Cpu) + cpu_local_size);
284+
CPUS[old] = new (storage) Cpu {};
285+
269286
if constexpr (LOG_SMP_STARTUP) {
270287
println("[kernel][aarch64]: init cpu ", Fmt::Hex, mpidr, Fmt::Reset);
271288
}

src/arch/x86/smp.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ static volatile limine_smp_request SMP_REQUEST {
2626
.flags = 0
2727
};
2828

29-
static ManuallyInit<Cpu> CPUS[CONFIG_MAX_CPUS] {};
29+
static Cpu* CPUS[CONFIG_MAX_CPUS] {};
3030
static kstd::atomic<u32> NUM_CPUS {};
3131
static Spinlock<void> SMP_LOCK {};
3232

@@ -147,6 +147,10 @@ static void x86_cpu_resume(Cpu* self, Thread* current_thread, bool initial) {
147147
asm volatile("mov %0, %%cr4" : : "r"(cr4));
148148
}
149149

150+
using CtorFn = void (*)();
151+
extern CtorFn CPU_LOCAL_CTORS_START[];
152+
extern CtorFn CPU_LOCAL_CTORS_END[];
153+
150154
static void x86_init_cpu_common(Cpu* self, u8 lapic_id, bool bsp) {
151155
self->number = NUM_CPUS.fetch_add(1, kstd::memory_order::relaxed);
152156
self->lapic_id = lapic_id;
@@ -164,6 +168,10 @@ static void x86_init_cpu_common(Cpu* self, u8 lapic_id, bool bsp) {
164168
self->tss.iopb = sizeof(Tss);
165169
x86_cpu_resume(self, thread, true);
166170
sched_init(bsp);
171+
172+
for (auto* ctor = CPU_LOCAL_CTORS_START; ctor != CPU_LOCAL_CTORS_END; ++ctor) {
173+
(*ctor)();
174+
}
167175
}
168176

169177
[[noreturn]] static void smp_ap_entry(limine_smp_info* info) {
@@ -188,12 +196,18 @@ static void x86_init_cpu_common(Cpu* self, u8 lapic_id, bool bsp) {
188196
panic("scheduler block returned");
189197
}
190198

199+
extern char __CPU_LOCAL_START[];
200+
extern char __CPU_LOCAL_END[];
201+
191202
void x86_smp_init() {
192203
lapic_first_init();
193204
x86_init_idt();
194205
x86_detect_cpu_features();
195206

196-
CPUS[0].initialize();
207+
usize cpu_local_size = __CPU_LOCAL_END - __CPU_LOCAL_START;
208+
auto storage = ALLOCATOR.alloc(sizeof(Cpu) + cpu_local_size);
209+
CPUS[0] = new (storage) Cpu {};
210+
197211
x86_init_cpu_common(&*CPUS[0], SMP_REQUEST.response->bsp_lapic_id, true);
198212

199213
u32 prev = 0;
@@ -206,7 +220,9 @@ void x86_smp_init() {
206220
NUM_CPUS.store(1, kstd::memory_order::relaxed);
207221
continue;
208222
}
209-
CPUS[i].initialize();
223+
224+
storage = ALLOCATOR.alloc(sizeof(Cpu) + cpu_local_size);
225+
CPUS[i] = new (storage) Cpu {};
210226

211227
auto ap_stack_phys = pmalloc(8);
212228
assert(ap_stack_phys);

src/utils/cpu_var.hpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#pragma once
2+
#include "types.hpp"
3+
#include "arch/cpu.hpp"
4+
5+
extern char __CPU_LOCAL_START[];
6+
7+
template<typename T>
8+
struct CpuVar {
9+
struct Guard {
10+
~Guard() {
11+
arch_enable_irqs(old);
12+
}
13+
14+
T& get() {
15+
return *ptr;
16+
}
17+
18+
T* operator->() {
19+
return ptr;
20+
}
21+
22+
T& operator*() {
23+
return *ptr;
24+
}
25+
26+
private:
27+
friend CpuVar;
28+
constexpr explicit Guard(T* ptr, bool old) : ptr {ptr}, old {old} {}
29+
T* ptr;
30+
bool old;
31+
};
32+
33+
Guard get() {
34+
bool old = arch_enable_irqs(false);
35+
auto ptr = __get_unsafe();
36+
return Guard {ptr, old};
37+
}
38+
39+
T* __get_unsafe() {
40+
usize base;
41+
#ifdef __x86_64__
42+
asm volatile("mov %%gs:160, %0" : "=r"(base));
43+
#elif defined(__aarch64__)
44+
usize thread;
45+
asm volatile("mrs %0, tpidr_el1" : "=r"(thread));
46+
base = *reinterpret_cast<usize*>(thread + 144);
47+
#else
48+
#error missing architecture specific code
49+
#endif
50+
return __builtin_launder(reinterpret_cast<T*>(base + __offset));
51+
}
52+
53+
usize __offset;
54+
};
55+
56+
#define per_cpu(type, name, ctor, modifiers...) \
57+
[[gnu::section(".bss.cpu_local")]] alignas(type) modifiers char __cpu_local_ ## name[sizeof(type)]; \
58+
CpuVar<type> name {sizeof(Cpu) + reinterpret_cast<uintptr_t>(&__cpu_local_ ##name) - reinterpret_cast<uintptr_t>(__CPU_LOCAL_START)}; \
59+
[[gnu::section(".cpu_local_ctors"), gnu::used]] void (*__cpu_local_ ## name ## _ctor)() = []() { ctor(name.__get_unsafe()); }
60+
#define per_cpu_trivial(type, name, value, modifiers...) \
61+
static_assert(__is_trivially_constructible(type)); \
62+
per_cpu(type, name, [](void* ptr) { *__builtin_launder(static_cast<type*>(ptr)) = value; }, modifiers)

0 commit comments

Comments
 (0)