Skip to content

Commit 8ef8a0c

Browse files
committed
#61: add fast_context.h module
1 parent 8b40d1b commit 8ef8a0c

File tree

1 file changed

+334
-0
lines changed

1 file changed

+334
-0
lines changed

internal/fast_context.h

Lines changed: 334 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
/*
2+
+----------------------------------------------------------------------+
3+
| Copyright (c) The PHP Group |
4+
+----------------------------------------------------------------------+
5+
| This source file is subject to version 3.01 of the PHP license, |
6+
| that is bundled with this package in the file LICENSE, and is |
7+
| available through the world-wide-web at the following url: |
8+
| https://www.php.net/license/3_01.txt |
9+
| If you did not receive a copy of the PHP license and are unable to |
10+
| obtain it through the world-wide-web, please send a note to |
11+
| [email protected] so we can mail you a copy immediately. |
12+
+----------------------------------------------------------------------+
13+
| Author: Edmond |
14+
+----------------------------------------------------------------------+
15+
*/
16+
#ifndef FAST_CONTEXT_H
17+
#define FAST_CONTEXT_H
18+
19+
/**
20+
* Fast Context Switching - Based on Alibaba Photon
21+
* Ultra-fast context switching for all platforms
22+
* Supports GCC, Clang, MSVC
23+
*/
24+
25+
/* ===== COMPILER DETECTION ===== */
26+
#if defined(_MSC_VER)
27+
#define FASTCTX_MSVC 1
28+
#define FASTCTX_NAKED __declspec(naked)
29+
#define FASTCTX_INLINE __forceinline
30+
#define FASTCTX_ASM_FUNCTION FASTCTX_NAKED
31+
#elif defined(__clang__)
32+
#define FASTCTX_CLANG 1
33+
#define FASTCTX_NAKED __attribute__((naked))
34+
#define FASTCTX_INLINE static inline
35+
/* For inline assembly functions, naked is more important than inline */
36+
#define FASTCTX_ASM_FUNCTION __attribute__((naked, noinline))
37+
#elif defined(__GNUC__)
38+
#define FASTCTX_GCC 1
39+
#define FASTCTX_NAKED __attribute__((naked))
40+
#define FASTCTX_INLINE static inline
41+
/* For inline assembly functions, naked is more important than inline */
42+
#define FASTCTX_ASM_FUNCTION __attribute__((naked, noinline))
43+
#else
44+
#define FASTCTX_UNKNOWN 1
45+
#define FASTCTX_NAKED
46+
#define FASTCTX_INLINE static inline
47+
#define FASTCTX_ASM_FUNCTION static
48+
#endif
49+
50+
/* ===== C++ COMPATIBILITY ===== */
51+
#ifdef __cplusplus
52+
extern "C" {
53+
#endif
54+
55+
/* ===== INTEL CET (Control-flow Enforcement Technology) DETECTION ===== */
56+
#if defined(__CET__)
57+
#include <cet.h>
58+
/* Intel CET has two features:
59+
* - IBT (Indirect Branch Tracking): __CET__ & 0x1
60+
* - SHSTK (Shadow Stack): __CET__ & 0x2
61+
* We only need Shadow Stack for context switching */
62+
#define FASTCTX_IBT_ENABLED (__CET__ & 0x1)
63+
#define FASTCTX_SHSTK_ENABLED (__CET__ & 0x2)
64+
65+
/* Check if both compile-time and runtime support are available */
66+
#if FASTCTX_SHSTK_ENABLED && defined(SHADOW_STACK_SYSCALL)
67+
#define FASTCTX_CET 1
68+
#else
69+
#define FASTCTX_CET 0
70+
#endif
71+
#else
72+
/* No CET support - define empty macros */
73+
#define _CET_ENDBR
74+
#define FASTCTX_CET 0
75+
#define FASTCTX_IBT_ENABLED 0
76+
#define FASTCTX_SHSTK_ENABLED 0
77+
#endif
78+
79+
/* ===== CONTEXT STRUCTURE ===== */
80+
#if !defined(FASTCTX_FALLBACK_SETJMP)
81+
typedef struct {
82+
void *stack_ptr;
83+
} coroutine_context;
84+
#endif
85+
86+
/* Forward declaration for all platforms */
87+
/* Note: actual function is defined inline for each platform below */
88+
89+
// x86_64
90+
#if defined(__x86_64__) || defined(_M_X64)
91+
92+
#ifdef FASTCTX_MSVC
93+
/* MSVC x64 calling convention: RCX, RDX, R8, R9
94+
* Microsoft x64 callee-saved: RBP, RBX, RDI, RSI, R12-R15 */
95+
FASTCTX_NAKED void fast_context_switch(coroutine_context *from, coroutine_context *to) {
96+
__asm {
97+
; Save Microsoft x64 callee-saved registers (like Photon)
98+
push rbp
99+
push rbx
100+
push rdi
101+
push rsi
102+
push r12
103+
push r13
104+
push r14
105+
push r15
106+
107+
mov [rcx], rsp ; from->stack_ptr = rsp
108+
mov rsp, [rdx] ; rsp = to->stack_ptr
109+
110+
; Restore Microsoft x64 callee-saved registers
111+
pop r15
112+
pop r14
113+
pop r13
114+
pop r12
115+
pop rsi
116+
pop rdi
117+
pop rbx
118+
pop rbp
119+
ret
120+
}
121+
}
122+
#else
123+
/* GCC/Clang x64 calling convention: RDI, RSI */
124+
FASTCTX_ASM_FUNCTION void fast_context_switch(coroutine_context *from, coroutine_context *to)
125+
{
126+
#if FASTCTX_CET
127+
__asm__ volatile(
128+
"_CET_ENDBR\n" /* IBT: Mark as valid indirect branch target */
129+
130+
/* Save Shadow Stack Pointer (for SHSTK) */
131+
"rdsspq %%rcx\n" /* Read current Shadow Stack Pointer */
132+
"pushq %%rcx\n" /* Save SSP to current stack */
133+
134+
/* Standard context switch - save System V callee-saved registers */
135+
"pushq %%rbp\n" /* Save frame pointer */
136+
"pushq %%rbx\n" /* Save callee-saved */
137+
"pushq %%r12\n" /* Save callee-saved */
138+
"pushq %%r13\n" /* Save callee-saved */
139+
"pushq %%r14\n" /* Save callee-saved */
140+
"pushq %%r15\n" /* Save callee-saved */
141+
"movq %%rsp, (%0)\n" /* from->stack_ptr = current rsp */
142+
"movq (%1), %%rsp\n" /* rsp = to->stack_ptr (switch stacks) */
143+
"popq %%r15\n" /* Restore callee-saved */
144+
"popq %%r14\n" /* Restore callee-saved */
145+
"popq %%r13\n" /* Restore callee-saved */
146+
"popq %%r12\n" /* Restore callee-saved */
147+
"popq %%rbx\n" /* Restore callee-saved */
148+
"popq %%rbp\n" /* Restore frame pointer */
149+
150+
/* Restore Shadow Stack (for SHSTK) */
151+
"popq %%rcx\n" /* Load SSP from new stack */
152+
"test %%rcx, %%rcx\n" /* Check if SSP is non-zero */
153+
"jz 1f\n" /* Skip if zero (no shadow stack) */
154+
"rstorssp -8(%%rcx)\n" /* Restore shadow stack from token */
155+
"saveprevssp\n" /* Save token for previous shadow stack */
156+
157+
/* CRITICAL: Since we use 'ret' instead of 'jmp', we need to adjust SSP
158+
* because 'ret' will decrement SSP but we haven't actually returned
159+
* from a function call - we're jumping to a different context */
160+
"incsspq $1\n" /* Increment SSP to compensate */
161+
"1:\n"
162+
"ret\n" /* Return to new context */
163+
: : "r"(from), "r"(to) : "rcx", "memory"
164+
);
165+
#else
166+
__asm__ volatile(
167+
/* Save System V AMD64 callee-saved registers (like Photon) */
168+
"pushq %%rbp\n"
169+
"pushq %%rbx\n"
170+
"pushq %%r12\n"
171+
"pushq %%r13\n"
172+
"pushq %%r14\n"
173+
"pushq %%r15\n"
174+
"movq %%rsp, (%0)\n"
175+
"movq (%1), %%rsp\n"
176+
/* Restore System V AMD64 callee-saved registers */
177+
"popq %%r15\n"
178+
"popq %%r14\n"
179+
"popq %%r13\n"
180+
"popq %%r12\n"
181+
"popq %%rbx\n"
182+
"popq %%rbp\n"
183+
"ret\n"
184+
: : "r"(from), "r"(to) : "memory"
185+
);
186+
#endif
187+
}
188+
#endif
189+
190+
// ARM64
191+
#elif defined(__aarch64__) || defined(_M_ARM64)
192+
193+
#ifdef FASTCTX_MSVC
194+
/*
195+
* MSVC ARM64 LIMITATION:
196+
*
197+
* Microsoft Visual C++ does NOT support inline assembly on ARM64,
198+
* unlike x86/x64 where __asm blocks are supported. This is because:
199+
*
200+
* 1. ARM64 ABI COMPLEXITY: The ARM64 calling convention is more complex
201+
* than x86/x64, with different register classes and stricter alignment
202+
*
203+
* 2. MICROSOFT'S DESIGN DECISION: MSVC encourages use of compiler intrinsics
204+
* rather than inline assembly for better optimization and security
205+
*
206+
* 3. TOOLCHAIN SEPARATION: Microsoft separates assembly code into dedicated
207+
* .asm files processed by armasm64.exe assembler
208+
*
209+
* SOLUTION:
210+
* We provide the context switching function in a separate .asm file:
211+
* 'fast_context_arm64_msvc.asm' which must be:
212+
* 1. Assembled with armasm64.exe
213+
* 2. Linked with the main program
214+
* 3. Declared as external function here
215+
*
216+
* BUILD INSTRUCTIONS:
217+
* armasm64.exe fast_context_arm64_msvc.asm
218+
* link.exe your_program.obj fast_context_arm64_msvc.obj
219+
*/
220+
221+
/* External function declaration - implemented in fast_context_arm64_msvc.asm */
222+
extern void fast_context_switch(coroutine_context *from, coroutine_context *to);
223+
224+
#else
225+
/* GCC/Clang ARM64 - supports inline assembly */
226+
FASTCTX_ASM_FUNCTION void fast_context_switch(coroutine_context *from, coroutine_context *to) {
227+
__asm__ volatile(
228+
"stp x29, x30, [sp, #-16]!\n" /* Save FP and LR to current stack */
229+
"mov x29, sp\n" /* Update frame pointer */
230+
"str x29, [%0]\n" /* from->stack_ptr = current stack */
231+
"ldr x29, [%1]\n" /* Load new stack pointer */
232+
"mov sp, x29\n" /* Switch to new stack */
233+
"ldp x29, x30, [sp], #16\n" /* Restore FP and LR from new stack */
234+
"ret\n" /* Return to new context */
235+
: : "r"(from), "r"(to) : "x29", "x30", "memory"
236+
);
237+
}
238+
#endif
239+
240+
// RISC-V 64
241+
#elif defined(__riscv) && (__riscv_xlen == 64)
242+
243+
FASTCTX_ASM_FUNCTION void fast_context_switch(coroutine_context *from, coroutine_context *to) {
244+
__asm__ volatile(
245+
"addi sp, sp, -16\n"
246+
"sd ra, 8(sp)\n"
247+
"sd s0, 0(sp)\n"
248+
"sd sp, 0(%0)\n"
249+
"ld sp, 0(%1)\n"
250+
"ld s0, 0(sp)\n"
251+
"ld ra, 8(sp)\n"
252+
"addi sp, sp, 16\n"
253+
"jr ra\n"
254+
: : "r"(from), "r"(to) : "memory"
255+
);
256+
}
257+
258+
// ARM32
259+
#elif defined(__arm__) || defined(_M_ARM)
260+
261+
FASTCTX_ASM_FUNCTION void fast_context_switch(coroutine_context *from, coroutine_context *to) {
262+
__asm__ volatile(
263+
"push {r11, lr}\n"
264+
"mov r11, sp\n"
265+
"str r11, [%0]\n"
266+
"ldr r11, [%1]\n"
267+
"mov sp, r11\n"
268+
"pop {r11, pc}\n"
269+
: : "r"(from), "r"(to) : "r11", "lr", "memory"
270+
);
271+
}
272+
273+
// x86_32
274+
#elif defined(__i386__) || defined(_M_IX86)
275+
276+
#ifdef FASTCTX_MSVC
277+
/* MSVC x86 calling convention: parameters on stack */
278+
FASTCTX_NAKED void fast_context_switch(coroutine_context *from, coroutine_context *to) {
279+
__asm {
280+
push ebp ; Save frame pointer
281+
mov eax, [esp+8] ; eax = from parameter (after push ebp)
282+
mov edx, [esp+12] ; edx = to parameter (after push ebp)
283+
mov [eax], esp ; from->stack_ptr = current esp
284+
mov esp, [edx] ; switch to new stack
285+
pop ebp ; restore frame pointer from new stack
286+
ret ; return to new context
287+
}
288+
}
289+
#else
290+
FASTCTX_ASM_FUNCTION void fast_context_switch(coroutine_context *from, coroutine_context *to) {
291+
__asm__ volatile(
292+
"pushl %%ebp\n"
293+
"movl %%esp, (%0)\n"
294+
"movl (%1), %%esp\n"
295+
"popl %%ebp\n"
296+
"ret\n"
297+
: : "r"(from), "r"(to) : "memory"
298+
);
299+
}
300+
#endif
301+
302+
#else
303+
304+
/* ===== FALLBACK FOR UNSUPPORTED PLATFORMS ===== */
305+
#define FASTCTX_FALLBACK_SETJMP 1
306+
#include <setjmp.h>
307+
308+
#ifdef FASTCTX_FALLBACK_SETJMP
309+
#undef coroutine_context /* Remove previous declaration */
310+
typedef struct {
311+
jmp_buf ctx;
312+
int initialized;
313+
} coroutine_context;
314+
#endif
315+
316+
FASTCTX_INLINE void fast_context_switch(coroutine_context *from, coroutine_context *to) {
317+
if (setjmp(from->ctx) == 0) {
318+
longjmp(to->ctx, 1);
319+
}
320+
}
321+
322+
#if defined(__GNUC__) || defined(__clang__)
323+
#warning "Using setjmp/longjmp fallback - performance will be reduced"
324+
#elif defined(_MSC_VER)
325+
#pragma message("Using setjmp/longjmp fallback - performance will be reduced")
326+
#endif
327+
328+
#endif
329+
330+
#ifdef __cplusplus
331+
}
332+
#endif
333+
334+
#endif /* FAST_CONTEXT_H */

0 commit comments

Comments
 (0)