1+ /*
2+ +----------------------------------------------------------------------+
3+ | Copyright (c) The PHP Group |
4+ +----------------------------------------------------------------------+
5+ | This source file is subject to version 3.01 of the PHP license, |
6+ | that is bundled with this package in the file LICENSE, and is |
7+ | available through the world-wide-web at the following url: |
8+ | https://www.php.net/license/3_01.txt |
9+ | If you did not receive a copy of the PHP license and are unable to |
10+ | obtain it through the world-wide-web, please send a note to |
11+ | [email protected] so we can mail you a copy immediately. | 12+ +----------------------------------------------------------------------+
13+ | Author: Edmond |
14+ +----------------------------------------------------------------------+
15+ */
16+ #ifndef FAST_CONTEXT_H
17+ #define FAST_CONTEXT_H
18+
19+ /**
20+ * Fast Context Switching - Based on Alibaba Photon
21+ * Ultra-fast context switching for all platforms
22+ * Supports GCC, Clang, MSVC
23+ */
24+
25+ /* ===== COMPILER DETECTION ===== */
26+ #if defined(_MSC_VER )
27+ #define FASTCTX_MSVC 1
28+ #define FASTCTX_NAKED __declspec(naked)
29+ #define FASTCTX_INLINE __forceinline
30+ #define FASTCTX_ASM_FUNCTION FASTCTX_NAKED
31+ #elif defined(__clang__ )
32+ #define FASTCTX_CLANG 1
33+ #define FASTCTX_NAKED __attribute__((naked))
34+ #define FASTCTX_INLINE static inline
35+ /* For inline assembly functions, naked is more important than inline */
36+ #define FASTCTX_ASM_FUNCTION __attribute__((naked, noinline))
37+ #elif defined(__GNUC__ )
38+ #define FASTCTX_GCC 1
39+ #define FASTCTX_NAKED __attribute__((naked))
40+ #define FASTCTX_INLINE static inline
41+ /* For inline assembly functions, naked is more important than inline */
42+ #define FASTCTX_ASM_FUNCTION __attribute__((naked, noinline))
43+ #else
44+ #define FASTCTX_UNKNOWN 1
45+ #define FASTCTX_NAKED
46+ #define FASTCTX_INLINE static inline
47+ #define FASTCTX_ASM_FUNCTION static
48+ #endif
49+
50+ /* ===== C++ COMPATIBILITY ===== */
51+ #ifdef __cplusplus
52+ extern "C" {
53+ #endif
54+
55+ /* ===== INTEL CET (Control-flow Enforcement Technology) DETECTION ===== */
56+ #if defined(__CET__ )
57+ #include <cet.h>
58+ /* Intel CET has two features:
59+ * - IBT (Indirect Branch Tracking): __CET__ & 0x1
60+ * - SHSTK (Shadow Stack): __CET__ & 0x2
61+ * We only need Shadow Stack for context switching */
62+ #define FASTCTX_IBT_ENABLED (__CET__ & 0x1)
63+ #define FASTCTX_SHSTK_ENABLED (__CET__ & 0x2)
64+
65+ /* Check if both compile-time and runtime support are available */
66+ #if FASTCTX_SHSTK_ENABLED && defined(SHADOW_STACK_SYSCALL )
67+ #define FASTCTX_CET 1
68+ #else
69+ #define FASTCTX_CET 0
70+ #endif
71+ #else
72+ /* No CET support - define empty macros */
73+ #define _CET_ENDBR
74+ #define FASTCTX_CET 0
75+ #define FASTCTX_IBT_ENABLED 0
76+ #define FASTCTX_SHSTK_ENABLED 0
77+ #endif
78+
79+ /* ===== CONTEXT STRUCTURE ===== */
80+ #if !defined(FASTCTX_FALLBACK_SETJMP )
81+ typedef struct {
82+ void * stack_ptr ;
83+ } coroutine_context ;
84+ #endif
85+
86+ /* Forward declaration for all platforms */
87+ /* Note: actual function is defined inline for each platform below */
88+
89+ // x86_64
90+ #if defined(__x86_64__ ) || defined(_M_X64 )
91+
92+ #ifdef FASTCTX_MSVC
93+ /* MSVC x64 calling convention: RCX, RDX, R8, R9
94+ * Microsoft x64 callee-saved: RBP, RBX, RDI, RSI, R12-R15 */
95+ FASTCTX_NAKED void fast_context_switch (coroutine_context * from , coroutine_context * to ) {
96+ __asm {
97+ ; Save Microsoft x64 callee - saved registers (like Photon )
98+ push rbp
99+ push rbx
100+ push rdi
101+ push rsi
102+ push r12
103+ push r13
104+ push r14
105+ push r15
106+
107+ mov [rcx ], rsp ; from -> stack_ptr = rsp
108+ mov rsp , [rdx ] ; rsp = to -> stack_ptr
109+
110+ ; Restore Microsoft x64 callee - saved registers
111+ pop r15
112+ pop r14
113+ pop r13
114+ pop r12
115+ pop rsi
116+ pop rdi
117+ pop rbx
118+ pop rbp
119+ ret
120+ }
121+ }
122+ #else
123+ /* GCC/Clang x64 calling convention: RDI, RSI */
124+ FASTCTX_ASM_FUNCTION void fast_context_switch (coroutine_context * from , coroutine_context * to )
125+ {
126+ #if FASTCTX_CET
127+ __asm__ volatile (
128+ "_CET_ENDBR\n" /* IBT: Mark as valid indirect branch target */
129+
130+ /* Save Shadow Stack Pointer (for SHSTK) */
131+ "rdsspq %%rcx\n" /* Read current Shadow Stack Pointer */
132+ "pushq %%rcx\n" /* Save SSP to current stack */
133+
134+ /* Standard context switch - save System V callee-saved registers */
135+ "pushq %%rbp\n" /* Save frame pointer */
136+ "pushq %%rbx\n" /* Save callee-saved */
137+ "pushq %%r12\n" /* Save callee-saved */
138+ "pushq %%r13\n" /* Save callee-saved */
139+ "pushq %%r14\n" /* Save callee-saved */
140+ "pushq %%r15\n" /* Save callee-saved */
141+ "movq %%rsp, (%0)\n" /* from->stack_ptr = current rsp */
142+ "movq (%1), %%rsp\n" /* rsp = to->stack_ptr (switch stacks) */
143+ "popq %%r15\n" /* Restore callee-saved */
144+ "popq %%r14\n" /* Restore callee-saved */
145+ "popq %%r13\n" /* Restore callee-saved */
146+ "popq %%r12\n" /* Restore callee-saved */
147+ "popq %%rbx\n" /* Restore callee-saved */
148+ "popq %%rbp\n" /* Restore frame pointer */
149+
150+ /* Restore Shadow Stack (for SHSTK) */
151+ "popq %%rcx\n" /* Load SSP from new stack */
152+ "test %%rcx, %%rcx\n" /* Check if SSP is non-zero */
153+ "jz 1f\n" /* Skip if zero (no shadow stack) */
154+ "rstorssp -8(%%rcx)\n" /* Restore shadow stack from token */
155+ "saveprevssp\n" /* Save token for previous shadow stack */
156+
157+ /* CRITICAL: Since we use 'ret' instead of 'jmp', we need to adjust SSP
158+ * because 'ret' will decrement SSP but we haven't actually returned
159+ * from a function call - we're jumping to a different context */
160+ "incsspq $1\n" /* Increment SSP to compensate */
161+ "1:\n"
162+ "ret\n" /* Return to new context */
163+ : : "r" (from ), "r" (to ) : "rcx" , "memory"
164+ );
165+ #else
166+ __asm__ volatile (
167+ /* Save System V AMD64 callee-saved registers (like Photon) */
168+ "pushq %%rbp\n"
169+ "pushq %%rbx\n"
170+ "pushq %%r12\n"
171+ "pushq %%r13\n"
172+ "pushq %%r14\n"
173+ "pushq %%r15\n"
174+ "movq %%rsp, (%0)\n"
175+ "movq (%1), %%rsp\n"
176+ /* Restore System V AMD64 callee-saved registers */
177+ "popq %%r15\n"
178+ "popq %%r14\n"
179+ "popq %%r13\n"
180+ "popq %%r12\n"
181+ "popq %%rbx\n"
182+ "popq %%rbp\n"
183+ "ret\n"
184+ : : "r" (from ), "r" (to ) : "memory"
185+ );
186+ #endif
187+ }
188+ #endif
189+
190+ // ARM64
191+ #elif defined(__aarch64__ ) || defined(_M_ARM64 )
192+
193+ #ifdef FASTCTX_MSVC
194+ /*
195+ * MSVC ARM64 LIMITATION:
196+ *
197+ * Microsoft Visual C++ does NOT support inline assembly on ARM64,
198+ * unlike x86/x64 where __asm blocks are supported. This is because:
199+ *
200+ * 1. ARM64 ABI COMPLEXITY: The ARM64 calling convention is more complex
201+ * than x86/x64, with different register classes and stricter alignment
202+ *
203+ * 2. MICROSOFT'S DESIGN DECISION: MSVC encourages use of compiler intrinsics
204+ * rather than inline assembly for better optimization and security
205+ *
206+ * 3. TOOLCHAIN SEPARATION: Microsoft separates assembly code into dedicated
207+ * .asm files processed by armasm64.exe assembler
208+ *
209+ * SOLUTION:
210+ * We provide the context switching function in a separate .asm file:
211+ * 'fast_context_arm64_msvc.asm' which must be:
212+ * 1. Assembled with armasm64.exe
213+ * 2. Linked with the main program
214+ * 3. Declared as external function here
215+ *
216+ * BUILD INSTRUCTIONS:
217+ * armasm64.exe fast_context_arm64_msvc.asm
218+ * link.exe your_program.obj fast_context_arm64_msvc.obj
219+ */
220+
221+ /* External function declaration - implemented in fast_context_arm64_msvc.asm */
222+ extern void fast_context_switch (coroutine_context * from , coroutine_context * to );
223+
224+ #else
225+ /* GCC/Clang ARM64 - supports inline assembly */
226+ FASTCTX_ASM_FUNCTION void fast_context_switch (coroutine_context * from , coroutine_context * to ) {
227+ __asm__ volatile (
228+ "stp x29, x30, [sp, #-16]!\n" /* Save FP and LR to current stack */
229+ "mov x29, sp\n" /* Update frame pointer */
230+ "str x29, [%0]\n" /* from->stack_ptr = current stack */
231+ "ldr x29, [%1]\n" /* Load new stack pointer */
232+ "mov sp, x29\n" /* Switch to new stack */
233+ "ldp x29, x30, [sp], #16\n" /* Restore FP and LR from new stack */
234+ "ret\n" /* Return to new context */
235+ : : "r" (from ), "r" (to ) : "x29" , "x30" , "memory"
236+ );
237+ }
238+ #endif
239+
240+ // RISC-V 64
241+ #elif defined(__riscv ) && (__riscv_xlen == 64 )
242+
243+ FASTCTX_ASM_FUNCTION void fast_context_switch (coroutine_context * from , coroutine_context * to ) {
244+ __asm__ volatile (
245+ "addi sp, sp, -16\n"
246+ "sd ra, 8(sp)\n"
247+ "sd s0, 0(sp)\n"
248+ "sd sp, 0(%0)\n"
249+ "ld sp, 0(%1)\n"
250+ "ld s0, 0(sp)\n"
251+ "ld ra, 8(sp)\n"
252+ "addi sp, sp, 16\n"
253+ "jr ra\n"
254+ : : "r" (from ), "r" (to ) : "memory"
255+ );
256+ }
257+
258+ // ARM32
259+ #elif defined(__arm__ ) || defined(_M_ARM )
260+
261+ FASTCTX_ASM_FUNCTION void fast_context_switch (coroutine_context * from , coroutine_context * to ) {
262+ __asm__ volatile (
263+ "push {r11, lr}\n"
264+ "mov r11, sp\n"
265+ "str r11, [%0]\n"
266+ "ldr r11, [%1]\n"
267+ "mov sp, r11\n"
268+ "pop {r11, pc}\n"
269+ : : "r" (from ), "r" (to ) : "r11" , "lr" , "memory"
270+ );
271+ }
272+
273+ // x86_32
274+ #elif defined(__i386__ ) || defined(_M_IX86 )
275+
276+ #ifdef FASTCTX_MSVC
277+ /* MSVC x86 calling convention: parameters on stack */
278+ FASTCTX_NAKED void fast_context_switch (coroutine_context * from , coroutine_context * to ) {
279+ __asm {
280+ push ebp ; Save frame pointer
281+ mov eax , [esp + 8 ] ; eax = from parameter (after push ebp )
282+ mov edx , [esp + 12 ] ; edx = to parameter (after push ebp )
283+ mov [eax ], esp ; from -> stack_ptr = current esp
284+ mov esp , [edx ] ; switch to new stack
285+ pop ebp ; restore frame pointer from new stack
286+ ret ; return to new context
287+ }
288+ }
289+ #else
290+ FASTCTX_ASM_FUNCTION void fast_context_switch (coroutine_context * from , coroutine_context * to ) {
291+ __asm__ volatile (
292+ "pushl %%ebp\n"
293+ "movl %%esp, (%0)\n"
294+ "movl (%1), %%esp\n"
295+ "popl %%ebp\n"
296+ "ret\n"
297+ : : "r" (from ), "r" (to ) : "memory"
298+ );
299+ }
300+ #endif
301+
302+ #else
303+
304+ /* ===== FALLBACK FOR UNSUPPORTED PLATFORMS ===== */
305+ #define FASTCTX_FALLBACK_SETJMP 1
306+ #include <setjmp.h>
307+
308+ #ifdef FASTCTX_FALLBACK_SETJMP
309+ #undef coroutine_context /* Remove previous declaration */
310+ typedef struct {
311+ jmp_buf ctx ;
312+ int initialized ;
313+ } coroutine_context ;
314+ #endif
315+
316+ FASTCTX_INLINE void fast_context_switch (coroutine_context * from , coroutine_context * to ) {
317+ if (setjmp (from -> ctx ) == 0 ) {
318+ longjmp (to -> ctx , 1 );
319+ }
320+ }
321+
322+ #if defined(__GNUC__ ) || defined(__clang__ )
323+ #warning "Using setjmp/longjmp fallback - performance will be reduced"
324+ #elif defined(_MSC_VER )
325+ #pragma message("Using setjmp/longjmp fallback - performance will be reduced")
326+ #endif
327+
328+ #endif
329+
330+ #ifdef __cplusplus
331+ }
332+ #endif
333+
334+ #endif /* FAST_CONTEXT_H */
0 commit comments