Skip to content

Commit b70128b

Browse files
committed
Update stacklet
1 parent ac2dee6 commit b70128b

File tree

8 files changed

+409
-33
lines changed

8 files changed

+409
-33
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
os.system('ml64 /nologo /c /Fo src\switch_x64_msvc.obj src\switch_x64_msvc.asm')
3131

3232
ext_modules = [Extension('fibers._cfibers',
33-
sources = glob.glob('src/*.c'),
33+
sources=glob.glob('src/*.c'),
3434
extra_objects=extra_objects,
3535
)]
3636

src/slp_platformselect.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,12 @@
1010
#include "switch_x86_gcc.h" /* gcc on X86 */
1111
#elif defined(__GNUC__) && defined(__arm__)
1212
#include "switch_arm_gcc.h" /* gcc on arm */
13+
#elif defined(__GNUC__) && defined(__PPC64__)
14+
#include "switch_ppc64_gcc.h" /* gcc on ppc64 */
1315
#elif defined(__GNUC__) && defined(__mips__) && defined(_ABI64)
1416
#include "switch_mips64_gcc.h" /* gcc on mips64 */
17+
#elif defined(__GNUC__) && defined(__s390x__)
18+
#include "switch_s390x_gcc.h"
1519
#else
1620
#error "Unsupported platform!"
1721
#endif

src/stacklet.c

Lines changed: 48 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
#include "stacklet.h"
66

77
#include <stddef.h>
8-
#include <assert.h>
98
#include <string.h>
9+
#include <stdio.h>
1010

1111
/************************************************************
1212
* platform specific code
@@ -16,6 +16,7 @@
1616
* can redefine it to upwards growing, 1.
1717
*/
1818
#define STACK_DIRECTION 0
19+
#define STATIC_NOINLINE __attribute__((noinline)) static
1920

2021
#include "slp_platformselect.h"
2122

@@ -34,7 +35,7 @@
3435
/************************************************************/
3536

3637
struct stacklet_s {
37-
/* The portion of the real stack claimed by this paused tealet. */
38+
/* The portion of the real stack claimed by this paused stacklet. */
3839
char *stack_start; /* the "near" end of the stack */
3940
char *stack_stop; /* the "far" end of the stack */
4041

@@ -56,11 +57,6 @@ struct stacklet_s {
5657
stacklet_thread_handle stack_thrd; /* the thread where the stacklet is */
5758
};
5859

59-
void *(*_stacklet_switchstack)(void*(*)(void*, void*),
60-
void*(*)(void*, void*), void*) = NULL;
61-
void (*_stacklet_initialstub)(struct stacklet_thread_s *,
62-
stacklet_run_fn, void *) = NULL;
63-
6460
struct stacklet_thread_s {
6561
struct stacklet_s *g_stack_chain_head; /* NULL <=> running main */
6662
char *g_current_stack_stop;
@@ -69,6 +65,19 @@ struct stacklet_thread_s {
6965
struct stacklet_s *g_target;
7066
};
7167

68+
#define _check(x) do { if (!(x)) _check_failed(#x); } while (0)
69+
70+
static void _check_failed(const char *check)
71+
{
72+
fprintf(stderr, "FATAL: stacklet: %s failed\n", check);
73+
abort();
74+
}
75+
76+
static void check_valid(struct stacklet_s *g)
77+
{
78+
_check(g->stack_saved >= 0);
79+
}
80+
7281
/***************************************************************/
7382

7483
static void g_save(struct stacklet_s* g, char* stop
@@ -96,7 +105,8 @@ static void g_save(struct stacklet_s* g, char* stop
96105
*/
97106
ptrdiff_t sz1 = g->stack_saved;
98107
ptrdiff_t sz2 = stop - g->stack_start;
99-
assert(stop <= g->stack_stop);
108+
check_valid(g);
109+
_check(stop <= g->stack_stop);
100110

101111
if (sz2 > sz1) {
102112
char *c = (char *)(g + 1);
@@ -146,11 +156,13 @@ static void g_clear_stack(struct stacklet_s *g_target,
146156
{
147157
struct stacklet_s *current = thrd->g_stack_chain_head;
148158
char *target_stop = g_target->stack_stop;
159+
check_valid(g_target);
149160

150-
/* save and unlink tealets that are completely within
161+
/* save and unlink stacklets that are completely within
151162
the area to clear. */
152163
while (current != NULL && current->stack_stop <= target_stop) {
153164
struct stacklet_s *prev = current->stack_prev;
165+
check_valid(current);
154166
current->stack_prev = NULL;
155167
if (current != g_target) {
156168
/* don't bother saving away g_target, because
@@ -222,20 +234,31 @@ static void *g_restore_state(void *new_stack_pointer, void *rawthrd)
222234
struct stacklet_thread_s *thrd = (struct stacklet_thread_s *)rawthrd;
223235
struct stacklet_s *g = thrd->g_target;
224236
ptrdiff_t stack_saved = g->stack_saved;
237+
check_valid(g);
225238

226-
assert(new_stack_pointer == g->stack_start);
239+
_check(new_stack_pointer == g->stack_start);
227240
#if STACK_DIRECTION == 0
228241
memcpy(g->stack_start, g+1, stack_saved);
229242
#else
230243
memcpy(g->stack_start - stack_saved, g+1, stack_saved);
231244
#endif
232245
thrd->g_current_stack_stop = g->stack_stop;
246+
g->stack_saved = -13; /* debugging */
233247
free(g);
234248
return EMPTY_STACKLET_HANDLE;
235249
}
236250

237-
static void g_initialstub(struct stacklet_thread_s *thrd,
238-
stacklet_run_fn run, void *run_arg)
251+
STATIC_NOINLINE
252+
void *_stacklet_switchstack(void *(*save_state)(void*, void*),
253+
void *(*restore_state)(void*, void*),
254+
void *extra)
255+
{
256+
return slp_switch(save_state, restore_state, extra);
257+
}
258+
259+
STATIC_NOINLINE
260+
void g_initialstub(struct stacklet_thread_s *thrd,
261+
stacklet_run_fn run, void *run_arg)
239262
{
240263
struct stacklet_s *result;
241264

@@ -250,10 +273,11 @@ static void g_initialstub(struct stacklet_thread_s *thrd,
250273
result = run(thrd->g_source, run_arg);
251274

252275
/* Then switch to 'result'. */
276+
check_valid(result);
253277
thrd->g_target = result;
254278
_stacklet_switchstack(g_destroy_state, g_restore_state, thrd);
255279

256-
assert(!"stacklet: we should not return here");
280+
_check_failed("we should not return here");
257281
abort();
258282
}
259283
/* The second time it returns. */
@@ -265,13 +289,6 @@ stacklet_thread_handle stacklet_newthread(void)
265289
{
266290
struct stacklet_thread_s *thrd;
267291

268-
if (_stacklet_switchstack == NULL) {
269-
/* set up the following global with an indirection, which is needed
270-
to prevent any inlining */
271-
_stacklet_initialstub = g_initialstub;
272-
_stacklet_switchstack = slp_switch;
273-
}
274-
275292
thrd = malloc(sizeof(struct stacklet_thread_s));
276293
if (thrd != NULL)
277294
memset(thrd, 0, sizeof(struct stacklet_thread_s));
@@ -287,19 +304,20 @@ stacklet_handle stacklet_new(stacklet_thread_handle thrd,
287304
stacklet_run_fn run, void *run_arg)
288305
{
289306
long stackmarker;
290-
assert((char *)NULL < (char *)&stackmarker);
307+
_check((char *)NULL < (char *)&stackmarker);
291308
if (thrd->g_current_stack_stop <= (char *)&stackmarker)
292309
thrd->g_current_stack_stop = ((char *)&stackmarker) + 1;
293310

294311
thrd->g_current_stack_marker = (char *)&stackmarker;
295-
_stacklet_initialstub(thrd, run, run_arg);
312+
g_initialstub(thrd, run, run_arg);
296313
return thrd->g_source;
297314
}
298315

299316
stacklet_handle stacklet_switch(stacklet_handle target)
300317
{
301318
long stackmarker;
302319
stacklet_thread_handle thrd = target->stack_thrd;
320+
check_valid(target);
303321
if (thrd->g_current_stack_stop <= (char *)&stackmarker)
304322
thrd->g_current_stack_stop = ((char *)&stackmarker) + 1;
305323

@@ -310,6 +328,7 @@ stacklet_handle stacklet_switch(stacklet_handle target)
310328

311329
void stacklet_destroy(stacklet_handle target)
312330
{
331+
check_valid(target);
313332
if (target->stack_prev != NULL) {
314333
/* 'target' appears to be in the chained list 'unsaved_stack',
315334
so remove it from there. Note that if 'thrd' was already
@@ -319,12 +338,15 @@ void stacklet_destroy(stacklet_handle target)
319338
we don't even read 'stack_thrd', already deallocated. */
320339
stacklet_thread_handle thrd = target->stack_thrd;
321340
struct stacklet_s **pp = &thrd->g_stack_chain_head;
322-
for (; *pp != NULL; pp = &(*pp)->stack_prev)
341+
for (; *pp != NULL; pp = &(*pp)->stack_prev) {
342+
check_valid(*pp);
323343
if (*pp == target) {
324344
*pp = target->stack_prev;
325345
break;
326346
}
347+
}
327348
}
349+
target->stack_saved = -11; /* debugging */
328350
free(target);
329351
}
330352

@@ -334,6 +356,7 @@ char **_stacklet_translate_pointer(stacklet_handle context, char **ptr)
334356
long delta;
335357
if (context == NULL)
336358
return ptr;
359+
check_valid(context);
337360
delta = p - context->stack_start;
338361
if (((unsigned long)delta) < ((unsigned long)context->stack_saved)) {
339362
/* a pointer to a saved away word */
@@ -345,8 +368,8 @@ char **_stacklet_translate_pointer(stacklet_handle context, char **ptr)
345368
/* out-of-stack pointer! it's only ok if we are the main stacklet
346369
and we are reading past the end, because the main stacklet's
347370
stack stop is not exactly known. */
348-
assert(delta >= 0);
349-
assert(((long)context->stack_stop) & 1);
371+
_check(delta >= 0);
372+
_check(((long)context->stack_stop) & 1);
350373
}
351374
return ptr;
352375
}

src/switch_arm_gcc.h

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,47 @@
55
# define call_reg(x) "blx " #x "\n"
66
#endif
77

8-
static void __attribute__((optimize("O3"))) *slp_switch(void *(*save_state)(void*, void*),
8+
static void *slp_switch(void *(*save_state)(void*, void*),
9+
void *(*restore_state)(void*, void*),
10+
void *extra) __attribute__((noinline));
11+
12+
static void *slp_switch(void *(*save_state)(void*, void*),
913
void *(*restore_state)(void*, void*),
1014
void *extra)
1115
{
1216
void *result;
17+
/*
18+
seven registers to preserve: r2, r3, r7, r8, r9, r10, r11
19+
registers marked as clobbered: r0, r1, r4, r5, r6, r12, lr
20+
others: r13 is sp; r14 is lr; r15 is pc
21+
*/
22+
1323
__asm__ volatile (
14-
"mov r3, %[save_state]\n"
15-
/* save values in calee saved registers for later */
16-
"mov r4, %[restore_state]\n"
17-
"mov r5, %[extra]\n"
24+
25+
/* align the stack and save 7 more registers explicitly */
26+
"mov r0, sp\n"
27+
"and r1, r0, #-16\n"
28+
"mov sp, r1\n"
29+
"push {r0, r2, r3, r7, r8, r9, r10, r11}\n" /* total 8, still aligned */
30+
#ifndef __SOFTFP__
31+
/* We also push d8-d15 to preserve them explicitly. This assumes
32+
* that this code is in a function that doesn't use floating-point
33+
* at all, and so don't touch the "d" registers (that's why we mark
34+
* it as non-inlinable). So here by pushing/poping d8-d15 we are
35+
* saving precisely the callee-saved registers in all cases. We
36+
* could also try to list all "d" registers as clobbered, but it
37+
* doesn't work: there is no way I could find to know if we have 16
38+
* or 32 "d" registers (depends on the exact -mcpu=... and we don't
39+
* know it from the C code). If we have 32, then gcc would "save"
40+
* d8-d15 by copying them into d16-d23 for example, and it doesn't
41+
* work. */
42+
"vpush {d8, d9, d10, d11, d12, d13, d14, d15}\n" /* 16 words, still aligned */
43+
#endif
44+
45+
/* save values in callee saved registers for later */
46+
"mov r4, %[restore_state]\n" /* can't be r0 or r1: marked clobbered */
47+
"mov r5, %[extra]\n" /* can't be r0 or r1 or r4: marked clob. */
48+
"mov r3, %[save_state]\n" /* can't be r0, r1, r4, r5: marked clob. */
1849
"mov r0, sp\n" /* arg 1: current (old) stack pointer */
1950
"mov r1, r5\n" /* arg 2: extra */
2051
call_reg(r3) /* call save_state() */
@@ -28,19 +59,26 @@ static void __attribute__((optimize("O3"))) *slp_switch(void *(*save_state)(void
2859
/* From now on, the stack pointer is modified, but the content of the
2960
stack is not restored yet. It contains only garbage here. */
3061
"mov r1, r5\n" /* arg 2: extra */
31-
/* arg 1: current (new) stack pointer is already in r0*/
62+
/* arg 1: current (new) stack pointer is already in r0*/
3263
call_reg(r4) /* call restore_state() */
3364

3465
/* The stack's content is now restored. */
3566
"zero:\n"
67+
68+
#ifndef __SOFTFP__
69+
"vpop {d8, d9, d10, d11, d12, d13, d14, d15}\n"
70+
#endif
71+
"pop {r1, r2, r3, r7, r8, r9, r10, r11}\n"
72+
"mov sp, r1\n"
3673
"mov %[result], r0\n"
3774

3875
: [result]"=r"(result) /* output variables */
3976
/* input variables */
4077
: [restore_state]"r"(restore_state),
4178
[save_state]"r"(save_state),
4279
[extra]"r"(extra)
43-
: "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r13"
80+
: "r0", "r1", "r4", "r5", "r6", "r12", "lr",
81+
"memory", "cc"
4482
);
4583
return result;
4684
}

0 commit comments

Comments
 (0)