Skip to content

Commit 61e084f

Browse files
aescolarnashif
authored andcommitted
arch: POSIX: Fix race with unused threads
Fix a race which seems to have been presenting itself very sporadically on loaded systems. The race seems to have caused tests/kernel/sched/schedule_api to fail at random on native_posix. The case is a bit convoluted: When the kernel calls z_new_thread(), the POSIX arch saves the new thread entry call in that new Zephyr thread stack together with a bit of extra info for the POSIX arch. And spawns a new pthread (posix_thread_starter()) which will eventually (after the Zephyr kernel swapped to it), call that entry function. (Note that in principle a thread spawned by pthreads may be arbitrarily delayed) The POSIX arch does not try to synchronize to that new pthread (because why should it) until the first time the Zephyr kernel tries to swap to that thread. But, the kernel may never try to swap to it. And therefore that thread's posix_thread_starter() may never have got to run before the thread was aborted, and its Zephyr stack reused for something else by the Zephyr app. As posix_thread_starter() was relaying on looking into that thread stack, it may now be looking into another thread stack or anything else. So, this commit fixes it by having posix_thread_starter() get the input it always needs not from the Zephyr stack, but from its own pthread_create() parameter pointing to a structure kept by the POSIX arch. Note that if the thread was aborted before reaching that point posix_thread_starter() will NOT call the Zephyr thread entry function, but just cleanup. With this change all "asynchronous" parts of the POSIX arch should relay only on the POSIX arch own structures. Signed-off-by: Alberto Escolar Piedras <[email protected]>
1 parent 5fcaeaa commit 61e084f

File tree

1 file changed

+19
-13
lines changed

1 file changed

+19
-13
lines changed

arch/posix/core/posix_core.c

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ struct threads_table_el {
7272
bool running; /* Is this the currently running thread */
7373
pthread_t thread; /* Actual pthread_t as returned by native kernel */
7474
int thead_cnt; /* For debugging: Unique, consecutive, thread number */
75+
/* Pointer to the status kept in the Zephyr thread stack */
76+
posix_thread_status_t *t_status;
7577
};
7678

7779
static struct threads_table_el *threads_table;
@@ -260,11 +262,11 @@ static void posix_cleanup_handler(void *arg)
260262
*/
261263
static void *posix_thread_starter(void *arg)
262264
{
263-
posix_thread_status_t *ptr = (posix_thread_status_t *) arg;
265+
int thread_idx = (intptr_t)arg;
264266

265267
PC_DEBUG("Thread [%i] %i: %s: Starting\n",
266-
threads_table[ptr->thread_idx].thead_cnt,
267-
ptr->thread_idx,
268+
threads_table[thread_idx].thead_cnt,
269+
thread_idx,
268270
__func__);
269271

270272
/*
@@ -286,18 +288,20 @@ static void *posix_thread_starter(void *arg)
286288
pthread_cleanup_push(posix_cleanup_handler, arg);
287289

288290
PC_DEBUG("Thread [%i] %i: %s: After start mutex (hav mut)\n",
289-
threads_table[ptr->thread_idx].thead_cnt,
290-
ptr->thread_idx,
291+
threads_table[thread_idx].thead_cnt,
292+
thread_idx,
291293
__func__);
292294

293295
/*
294296
* The thread would try to execute immediately, so we block it
295297
* until allowed
296298
*/
297-
posix_wait_until_allowed(ptr->thread_idx);
299+
posix_wait_until_allowed(thread_idx);
298300

299301
posix_new_thread_pre_start();
300302

303+
posix_thread_status_t *ptr = threads_table[thread_idx].t_status;
304+
301305
z_thread_entry(ptr->entry_point, ptr->arg1, ptr->arg2, ptr->arg3);
302306

303307
/*
@@ -306,13 +310,13 @@ static void *posix_thread_starter(void *arg)
306310
*/
307311
/* LCOV_EXCL_START */
308312
posix_print_trace(PREFIX"Thread [%i] %i [%lu] ended!?!\n",
309-
threads_table[ptr->thread_idx].thead_cnt,
310-
ptr->thread_idx,
313+
threads_table[thread_idx].thead_cnt,
314+
thread_idx,
311315
pthread_self());
312316

313317

314-
threads_table[ptr->thread_idx].running = false;
315-
threads_table[ptr->thread_idx].state = FAILED;
318+
threads_table[thread_idx].running = false;
319+
threads_table[thread_idx].state = FAILED;
316320

317321
pthread_cleanup_pop(1);
318322

@@ -370,16 +374,18 @@ void posix_new_thread(posix_thread_status_t *ptr)
370374
threads_table[t_slot].state = USED;
371375
threads_table[t_slot].running = false;
372376
threads_table[t_slot].thead_cnt = thread_create_count++;
377+
threads_table[t_slot].t_status = ptr;
373378
ptr->thread_idx = t_slot;
374379

375380
PC_SAFE_CALL(pthread_create(&threads_table[t_slot].thread,
376381
NULL,
377382
posix_thread_starter,
378-
(void *)ptr));
383+
(void *)(intptr_t)t_slot));
379384

380-
PC_DEBUG("created thread [%i] %i [%lu]\n",
385+
PC_DEBUG("%s created thread [%i] %i [%lu]\n",
386+
__func__,
381387
threads_table[t_slot].thead_cnt,
382-
ptr->thread_idx,
388+
t_slot,
383389
threads_table[t_slot].thread);
384390

385391
}

0 commit comments

Comments
 (0)