From 4a9142943be5eb4c2cfe645697ae9b425b194c92 Mon Sep 17 00:00:00 2001 From: Christopher Friedt Date: Sun, 14 May 2023 11:11:22 -0400 Subject: [PATCH 1/3] Revert "tests: posix: pthread: init pthread_attr_t on each iteration" This reverts commit 7c17bda3c23ed1ea33aa65df2732ac9a586640a4. Signed-off-by: Christopher Friedt --- tests/posix/common/src/pthread.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/posix/common/src/pthread.c b/tests/posix/common/src/pthread.c index 9b53468f9fb38..ab1e904986852 100644 --- a/tests/posix/common/src/pthread.c +++ b/tests/posix/common/src/pthread.c @@ -587,10 +587,11 @@ ZTEST(posix_apis, test_pthread_descriptor_leak) pthread_t pthread1; pthread_attr_t attr; + zassert_ok(pthread_attr_init(&attr)); + zassert_ok(pthread_attr_setstack(&attr, &stack_e[0][0], STACKS)); + /* If we are leaking descriptors, then this loop will never complete */ for (size_t i = 0; i < CONFIG_MAX_PTHREAD_COUNT * 2; ++i) { - zassert_ok(pthread_attr_init(&attr)); - zassert_ok(pthread_attr_setstack(&attr, &stack_e[0][0], STACKS)); zassert_ok(pthread_create(&pthread1, &attr, create_thread1, NULL), "unable to create thread %zu", i); zassert_ok(pthread_join(pthread1, NULL), "unable to join thread %zu", i); From 23d36a79993714741a4ae99535fe599347b7eabb Mon Sep 17 00:00:00 2001 From: Christopher Friedt Date: Sun, 14 May 2023 11:26:16 -0400 Subject: [PATCH 2/3] tests: posix: common: add a small delay in pthread_descriptor_leak The `aborted _current back from dead` error may appear in this particular test under Qemu (and in particular Qemu SMP) systems. A small delay between `pthread_create()` and `pthread_join()` is sufficient to mitigate the issue. Signed-off-by: Christopher Friedt --- tests/posix/common/src/pthread.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/posix/common/src/pthread.c b/tests/posix/common/src/pthread.c index ab1e904986852..5e506c4d39535 100644 --- a/tests/posix/common/src/pthread.c +++ b/tests/posix/common/src/pthread.c @@ -594,6 +594,12 @@ ZTEST(posix_apis, test_pthread_descriptor_leak) for (size_t i = 0; i < CONFIG_MAX_PTHREAD_COUNT * 2; ++i) { zassert_ok(pthread_create(&pthread1, &attr, create_thread1, NULL), "unable to create thread %zu", i); + /* + * k_msleep() should not be necessary, but it is added as a workaround + * for #56163 and #58116, which identified race conditions on some + * platforms. + */ + k_msleep(100); zassert_ok(pthread_join(pthread1, NULL), "unable to join thread %zu", i); } } From 02205a6072bfe4d1f9e228290f02551c289ebe0f Mon Sep 17 00:00:00 2001 From: Christopher Friedt Date: Wed, 24 May 2023 00:02:39 -0400 Subject: [PATCH 3/3] tests: posix: stress test for pthread_create and pthread_join Recently, a race condition was discovered in `pthread_create()` and `pthread_join()` that would trigger an assertion in `kernel/sched.c` of the form below. ``` aborted _current back from dead ``` This was mainly observed in Qemu. Unfortunately, Qemu SMP platforms exhibit a real and measurable "scheduler noise" which makes testing rather difficult. Signed-off-by: Christopher Friedt --- tests/posix/pthread_pressure/CMakeLists.txt | 10 + tests/posix/pthread_pressure/Kconfig | 59 ++++++ tests/posix/pthread_pressure/prj.conf | 3 + tests/posix/pthread_pressure/src/main.c | 224 ++++++++++++++++++++ tests/posix/pthread_pressure/testcase.yaml | 12 ++ 5 files changed, 308 insertions(+) create mode 100644 tests/posix/pthread_pressure/CMakeLists.txt create mode 100644 tests/posix/pthread_pressure/Kconfig create mode 100644 tests/posix/pthread_pressure/prj.conf create mode 100644 tests/posix/pthread_pressure/src/main.c create mode 100644 tests/posix/pthread_pressure/testcase.yaml diff --git a/tests/posix/pthread_pressure/CMakeLists.txt b/tests/posix/pthread_pressure/CMakeLists.txt new file mode 100644 index 0000000000000..efbb1bb00e2fd --- /dev/null +++ b/tests/posix/pthread_pressure/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright (c) 2023, Meta +# +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.20.0) +find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE}) +project(pthread_pressure) + +FILE(GLOB app_sources src/*.c) +target_sources(app PRIVATE ${app_sources}) diff --git a/tests/posix/pthread_pressure/Kconfig b/tests/posix/pthread_pressure/Kconfig new file mode 100644 index 0000000000000..536714064ed90 --- /dev/null +++ b/tests/posix/pthread_pressure/Kconfig @@ -0,0 +1,59 @@ +# Copyright (c) 2023, Meta +# +# SPDX-License-Identifier: Apache-2.0 + +source "Kconfig.zephyr" + +config TEST_NUM_CPUS + int "Number of CPUs to use in parallel" + range 1 MP_NUM_CPUS + default MP_NUM_CPUS + help + The number of parallel threads to run during the test. The test + thread itself yields so that all cores have some probability of + causing racey behaviour. + +config TEST_DURATION_S + int "Number of seconds to run the test" + range 1 21600 + default 5 + help + Duration for the test, in seconds. The range has a reblatively high + upper bound because we should expect that pthread_create() and + pthread_join() are stable enough to run for an arbitrarily long + period of time without encountering any race conditions. + + Some exceptions apply, notably Qemu SMP targets. + +config TEST_DELAY_US + int "Microseconds to delay between pthread join and create" + default 0 + help + If there is a race condition, a value of zero here should + cause a crash. + +config TEST_STACK_SIZE + int "Size of each thread stack in this test" + default 512 if !64_BIT + default 1024 if 64_BIT + help + The minimal stack size required to run a no-op thread. + +config TEST_KTHREADS + bool "Test k_threads" + default y + help + Run tests for k_threads + +config TEST_PTHREADS + bool "Test pthreads" + default y + help + Run tests for pthreads + +config TEST_EXTRA_ASSERTIONS + bool "Add extra assertions into the hot path" + default y + help + On Qemu SMP targets, this can potentially lead to "scheduler noise" + leaking in from the host system, which can cause the test to fail. diff --git a/tests/posix/pthread_pressure/prj.conf b/tests/posix/pthread_pressure/prj.conf new file mode 100644 index 0000000000000..d189ad86a6805 --- /dev/null +++ b/tests/posix/pthread_pressure/prj.conf @@ -0,0 +1,3 @@ +CONFIG_ZTEST=y +CONFIG_ZTEST_NEW_API=y +CONFIG_POSIX_API=y diff --git a/tests/posix/pthread_pressure/src/main.c b/tests/posix/pthread_pressure/src/main.c new file mode 100644 index 0000000000000..f876d4bb29f72 --- /dev/null +++ b/tests/posix/pthread_pressure/src/main.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2023, Meta + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include +#include + +#define STACK_SIZE K_THREAD_STACK_LEN(CONFIG_TEST_STACK_SIZE) + +/* update interval for printing stats */ +#if CONFIG_TEST_DURATION_S >= 60 +#define UPDATE_INTERVAL_S 10 +#elif CONFIG_TEST_DURATION_S >= 30 +#define UPDATE_INTERVAL_S 5 +#else +#define UPDATE_INTERVAL_S 1 +#endif + +/* 32 threads is mainly a limitation of find_lsb_set() */ +#define NUM_THREADS MIN(32, MIN(CONFIG_TEST_NUM_CPUS, CONFIG_MAX_PTHREAD_COUNT)) + +typedef int (*create_fn)(int i); +typedef int (*join_fn)(int i); + +static void *setup(void); +static void before(void *fixture); + +/* bitmask of available threads */ +static bool alive[NUM_THREADS]; + +/* array of thread stacks */ +static K_THREAD_STACK_ARRAY_DEFINE(thread_stacks, NUM_THREADS, STACK_SIZE); + +static struct k_thread k_threads[NUM_THREADS]; +static size_t counters[NUM_THREADS]; + +static void print_stats(uint64_t now, uint64_t end) +{ + printk("now (ms): %llu end (ms): %llu\n", now, end); + for (int i = 0; i < NUM_THREADS; ++i) { + printk("Thread %d created and joined %zu times\n", i, counters[i]); + } +} + +static void test_create_join_common(const char *tag, create_fn create, join_fn join) +{ + int i; + int ret; + uint64_t now_ms = k_uptime_get(); + const uint64_t end_ms = now_ms + MSEC_PER_SEC * CONFIG_TEST_DURATION_S; + uint64_t update_ms = now_ms + MSEC_PER_SEC * UPDATE_INTERVAL_S; + + printk("BOARD: %s\n", CONFIG_BOARD); + printk("NUM_THREADS: %u\n", NUM_THREADS); + printk("TEST_NUM_CPUS: %u\n", CONFIG_TEST_NUM_CPUS); + printk("TEST_DURATION_S: %u\n", CONFIG_TEST_DURATION_S); + printk("TEST_DELAY_US: %u\n", CONFIG_TEST_DELAY_US); + + for (i = 0; i < NUM_THREADS; ++i) { + /* spawn thread i */ + ret = create(i); + if (IS_ENABLED(CONFIG_TEST_EXTRA_ASSERTIONS)) { + zassert_ok(ret, "%s_create(%d)[%zu] failed: %d", tag, i, counters[i], ret); + } + } + + do { + if (!IS_ENABLED(CONFIG_SMP)) { + /* allow the test thread to be swapped-out */ + k_yield(); + } + + for (i = 0; i < NUM_THREADS; ++i) { + if (alive[i]) { + ret = join(i); + if (IS_ENABLED(CONFIG_TEST_EXTRA_ASSERTIONS)) { + zassert_ok(ret, "%s_join(%d)[%zu] failed: %d", tag, i, + counters[i], ret); + } + alive[i] = false; + + /* update counter i after each (create,join) pair */ + ++counters[i]; + + if (IS_ENABLED(CONFIG_TEST_DELAY_US)) { + /* success with 0 delay means we are ~raceless */ + k_busy_wait(CONFIG_TEST_DELAY_US); + } + + /* re-spawn thread i */ + ret = create(i); + if (IS_ENABLED(CONFIG_TEST_EXTRA_ASSERTIONS)) { + zassert_ok(ret, "%s_create(%d)[%zu] failed: %d", tag, i, + counters[i], ret); + } + } + } + + /* are we there yet? */ + now_ms = k_uptime_get(); + + /* dump some stats periodically */ + if (now_ms > update_ms) { + update_ms += MSEC_PER_SEC * UPDATE_INTERVAL_S; + + /* at this point, we should have seen many context switches */ + for (i = 0; i < NUM_THREADS; ++i) { + if (IS_ENABLED(CONFIG_TEST_EXTRA_ASSERTIONS)) { + zassert_true(counters[i] > 0, "%s %d was never scheduled", + tag, i); + } + } + + print_stats(now_ms, end_ms); + } + } while (end_ms > now_ms); + + print_stats(now_ms, end_ms); +} + +/* + * Wrappers for k_threads + */ + +static void k_thread_fun(void *arg1, void *arg2, void *arg3) +{ + int i = POINTER_TO_INT(arg1); + + alive[i] = true; +} + +static int k_thread_create_wrapper(int i) +{ + k_thread_create(&k_threads[i], thread_stacks[i], STACK_SIZE, k_thread_fun, + INT_TO_POINTER(i), NULL, NULL, K_HIGHEST_APPLICATION_THREAD_PRIO, 0, + K_NO_WAIT); + + return 0; +} + +static int k_thread_join_wrapper(int i) +{ + return k_thread_join(&k_threads[i], K_FOREVER); +} + +ZTEST(pthread_pressure, test_k_thread_create_join) +{ + if (IS_ENABLED(CONFIG_TEST_KTHREADS)) { + test_create_join_common("k_thread", k_thread_create_wrapper, k_thread_join_wrapper); + } else { + ztest_test_skip(); + } +} + +/* + * Wrappers for pthreads + */ + +static pthread_t pthreads[NUM_THREADS]; +static pthread_attr_t pthread_attrs[NUM_THREADS]; + +static void *pthread_fun(void *arg) +{ + k_thread_fun(arg, NULL, NULL); + return NULL; +} + +static int pthread_create_wrapper(int i) +{ + return pthread_create(&pthreads[i], &pthread_attrs[i], pthread_fun, INT_TO_POINTER(i)); +} + +static int pthread_join_wrapper(int i) +{ + return pthread_join(pthreads[i], NULL); +} + +ZTEST(pthread_pressure, test_pthread_create_join) +{ + if (IS_ENABLED(CONFIG_TEST_PTHREADS)) { + test_create_join_common("pthread", pthread_create_wrapper, pthread_join_wrapper); + } else { + ztest_test_skip(); + } +} + +/* + * Test suite / fixture + */ + +ZTEST_SUITE(pthread_pressure, NULL, setup, before, NULL, NULL); + +static void *setup(void) +{ + if (IS_ENABLED(CONFIG_TEST_PTHREADS)) { + const struct sched_param param = { + .sched_priority = sched_get_priority_max(SCHED_FIFO), + }; + + /* setup pthread stacks */ + for (int i = 0; i < NUM_THREADS; ++i) { + zassert_ok(pthread_attr_init(&pthread_attrs[i])); + zassert_ok(pthread_attr_setstack(&pthread_attrs[i], thread_stacks[i], + STACK_SIZE)); + zassert_ok(pthread_attr_setschedpolicy(&pthread_attrs[i], SCHED_FIFO)); + zassert_ok(pthread_attr_setschedparam(&pthread_attrs[i], ¶m)); + } + } + + return NULL; +} + +static void before(void *fixture) +{ + ARG_UNUSED(before); + + for (int i = 0; i < NUM_THREADS; ++i) { + counters[i] = 0; + } +} diff --git a/tests/posix/pthread_pressure/testcase.yaml b/tests/posix/pthread_pressure/testcase.yaml new file mode 100644 index 0000000000000..cca77f9179a3e --- /dev/null +++ b/tests/posix/pthread_pressure/testcase.yaml @@ -0,0 +1,12 @@ +common: + tags: posix + min_ram: 64 + arch_exclude: + - posix + integration_platforms: + - qemu_riscv64_smp + platform_exclude: + - qemu_cortex_a53 + - qemu_cortex_a53_smp +tests: + portability.posix.pthread_pressure: {}