Skip to content

Commit 5132abe

Browse files
Damian-Nordicrlubos
authored andcommitted
nrf_rpc: rpc_utils: system health command
Add RPC command for getting system health information from a remote device. Currently, the system health information only returns information about selected hung threads. Signed-off-by: Damian Krolik <[email protected]>
1 parent 7c0b4b5 commit 5132abe

File tree

10 files changed

+228
-1
lines changed

10 files changed

+228
-1
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright (c) 2025 Nordic Semiconductor ASA
3+
*
4+
* SPDX-License-Identifier: LicenseRef-Nordic-5-Clause
5+
*/
6+
7+
#ifndef NRF_RPC_SYSTEM_HEALTH_H_
8+
#define NRF_RPC_SYSTEM_HEALTH_H_
9+
10+
#include <stdint.h>
11+
12+
#ifdef __cplusplus
13+
extern "C" {
14+
#endif
15+
16+
/**
17+
* @addtogroup nrf_rpc_utils nRF RPC utility commands
18+
* @{
19+
* @defgroup nrf_rpc_system_health nRF RPC system health commands
20+
* @{
21+
*/
22+
23+
/** @brief System health information. */
24+
struct nrf_rpc_system_health {
25+
/** @brief Bitmask of hung threads.
26+
*
27+
* Each bit represents a thread.
28+
* If a thread is hung, the corresponding bit is set.
29+
*/
30+
uint32_t hung_threads;
31+
};
32+
33+
/** @brief Get system health information from the remote server.
34+
*
35+
* @param[out] out Pointer to the system health information.
36+
*/
37+
void nrf_rpc_system_health_get(struct nrf_rpc_system_health *out);
38+
39+
/**
40+
* @}
41+
* @}
42+
*/
43+
44+
#ifdef __cplusplus
45+
}
46+
#endif
47+
48+
#endif /* NRF_RPC_REMOTE_SHELL_H_ */

samples/nrf_rpc/protocols_serialization/client/prj.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@ CONFIG_NRF_RPC_UTILS=y
2828
CONFIG_NRF_RPC_UTILS_CLIENT=y
2929
CONFIG_NRF_RPC_UTILS_DEV_INFO=y
3030
CONFIG_NRF_RPC_UTILS_REMOTE_SHELL=y
31+
CONFIG_NRF_RPC_UTILS_SYSTEM_HEALTH=y

samples/nrf_rpc/protocols_serialization/client/src/rpc_utils_shell.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <nrf_rpc/rpc_utils/crash_gen.h>
99
#include <nrf_rpc/rpc_utils/dev_info.h>
1010
#include <nrf_rpc/rpc_utils/remote_shell.h>
11+
#include <nrf_rpc/rpc_utils/system_health.h>
1112

1213
#if defined(CONFIG_NRF_RPC_UTILS_DEV_INFO)
1314
static int remote_version_cmd(const struct shell *sh, size_t argc, char *argv[])
@@ -104,6 +105,19 @@ static int cmd_stack_overflow(const struct shell *sh, size_t argc, char *argv[])
104105
}
105106
#endif /* CONFIG_NRF_RPC_UTILS_CRASH_GEN */
106107

108+
#if defined(CONFIG_NRF_RPC_UTILS_SYSTEM_HEALTH)
109+
static int cmd_system_health(const struct shell *sh, size_t argc, char *argv[])
110+
{
111+
struct nrf_rpc_system_health health;
112+
113+
nrf_rpc_system_health_get(&health);
114+
115+
shell_print(sh, "Hung threads: %u", health.hung_threads);
116+
117+
return 0;
118+
}
119+
#endif /* CONFIG_NRF_RPC_UTILS_SYSTEM_HEALTH */
120+
107121
SHELL_STATIC_SUBCMD_SET_CREATE(
108122
util_cmds,
109123
#if defined(CONFIG_NRF_RPC_UTILS_DEV_INFO)
@@ -116,6 +130,9 @@ SHELL_STATIC_SUBCMD_SET_CREATE(
116130
SHELL_CMD_ARG(assert, NULL, "Invoke assert", cmd_assert, 1, 1),
117131
SHELL_CMD_ARG(hard_fault, NULL, "Invoke hard fault", cmd_hard_fault, 1, 1),
118132
SHELL_CMD_ARG(stack_overflow, NULL, "Invoke stack overflow", cmd_stack_overflow, 1, 1),
133+
#endif
134+
#if defined(CONFIG_NRF_RPC_UTILS_SYSTEM_HEALTH)
135+
SHELL_CMD_ARG(system_health, NULL, "Get system health", cmd_system_health, 0, 0),
119136
#endif
120137
SHELL_SUBCMD_SET_END);
121138

samples/nrf_rpc/protocols_serialization/server/prj.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ CONFIG_NRF_RPC_UTILS=y
2020
CONFIG_NRF_RPC_UTILS_SERVER=y
2121
CONFIG_NRF_RPC_UTILS_DEV_INFO=y
2222
CONFIG_NRF_RPC_UTILS_REMOTE_SHELL=y
23+
CONFIG_NRF_RPC_UTILS_SYSTEM_HEALTH=y
2324

2425
CONFIG_SETTINGS=y
2526
CONFIG_FLASH=y

subsys/nrf_rpc/rpc_utils/Kconfig

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,24 @@ config NRF_RPC_UTILS_REMOTE_SHELL
4444
config NRF_RPC_UTILS_CRASH_GEN
4545
bool "Crash generator"
4646
help
47-
Enables commands for generating a crash on the remote device.
47+
Enables nRF RPC commands for generating a crash on the remote device.
48+
49+
config NRF_RPC_UTILS_SYSTEM_HEALTH
50+
bool "System health"
51+
help
52+
Enables nRF RPC commands for getting the system health information.
53+
54+
if NRF_RPC_UTILS_SYSTEM_HEALTH && NRF_RPC_UTILS_SERVER
55+
56+
config NRF_RPC_UTILS_WATCHDOG_PERIOD
57+
int "Watchdog period (s)"
58+
default 10
59+
depends on NRF_RPC_UTILS_SYSTEM_HEALTH && NRF_RPC_UTILS_SERVER
60+
help
61+
Configures how frequently the nRF RPC utils server kicks watched
62+
threads, and how quickly it concludes a thread is hung.
63+
64+
endif # NRF_RPC_UTILS_SYSTEM_HEALTH && NRF_RPC_UTILS_SERVER
4865

4966
config SHELL_BACKEND_DUMMY_BUF_SIZE
5067
default 1024 if NRF_RPC_UTILS_REMOTE_SHELL && NRF_RPC_UTILS_SERVER

subsys/nrf_rpc/rpc_utils/client/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ zephyr_library()
99
zephyr_library_sources_ifdef(CONFIG_NRF_RPC_UTILS_CRASH_GEN crash_gen.c)
1010
zephyr_library_sources_ifdef(CONFIG_NRF_RPC_UTILS_DEV_INFO dev_info.c)
1111
zephyr_library_sources_ifdef(CONFIG_NRF_RPC_UTILS_REMOTE_SHELL remote_shell.c)
12+
zephyr_library_sources_ifdef(CONFIG_NRF_RPC_UTILS_SYSTEM_HEALTH system_health.c)
1213

1314
zephyr_library_include_directories(
1415
${CMAKE_CURRENT_SOURCE_DIR}/../common
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Copyright (c) 2025 Nordic Semiconductor ASA
3+
*
4+
* SPDX-License-Identifier: LicenseRef-Nordic-5-Clause
5+
*/
6+
7+
#include <nrf_rpc/nrf_rpc_serialize.h>
8+
#include <nrf_rpc/rpc_utils/system_health.h>
9+
#include <rpc_utils_group.h>
10+
11+
#include <nrf_rpc_cbor.h>
12+
13+
void nrf_rpc_system_health_get(struct nrf_rpc_system_health *out)
14+
{
15+
struct nrf_rpc_cbor_ctx ctx;
16+
17+
NRF_RPC_CBOR_ALLOC(&rpc_utils_group, ctx, 0);
18+
nrf_rpc_cbor_cmd_rsp_no_err(&rpc_utils_group, RPC_UTIL_SYSTEM_HEALTH_GET, &ctx);
19+
20+
out->hung_threads = nrf_rpc_decode_uint(&ctx);
21+
22+
if (!nrf_rpc_decoding_done_and_check(&rpc_utils_group, &ctx)) {
23+
nrf_rpc_err(-EBADMSG, NRF_RPC_ERR_SRC_RECV, &rpc_utils_group,
24+
RPC_UTIL_SYSTEM_HEALTH_GET, NRF_RPC_PACKET_TYPE_RSP);
25+
}
26+
}

subsys/nrf_rpc/rpc_utils/common/rpc_utils_group.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ enum rpc_utils_cmd_server {
1919
RPC_UTIL_CRASH_GEN_ASSERT = 2,
2020
RPC_UTIL_CRASH_GEN_HARD_FAULT = 3,
2121
RPC_UTIL_CRASH_GEN_STACK_OVERFLOW = 4,
22+
RPC_UTIL_SYSTEM_HEALTH_GET = 5,
2223
};
2324

2425
#endif /* RPC_UTILS_GROUP_H_ */

subsys/nrf_rpc/rpc_utils/server/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ zephyr_library()
99
zephyr_library_sources_ifdef(CONFIG_NRF_RPC_UTILS_CRASH_GEN crash_gen.c)
1010
zephyr_library_sources_ifdef(CONFIG_NRF_RPC_UTILS_DEV_INFO dev_info.c)
1111
zephyr_library_sources_ifdef(CONFIG_NRF_RPC_UTILS_REMOTE_SHELL remote_shell.c)
12+
zephyr_library_sources_ifdef(CONFIG_NRF_RPC_UTILS_SYSTEM_HEALTH system_health.c)
1213

1314
zephyr_library_include_directories(
1415
${CMAKE_CURRENT_SOURCE_DIR}/../common
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
* Copyright (c) 2025 Nordic Semiconductor ASA
3+
*
4+
* SPDX-License-Identifier: LicenseRef-Nordic-5-Clause
5+
*/
6+
7+
#include <nrf_rpc/nrf_rpc_serialize.h>
8+
#include <rpc_utils_group.h>
9+
10+
#include <nrf_rpc_cbor.h>
11+
12+
#include <zephyr/kernel.h>
13+
#include <zephyr/sys/atomic.h>
14+
#include <zephyr/sys/util.h>
15+
16+
#ifdef CONFIG_OPENTHREAD
17+
#include <zephyr/net/openthread.h>
18+
#endif
19+
20+
/*
21+
* Pointer to a function that "kicks" a watched thread.
22+
*
23+
* Currently, only work queue threads can be watched.
24+
* "Kicking" means submitting a task to the thread, which marks the thread as alive (not hung).
25+
*/
26+
typedef void (*watched_thread_kick_fn)(struct k_work *work);
27+
28+
static void kick_system_workq(struct k_work *work)
29+
{
30+
(void)k_work_submit(work);
31+
}
32+
33+
#ifdef CONFIG_OPENTHREAD
34+
static void kick_openthread(struct k_work *work)
35+
{
36+
k_tid_t tid = openthread_thread_id_get();
37+
struct k_work_q *queue = CONTAINER_OF(tid, struct k_work_q, thread);
38+
39+
(void)k_work_submit_to_queue(queue, work);
40+
}
41+
#endif
42+
43+
#ifdef CONFIG_BT_LONG_WQ
44+
extern int bt_long_wq_submit(struct k_work *work);
45+
46+
static void kick_bluetooth_long(struct k_work *work)
47+
{
48+
(void)bt_long_wq_submit(work);
49+
}
50+
#endif
51+
52+
const static watched_thread_kick_fn watched_thread_kick[] = {
53+
kick_system_workq,
54+
#ifdef CONFIG_OPENTHREAD
55+
kick_openthread,
56+
#endif
57+
#ifdef CONFIG_BT_LONG_WQ
58+
kick_bluetooth_long,
59+
#endif
60+
};
61+
62+
enum {
63+
NUM_WATCHED_THREADS = ARRAY_SIZE(watched_thread_kick),
64+
};
65+
66+
static atomic_t hung_threads;
67+
static atomic_t hung_thread_candidates;
68+
static void watchdog_timer_handler(struct k_timer *timer);
69+
static K_TIMER_DEFINE(watchdog_timer, watchdog_timer_handler, NULL);
70+
static struct k_work watched_thread_work[NUM_WATCHED_THREADS];
71+
72+
/*
73+
* Function called by a watched thread to prove it's not hung.
74+
*/
75+
static void watchdog_feed(struct k_work *work)
76+
{
77+
const size_t index = ARRAY_INDEX(watched_thread_work, work);
78+
79+
atomic_clear_bit(&hung_thread_candidates, index);
80+
}
81+
82+
static void watchdog_timer_handler(struct k_timer *timer)
83+
{
84+
const atomic_val_t ALL_THREADS = GENMASK(NUM_WATCHED_THREADS - 1, 0);
85+
86+
atomic_set(&hung_threads, atomic_set(&hung_thread_candidates, ALL_THREADS));
87+
88+
for (size_t i = 0; i < NUM_WATCHED_THREADS; i++) {
89+
watched_thread_kick[i](&watched_thread_work[i]);
90+
}
91+
}
92+
93+
static int watchdog_init(void)
94+
{
95+
for (size_t i = 0; i < NUM_WATCHED_THREADS; i++) {
96+
k_work_init(&watched_thread_work[i], watchdog_feed);
97+
}
98+
99+
k_timer_start(&watchdog_timer, K_NO_WAIT, K_SECONDS(CONFIG_NRF_RPC_UTILS_WATCHDOG_PERIOD));
100+
101+
return 0;
102+
}
103+
104+
SYS_INIT(watchdog_init, APPLICATION, CONFIG_APPLICATION_INIT_PRIORITY);
105+
106+
static void system_health_get_handler(const struct nrf_rpc_group *group,
107+
struct nrf_rpc_cbor_ctx *ctx, void *handler_data)
108+
{
109+
nrf_rpc_cbor_decoding_done(group, ctx);
110+
nrf_rpc_rsp_send_uint(group, atomic_get(&hung_threads));
111+
}
112+
113+
NRF_RPC_CBOR_CMD_DECODER(rpc_utils_group, system_health_get, RPC_UTIL_SYSTEM_HEALTH_GET,
114+
system_health_get_handler, NULL);

0 commit comments

Comments
 (0)