Skip to content

Commit 45f6872

Browse files
authored
Merge pull request #48 from CHERIoT-Platform/hlefeuvre/stack-overflow-resilient-error-handler
Move to a stack-overflow-resilient error handler
2 parents a258eca + 6f0296b commit 45f6872

File tree

4 files changed

+119
-179
lines changed

4 files changed

+119
-179
lines changed

lib/tcpip/FreeRTOS_IP_wrapper.c

Lines changed: 56 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ void ip_thread_start(void);
1717
#include <futex.h>
1818
#include <locks.h>
1919
#include <stdatomic.h>
20+
#include <unwind.h>
2021

2122
/**
2223
* Flag used to synchronize the network stack thread and user threads at
@@ -37,11 +38,16 @@ static uint32_t threadEntryGuard;
3738
uint16_t networkThreadID;
3839

3940
/**
40-
* Global lock acquired by the IP thread at startup time. This lock is never
41-
* released and acquiring it after startup will always fail. This lock can be
42-
* used to force the IP thread to run for a short amount of time, e.g., when
43-
* the internal FreeRTOS message queue is full and we want to let the IP thread
44-
* empty it to close a socket.
41+
* Global lock acquired by the IP thread at startup time. In normal running
42+
* conditions, this lock is never released and acquiring it after startup will
43+
* always fail. This lock can be used to force the IP thread to run for a short
44+
* amount of time, e.g., when the internal FreeRTOS message queue is full and
45+
* we want to let the IP thread empty it to close a socket. See such an example
46+
* in `close_socket_retry`.
47+
*
48+
* Corner case: during a network stack reset, the lock is shortly released to
49+
* notify other threads that the IP thread is restarting. See comment in
50+
* `ip_thread_entry` for more details.
4551
*/
4652
struct FlagLockState ipThreadLockState;
4753

@@ -106,6 +112,8 @@ void ip_cleanup(void)
106112
// modifiable by a potentially compromised compartment.
107113
}
108114

115+
extern void reset_network_stack_state(bool isIpThread);
116+
109117
void __cheri_compartment("TCPIP") ip_thread_entry(void)
110118
{
111119
FreeRTOS_printf(("ip_thread_entry\n"));
@@ -114,36 +122,53 @@ void __cheri_compartment("TCPIP") ip_thread_entry(void)
114122

115123
while (1)
116124
{
117-
while (threadEntryGuard == 0)
125+
CHERIOT_DURING
118126
{
127+
while (threadEntryGuard == 0)
128+
{
129+
FreeRTOS_printf(
130+
("Sleeping until the IP task is supposed to start\n"));
131+
futex_wait(&threadEntryGuard, 0);
132+
}
133+
134+
// Reset the guard now: we will only ever re-iterate
135+
// the loop in the case of a network stack reset, in
136+
// which case we want to wait again for a call to
137+
// `ip_thread_start`.
138+
//
139+
// Note that we cannot reset this in `ip_cleanup`,
140+
// because that would overwrite the 1 written by
141+
// `ip_thread_start` if the latter was called before we
142+
// call `ip_cleanup`. This will happen if the IP thread
143+
// crashes, in which case we would first call
144+
// `ip_thread_start` in the error handler, and then
145+
// reset the context to `ip_thread_entry` (itself then
146+
// calling `ip_cleanup`).
147+
threadEntryGuard = 0;
148+
149+
xIPTaskHandle = networkThreadID;
119150
FreeRTOS_printf(
120-
("Sleeping until the IP task is supposed to start\n"));
121-
futex_wait(&threadEntryGuard, 0);
122-
}
151+
("ip_thread_entry starting, thread ID is %p\n", xIPTaskHandle));
123152

124-
// Reset the guard now: we will only ever re-enter this
125-
// function in the case of a network stack reset, in which case
126-
// we want to wait again for a call to `ip_thread_start`.
127-
//
128-
// Note that we cannot reset this in `ip_cleanup`, because that
129-
// would overwrite the 1 written by `ip_thread_start` if the
130-
// latter was called before we call `ip_cleanup`. This will
131-
// happen if the IP thread crashes, in which case we would
132-
// first call `ip_thread_start` in the error handler, and then
133-
// reset the context to `ip_thread_entry` (itself then calling
134-
// `ip_cleanup`).
135-
threadEntryGuard = 0;
136-
137-
xIPTaskHandle = networkThreadID;
138-
FreeRTOS_printf(
139-
("ip_thread_entry starting, thread ID is %p\n", xIPTaskHandle));
140-
141-
flaglock_priority_inheriting_lock(&ipThreadLockState);
142-
143-
// FreeRTOS event loop. This will only return if a user thread
144-
// crashed.
145-
prvIPTask(NULL);
153+
flaglock_priority_inheriting_lock(&ipThreadLockState);
146154

155+
// FreeRTOS event loop. This will only return if a user
156+
// thread crashed.
157+
prvIPTask(NULL);
158+
}
159+
CHERIOT_HANDLER
160+
{
161+
// Call the network stack error handler for the network thread.
162+
reset_network_stack_state(true /* this is the IP thread */);
163+
}
164+
CHERIOT_END_HANDLER
165+
166+
// Release the IP thread lock. We will re-acquire it in the
167+
// next loop iteration. This is necessary if a user thread
168+
// triggered a reset. In such a case, the user thread error
169+
// handler will wait for the IP thread to restart by acquiring
170+
// the `ipThreadLockState` with an infinite timeout (and
171+
// immediately releasing it thereafter).
147172
flaglock_unlock(&ipThreadLockState);
148173
}
149174
}

lib/tcpip/tcpip-internal.h

Lines changed: 47 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,16 @@
55
#include <FreeRTOS_IP.h>
66
#include <ds/linked_list.h>
77
#include <locks.hh>
8+
#include <unwind.h>
89

910
/**
1011
* Internal helpers and data structures for use inside of the TCP/IP
1112
* compartment. These should be called or used only from/in the TCP/IP
1213
* compartment.
1314
*/
1415

16+
extern "C" void reset_network_stack_state(bool isIpThread);
17+
1518
/**
1619
* Flags for the state of restart.
1720
*/
@@ -97,30 +100,38 @@ extern std::atomic<uint8_t> userThreadCount;
97100

98101
/**
99102
* Helper to run a function ensuring that the thread counters are updated
100-
* appropriately. Every entry point of the TCP/IP stack API (with the exception
101-
* of the driver thread, see below) should go through this unless it
102-
* manipulates `userThreadCount` manually.
103+
* appropriately and that the thread is running under the error handler. Every
104+
* entry point of the TCP/IP stack API (with the exception of the driver
105+
* thread, see below) should go through this unless it manipulates
106+
* `userThreadCount` and sets up the error handler manually.
103107
*/
104108
auto with_restarting_checks(auto operation, auto errorValue)
105109
{
106-
if (restartState.load() != 0)
107-
{
108-
// yield to give a chance to the restart code to make some
109-
// progress, in case applications are aggressively trying to
110-
// re-open the socket.
111-
yield();
112-
return errorValue;
113-
}
114-
userThreadCount++;
115-
auto ret = operation();
116-
// The decrement will happen in the error handler if the thread
117-
// crashes.
118-
//
119-
// FIXME The decrement will *not* happen when a thread runs out of call
120-
// stack in the TCP/IP compartment (because the error handler does not
121-
// execute in that case). This will be fixed when we enable the error
122-
// handler to run on stack overflow.
123-
userThreadCount--;
110+
auto ret = errorValue;
111+
112+
on_error(
113+
[&]() {
114+
// Is a reset ongoing?
115+
if (restartState.load() == 0)
116+
{
117+
// We are not resetting.
118+
userThreadCount++;
119+
ret = operation();
120+
// The decrement will happen in the error handler if
121+
// the thread crashes.
122+
userThreadCount--;
123+
return;
124+
}
125+
// A reset is ongoing. yield to give a chance to the restart
126+
// code to make some progress, in case applications are
127+
// aggressively trying to re-open the socket.
128+
yield();
129+
},
130+
[&]() {
131+
// Call the network stack error handler.
132+
reset_network_stack_state(false /* this is not the IP thread */);
133+
});
134+
124135
return ret;
125136
}
126137

@@ -135,8 +146,20 @@ auto with_restarting_checks(auto operation, auto errorValue)
135146
*/
136147
auto with_restarting_checks_driver(auto operation, auto errorValue)
137148
{
138-
userThreadCount++;
139-
auto ret = operation();
140-
userThreadCount--;
149+
auto ret = errorValue;
150+
151+
on_error(
152+
[&]() {
153+
userThreadCount++;
154+
ret = operation();
155+
// The decrement will happen in the error handler if the thread
156+
// crashes.
157+
userThreadCount--;
158+
},
159+
[&]() {
160+
// Call the network stack error handler.
161+
reset_network_stack_state(false /* this is not the IP thread */);
162+
});
163+
141164
return ret;
142165
}

0 commit comments

Comments
 (0)