Skip to content

Commit 9d475d8

Browse files
glemcorostedt
authored andcommitted
rv: Retry when da monitor detects race conditions
DA monitor can be accessed from multiple cores simultaneously, this is likely, for instance when dealing with per-task monitors reacting on events that do not always occur on the CPU where the task is running. This can cause race conditions where two events change the next state and we see inconsistent values. E.g.: [62] event_srs: 27: sleepable x sched_wakeup -> running (final) [63] event_srs: 27: sleepable x sched_set_state_sleepable -> sleepable [63] error_srs: 27: event sched_switch_suspend not expected in the state running In this case the monitor fails because the event on CPU 62 wins against the one on CPU 63, although the correct state should have been sleepable, since the task get suspended. Detect if the current state was modified by using try_cmpxchg while storing the next value. If it was, try again reading the current state. After a maximum number of failed retries, react by calling a special tracepoint, print on the console and reset the monitor. Remove the functions da_monitor_curr_state() and da_monitor_set_state() as they only hide the underlying implementation in this case. Monitors where this type of condition can occur must be able to account for racing events in any possible order, as we cannot know the winner. Cc: Ingo Molnar <[email protected]> Cc: Masami Hiramatsu <[email protected]> Cc: Tomas Glozar <[email protected]> Cc: Juri Lelli <[email protected]> Cc: Clark Williams <[email protected]> Cc: John Kacur <[email protected]> Cc: Peter Zijlstra <[email protected]> Link: https://lore.kernel.org/[email protected] Signed-off-by: Gabriele Monaco <[email protected]> Reviewed-by: Nam Cao <[email protected]> Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent 79de661 commit 9d475d8

File tree

4 files changed

+85
-54
lines changed

4 files changed

+85
-54
lines changed

include/linux/rv.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
#include <linux/types.h>
1111
#include <linux/list.h>
1212

13-
#define MAX_DA_NAME_LEN 32
13+
#define MAX_DA_NAME_LEN 32
14+
#define MAX_DA_RETRY_RACING_EVENTS 3
1415

1516
#ifdef CONFIG_RV
1617
#include <linux/bitops.h>

include/rv/da_monitor.h

Lines changed: 54 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -54,23 +54,6 @@ static inline void da_monitor_reset_##name(struct da_monitor *da_mon) \
5454
da_mon->curr_state = model_get_initial_state_##name(); \
5555
} \
5656
\
57-
/* \
58-
* da_monitor_curr_state_##name - return the current state \
59-
*/ \
60-
static inline type da_monitor_curr_state_##name(struct da_monitor *da_mon) \
61-
{ \
62-
return da_mon->curr_state; \
63-
} \
64-
\
65-
/* \
66-
* da_monitor_set_state_##name - set the new current state \
67-
*/ \
68-
static inline void \
69-
da_monitor_set_state_##name(struct da_monitor *da_mon, enum states_##name state) \
70-
{ \
71-
da_mon->curr_state = state; \
72-
} \
73-
\
7457
/* \
7558
* da_monitor_start_##name - start monitoring \
7659
* \
@@ -127,63 +110,81 @@ static inline bool da_monitor_handling_event_##name(struct da_monitor *da_mon)
127110
* Event handler for implicit monitors. Implicit monitor is the one which the
128111
* handler does not need to specify which da_monitor to manipulate. Examples
129112
* of implicit monitor are the per_cpu or the global ones.
113+
*
114+
* Retry in case there is a race between getting and setting the next state,
115+
* warn and reset the monitor if it runs out of retries. The monitor should be
116+
* able to handle various orders.
130117
*/
131118
#define DECLARE_DA_MON_MODEL_HANDLER_IMPLICIT(name, type) \
132119
\
133120
static inline bool \
134121
da_event_##name(struct da_monitor *da_mon, enum events_##name event) \
135122
{ \
136-
type curr_state = da_monitor_curr_state_##name(da_mon); \
137-
type next_state = model_get_next_state_##name(curr_state, event); \
138-
\
139-
if (next_state != INVALID_STATE) { \
140-
da_monitor_set_state_##name(da_mon, next_state); \
141-
\
142-
trace_event_##name(model_get_state_name_##name(curr_state), \
143-
model_get_event_name_##name(event), \
144-
model_get_state_name_##name(next_state), \
145-
model_is_final_state_##name(next_state)); \
146-
\
147-
return true; \
123+
enum states_##name curr_state, next_state; \
124+
\
125+
curr_state = READ_ONCE(da_mon->curr_state); \
126+
for (int i = 0; i < MAX_DA_RETRY_RACING_EVENTS; i++) { \
127+
next_state = model_get_next_state_##name(curr_state, event); \
128+
if (next_state == INVALID_STATE) { \
129+
cond_react_##name(curr_state, event); \
130+
trace_error_##name(model_get_state_name_##name(curr_state), \
131+
model_get_event_name_##name(event)); \
132+
return false; \
133+
} \
134+
if (likely(try_cmpxchg(&da_mon->curr_state, &curr_state, next_state))) { \
135+
trace_event_##name(model_get_state_name_##name(curr_state), \
136+
model_get_event_name_##name(event), \
137+
model_get_state_name_##name(next_state), \
138+
model_is_final_state_##name(next_state)); \
139+
return true; \
140+
} \
148141
} \
149142
\
150-
cond_react_##name(curr_state, event); \
151-
\
152-
trace_error_##name(model_get_state_name_##name(curr_state), \
153-
model_get_event_name_##name(event)); \
154-
\
143+
trace_rv_retries_error(#name, model_get_event_name_##name(event)); \
144+
pr_warn("rv: " __stringify(MAX_DA_RETRY_RACING_EVENTS) \
145+
" retries reached for event %s, resetting monitor %s", \
146+
model_get_event_name_##name(event), #name); \
155147
return false; \
156148
} \
157149

158150
/*
159151
* Event handler for per_task monitors.
152+
*
153+
* Retry in case there is a race between getting and setting the next state,
154+
* warn and reset the monitor if it runs out of retries. The monitor should be
155+
* able to handle various orders.
160156
*/
161157
#define DECLARE_DA_MON_MODEL_HANDLER_PER_TASK(name, type) \
162158
\
163159
static inline bool da_event_##name(struct da_monitor *da_mon, struct task_struct *tsk, \
164160
enum events_##name event) \
165161
{ \
166-
type curr_state = da_monitor_curr_state_##name(da_mon); \
167-
type next_state = model_get_next_state_##name(curr_state, event); \
168-
\
169-
if (next_state != INVALID_STATE) { \
170-
da_monitor_set_state_##name(da_mon, next_state); \
171-
\
172-
trace_event_##name(tsk->pid, \
173-
model_get_state_name_##name(curr_state), \
174-
model_get_event_name_##name(event), \
175-
model_get_state_name_##name(next_state), \
176-
model_is_final_state_##name(next_state)); \
177-
\
178-
return true; \
162+
enum states_##name curr_state, next_state; \
163+
\
164+
curr_state = READ_ONCE(da_mon->curr_state); \
165+
for (int i = 0; i < MAX_DA_RETRY_RACING_EVENTS; i++) { \
166+
next_state = model_get_next_state_##name(curr_state, event); \
167+
if (next_state == INVALID_STATE) { \
168+
cond_react_##name(curr_state, event); \
169+
trace_error_##name(tsk->pid, \
170+
model_get_state_name_##name(curr_state), \
171+
model_get_event_name_##name(event)); \
172+
return false; \
173+
} \
174+
if (likely(try_cmpxchg(&da_mon->curr_state, &curr_state, next_state))) { \
175+
trace_event_##name(tsk->pid, \
176+
model_get_state_name_##name(curr_state), \
177+
model_get_event_name_##name(event), \
178+
model_get_state_name_##name(next_state), \
179+
model_is_final_state_##name(next_state)); \
180+
return true; \
181+
} \
179182
} \
180183
\
181-
cond_react_##name(curr_state, event); \
182-
\
183-
trace_error_##name(tsk->pid, \
184-
model_get_state_name_##name(curr_state), \
185-
model_get_event_name_##name(event)); \
186-
\
184+
trace_rv_retries_error(#name, model_get_event_name_##name(event)); \
185+
pr_warn("rv: " __stringify(MAX_DA_RETRY_RACING_EVENTS) \
186+
" retries reached for event %s, resetting monitor %s", \
187+
model_get_event_name_##name(event), #name); \
187188
return false; \
188189
}
189190

kernel/trace/rv/Kconfig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,17 @@
33
config RV_MON_EVENTS
44
bool
55

6+
config RV_MON_MAINTENANCE_EVENTS
7+
bool
8+
69
config DA_MON_EVENTS_IMPLICIT
710
select RV_MON_EVENTS
11+
select RV_MON_MAINTENANCE_EVENTS
812
bool
913

1014
config DA_MON_EVENTS_ID
1115
select RV_MON_EVENTS
16+
select RV_MON_MAINTENANCE_EVENTS
1217
bool
1318

1419
config LTL_MON_EVENTS_ID

kernel/trace/rv/rv_trace.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,30 @@ DECLARE_EVENT_CLASS(error_ltl_monitor_id,
176176
#include <monitors/sleep/sleep_trace.h>
177177
// Add new monitors based on CONFIG_LTL_MON_EVENTS_ID here
178178
#endif /* CONFIG_LTL_MON_EVENTS_ID */
179+
180+
#ifdef CONFIG_RV_MON_MAINTENANCE_EVENTS
181+
/* Tracepoint useful for monitors development, currenly only used in DA */
182+
TRACE_EVENT(rv_retries_error,
183+
184+
TP_PROTO(char *name, char *event),
185+
186+
TP_ARGS(name, event),
187+
188+
TP_STRUCT__entry(
189+
__string( name, name )
190+
__string( event, event )
191+
),
192+
193+
TP_fast_assign(
194+
__assign_str(name);
195+
__assign_str(event);
196+
),
197+
198+
TP_printk(__stringify(MAX_DA_RETRY_RACING_EVENTS)
199+
" retries reached for event %s, resetting monitor %s",
200+
__get_str(event), __get_str(name))
201+
);
202+
#endif /* CONFIG_RV_MON_MAINTENANCE_EVENTS */
179203
#endif /* _TRACE_RV_H */
180204

181205
/* This part must be outside protection */

0 commit comments

Comments
 (0)