2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

rv: Retry when da monitor detects race conditions

DA monitor can be accessed from multiple cores simultaneously, this is
likely, for instance when dealing with per-task monitors reacting on
events that do not always occur on the CPU where the task is running.
This can cause race conditions where two events change the next state
and we see inconsistent values. E.g.:

  [62] event_srs: 27: sleepable x sched_wakeup -> running (final)
  [63] event_srs: 27: sleepable x sched_set_state_sleepable -> sleepable
  [63] error_srs: 27: event sched_switch_suspend not expected in the state running

In this case the monitor fails because the event on CPU 62 wins against
the one on CPU 63, although the correct state should have been
sleepable, since the task get suspended.

Detect if the current state was modified by using try_cmpxchg while
storing the next value. If it was, try again reading the current state.
After a maximum number of failed retries, react by calling a special
tracepoint, print on the console and reset the monitor.

Remove the functions da_monitor_curr_state() and da_monitor_set_state()
as they only hide the underlying implementation in this case.

Monitors where this type of condition can occur must be able to account
for racing events in any possible order, as we cannot know the winner.

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Tomas Glozar <tglozar@redhat.com>
Cc: Juri Lelli <jlelli@redhat.com>
Cc: Clark Williams <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/20250728135022.255578-6-gmonaco@redhat.com
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
Reviewed-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
This commit is contained in:
Gabriele Monaco 2025-07-28 15:50:17 +02:00 committed by Steven Rostedt (Google)
parent 79de661707
commit 9d475d80c9
4 changed files with 83 additions and 52 deletions

View File

@ -11,6 +11,7 @@
#include <linux/list.h> #include <linux/list.h>
#define MAX_DA_NAME_LEN 32 #define MAX_DA_NAME_LEN 32
#define MAX_DA_RETRY_RACING_EVENTS 3
#ifdef CONFIG_RV #ifdef CONFIG_RV
#include <linux/bitops.h> #include <linux/bitops.h>

View File

@ -54,23 +54,6 @@ static inline void da_monitor_reset_##name(struct da_monitor *da_mon) \
da_mon->curr_state = model_get_initial_state_##name(); \ da_mon->curr_state = model_get_initial_state_##name(); \
} \ } \
\ \
/* \
* da_monitor_curr_state_##name - return the current state \
*/ \
static inline type da_monitor_curr_state_##name(struct da_monitor *da_mon) \
{ \
return da_mon->curr_state; \
} \
\
/* \
* da_monitor_set_state_##name - set the new current state \
*/ \
static inline void \
da_monitor_set_state_##name(struct da_monitor *da_mon, enum states_##name state) \
{ \
da_mon->curr_state = state; \
} \
\
/* \ /* \
* da_monitor_start_##name - start monitoring \ * da_monitor_start_##name - start monitoring \
* \ * \
@ -127,63 +110,81 @@ static inline bool da_monitor_handling_event_##name(struct da_monitor *da_mon)
* Event handler for implicit monitors. Implicit monitor is the one which the * Event handler for implicit monitors. Implicit monitor is the one which the
* handler does not need to specify which da_monitor to manipulate. Examples * handler does not need to specify which da_monitor to manipulate. Examples
* of implicit monitor are the per_cpu or the global ones. * of implicit monitor are the per_cpu or the global ones.
*
* Retry in case there is a race between getting and setting the next state,
* warn and reset the monitor if it runs out of retries. The monitor should be
* able to handle various orders.
*/ */
#define DECLARE_DA_MON_MODEL_HANDLER_IMPLICIT(name, type) \ #define DECLARE_DA_MON_MODEL_HANDLER_IMPLICIT(name, type) \
\ \
static inline bool \ static inline bool \
da_event_##name(struct da_monitor *da_mon, enum events_##name event) \ da_event_##name(struct da_monitor *da_mon, enum events_##name event) \
{ \ { \
type curr_state = da_monitor_curr_state_##name(da_mon); \ enum states_##name curr_state, next_state; \
type next_state = model_get_next_state_##name(curr_state, event); \
\
if (next_state != INVALID_STATE) { \
da_monitor_set_state_##name(da_mon, next_state); \
\ \
curr_state = READ_ONCE(da_mon->curr_state); \
for (int i = 0; i < MAX_DA_RETRY_RACING_EVENTS; i++) { \
next_state = model_get_next_state_##name(curr_state, event); \
if (next_state == INVALID_STATE) { \
cond_react_##name(curr_state, event); \
trace_error_##name(model_get_state_name_##name(curr_state), \
model_get_event_name_##name(event)); \
return false; \
} \
if (likely(try_cmpxchg(&da_mon->curr_state, &curr_state, next_state))) { \
trace_event_##name(model_get_state_name_##name(curr_state), \ trace_event_##name(model_get_state_name_##name(curr_state), \
model_get_event_name_##name(event), \ model_get_event_name_##name(event), \
model_get_state_name_##name(next_state), \ model_get_state_name_##name(next_state), \
model_is_final_state_##name(next_state)); \ model_is_final_state_##name(next_state)); \
\
return true; \ return true; \
} \
} \ } \
\ \
cond_react_##name(curr_state, event); \ trace_rv_retries_error(#name, model_get_event_name_##name(event)); \
\ pr_warn("rv: " __stringify(MAX_DA_RETRY_RACING_EVENTS) \
trace_error_##name(model_get_state_name_##name(curr_state), \ " retries reached for event %s, resetting monitor %s", \
model_get_event_name_##name(event)); \ model_get_event_name_##name(event), #name); \
\
return false; \ return false; \
} \ } \
/* /*
* Event handler for per_task monitors. * Event handler for per_task monitors.
*
* Retry in case there is a race between getting and setting the next state,
* warn and reset the monitor if it runs out of retries. The monitor should be
* able to handle various orders.
*/ */
#define DECLARE_DA_MON_MODEL_HANDLER_PER_TASK(name, type) \ #define DECLARE_DA_MON_MODEL_HANDLER_PER_TASK(name, type) \
\ \
static inline bool da_event_##name(struct da_monitor *da_mon, struct task_struct *tsk, \ static inline bool da_event_##name(struct da_monitor *da_mon, struct task_struct *tsk, \
enum events_##name event) \ enum events_##name event) \
{ \ { \
type curr_state = da_monitor_curr_state_##name(da_mon); \ enum states_##name curr_state, next_state; \
type next_state = model_get_next_state_##name(curr_state, event); \
\
if (next_state != INVALID_STATE) { \
da_monitor_set_state_##name(da_mon, next_state); \
\ \
curr_state = READ_ONCE(da_mon->curr_state); \
for (int i = 0; i < MAX_DA_RETRY_RACING_EVENTS; i++) { \
next_state = model_get_next_state_##name(curr_state, event); \
if (next_state == INVALID_STATE) { \
cond_react_##name(curr_state, event); \
trace_error_##name(tsk->pid, \
model_get_state_name_##name(curr_state), \
model_get_event_name_##name(event)); \
return false; \
} \
if (likely(try_cmpxchg(&da_mon->curr_state, &curr_state, next_state))) { \
trace_event_##name(tsk->pid, \ trace_event_##name(tsk->pid, \
model_get_state_name_##name(curr_state), \ model_get_state_name_##name(curr_state), \
model_get_event_name_##name(event), \ model_get_event_name_##name(event), \
model_get_state_name_##name(next_state), \ model_get_state_name_##name(next_state), \
model_is_final_state_##name(next_state)); \ model_is_final_state_##name(next_state)); \
\
return true; \ return true; \
} \
} \ } \
\ \
cond_react_##name(curr_state, event); \ trace_rv_retries_error(#name, model_get_event_name_##name(event)); \
\ pr_warn("rv: " __stringify(MAX_DA_RETRY_RACING_EVENTS) \
trace_error_##name(tsk->pid, \ " retries reached for event %s, resetting monitor %s", \
model_get_state_name_##name(curr_state), \ model_get_event_name_##name(event), #name); \
model_get_event_name_##name(event)); \
\
return false; \ return false; \
} }

View File

@ -3,12 +3,17 @@
config RV_MON_EVENTS config RV_MON_EVENTS
bool bool
config RV_MON_MAINTENANCE_EVENTS
bool
config DA_MON_EVENTS_IMPLICIT config DA_MON_EVENTS_IMPLICIT
select RV_MON_EVENTS select RV_MON_EVENTS
select RV_MON_MAINTENANCE_EVENTS
bool bool
config DA_MON_EVENTS_ID config DA_MON_EVENTS_ID
select RV_MON_EVENTS select RV_MON_EVENTS
select RV_MON_MAINTENANCE_EVENTS
bool bool
config LTL_MON_EVENTS_ID config LTL_MON_EVENTS_ID

View File

@ -176,6 +176,30 @@ DECLARE_EVENT_CLASS(error_ltl_monitor_id,
#include <monitors/sleep/sleep_trace.h> #include <monitors/sleep/sleep_trace.h>
// Add new monitors based on CONFIG_LTL_MON_EVENTS_ID here // Add new monitors based on CONFIG_LTL_MON_EVENTS_ID here
#endif /* CONFIG_LTL_MON_EVENTS_ID */ #endif /* CONFIG_LTL_MON_EVENTS_ID */
#ifdef CONFIG_RV_MON_MAINTENANCE_EVENTS
/* Tracepoint useful for monitors development, currenly only used in DA */
TRACE_EVENT(rv_retries_error,
TP_PROTO(char *name, char *event),
TP_ARGS(name, event),
TP_STRUCT__entry(
__string( name, name )
__string( event, event )
),
TP_fast_assign(
__assign_str(name);
__assign_str(event);
),
TP_printk(__stringify(MAX_DA_RETRY_RACING_EVENTS)
" retries reached for event %s, resetting monitor %s",
__get_str(event), __get_str(name))
);
#endif /* CONFIG_RV_MON_MAINTENANCE_EVENTS */
#endif /* _TRACE_RV_H */ #endif /* _TRACE_RV_H */
/* This part must be outside protection */ /* This part must be outside protection */