mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
Scheduler updates for v6.16:
Core & fair scheduler changes: - Tweak wait_task_inactive() to force dequeue sched_delayed tasks (John Stultz) - Adhere to place_entity() constraints (Peter Zijlstra) - Allow decaying util_est when util_avg > CPU capacity (Pierre Gondois) - Fix up wake_up_sync() vs DELAYED_DEQUEUE (Xuewen Yan) Energy management: - Introduce sched_update_asym_prefer_cpu() (K Prateek Nayak) - cpufreq/amd-pstate: Update asym_prefer_cpu when core rankings change (K Prateek Nayak) - Align uclamp and util_est and call before freq update (Xuewen Yan) CPU isolation: - Make use of more than one housekeeping CPU (Phil Auld) RT scheduler: - Fix race in push_rt_task() (Harshit Agarwal) - Add kernel cmdline option for rt_group_sched (Michal Koutný) Scheduler topology support: - Improve topology_span_sane speed (Steve Wahl) Scheduler debugging: - Move and extend the sched_process_exit() tracepoint (Andrii Nakryiko) - Add RT_GROUP WARN checks for non-root task_groups (Michal Koutný) - Fix trace_sched_switch(.prev_state) (Peter Zijlstra) - Untangle cond_resched() and live-patching (Peter Zijlstra) Fixes and cleanups: - Misc fixes and cleanups (K Prateek Nayak, Michal Koutný, Peter Zijlstra, Xuewen Yan) Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmgy50ARHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1jFQQ/+KXl2XDg1V/VVmMG8GmtDlR29V3M3ricy D7/2s0D1Y1ErHb+pRMBG31EubT9/bXjUshWIuuf51DciSLBmpELHxY5J+AevRa0L /pHFwSvP6H5pDakI/xZ01FlYt7PxZGs+1m1o2615Mbwq6J2bjZTan54CYzrdpLOy Nqb3OT4tSqU1+7SV7hVForBpZp9u3CvVBRt/wE6vcHltW/I486bM8OCOd2XrUlnb QoIRliGI9KHpqCpbAeKPRSKXpf9tZv/AijZ+0WUu2yY8iwSN4p3RbbbwdCipjVQj w5I5oqKI6cylFfl2dEFWXVO+tLBihs06w8KSQrhYmQ9DUu4RGBVM9ORINGDBPejL bvoQh1mAkqvIL+oodujdbMDIqLupvOEtVSvwzR7SJn8BJSB00js88ngCWLjo/CcU imLbWy9FSBLvOswLBzQthgAJEj+ejCkOIbcvM2lINWhX/zNsMFaaqYcO1wRunGGR SavTI1s+ZksCQY6vCwRkwPrOZjyg91TA/q4FK102fHL1IcthH6xubE4yi4lTIUYs L56HuGm8e7Shc8M2Y5rAYsVG3GoIHFLXnptOn2HnCRWaAAJYsBaLUlzoBy9MxCfw I2YVDCylkQxevosSi2XxXo3tbM6auISU9SelAT/dAz32V1rsjWQojRJXeGYKIbu7 KBuN/dLItW0= =s/ra -----END PGP SIGNATURE----- Merge tag 'sched-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler updates from Ingo Molnar: "Core & fair scheduler changes: - Tweak wait_task_inactive() to force dequeue sched_delayed tasks (John Stultz) - Adhere to place_entity() constraints (Peter Zijlstra) - Allow decaying util_est when util_avg > CPU capacity (Pierre Gondois) - Fix up wake_up_sync() vs DELAYED_DEQUEUE (Xuewen Yan) Energy management: - Introduce sched_update_asym_prefer_cpu() (K Prateek Nayak) - cpufreq/amd-pstate: Update asym_prefer_cpu when core rankings change (K Prateek Nayak) - Align uclamp and util_est and call before freq update (Xuewen Yan) CPU isolation: - Make use of more than one housekeeping CPU (Phil Auld) RT scheduler: - Fix race in push_rt_task() (Harshit Agarwal) - Add kernel cmdline option for rt_group_sched (Michal Koutný) Scheduler topology support: - Improve topology_span_sane speed (Steve Wahl) Scheduler debugging: - Move and extend the sched_process_exit() tracepoint (Andrii Nakryiko) - Add RT_GROUP WARN checks for non-root task_groups (Michal Koutný) - Fix trace_sched_switch(.prev_state) (Peter Zijlstra) - Untangle cond_resched() and live-patching (Peter Zijlstra) Fixes and cleanups: - Misc fixes and cleanups (K Prateek Nayak, Michal Koutný, Peter Zijlstra, Xuewen Yan)" * tag 'sched-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits) sched/uclamp: Align uclamp and util_est and call before freq update sched/util_est: Simplify condition for util_est_{en,de}queue() sched/fair: Fixup wake_up_sync() vs DELAYED_DEQUEUE sched,livepatch: Untangle cond_resched() and live-patching sched/core: Tweak wait_task_inactive() to force dequeue sched_delayed tasks sched/fair: Adhere to place_entity() constraints sched/debug: Print the local group's asym_prefer_cpu cpufreq/amd-pstate: Update asym_prefer_cpu when core rankings change sched/topology: Introduce sched_update_asym_prefer_cpu() sched/fair: Use READ_ONCE() to read sg->asym_prefer_cpu sched/isolation: Make use of more than one housekeeping cpu sched/rt: Fix race in push_rt_task sched: Add annotations to RT_GROUP_SCHED fields sched: Add RT_GROUP WARN checks for non-root task_groups sched: Do not construct nor expose RT_GROUP_SCHED structures if disabled sched: Bypass bandwitdh checks with runtime disabled RT_GROUP_SCHED sched: Skip non-root task_groups with disabled RT_GROUP_SCHED sched: Add commadline option for RT_GROUP_SCHED toggling sched: Always initialize rt_rq's task_group sched: Remove unneeed macro wrap ...
This commit is contained in:
commit
eaed94d1f6
@ -6320,6 +6320,11 @@
|
||||
Memory area to be used by remote processor image,
|
||||
managed by CMA.
|
||||
|
||||
rt_group_sched= [KNL] Enable or disable SCHED_RR/FIFO group scheduling
|
||||
when CONFIG_RT_GROUP_SCHED=y. Defaults to
|
||||
!CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED.
|
||||
Format: <bool>
|
||||
|
||||
rw [KNL] Mount root device read-write on boot
|
||||
|
||||
S [KNL] Run init in single mode
|
||||
|
@ -831,8 +831,10 @@ static void amd_pstate_update_limits(unsigned int cpu)
|
||||
if (highest_perf_changed) {
|
||||
WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
|
||||
|
||||
if (cur_high < CPPC_MAX_PERF)
|
||||
if (cur_high < CPPC_MAX_PERF) {
|
||||
sched_set_itmt_core_prio((int)cur_high, cpu);
|
||||
sched_update_asym_prefer_cpu(cpu, prev_high, cur_high);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3,27 +3,23 @@
|
||||
#define _LINUX_LIVEPATCH_SCHED_H_
|
||||
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/static_call_types.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#ifdef CONFIG_LIVEPATCH
|
||||
|
||||
void __klp_sched_try_switch(void);
|
||||
|
||||
#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
|
||||
|
||||
static __always_inline void klp_sched_try_switch(void)
|
||||
static __always_inline void klp_sched_try_switch(struct task_struct *curr)
|
||||
{
|
||||
if (static_branch_unlikely(&klp_sched_try_switch_key))
|
||||
if (static_branch_unlikely(&klp_sched_try_switch_key) &&
|
||||
READ_ONCE(curr->__state) & TASK_FREEZABLE)
|
||||
__klp_sched_try_switch();
|
||||
}
|
||||
|
||||
#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
|
||||
|
||||
#else /* !CONFIG_LIVEPATCH */
|
||||
static inline void klp_sched_try_switch(void) {}
|
||||
static inline void __klp_sched_try_switch(void) {}
|
||||
static inline void klp_sched_try_switch(struct task_struct *curr) {}
|
||||
#endif /* CONFIG_LIVEPATCH */
|
||||
|
||||
#endif /* _LINUX_LIVEPATCH_SCHED_H_ */
|
||||
|
@ -44,7 +44,6 @@
|
||||
#include <linux/seqlock_types.h>
|
||||
#include <linux/kcsan.h>
|
||||
#include <linux/rv.h>
|
||||
#include <linux/livepatch_sched.h>
|
||||
#include <linux/uidgid_types.h>
|
||||
#include <linux/tracepoint-defs.h>
|
||||
#include <asm/kmap_size.h>
|
||||
@ -2089,9 +2088,6 @@ extern int __cond_resched(void);
|
||||
|
||||
#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
|
||||
void sched_dynamic_klp_enable(void);
|
||||
void sched_dynamic_klp_disable(void);
|
||||
|
||||
DECLARE_STATIC_CALL(cond_resched, __cond_resched);
|
||||
|
||||
static __always_inline int _cond_resched(void)
|
||||
@ -2112,7 +2108,6 @@ static __always_inline int _cond_resched(void)
|
||||
|
||||
static inline int _cond_resched(void)
|
||||
{
|
||||
klp_sched_try_switch();
|
||||
return __cond_resched();
|
||||
}
|
||||
|
||||
@ -2122,7 +2117,6 @@ static inline int _cond_resched(void)
|
||||
|
||||
static inline int _cond_resched(void)
|
||||
{
|
||||
klp_sched_try_switch();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -195,6 +195,8 @@ struct sched_domain_topology_level {
|
||||
};
|
||||
|
||||
extern void __init set_sched_topology(struct sched_domain_topology_level *tl);
|
||||
extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio);
|
||||
|
||||
|
||||
# define SD_INIT_NAME(type) .name = #type
|
||||
|
||||
@ -223,6 +225,10 @@ static inline bool cpus_share_resources(int this_cpu, int that_cpu)
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* !CONFIG_SMP */
|
||||
|
||||
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
|
||||
|
@ -326,11 +326,37 @@ DEFINE_EVENT(sched_process_template, sched_process_free,
|
||||
TP_ARGS(p));
|
||||
|
||||
/*
|
||||
* Tracepoint for a task exiting:
|
||||
* Tracepoint for a task exiting.
|
||||
* Note, it's a superset of sched_process_template and should be kept
|
||||
* compatible as much as possible. sched_process_exits has an extra
|
||||
* `group_dead` argument, so sched_process_template can't be used,
|
||||
* unfortunately, just like sched_migrate_task above.
|
||||
*/
|
||||
DEFINE_EVENT(sched_process_template, sched_process_exit,
|
||||
TP_PROTO(struct task_struct *p),
|
||||
TP_ARGS(p));
|
||||
TRACE_EVENT(sched_process_exit,
|
||||
|
||||
TP_PROTO(struct task_struct *p, bool group_dead),
|
||||
|
||||
TP_ARGS(p, group_dead),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
__field( pid_t, pid )
|
||||
__field( int, prio )
|
||||
__field( bool, group_dead )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
||||
__entry->pid = p->pid;
|
||||
__entry->prio = p->prio; /* XXX SCHED_DEADLINE */
|
||||
__entry->group_dead = group_dead;
|
||||
),
|
||||
|
||||
TP_printk("comm=%s pid=%d prio=%d group_dead=%s",
|
||||
__entry->comm, __entry->pid, __entry->prio,
|
||||
__entry->group_dead ? "true" : "false"
|
||||
)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for waiting on task to unschedule:
|
||||
|
11
init/Kconfig
11
init/Kconfig
@ -1075,6 +1075,17 @@ config RT_GROUP_SCHED
|
||||
realtime bandwidth for them.
|
||||
See Documentation/scheduler/sched-rt-group.rst for more information.
|
||||
|
||||
config RT_GROUP_SCHED_DEFAULT_DISABLED
|
||||
bool "Require boot parameter to enable group scheduling for SCHED_RR/FIFO"
|
||||
depends on RT_GROUP_SCHED
|
||||
default n
|
||||
help
|
||||
When set, the RT group scheduling is disabled by default. The option
|
||||
is in inverted form so that mere RT_GROUP_SCHED enables the group
|
||||
scheduling.
|
||||
|
||||
Say N if unsure.
|
||||
|
||||
config EXT_GROUP_SCHED
|
||||
bool
|
||||
depends on SCHED_CLASS_EXT && CGROUP_SCHED
|
||||
|
@ -942,12 +942,12 @@ void __noreturn do_exit(long code)
|
||||
|
||||
tsk->exit_code = code;
|
||||
taskstats_exit(tsk, group_dead);
|
||||
trace_sched_process_exit(tsk, group_dead);
|
||||
|
||||
exit_mm();
|
||||
|
||||
if (group_dead)
|
||||
acct_process();
|
||||
trace_sched_process_exit(tsk);
|
||||
|
||||
exit_sem(tsk);
|
||||
exit_shm(tsk);
|
||||
|
@ -29,22 +29,13 @@ static unsigned int klp_signals_cnt;
|
||||
|
||||
/*
|
||||
* When a livepatch is in progress, enable klp stack checking in
|
||||
* cond_resched(). This helps CPU-bound kthreads get patched.
|
||||
* schedule(). This helps CPU-bound kthreads get patched.
|
||||
*/
|
||||
#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
|
||||
#define klp_cond_resched_enable() sched_dynamic_klp_enable()
|
||||
#define klp_cond_resched_disable() sched_dynamic_klp_disable()
|
||||
|
||||
#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
|
||||
EXPORT_SYMBOL(klp_sched_try_switch_key);
|
||||
|
||||
#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
|
||||
#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
|
||||
|
||||
#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
|
||||
#define klp_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
|
||||
#define klp_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
|
||||
|
||||
/*
|
||||
* This work can be performed periodically to finish patching or unpatching any
|
||||
@ -365,27 +356,19 @@ static bool klp_try_switch_task(struct task_struct *task)
|
||||
|
||||
void __klp_sched_try_switch(void)
|
||||
{
|
||||
/*
|
||||
* This function is called from __schedule() while a context switch is
|
||||
* about to happen. Preemption is already disabled and klp_mutex
|
||||
* can't be acquired.
|
||||
* Disabled preemption is used to prevent racing with other callers of
|
||||
* klp_try_switch_task(). Thanks to task_call_func() they won't be
|
||||
* able to switch to this task while it's running.
|
||||
*/
|
||||
lockdep_assert_preemption_disabled();
|
||||
|
||||
if (likely(!klp_patch_pending(current)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* This function is called from cond_resched() which is called in many
|
||||
* places throughout the kernel. Using the klp_mutex here might
|
||||
* deadlock.
|
||||
*
|
||||
* Instead, disable preemption to prevent racing with other callers of
|
||||
* klp_try_switch_task(). Thanks to task_call_func() they won't be
|
||||
* able to switch this task while it's running.
|
||||
*/
|
||||
preempt_disable();
|
||||
|
||||
/*
|
||||
* Make sure current didn't get patched between the above check and
|
||||
* preempt_disable().
|
||||
*/
|
||||
if (unlikely(!klp_patch_pending(current)))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Enforce the order of the TIF_PATCH_PENDING read above and the
|
||||
* klp_target_state read in klp_try_switch_task(). The corresponding
|
||||
@ -395,11 +378,7 @@ void __klp_sched_try_switch(void)
|
||||
smp_rmb();
|
||||
|
||||
klp_try_switch_task(current);
|
||||
|
||||
out:
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__klp_sched_try_switch);
|
||||
|
||||
/*
|
||||
* Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
|
||||
@ -508,7 +487,7 @@ void klp_try_complete_transition(void)
|
||||
}
|
||||
|
||||
/* Done! Now cleanup the data structures. */
|
||||
klp_cond_resched_disable();
|
||||
klp_resched_disable();
|
||||
patch = klp_transition_patch;
|
||||
klp_complete_transition();
|
||||
|
||||
@ -560,7 +539,7 @@ void klp_start_transition(void)
|
||||
set_tsk_thread_flag(task, TIF_PATCH_PENDING);
|
||||
}
|
||||
|
||||
klp_cond_resched_enable();
|
||||
klp_resched_enable();
|
||||
|
||||
klp_signals_cnt = 0;
|
||||
}
|
||||
|
@ -66,6 +66,7 @@
|
||||
#include <linux/vtime.h>
|
||||
#include <linux/wait_api.h>
|
||||
#include <linux/workqueue_api.h>
|
||||
#include <linux/livepatch_sched.h>
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
# ifdef CONFIG_GENERIC_ENTRY
|
||||
@ -1752,7 +1753,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
|
||||
static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
enum uclamp_id clamp_id;
|
||||
|
||||
@ -1768,7 +1769,8 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
|
||||
if (unlikely(!p->sched_class->uclamp_enabled))
|
||||
return;
|
||||
|
||||
if (p->se.sched_delayed)
|
||||
/* Only inc the delayed task which being woken up. */
|
||||
if (p->se.sched_delayed && !(flags & ENQUEUE_DELAYED))
|
||||
return;
|
||||
|
||||
for_each_clamp_id(clamp_id)
|
||||
@ -2036,7 +2038,7 @@ static void __init init_uclamp(void)
|
||||
}
|
||||
|
||||
#else /* !CONFIG_UCLAMP_TASK */
|
||||
static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
|
||||
static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags) { }
|
||||
static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
|
||||
static inline void uclamp_fork(struct task_struct *p) { }
|
||||
static inline void uclamp_post_fork(struct task_struct *p) { }
|
||||
@ -2072,12 +2074,14 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
if (!(flags & ENQUEUE_NOCLOCK))
|
||||
update_rq_clock(rq);
|
||||
|
||||
p->sched_class->enqueue_task(rq, p, flags);
|
||||
/*
|
||||
* Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
|
||||
* ->sched_delayed.
|
||||
* Can be before ->enqueue_task() because uclamp considers the
|
||||
* ENQUEUE_DELAYED task before its ->sched_delayed gets cleared
|
||||
* in ->enqueue_task().
|
||||
*/
|
||||
uclamp_rq_inc(rq, p);
|
||||
uclamp_rq_inc(rq, p, flags);
|
||||
|
||||
p->sched_class->enqueue_task(rq, p, flags);
|
||||
|
||||
psi_enqueue(p, flags);
|
||||
|
||||
@ -2283,6 +2287,12 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
|
||||
* just go back and repeat.
|
||||
*/
|
||||
rq = task_rq_lock(p, &rf);
|
||||
/*
|
||||
* If task is sched_delayed, force dequeue it, to avoid always
|
||||
* hitting the tick timeout in the queued case
|
||||
*/
|
||||
if (p->se.sched_delayed)
|
||||
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
|
||||
trace_sched_wait_task(p);
|
||||
running = task_on_cpu(rq, p);
|
||||
queued = task_on_rq_queued(p);
|
||||
@ -6571,12 +6581,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
* Otherwise marks the task's __state as RUNNING
|
||||
*/
|
||||
static bool try_to_block_task(struct rq *rq, struct task_struct *p,
|
||||
unsigned long task_state)
|
||||
unsigned long *task_state_p)
|
||||
{
|
||||
unsigned long task_state = *task_state_p;
|
||||
int flags = DEQUEUE_NOCLOCK;
|
||||
|
||||
if (signal_pending_state(task_state, p)) {
|
||||
WRITE_ONCE(p->__state, TASK_RUNNING);
|
||||
*task_state_p = TASK_RUNNING;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -6668,6 +6680,8 @@ static void __sched notrace __schedule(int sched_mode)
|
||||
if (sched_feat(HRTICK) || sched_feat(HRTICK_DL))
|
||||
hrtick_clear(rq);
|
||||
|
||||
klp_sched_try_switch(prev);
|
||||
|
||||
local_irq_disable();
|
||||
rcu_note_context_switch(preempt);
|
||||
|
||||
@ -6713,7 +6727,7 @@ static void __sched notrace __schedule(int sched_mode)
|
||||
goto picked;
|
||||
}
|
||||
} else if (!preempt && prev_state) {
|
||||
try_to_block_task(rq, prev, prev_state);
|
||||
try_to_block_task(rq, prev, &prev_state);
|
||||
switch_count = &prev->nvcsw;
|
||||
}
|
||||
|
||||
@ -7328,7 +7342,6 @@ EXPORT_STATIC_CALL_TRAMP(might_resched);
|
||||
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
|
||||
int __sched dynamic_cond_resched(void)
|
||||
{
|
||||
klp_sched_try_switch();
|
||||
if (!static_branch_unlikely(&sk_dynamic_cond_resched))
|
||||
return 0;
|
||||
return __cond_resched();
|
||||
@ -7500,7 +7513,6 @@ int sched_dynamic_mode(const char *str)
|
||||
#endif
|
||||
|
||||
static DEFINE_MUTEX(sched_dynamic_mutex);
|
||||
static bool klp_override;
|
||||
|
||||
static void __sched_dynamic_update(int mode)
|
||||
{
|
||||
@ -7508,7 +7520,6 @@ static void __sched_dynamic_update(int mode)
|
||||
* Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
|
||||
* the ZERO state, which is invalid.
|
||||
*/
|
||||
if (!klp_override)
|
||||
preempt_dynamic_enable(cond_resched);
|
||||
preempt_dynamic_enable(might_resched);
|
||||
preempt_dynamic_enable(preempt_schedule);
|
||||
@ -7518,7 +7529,6 @@ static void __sched_dynamic_update(int mode)
|
||||
|
||||
switch (mode) {
|
||||
case preempt_dynamic_none:
|
||||
if (!klp_override)
|
||||
preempt_dynamic_enable(cond_resched);
|
||||
preempt_dynamic_disable(might_resched);
|
||||
preempt_dynamic_disable(preempt_schedule);
|
||||
@ -7530,7 +7540,6 @@ static void __sched_dynamic_update(int mode)
|
||||
break;
|
||||
|
||||
case preempt_dynamic_voluntary:
|
||||
if (!klp_override)
|
||||
preempt_dynamic_enable(cond_resched);
|
||||
preempt_dynamic_enable(might_resched);
|
||||
preempt_dynamic_disable(preempt_schedule);
|
||||
@ -7542,7 +7551,6 @@ static void __sched_dynamic_update(int mode)
|
||||
break;
|
||||
|
||||
case preempt_dynamic_full:
|
||||
if (!klp_override)
|
||||
preempt_dynamic_disable(cond_resched);
|
||||
preempt_dynamic_disable(might_resched);
|
||||
preempt_dynamic_enable(preempt_schedule);
|
||||
@ -7554,7 +7562,6 @@ static void __sched_dynamic_update(int mode)
|
||||
break;
|
||||
|
||||
case preempt_dynamic_lazy:
|
||||
if (!klp_override)
|
||||
preempt_dynamic_disable(cond_resched);
|
||||
preempt_dynamic_disable(might_resched);
|
||||
preempt_dynamic_enable(preempt_schedule);
|
||||
@ -7576,36 +7583,6 @@ void sched_dynamic_update(int mode)
|
||||
mutex_unlock(&sched_dynamic_mutex);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_PREEMPT_DYNAMIC_CALL
|
||||
|
||||
static int klp_cond_resched(void)
|
||||
{
|
||||
__klp_sched_try_switch();
|
||||
return __cond_resched();
|
||||
}
|
||||
|
||||
void sched_dynamic_klp_enable(void)
|
||||
{
|
||||
mutex_lock(&sched_dynamic_mutex);
|
||||
|
||||
klp_override = true;
|
||||
static_call_update(cond_resched, klp_cond_resched);
|
||||
|
||||
mutex_unlock(&sched_dynamic_mutex);
|
||||
}
|
||||
|
||||
void sched_dynamic_klp_disable(void)
|
||||
{
|
||||
mutex_lock(&sched_dynamic_mutex);
|
||||
|
||||
klp_override = false;
|
||||
__sched_dynamic_update(preempt_dynamic_mode);
|
||||
|
||||
mutex_unlock(&sched_dynamic_mutex);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
|
||||
|
||||
static int __init setup_preempt_mode(char *str)
|
||||
{
|
||||
int mode = sched_dynamic_mode(str);
|
||||
@ -9018,7 +8995,7 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&task_group_lock, flags);
|
||||
list_add_rcu(&tg->list, &task_groups);
|
||||
list_add_tail_rcu(&tg->list, &task_groups);
|
||||
|
||||
/* Root should already exist: */
|
||||
WARN_ON(!parent);
|
||||
@ -9204,11 +9181,15 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
|
||||
struct task_struct *task;
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
if (!rt_group_sched_enabled())
|
||||
goto scx_check;
|
||||
|
||||
cgroup_taskset_for_each(task, css, tset) {
|
||||
if (!sched_rt_can_attach(css_tg(css), task))
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
scx_check:
|
||||
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||
return scx_cgroup_can_attach(tset);
|
||||
}
|
||||
|
||||
@ -9861,18 +9842,6 @@ static struct cftype cpu_legacy_files[] = {
|
||||
.seq_show = cpu_cfs_local_stat_show,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
{
|
||||
.name = "rt_runtime_us",
|
||||
.read_s64 = cpu_rt_runtime_read,
|
||||
.write_s64 = cpu_rt_runtime_write,
|
||||
},
|
||||
{
|
||||
.name = "rt_period_us",
|
||||
.read_u64 = cpu_rt_period_read_uint,
|
||||
.write_u64 = cpu_rt_period_write_uint,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_UCLAMP_TASK_GROUP
|
||||
{
|
||||
.name = "uclamp.min",
|
||||
@ -9890,6 +9859,55 @@ static struct cftype cpu_legacy_files[] = {
|
||||
{ } /* Terminate */
|
||||
};
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
static struct cftype rt_group_files[] = {
|
||||
{
|
||||
.name = "rt_runtime_us",
|
||||
.read_s64 = cpu_rt_runtime_read,
|
||||
.write_s64 = cpu_rt_runtime_write,
|
||||
},
|
||||
{
|
||||
.name = "rt_period_us",
|
||||
.read_u64 = cpu_rt_period_read_uint,
|
||||
.write_u64 = cpu_rt_period_write_uint,
|
||||
},
|
||||
{ } /* Terminate */
|
||||
};
|
||||
|
||||
# ifdef CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED
|
||||
DEFINE_STATIC_KEY_FALSE(rt_group_sched);
|
||||
# else
|
||||
DEFINE_STATIC_KEY_TRUE(rt_group_sched);
|
||||
# endif
|
||||
|
||||
static int __init setup_rt_group_sched(char *str)
|
||||
{
|
||||
long val;
|
||||
|
||||
if (kstrtol(str, 0, &val) || val < 0 || val > 1) {
|
||||
pr_warn("Unable to set rt_group_sched\n");
|
||||
return 1;
|
||||
}
|
||||
if (val)
|
||||
static_branch_enable(&rt_group_sched);
|
||||
else
|
||||
static_branch_disable(&rt_group_sched);
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("rt_group_sched=", setup_rt_group_sched);
|
||||
|
||||
static int __init cpu_rt_group_init(void)
|
||||
{
|
||||
if (!rt_group_sched_enabled())
|
||||
return 0;
|
||||
|
||||
WARN_ON(cgroup_add_legacy_cftypes(&cpu_cgrp_subsys, rt_group_files));
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(cpu_rt_group_init);
|
||||
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||
|
||||
static int cpu_extra_stat_show(struct seq_file *sf,
|
||||
struct cgroup_subsys_state *css)
|
||||
{
|
||||
|
@ -588,6 +588,10 @@ static void register_sd(struct sched_domain *sd, struct dentry *parent)
|
||||
debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
|
||||
debugfs_create_file("groups_flags", 0444, parent, &sd->groups->flags, &sd_flags_fops);
|
||||
debugfs_create_u32("level", 0444, parent, (u32 *)&sd->level);
|
||||
|
||||
if (sd->flags & SD_ASYM_PACKING)
|
||||
debugfs_create_u32("group_asym_prefer_cpu", 0444, parent,
|
||||
(u32 *)&sd->groups->asym_prefer_cpu);
|
||||
}
|
||||
|
||||
void update_sched_domain_debugfs(void)
|
||||
|
@ -3795,6 +3795,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
||||
update_entity_lag(cfs_rq, se);
|
||||
se->deadline -= se->vruntime;
|
||||
se->rel_deadline = 1;
|
||||
cfs_rq->nr_queued--;
|
||||
if (!curr)
|
||||
__dequeue_entity(cfs_rq, se);
|
||||
update_load_sub(&cfs_rq->load, se->load.weight);
|
||||
@ -3821,10 +3822,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
||||
|
||||
enqueue_load_avg(cfs_rq, se);
|
||||
if (se->on_rq) {
|
||||
update_load_add(&cfs_rq->load, se->load.weight);
|
||||
place_entity(cfs_rq, se, 0);
|
||||
update_load_add(&cfs_rq->load, se->load.weight);
|
||||
if (!curr)
|
||||
__enqueue_entity(cfs_rq, se);
|
||||
cfs_rq->nr_queued++;
|
||||
|
||||
/*
|
||||
* The entity's vruntime has been adjusted, so let's check
|
||||
@ -4932,13 +4934,6 @@ static inline void util_est_update(struct cfs_rq *cfs_rq,
|
||||
if (last_ewma_diff < UTIL_EST_MARGIN)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* To avoid overestimation of actual task utilization, skip updates if
|
||||
* we cannot grant there is idle time in this CPU.
|
||||
*/
|
||||
if (dequeued > arch_scale_cpu_capacity(cpu_of(rq_of(cfs_rq))))
|
||||
return;
|
||||
|
||||
/*
|
||||
* To avoid underestimate of task utilization, skip updates of EWMA if
|
||||
* we cannot grant that thread got all CPU time it wanted.
|
||||
@ -6941,7 +6936,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
* Let's add the task's estimated utilization to the cfs_rq's
|
||||
* estimated utilization, before we update schedutil.
|
||||
*/
|
||||
if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
|
||||
if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
|
||||
util_est_enqueue(&rq->cfs, p);
|
||||
|
||||
if (flags & ENQUEUE_DELAYED) {
|
||||
@ -7181,7 +7176,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
||||
*/
|
||||
static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
|
||||
if (!p->se.sched_delayed)
|
||||
util_est_dequeue(&rq->cfs, p);
|
||||
|
||||
util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
|
||||
@ -7196,6 +7191,11 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline unsigned int cfs_h_nr_delayed(struct rq *rq)
|
||||
{
|
||||
return (rq->cfs.h_nr_queued - rq->cfs.h_nr_runnable);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
/* Working cpumask for: sched_balance_rq(), sched_balance_newidle(). */
|
||||
@ -7357,8 +7357,12 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)
|
||||
if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
|
||||
return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
|
||||
|
||||
if (sync && cpu_rq(this_cpu)->nr_running == 1)
|
||||
if (sync) {
|
||||
struct rq *rq = cpu_rq(this_cpu);
|
||||
|
||||
if ((rq->nr_running - cfs_h_nr_delayed(rq)) == 1)
|
||||
return this_cpu;
|
||||
}
|
||||
|
||||
if (available_idle_cpu(prev_cpu))
|
||||
return prev_cpu;
|
||||
@ -10256,7 +10260,7 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group
|
||||
(sgs->group_weight - sgs->idle_cpus != 1))
|
||||
return false;
|
||||
|
||||
return sched_asym(env->sd, env->dst_cpu, group->asym_prefer_cpu);
|
||||
return sched_asym(env->sd, env->dst_cpu, READ_ONCE(group->asym_prefer_cpu));
|
||||
}
|
||||
|
||||
/* One group has more than one SMT CPU while the other group does not */
|
||||
@ -10493,7 +10497,8 @@ static bool update_sd_pick_busiest(struct lb_env *env,
|
||||
|
||||
case group_asym_packing:
|
||||
/* Prefer to move from lowest priority CPU's work */
|
||||
return sched_asym_prefer(sds->busiest->asym_prefer_cpu, sg->asym_prefer_cpu);
|
||||
return sched_asym_prefer(READ_ONCE(sds->busiest->asym_prefer_cpu),
|
||||
READ_ONCE(sg->asym_prefer_cpu));
|
||||
|
||||
case group_misfit_task:
|
||||
/*
|
||||
|
@ -40,7 +40,7 @@ int housekeeping_any_cpu(enum hk_type type)
|
||||
if (cpu < nr_cpu_ids)
|
||||
return cpu;
|
||||
|
||||
cpu = cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
|
||||
cpu = cpumask_any_and_distribute(housekeeping.cpumasks[type], cpu_online_mask);
|
||||
if (likely(cpu < nr_cpu_ids))
|
||||
return cpu;
|
||||
/*
|
||||
|
@ -89,6 +89,7 @@ void init_rt_rq(struct rt_rq *rt_rq)
|
||||
rt_rq->rt_throttled = 0;
|
||||
rt_rq->rt_runtime = 0;
|
||||
raw_spin_lock_init(&rt_rq->rt_runtime_lock);
|
||||
rt_rq->tg = &root_task_group;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -175,11 +176,14 @@ static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
|
||||
|
||||
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
|
||||
{
|
||||
/* Cannot fold with non-CONFIG_RT_GROUP_SCHED version, layout */
|
||||
WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group);
|
||||
return rt_rq->rq;
|
||||
}
|
||||
|
||||
static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
WARN_ON(!rt_group_sched_enabled() && rt_se->rt_rq->tg != &root_task_group);
|
||||
return rt_se->rt_rq;
|
||||
}
|
||||
|
||||
@ -187,11 +191,15 @@ static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
struct rt_rq *rt_rq = rt_se->rt_rq;
|
||||
|
||||
WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group);
|
||||
return rt_rq->rq;
|
||||
}
|
||||
|
||||
void unregister_rt_sched_group(struct task_group *tg)
|
||||
{
|
||||
if (!rt_group_sched_enabled())
|
||||
return;
|
||||
|
||||
if (tg->rt_se)
|
||||
destroy_rt_bandwidth(&tg->rt_bandwidth);
|
||||
}
|
||||
@ -200,6 +208,9 @@ void free_rt_sched_group(struct task_group *tg)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!rt_group_sched_enabled())
|
||||
return;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
if (tg->rt_rq)
|
||||
kfree(tg->rt_rq[i]);
|
||||
@ -244,6 +255,9 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
|
||||
struct sched_rt_entity *rt_se;
|
||||
int i;
|
||||
|
||||
if (!rt_group_sched_enabled())
|
||||
return 1;
|
||||
|
||||
tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
|
||||
if (!tg->rt_rq)
|
||||
goto err;
|
||||
@ -482,9 +496,6 @@ static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
|
||||
|
||||
static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
|
||||
{
|
||||
if (!rt_rq->tg)
|
||||
return RUNTIME_INF;
|
||||
|
||||
return rt_rq->rt_runtime;
|
||||
}
|
||||
|
||||
@ -497,6 +508,11 @@ typedef struct task_group *rt_rq_iter_t;
|
||||
|
||||
static inline struct task_group *next_task_group(struct task_group *tg)
|
||||
{
|
||||
if (!rt_group_sched_enabled()) {
|
||||
WARN_ON(tg != &root_task_group);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
do {
|
||||
tg = list_entry_rcu(tg->list.next,
|
||||
typeof(struct task_group), list);
|
||||
@ -509,9 +525,9 @@ static inline struct task_group *next_task_group(struct task_group *tg)
|
||||
}
|
||||
|
||||
#define for_each_rt_rq(rt_rq, iter, rq) \
|
||||
for (iter = container_of(&task_groups, typeof(*iter), list); \
|
||||
(iter = next_task_group(iter)) && \
|
||||
(rt_rq = iter->rt_rq[cpu_of(rq)]);)
|
||||
for (iter = &root_task_group; \
|
||||
iter && (rt_rq = iter->rt_rq[cpu_of(rq)]); \
|
||||
iter = next_task_group(iter))
|
||||
|
||||
#define for_each_sched_rt_entity(rt_se) \
|
||||
for (; rt_se; rt_se = rt_se->parent)
|
||||
@ -1066,13 +1082,12 @@ inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
|
||||
{
|
||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
/*
|
||||
* Change rq's cpupri only if rt_rq is the top queue.
|
||||
*/
|
||||
if (&rq->rt != rt_rq)
|
||||
if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && &rq->rt != rt_rq)
|
||||
return;
|
||||
#endif
|
||||
|
||||
if (rq->online && prio < prev_prio)
|
||||
cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
|
||||
}
|
||||
@ -1082,13 +1097,12 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
|
||||
{
|
||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
/*
|
||||
* Change rq's cpupri only if rt_rq is the top queue.
|
||||
*/
|
||||
if (&rq->rt != rt_rq)
|
||||
if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && &rq->rt != rt_rq)
|
||||
return;
|
||||
#endif
|
||||
|
||||
if (rq->online && rt_rq->highest_prio.curr != prev_prio)
|
||||
cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
|
||||
}
|
||||
@ -1156,7 +1170,6 @@ inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
if (rt_se_boosted(rt_se))
|
||||
rt_rq->rt_nr_boosted++;
|
||||
|
||||
if (rt_rq->tg)
|
||||
start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
|
||||
}
|
||||
|
||||
@ -1257,11 +1270,9 @@ static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_arr
|
||||
static inline struct sched_statistics *
|
||||
__schedstats_from_rt_se(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
/* schedstats is not supported for rt group. */
|
||||
if (!rt_entity_is_task(rt_se))
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
return &rt_task_of(rt_se)->stats;
|
||||
}
|
||||
@ -1883,6 +1894,27 @@ static int find_lowest_rq(struct task_struct *task)
|
||||
return -1;
|
||||
}
|
||||
|
||||
static struct task_struct *pick_next_pushable_task(struct rq *rq)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
if (!has_pushable_tasks(rq))
|
||||
return NULL;
|
||||
|
||||
p = plist_first_entry(&rq->rt.pushable_tasks,
|
||||
struct task_struct, pushable_tasks);
|
||||
|
||||
BUG_ON(rq->cpu != task_cpu(p));
|
||||
BUG_ON(task_current(rq, p));
|
||||
BUG_ON(task_current_donor(rq, p));
|
||||
BUG_ON(p->nr_cpus_allowed <= 1);
|
||||
|
||||
BUG_ON(!task_on_rq_queued(p));
|
||||
BUG_ON(!rt_task(p));
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Will lock the rq it finds */
|
||||
static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
||||
{
|
||||
@ -1913,18 +1945,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
||||
/*
|
||||
* We had to unlock the run queue. In
|
||||
* the mean time, task could have
|
||||
* migrated already or had its affinity changed.
|
||||
* Also make sure that it wasn't scheduled on its rq.
|
||||
* migrated already or had its affinity changed,
|
||||
* therefore check if the task is still at the
|
||||
* head of the pushable tasks list.
|
||||
* It is possible the task was scheduled, set
|
||||
* "migrate_disabled" and then got preempted, so we must
|
||||
* check the task migration disable flag here too.
|
||||
*/
|
||||
if (unlikely(task_rq(task) != rq ||
|
||||
if (unlikely(is_migration_disabled(task) ||
|
||||
!cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
|
||||
task_on_cpu(rq, task) ||
|
||||
!rt_task(task) ||
|
||||
is_migration_disabled(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
task != pick_next_pushable_task(rq))) {
|
||||
|
||||
double_unlock_balance(rq, lowest_rq);
|
||||
lowest_rq = NULL;
|
||||
@ -1944,27 +1974,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
||||
return lowest_rq;
|
||||
}
|
||||
|
||||
static struct task_struct *pick_next_pushable_task(struct rq *rq)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
if (!has_pushable_tasks(rq))
|
||||
return NULL;
|
||||
|
||||
p = plist_first_entry(&rq->rt.pushable_tasks,
|
||||
struct task_struct, pushable_tasks);
|
||||
|
||||
BUG_ON(rq->cpu != task_cpu(p));
|
||||
BUG_ON(task_current(rq, p));
|
||||
BUG_ON(task_current_donor(rq, p));
|
||||
BUG_ON(p->nr_cpus_allowed <= 1);
|
||||
|
||||
BUG_ON(!task_on_rq_queued(p));
|
||||
BUG_ON(!rt_task(p));
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the current CPU has more than one RT task, see if the non
|
||||
* running task can migrate over to a CPU that is running a task
|
||||
@ -2602,8 +2611,9 @@ static int task_is_throttled_rt(struct task_struct *p, int cpu)
|
||||
{
|
||||
struct rt_rq *rt_rq;
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
#ifdef CONFIG_RT_GROUP_SCHED // XXX maybe add task_rt_rq(), see also sched_rt_period_rt_rq
|
||||
rt_rq = task_group(p)->rt_rq[cpu];
|
||||
WARN_ON(!rt_group_sched_enabled() && rt_rq->tg != &root_task_group);
|
||||
#else
|
||||
rt_rq = &cpu_rq(cpu)->rt;
|
||||
#endif
|
||||
@ -2713,6 +2723,9 @@ static int tg_rt_schedulable(struct task_group *tg, void *data)
|
||||
tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg))
|
||||
return -EBUSY;
|
||||
|
||||
if (WARN_ON(!rt_group_sched_enabled() && tg != &root_task_group))
|
||||
return -EBUSY;
|
||||
|
||||
total = to_ratio(period, runtime);
|
||||
|
||||
/*
|
||||
@ -2868,7 +2881,7 @@ static int sched_rt_global_constraints(void)
|
||||
int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
|
||||
{
|
||||
/* Don't accept real-time tasks when there is no way for them to run */
|
||||
if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
|
||||
if (rt_group_sched_enabled() && rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
|
@ -813,15 +813,17 @@ struct rt_rq {
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
int rt_throttled;
|
||||
u64 rt_time;
|
||||
u64 rt_runtime;
|
||||
u64 rt_time; /* consumed RT time, goes up in update_curr_rt */
|
||||
u64 rt_runtime; /* allotted RT time, "slice" from rt_bandwidth, RT sharing/balancing */
|
||||
/* Nests inside the rq lock: */
|
||||
raw_spinlock_t rt_runtime_lock;
|
||||
|
||||
unsigned int rt_nr_boosted;
|
||||
|
||||
struct rq *rq;
|
||||
struct task_group *tg;
|
||||
struct rq *rq; /* this is always top-level rq, cache? */
|
||||
#endif
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
struct task_group *tg; /* this tg has "this" rt_rq on given CPU for runnable entities */
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -1498,6 +1500,23 @@ static inline bool sched_group_cookie_match(struct rq *rq,
|
||||
}
|
||||
|
||||
#endif /* !CONFIG_SCHED_CORE */
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
# ifdef CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED
|
||||
DECLARE_STATIC_KEY_FALSE(rt_group_sched);
|
||||
static inline bool rt_group_sched_enabled(void)
|
||||
{
|
||||
return static_branch_unlikely(&rt_group_sched);
|
||||
}
|
||||
# else
|
||||
DECLARE_STATIC_KEY_TRUE(rt_group_sched);
|
||||
static inline bool rt_group_sched_enabled(void)
|
||||
{
|
||||
return static_branch_likely(&rt_group_sched);
|
||||
}
|
||||
# endif /* CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED */
|
||||
#else
|
||||
# define rt_group_sched_enabled() false
|
||||
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||
|
||||
static inline void lockdep_assert_rq_held(struct rq *rq)
|
||||
{
|
||||
@ -2146,6 +2165,13 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
/*
|
||||
* p->rt.rt_rq is NULL initially and it is easier to assign
|
||||
* root_task_group's rt_rq than switching in rt_rq_of_se()
|
||||
* Clobbers tg(!)
|
||||
*/
|
||||
if (!rt_group_sched_enabled())
|
||||
tg = &root_task_group;
|
||||
p->rt.rt_rq = tg->rt_rq[cpu];
|
||||
p->rt.parent = tg->rt_se[cpu];
|
||||
#endif
|
||||
|
@ -634,13 +634,14 @@ change:
|
||||
* Do not allow real-time tasks into groups that have no runtime
|
||||
* assigned.
|
||||
*/
|
||||
if (rt_bandwidth_enabled() && rt_policy(policy) &&
|
||||
if (rt_group_sched_enabled() &&
|
||||
rt_bandwidth_enabled() && rt_policy(policy) &&
|
||||
task_group(p)->rt_bandwidth.rt_runtime == 0 &&
|
||||
!task_group_is_autogroup(task_group(p))) {
|
||||
retval = -EPERM;
|
||||
goto unlock;
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||
#ifdef CONFIG_SMP
|
||||
if (dl_bandwidth_enabled() && dl_policy(policy) &&
|
||||
!(attr->sched_flags & SCHED_FLAG_SUGOV)) {
|
||||
|
@ -1333,6 +1333,64 @@ next:
|
||||
update_group_capacity(sd, cpu);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
/* Update the "asym_prefer_cpu" when arch_asym_cpu_priority() changes. */
|
||||
void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio)
|
||||
{
|
||||
int asym_prefer_cpu = cpu;
|
||||
struct sched_domain *sd;
|
||||
|
||||
guard(rcu)();
|
||||
|
||||
for_each_domain(cpu, sd) {
|
||||
struct sched_group *sg;
|
||||
int group_cpu;
|
||||
|
||||
if (!(sd->flags & SD_ASYM_PACKING))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Groups of overlapping domain are replicated per NUMA
|
||||
* node and will require updating "asym_prefer_cpu" on
|
||||
* each local copy.
|
||||
*
|
||||
* If you are hitting this warning, consider moving
|
||||
* "sg->asym_prefer_cpu" to "sg->sgc->asym_prefer_cpu"
|
||||
* which is shared by all the overlapping groups.
|
||||
*/
|
||||
WARN_ON_ONCE(sd->flags & SD_OVERLAP);
|
||||
|
||||
sg = sd->groups;
|
||||
if (cpu != sg->asym_prefer_cpu) {
|
||||
/*
|
||||
* Since the parent is a superset of the current group,
|
||||
* if the cpu is not the "asym_prefer_cpu" at the
|
||||
* current level, it cannot be the preferred CPU at a
|
||||
* higher levels either.
|
||||
*/
|
||||
if (!sched_asym_prefer(cpu, sg->asym_prefer_cpu))
|
||||
return;
|
||||
|
||||
WRITE_ONCE(sg->asym_prefer_cpu, cpu);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Ranking has improved; CPU is still the preferred one. */
|
||||
if (new_prio >= old_prio)
|
||||
continue;
|
||||
|
||||
for_each_cpu(group_cpu, sched_group_span(sg)) {
|
||||
if (sched_asym_prefer(group_cpu, asym_prefer_cpu))
|
||||
asym_prefer_cpu = group_cpu;
|
||||
}
|
||||
|
||||
WRITE_ONCE(sg->asym_prefer_cpu, asym_prefer_cpu);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* Set of available CPUs grouped by their corresponding capacities
|
||||
* Each list entry contains a CPU mask reflecting CPUs that share the same
|
||||
@ -2098,7 +2156,7 @@ int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
|
||||
for (i = 0; i < sched_domains_numa_levels; i++) {
|
||||
if (!masks[i][j])
|
||||
break;
|
||||
cpu = cpumask_any_and(cpus, masks[i][j]);
|
||||
cpu = cpumask_any_and_distribute(cpus, masks[i][j]);
|
||||
if (cpu < nr_cpu_ids) {
|
||||
found = cpu;
|
||||
break;
|
||||
@ -2347,16 +2405,26 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
|
||||
|
||||
/*
|
||||
* Ensure topology masks are sane, i.e. there are no conflicts (overlaps) for
|
||||
* any two given CPUs at this (non-NUMA) topology level.
|
||||
* any two given CPUs on non-NUMA topology levels.
|
||||
*/
|
||||
static bool topology_span_sane(struct sched_domain_topology_level *tl,
|
||||
const struct cpumask *cpu_map, int cpu)
|
||||
static bool topology_span_sane(const struct cpumask *cpu_map)
|
||||
{
|
||||
int i = cpu + 1;
|
||||
struct sched_domain_topology_level *tl;
|
||||
struct cpumask *covered, *id_seen;
|
||||
int cpu;
|
||||
|
||||
lockdep_assert_held(&sched_domains_mutex);
|
||||
covered = sched_domains_tmpmask;
|
||||
id_seen = sched_domains_tmpmask2;
|
||||
|
||||
for_each_sd_topology(tl) {
|
||||
|
||||
/* NUMA levels are allowed to overlap */
|
||||
if (tl->flags & SDTL_OVERLAP)
|
||||
return true;
|
||||
continue;
|
||||
|
||||
cpumask_clear(covered);
|
||||
cpumask_clear(id_seen);
|
||||
|
||||
/*
|
||||
* Non-NUMA levels cannot partially overlap - they must be either
|
||||
@ -2364,18 +2432,27 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl,
|
||||
* breaking the sched_group lists - i.e. a later get_group() pass
|
||||
* breaks the linking done for an earlier span.
|
||||
*/
|
||||
for_each_cpu_from(i, cpu_map) {
|
||||
/*
|
||||
* We should 'and' all those masks with 'cpu_map' to exactly
|
||||
* match the topology we're about to build, but that can only
|
||||
* remove CPUs, which only lessens our ability to detect
|
||||
* overlaps
|
||||
*/
|
||||
if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&
|
||||
cpumask_intersects(tl->mask(cpu), tl->mask(i)))
|
||||
return false;
|
||||
}
|
||||
for_each_cpu(cpu, cpu_map) {
|
||||
const struct cpumask *tl_cpu_mask = tl->mask(cpu);
|
||||
int id;
|
||||
|
||||
/* lowest bit set in this mask is used as a unique id */
|
||||
id = cpumask_first(tl_cpu_mask);
|
||||
|
||||
if (cpumask_test_cpu(id, id_seen)) {
|
||||
/* First CPU has already been seen, ensure identical spans */
|
||||
if (!cpumask_equal(tl->mask(id), tl_cpu_mask))
|
||||
return false;
|
||||
} else {
|
||||
/* First CPU hasn't been seen before, ensure it's a completely new span */
|
||||
if (cpumask_intersects(tl_cpu_mask, covered))
|
||||
return false;
|
||||
|
||||
cpumask_or(covered, covered, tl_cpu_mask);
|
||||
cpumask_set_cpu(id, id_seen);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2408,9 +2485,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
|
||||
sd = NULL;
|
||||
for_each_sd_topology(tl) {
|
||||
|
||||
if (WARN_ON(!topology_span_sane(tl, cpu_map, i)))
|
||||
goto error;
|
||||
|
||||
sd = build_sched_domain(tl, cpu_map, attr, sd, i);
|
||||
|
||||
has_asym |= sd->flags & SD_ASYM_CPUCAPACITY;
|
||||
@ -2424,6 +2498,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
|
||||
}
|
||||
}
|
||||
|
||||
if (WARN_ON(!topology_span_sane(cpu_map)))
|
||||
goto error;
|
||||
|
||||
/* Build the groups for the domains */
|
||||
for_each_cpu(i, cpu_map) {
|
||||
for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
|
||||
|
Loading…
Reference in New Issue
Block a user