2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

sched_ext: Do not enable LLC/NUMA optimizations when domains overlap

When the LLC and NUMA domains fully overlap, enabling both optimizations
in the built-in idle CPU selection policy is redundant, as it leads to
searching for an idle CPU within the same domain twice.

Likewise, if all online CPUs are within a single LLC domain, LLC
optimization is unnecessary.

Therefore, detect overlapping domains and enable topology optimizations
only when necessary.

Moreover, rely on the online CPUs for this detection logic, instead of
using the possible CPUs.

Fixes: 860a45219b ("sched_ext: Introduce NUMA awareness to the default idle selection policy")
Signed-off-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
Andrea Righi 2024-11-08 01:01:36 +01:00 committed by Tejun Heo
parent 860a45219b
commit f6ce6b9493

View File

@ -3129,12 +3129,63 @@ found:
goto retry; goto retry;
} }
/*
* Return true if the LLC domains do not perfectly overlap with the NUMA
* domains, false otherwise.
*/
static bool llc_numa_mismatch(void)
{
int cpu;
/*
* We need to scan all online CPUs to verify whether their scheduling
* domains overlap.
*
* While it is rare to encounter architectures with asymmetric NUMA
* topologies, CPU hotplugging or virtualized environments can result
* in asymmetric configurations.
*
* For example:
*
* NUMA 0:
* - LLC 0: cpu0..cpu7
* - LLC 1: cpu8..cpu15 [offline]
*
* NUMA 1:
* - LLC 0: cpu16..cpu23
* - LLC 1: cpu24..cpu31
*
* In this case, if we only check the first online CPU (cpu0), we might
* incorrectly assume that the LLC and NUMA domains are fully
* overlapping, which is incorrect (as NUMA 1 has two distinct LLC
* domains).
*/
for_each_online_cpu(cpu) {
const struct cpumask *numa_cpus;
struct sched_domain *sd;
sd = rcu_dereference(per_cpu(sd_llc, cpu));
if (!sd)
return true;
numa_cpus = cpumask_of_node(cpu_to_node(cpu));
if (sd->span_weight != cpumask_weight(numa_cpus))
return true;
}
return false;
}
/* /*
* Initialize topology-aware scheduling. * Initialize topology-aware scheduling.
* *
* Detect if the system has multiple LLC or multiple NUMA domains and enable * Detect if the system has multiple LLC or multiple NUMA domains and enable
* cache-aware / NUMA-aware scheduling optimizations in the default CPU idle * cache-aware / NUMA-aware scheduling optimizations in the default CPU idle
* selection policy. * selection policy.
*
* Assumption: the kernel's internal topology representation assumes that each
* CPU belongs to a single LLC domain, and that each LLC domain is entirely
* contained within a single NUMA node.
*/ */
static void update_selcpu_topology(void) static void update_selcpu_topology(void)
{ {
@ -3144,26 +3195,34 @@ static void update_selcpu_topology(void)
s32 cpu = cpumask_first(cpu_online_mask); s32 cpu = cpumask_first(cpu_online_mask);
/* /*
* We only need to check the NUMA node and LLC domain of the first * Enable LLC domain optimization only when there are multiple LLC
* available CPU to determine if they cover all CPUs. * domains among the online CPUs. If all online CPUs are part of a
* single LLC domain, the idle CPU selection logic can choose any
* online CPU without bias.
* *
* If all CPUs belong to the same NUMA node or share the same LLC * Note that it is sufficient to check the LLC domain of the first
* domain, enabling NUMA or LLC optimizations is unnecessary. * online CPU to determine whether a single LLC domain includes all
* Otherwise, these optimizations can be enabled. * CPUs.
*/ */
rcu_read_lock(); rcu_read_lock();
sd = rcu_dereference(per_cpu(sd_llc, cpu)); sd = rcu_dereference(per_cpu(sd_llc, cpu));
if (sd) { if (sd) {
cpus = sched_domain_span(sd); if (sd->span_weight < num_online_cpus())
if (cpumask_weight(cpus) < num_possible_cpus())
enable_llc = true; enable_llc = true;
} }
sd = highest_flag_domain(cpu, SD_NUMA);
if (sd) { /*
cpus = sched_group_span(sd->groups); * Enable NUMA optimization only when there are multiple NUMA domains
if (cpumask_weight(cpus) < num_possible_cpus()) * among the online CPUs and the NUMA domains don't perfectly overlaps
enable_numa = true; * with the LLC domains.
} *
* If all CPUs belong to the same NUMA node and the same LLC domain,
* enabling both NUMA and LLC optimizations is unnecessary, as checking
* for an idle CPU in the same domain twice is redundant.
*/
cpus = cpumask_of_node(cpu_to_node(cpu));
if ((cpumask_weight(cpus) < num_online_cpus()) & llc_numa_mismatch())
enable_numa = true;
rcu_read_unlock(); rcu_read_unlock();
pr_debug("sched_ext: LLC idle selection %s\n", pr_debug("sched_ext: LLC idle selection %s\n",