mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-04-13 01:39:06 +08:00
Merge branches 'pm-cpuidle', 'pm-core' and 'pm-sleep'
Merge cpuidle updates, PM core updates and changes related to system sleep handling for 6.3-rc1: - Make the TEO cpuidle governor check CPU utilization in order to refine idle state selection (Kajetan Puchalski). - Make Kconfig select the haltpoll cpuidle governor when the haltpoll cpuidle driver is selected and replace a default_idle() call in that driver with arch_cpu_idle() which allows MWAIT to be used (Li RongQing). - Add Emerald Rapids Xeon support to the intel_idle driver (Artem Bityutskiy). - Add ARCH_SUSPEND_POSSIBLE dependencies for ARMv4 cpuidle drivers to avoid randconfig build failures (Arnd Bergmann). - Make kobj_type structures used in the cpuidle sysfs interface constant (Thomas Weißschuh). - Make the cpuidle driver registration code update microsecond values of idle state parameters in accordance with their nanosecond values if they are provided (Rafael Wysocki). - Make the PSCI cpuidle driver prevent topology CPUs from being suspended on PREEMPT_RT (Krzysztof Kozlowski). - Document that pm_runtime_force_suspend() cannot be used with DPM_FLAG_SMART_SUSPEND (Richard Fitzgerald). - Add EXPORT macros for exporting PM functions from drivers (Richard Fitzgerald). - Drop "select SRCU" from system sleep Kconfig (Paul E. McKenney). - Remove /** from non-kernel-doc comments in hibernation code (Randy Dunlap). * pm-cpuidle: cpuidle: psci: Do not suspend topology CPUs on PREEMPT_RT cpuidle: driver: Update microsecond values of state parameters as needed cpuidle: sysfs: make kobj_type structures constant cpuidle: add ARCH_SUSPEND_POSSIBLE dependencies intel_idle: add Emerald Rapids Xeon support cpuidle-haltpoll: Replace default_idle() with arch_cpu_idle() cpuidle-haltpoll: select haltpoll governor cpuidle: teo: Introduce util-awareness cpuidle: teo: Optionally skip polling states in teo_find_shallower_state() * pm-core: PM: Add EXPORT macros for exporting PM functions PM: runtime: Document that force_suspend() is incompatible with SMART_SUSPEND * pm-sleep: PM: sleep: Remove "select SRCU" PM: hibernate: swap: don't use /** for non-kernel-doc comments
This commit is contained in:
@@ -721,6 +721,7 @@ void arch_cpu_idle(void)
|
||||
{
|
||||
x86_idle();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(arch_cpu_idle);
|
||||
|
||||
/*
|
||||
* We use this if we don't have any better idle routine..
|
||||
|
||||
@@ -1864,6 +1864,10 @@ static bool pm_runtime_need_not_resume(struct device *dev)
|
||||
* sure the device is put into low power state and it should only be used during
|
||||
* system-wide PM transitions to sleep states. It assumes that the analogous
|
||||
* pm_runtime_force_resume() will be used to resume the device.
|
||||
*
|
||||
* Do not use with DPM_FLAG_SMART_SUSPEND as this can lead to an inconsistent
|
||||
* state where this function has called the ->runtime_suspend callback but the
|
||||
* PM core marks the driver as runtime active.
|
||||
*/
|
||||
int pm_runtime_force_suspend(struct device *dev)
|
||||
{
|
||||
|
||||
@@ -74,6 +74,7 @@ endmenu
|
||||
config HALTPOLL_CPUIDLE
|
||||
tristate "Halt poll cpuidle driver"
|
||||
depends on X86 && KVM_GUEST
|
||||
select CPU_IDLE_GOV_HALTPOLL
|
||||
default y
|
||||
help
|
||||
This option enables halt poll cpuidle driver, which allows to poll
|
||||
|
||||
@@ -24,6 +24,14 @@ config ARM_PSCI_CPUIDLE
|
||||
It provides an idle driver that is capable of detecting and
|
||||
managing idle states through the PSCI firmware interface.
|
||||
|
||||
The driver has limitations when used with PREEMPT_RT:
|
||||
- If the idle states are described with the non-hierarchical layout,
|
||||
all idle states are still available.
|
||||
|
||||
- If the idle states are described with the hierarchical layout,
|
||||
only the idle states defined per CPU are available, but not the ones
|
||||
being shared among a group of CPUs (aka cluster idle states).
|
||||
|
||||
config ARM_PSCI_CPUIDLE_DOMAIN
|
||||
bool "PSCI CPU idle Domain"
|
||||
depends on ARM_PSCI_CPUIDLE
|
||||
@@ -102,6 +110,7 @@ config ARM_MVEBU_V7_CPUIDLE
|
||||
config ARM_TEGRA_CPUIDLE
|
||||
bool "CPU Idle Driver for NVIDIA Tegra SoCs"
|
||||
depends on (ARCH_TEGRA || COMPILE_TEST) && !ARM64 && MMU
|
||||
depends on ARCH_SUSPEND_POSSIBLE
|
||||
select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
|
||||
select ARM_CPU_SUSPEND
|
||||
help
|
||||
@@ -110,6 +119,7 @@ config ARM_TEGRA_CPUIDLE
|
||||
config ARM_QCOM_SPM_CPUIDLE
|
||||
bool "CPU Idle Driver for Qualcomm Subsystem Power Manager (SPM)"
|
||||
depends on (ARCH_QCOM || COMPILE_TEST) && !ARM64 && MMU
|
||||
depends on ARCH_SUSPEND_POSSIBLE
|
||||
select ARM_CPU_SUSPEND
|
||||
select CPU_IDLE_MULTIPLE_DRIVERS
|
||||
select DT_IDLE_STATES
|
||||
|
||||
@@ -32,7 +32,7 @@ static int default_enter_idle(struct cpuidle_device *dev,
|
||||
local_irq_enable();
|
||||
return index;
|
||||
}
|
||||
default_idle();
|
||||
arch_cpu_idle();
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
@@ -64,8 +64,11 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
|
||||
|
||||
pd->flags |= GENPD_FLAG_IRQ_SAFE | GENPD_FLAG_CPU_DOMAIN;
|
||||
|
||||
/* Allow power off when OSI has been successfully enabled. */
|
||||
if (use_osi)
|
||||
/*
|
||||
* Allow power off when OSI has been successfully enabled.
|
||||
* PREEMPT_RT is not yet ready to enter domain idle states.
|
||||
*/
|
||||
if (use_osi && !IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
pd->power_off = psci_pd_power_off;
|
||||
else
|
||||
pd->flags |= GENPD_FLAG_ALWAYS_ON;
|
||||
|
||||
@@ -231,6 +231,9 @@ static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv,
|
||||
if (!psci_has_osi_support())
|
||||
return 0;
|
||||
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
return 0;
|
||||
|
||||
data->dev = psci_dt_attach_cpu(cpu);
|
||||
if (IS_ERR_OR_NULL(data->dev))
|
||||
return PTR_ERR_OR_ZERO(data->dev);
|
||||
|
||||
@@ -183,11 +183,15 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
|
||||
s->target_residency_ns = s->target_residency * NSEC_PER_USEC;
|
||||
else if (s->target_residency_ns < 0)
|
||||
s->target_residency_ns = 0;
|
||||
else
|
||||
s->target_residency = div_u64(s->target_residency_ns, NSEC_PER_USEC);
|
||||
|
||||
if (s->exit_latency > 0)
|
||||
s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
|
||||
else if (s->exit_latency_ns < 0)
|
||||
s->exit_latency_ns = 0;
|
||||
else
|
||||
s->exit_latency = div_u64(s->exit_latency_ns, NSEC_PER_USEC);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,8 +2,13 @@
|
||||
/*
|
||||
* Timer events oriented CPU idle governor
|
||||
*
|
||||
* TEO governor:
|
||||
* Copyright (C) 2018 - 2021 Intel Corporation
|
||||
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
*
|
||||
* Util-awareness mechanism:
|
||||
* Copyright (C) 2022 Arm Ltd.
|
||||
* Author: Kajetan Puchalski <kajetan.puchalski@arm.com>
|
||||
*/
|
||||
|
||||
/**
|
||||
@@ -99,14 +104,55 @@
|
||||
* select the given idle state instead of the candidate one.
|
||||
*
|
||||
* 3. By default, select the candidate state.
|
||||
*
|
||||
* Util-awareness mechanism:
|
||||
*
|
||||
* The idea behind the util-awareness extension is that there are two distinct
|
||||
* scenarios for the CPU which should result in two different approaches to idle
|
||||
* state selection - utilized and not utilized.
|
||||
*
|
||||
* In this case, 'utilized' means that the average runqueue util of the CPU is
|
||||
* above a certain threshold.
|
||||
*
|
||||
* When the CPU is utilized while going into idle, more likely than not it will
|
||||
* be woken up to do more work soon and so a shallower idle state should be
|
||||
* selected to minimise latency and maximise performance. When the CPU is not
|
||||
* being utilized, the usual metrics-based approach to selecting the deepest
|
||||
* available idle state should be preferred to take advantage of the power
|
||||
* saving.
|
||||
*
|
||||
* In order to achieve this, the governor uses a utilization threshold.
|
||||
* The threshold is computed per-CPU as a percentage of the CPU's capacity
|
||||
* by bit shifting the capacity value. Based on testing, the shift of 6 (~1.56%)
|
||||
* seems to be getting the best results.
|
||||
*
|
||||
* Before selecting the next idle state, the governor compares the current CPU
|
||||
* util to the precomputed util threshold. If it's below, it defaults to the
|
||||
* TEO metrics mechanism. If it's above, the closest shallower idle state will
|
||||
* be selected instead, as long as is not a polling state.
|
||||
*/
|
||||
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/topology.h>
|
||||
#include <linux/tick.h>
|
||||
|
||||
/*
|
||||
* The number of bits to shift the CPU's capacity by in order to determine
|
||||
* the utilized threshold.
|
||||
*
|
||||
* 6 was chosen based on testing as the number that achieved the best balance
|
||||
* of power and performance on average.
|
||||
*
|
||||
* The resulting threshold is high enough to not be triggered by background
|
||||
* noise and low enough to react quickly when activity starts to ramp up.
|
||||
*/
|
||||
#define UTIL_THRESHOLD_SHIFT 6
|
||||
|
||||
|
||||
/*
|
||||
* The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
|
||||
* is used for decreasing metrics on a regular basis.
|
||||
@@ -137,9 +183,11 @@ struct teo_bin {
|
||||
* @time_span_ns: Time between idle state selection and post-wakeup update.
|
||||
* @sleep_length_ns: Time till the closest timer event (at the selection time).
|
||||
* @state_bins: Idle state data bins for this CPU.
|
||||
* @total: Grand total of the "intercepts" and "hits" mertics for all bins.
|
||||
* @total: Grand total of the "intercepts" and "hits" metrics for all bins.
|
||||
* @next_recent_idx: Index of the next @recent_idx entry to update.
|
||||
* @recent_idx: Indices of bins corresponding to recent "intercepts".
|
||||
* @util_threshold: Threshold above which the CPU is considered utilized
|
||||
* @utilized: Whether the last sleep on the CPU happened while utilized
|
||||
*/
|
||||
struct teo_cpu {
|
||||
s64 time_span_ns;
|
||||
@@ -148,10 +196,29 @@ struct teo_cpu {
|
||||
unsigned int total;
|
||||
int next_recent_idx;
|
||||
int recent_idx[NR_RECENT];
|
||||
unsigned long util_threshold;
|
||||
bool utilized;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
|
||||
|
||||
/**
|
||||
* teo_cpu_is_utilized - Check if the CPU's util is above the threshold
|
||||
* @cpu: Target CPU
|
||||
* @cpu_data: Governor CPU data for the target CPU
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data)
|
||||
{
|
||||
return sched_cpu_util(cpu) > cpu_data->util_threshold;
|
||||
}
|
||||
#else
|
||||
static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* teo_update - Update CPU metrics after wakeup.
|
||||
* @drv: cpuidle driver containing state data.
|
||||
@@ -258,15 +325,17 @@ static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
|
||||
* @dev: Target CPU.
|
||||
* @state_idx: Index of the capping idle state.
|
||||
* @duration_ns: Idle duration value to match.
|
||||
* @no_poll: Don't consider polling states.
|
||||
*/
|
||||
static int teo_find_shallower_state(struct cpuidle_driver *drv,
|
||||
struct cpuidle_device *dev, int state_idx,
|
||||
s64 duration_ns)
|
||||
s64 duration_ns, bool no_poll)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = state_idx - 1; i >= 0; i--) {
|
||||
if (dev->states_usage[i].disable)
|
||||
if (dev->states_usage[i].disable ||
|
||||
(no_poll && drv->states[i].flags & CPUIDLE_FLAG_POLLING))
|
||||
continue;
|
||||
|
||||
state_idx = i;
|
||||
@@ -321,6 +390,22 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
goto end;
|
||||
}
|
||||
|
||||
cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
|
||||
/*
|
||||
* If the CPU is being utilized over the threshold and there are only 2
|
||||
* states to choose from, the metrics need not be considered, so choose
|
||||
* the shallowest non-polling state and exit.
|
||||
*/
|
||||
if (drv->state_count < 3 && cpu_data->utilized) {
|
||||
for (i = 0; i < drv->state_count; ++i) {
|
||||
if (!dev->states_usage[i].disable &&
|
||||
!(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) {
|
||||
idx = i;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the deepest idle state whose target residency does not exceed
|
||||
* the current sleep length and the deepest idle state not deeper than
|
||||
@@ -452,6 +537,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
if (idx > constraint_idx)
|
||||
idx = constraint_idx;
|
||||
|
||||
/*
|
||||
* If the CPU is being utilized over the threshold, choose a shallower
|
||||
* non-polling state to improve latency
|
||||
*/
|
||||
if (cpu_data->utilized)
|
||||
idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
|
||||
|
||||
end:
|
||||
/*
|
||||
* Don't stop the tick if the selected state is a polling one or if the
|
||||
@@ -469,7 +561,7 @@ end:
|
||||
*/
|
||||
if (idx > idx0 &&
|
||||
drv->states[idx].target_residency_ns > delta_tick)
|
||||
idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
|
||||
idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
|
||||
}
|
||||
|
||||
return idx;
|
||||
@@ -508,9 +600,11 @@ static int teo_enable_device(struct cpuidle_driver *drv,
|
||||
struct cpuidle_device *dev)
|
||||
{
|
||||
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
|
||||
unsigned long max_capacity = arch_scale_cpu_capacity(dev->cpu);
|
||||
int i;
|
||||
|
||||
memset(cpu_data, 0, sizeof(*cpu_data));
|
||||
cpu_data->util_threshold = max_capacity >> UTIL_THRESHOLD_SHIFT;
|
||||
|
||||
for (i = 0; i < NR_RECENT; i++)
|
||||
cpu_data->recent_idx[i] = -1;
|
||||
|
||||
@@ -200,7 +200,7 @@ static void cpuidle_sysfs_release(struct kobject *kobj)
|
||||
complete(&kdev->kobj_unregister);
|
||||
}
|
||||
|
||||
static struct kobj_type ktype_cpuidle = {
|
||||
static const struct kobj_type ktype_cpuidle = {
|
||||
.sysfs_ops = &cpuidle_sysfs_ops,
|
||||
.release = cpuidle_sysfs_release,
|
||||
};
|
||||
@@ -447,7 +447,7 @@ static void cpuidle_state_sysfs_release(struct kobject *kobj)
|
||||
complete(&state_obj->kobj_unregister);
|
||||
}
|
||||
|
||||
static struct kobj_type ktype_state_cpuidle = {
|
||||
static const struct kobj_type ktype_state_cpuidle = {
|
||||
.sysfs_ops = &cpuidle_state_sysfs_ops,
|
||||
.default_groups = cpuidle_state_default_groups,
|
||||
.release = cpuidle_state_sysfs_release,
|
||||
@@ -594,7 +594,7 @@ static struct attribute *cpuidle_driver_default_attrs[] = {
|
||||
};
|
||||
ATTRIBUTE_GROUPS(cpuidle_driver_default);
|
||||
|
||||
static struct kobj_type ktype_driver_cpuidle = {
|
||||
static const struct kobj_type ktype_driver_cpuidle = {
|
||||
.sysfs_ops = &cpuidle_driver_sysfs_ops,
|
||||
.default_groups = cpuidle_driver_default_groups,
|
||||
.release = cpuidle_driver_sysfs_release,
|
||||
|
||||
@@ -1430,6 +1430,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt),
|
||||
@@ -1862,6 +1863,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
|
||||
skx_idle_state_table_update();
|
||||
break;
|
||||
case INTEL_FAM6_SAPPHIRERAPIDS_X:
|
||||
case INTEL_FAM6_EMERALDRAPIDS_X:
|
||||
spr_idle_state_table_update();
|
||||
break;
|
||||
case INTEL_FAM6_ALDERLAKE:
|
||||
|
||||
@@ -379,9 +379,13 @@ const struct dev_pm_ops name = { \
|
||||
const struct dev_pm_ops name; \
|
||||
__EXPORT_SYMBOL(name, sec, ns); \
|
||||
const struct dev_pm_ops name
|
||||
#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name)
|
||||
#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns)
|
||||
#else
|
||||
#define _EXPORT_DEV_PM_OPS(name, sec, ns) \
|
||||
static __maybe_unused const struct dev_pm_ops __static_##name
|
||||
#define EXPORT_PM_FN_GPL(name)
|
||||
#define EXPORT_PM_FN_NS_GPL(name, ns)
|
||||
#endif
|
||||
|
||||
#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "")
|
||||
|
||||
@@ -118,7 +118,6 @@ config PM_SLEEP
|
||||
def_bool y
|
||||
depends on SUSPEND || HIBERNATE_CALLBACKS
|
||||
select PM
|
||||
select SRCU
|
||||
|
||||
config PM_SLEEP_SMP
|
||||
def_bool y
|
||||
|
||||
@@ -581,7 +581,7 @@ static int save_image(struct swap_map_handle *handle,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Structure used for CRC32.
|
||||
*/
|
||||
struct crc_data {
|
||||
@@ -596,7 +596,7 @@ struct crc_data {
|
||||
unsigned char *unc[LZO_THREADS]; /* uncompressed data */
|
||||
};
|
||||
|
||||
/**
|
||||
/*
|
||||
* CRC32 update function that runs in its own thread.
|
||||
*/
|
||||
static int crc32_threadfn(void *data)
|
||||
@@ -623,7 +623,7 @@ static int crc32_threadfn(void *data)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
/**
|
||||
/*
|
||||
* Structure used for LZO data compression.
|
||||
*/
|
||||
struct cmp_data {
|
||||
@@ -640,7 +640,7 @@ struct cmp_data {
|
||||
unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */
|
||||
};
|
||||
|
||||
/**
|
||||
/*
|
||||
* Compression function that runs in its own thread.
|
||||
*/
|
||||
static int lzo_compress_threadfn(void *data)
|
||||
@@ -948,9 +948,9 @@ out_finish:
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* The following functions allow us to read data using a swap map
|
||||
* in a file-alike way
|
||||
* in a file-like way.
|
||||
*/
|
||||
|
||||
static void release_swap_reader(struct swap_map_handle *handle)
|
||||
@@ -1107,7 +1107,7 @@ static int load_image(struct swap_map_handle *handle,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* Structure used for LZO data decompression.
|
||||
*/
|
||||
struct dec_data {
|
||||
@@ -1123,7 +1123,7 @@ struct dec_data {
|
||||
unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
|
||||
};
|
||||
|
||||
/**
|
||||
/*
|
||||
* Decompression function that runs in its own thread.
|
||||
*/
|
||||
static int lzo_decompress_threadfn(void *data)
|
||||
|
||||
Reference in New Issue
Block a user