2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

cgroup/cpuset: Keep track of CPUs in isolated partitions

Add a new internal isolated_cpus mask to keep track of the CPUs that are in
isolated partitions. Expose that new cpumask as a new root-only control file
".cpuset.cpus.isolated".

tj: Updated patch description to reflect dropping __DEBUG__ prefix.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
Waiman Long 2023-10-25 14:25:54 -04:00 committed by Tejun Heo
parent 14060dfc48
commit 11e5f407b6

View File

@ -204,6 +204,11 @@ struct cpuset {
*/ */
static cpumask_var_t subpartitions_cpus; static cpumask_var_t subpartitions_cpus;
/*
* Exclusive CPUs in isolated partitions
*/
static cpumask_var_t isolated_cpus;
/* List of remote partition root children */ /* List of remote partition root children */
static struct list_head remote_children; static struct list_head remote_children;
@ -1317,6 +1322,7 @@ static void compute_effective_cpumask(struct cpumask *new_cpus,
*/ */
enum partition_cmd { enum partition_cmd {
partcmd_enable, /* Enable partition root */ partcmd_enable, /* Enable partition root */
partcmd_enablei, /* Enable isolated partition root */
partcmd_disable, /* Disable partition root */ partcmd_disable, /* Disable partition root */
partcmd_update, /* Update parent's effective_cpus */ partcmd_update, /* Update parent's effective_cpus */
partcmd_invalidate, /* Make partition invalid */ partcmd_invalidate, /* Make partition invalid */
@ -1418,6 +1424,74 @@ static void reset_partition_data(struct cpuset *cs)
} }
} }
/*
* partition_xcpus_newstate - Exclusive CPUs state change
* @old_prs: old partition_root_state
* @new_prs: new partition_root_state
* @xcpus: exclusive CPUs with state change
*/
static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpumask *xcpus)
{
WARN_ON_ONCE(old_prs == new_prs);
if (new_prs == PRS_ISOLATED)
cpumask_or(isolated_cpus, isolated_cpus, xcpus);
else
cpumask_andnot(isolated_cpus, isolated_cpus, xcpus);
}
/*
* partition_xcpus_add - Add new exclusive CPUs to partition
* @new_prs: new partition_root_state
* @parent: parent cpuset
* @xcpus: exclusive CPUs to be added
*
* Remote partition if parent == NULL
*/
static void partition_xcpus_add(int new_prs, struct cpuset *parent,
struct cpumask *xcpus)
{
WARN_ON_ONCE(new_prs < 0);
lockdep_assert_held(&callback_lock);
if (!parent)
parent = &top_cpuset;
if (parent == &top_cpuset)
cpumask_or(subpartitions_cpus, subpartitions_cpus, xcpus);
if (new_prs != parent->partition_root_state)
partition_xcpus_newstate(parent->partition_root_state, new_prs,
xcpus);
cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus);
}
/*
* partition_xcpus_del - Remove exclusive CPUs from partition
* @old_prs: old partition_root_state
* @parent: parent cpuset
* @xcpus: exclusive CPUs to be removed
*
* Remote partition if parent == NULL
*/
static void partition_xcpus_del(int old_prs, struct cpuset *parent,
struct cpumask *xcpus)
{
WARN_ON_ONCE(old_prs < 0);
lockdep_assert_held(&callback_lock);
if (!parent)
parent = &top_cpuset;
if (parent == &top_cpuset)
cpumask_andnot(subpartitions_cpus, subpartitions_cpus, xcpus);
if (old_prs != parent->partition_root_state)
partition_xcpus_newstate(old_prs, parent->partition_root_state,
xcpus);
cpumask_and(xcpus, xcpus, cpu_active_mask);
cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus);
}
/* /*
* compute_effective_exclusive_cpumask - compute effective exclusive CPUs * compute_effective_exclusive_cpumask - compute effective exclusive CPUs
* @cs: cpuset * @cs: cpuset
@ -1456,13 +1530,15 @@ static inline bool is_local_partition(struct cpuset *cs)
/* /*
* remote_partition_enable - Enable current cpuset as a remote partition root * remote_partition_enable - Enable current cpuset as a remote partition root
* @cs: the cpuset to update * @cs: the cpuset to update
* @new_prs: new partition_root_state
* @tmp: temparary masks * @tmp: temparary masks
* Return: 1 if successful, 0 if error * Return: 1 if successful, 0 if error
* *
* Enable the current cpuset to become a remote partition root taking CPUs * Enable the current cpuset to become a remote partition root taking CPUs
* directly from the top cpuset. cpuset_mutex must be held by the caller. * directly from the top cpuset. cpuset_mutex must be held by the caller.
*/ */
static int remote_partition_enable(struct cpuset *cs, struct tmpmasks *tmp) static int remote_partition_enable(struct cpuset *cs, int new_prs,
struct tmpmasks *tmp)
{ {
/* /*
* The user must have sysadmin privilege. * The user must have sysadmin privilege.
@ -1485,18 +1561,14 @@ static int remote_partition_enable(struct cpuset *cs, struct tmpmasks *tmp)
return 0; return 0;
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
cpumask_andnot(top_cpuset.effective_cpus, partition_xcpus_add(new_prs, NULL, tmp->new_cpus);
top_cpuset.effective_cpus, tmp->new_cpus); list_add(&cs->remote_sibling, &remote_children);
cpumask_or(subpartitions_cpus,
subpartitions_cpus, tmp->new_cpus);
if (cs->use_parent_ecpus) { if (cs->use_parent_ecpus) {
struct cpuset *parent = parent_cs(cs); struct cpuset *parent = parent_cs(cs);
cs->use_parent_ecpus = false; cs->use_parent_ecpus = false;
parent->child_ecpus_count--; parent->child_ecpus_count--;
} }
list_add(&cs->remote_sibling, &remote_children);
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
/* /*
@ -1524,13 +1596,8 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus)); WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus));
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
cpumask_andnot(subpartitions_cpus,
subpartitions_cpus, tmp->new_cpus);
cpumask_and(tmp->new_cpus,
tmp->new_cpus, cpu_active_mask);
cpumask_or(top_cpuset.effective_cpus,
top_cpuset.effective_cpus, tmp->new_cpus);
list_del_init(&cs->remote_sibling); list_del_init(&cs->remote_sibling);
partition_xcpus_del(cs->partition_root_state, NULL, tmp->new_cpus);
cs->partition_root_state = -cs->partition_root_state; cs->partition_root_state = -cs->partition_root_state;
if (!cs->prs_err) if (!cs->prs_err)
cs->prs_err = PERR_INVCPUS; cs->prs_err = PERR_INVCPUS;
@ -1557,6 +1624,7 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
struct tmpmasks *tmp) struct tmpmasks *tmp)
{ {
bool adding, deleting; bool adding, deleting;
int prs = cs->partition_root_state;
if (WARN_ON_ONCE(!is_remote_partition(cs))) if (WARN_ON_ONCE(!is_remote_partition(cs)))
return; return;
@ -1580,20 +1648,10 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
goto invalidate; goto invalidate;
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
if (adding) { if (adding)
cpumask_or(subpartitions_cpus, partition_xcpus_add(prs, NULL, tmp->addmask);
subpartitions_cpus, tmp->addmask); if (deleting)
cpumask_andnot(top_cpuset.effective_cpus, partition_xcpus_del(prs, NULL, tmp->delmask);
top_cpuset.effective_cpus, tmp->addmask);
}
if (deleting) {
cpumask_andnot(subpartitions_cpus,
subpartitions_cpus, tmp->delmask);
cpumask_and(tmp->delmask,
tmp->delmask, cpu_active_mask);
cpumask_or(top_cpuset.effective_cpus,
top_cpuset.effective_cpus, tmp->delmask);
}
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
/* /*
@ -1676,11 +1734,11 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
* @tmp: Temporary addmask and delmask * @tmp: Temporary addmask and delmask
* Return: 0 or a partition root state error code * Return: 0 or a partition root state error code
* *
* For partcmd_enable, the cpuset is being transformed from a non-partition * For partcmd_enable*, the cpuset is being transformed from a non-partition
* root to a partition root. The effective_xcpus (cpus_allowed if effective_xcpus * root to a partition root. The effective_xcpus (cpus_allowed if
* not set) mask of the given cpuset will be taken away from parent's * effective_xcpus not set) mask of the given cpuset will be taken away from
* effective_cpus. The function will return 0 if all the CPUs listed in * parent's effective_cpus. The function will return 0 if all the CPUs listed
* effective_xcpus can be granted or an error code will be returned. * in effective_xcpus can be granted or an error code will be returned.
* *
* For partcmd_disable, the cpuset is being transformed from a partition * For partcmd_disable, the cpuset is being transformed from a partition
* root back to a non-partition root. Any CPUs in effective_xcpus will be * root back to a non-partition root. Any CPUs in effective_xcpus will be
@ -1695,7 +1753,7 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
* *
* For partcmd_invalidate, the current partition will be made invalid. * For partcmd_invalidate, the current partition will be made invalid.
* *
* The partcmd_enable and partcmd_disable commands are used by * The partcmd_enable* and partcmd_disable commands are used by
* update_prstate(). An error code may be returned and the caller will check * update_prstate(). An error code may be returned and the caller will check
* for error. * for error.
* *
@ -1760,7 +1818,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
nocpu = tasks_nocpu_error(parent, cs, xcpus); nocpu = tasks_nocpu_error(parent, cs, xcpus);
if (cmd == partcmd_enable) { if ((cmd == partcmd_enable) || (cmd == partcmd_enablei)) {
/* /*
* Enabling partition root is not allowed if its * Enabling partition root is not allowed if its
* effective_xcpus is empty or doesn't overlap with * effective_xcpus is empty or doesn't overlap with
@ -1783,6 +1841,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
cpumask_copy(tmp->delmask, xcpus); cpumask_copy(tmp->delmask, xcpus);
deleting = true; deleting = true;
subparts_delta++; subparts_delta++;
new_prs = (cmd == partcmd_enable) ? PRS_ROOT : PRS_ISOLATED;
} else if (cmd == partcmd_disable) { } else if (cmd == partcmd_disable) {
/* /*
* May need to add cpus to parent's effective_cpus for * May need to add cpus to parent's effective_cpus for
@ -1792,6 +1851,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus); cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus);
if (adding) if (adding)
subparts_delta--; subparts_delta--;
new_prs = PRS_MEMBER;
} else if (newmask) { } else if (newmask) {
/* /*
* Empty cpumask is not allowed * Empty cpumask is not allowed
@ -1940,37 +2000,24 @@ write_error:
* newly deleted ones will be added back to effective_cpus. * newly deleted ones will be added back to effective_cpus.
*/ */
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
if (adding) {
if (parent == &top_cpuset)
cpumask_andnot(subpartitions_cpus,
subpartitions_cpus, tmp->addmask);
/*
* Some of the CPUs in effective_xcpus might have been offlined.
*/
cpumask_or(parent->effective_cpus,
parent->effective_cpus, tmp->addmask);
cpumask_and(parent->effective_cpus,
parent->effective_cpus, cpu_active_mask);
}
if (deleting) {
if (parent == &top_cpuset)
cpumask_or(subpartitions_cpus,
subpartitions_cpus, tmp->delmask);
cpumask_andnot(parent->effective_cpus,
parent->effective_cpus, tmp->delmask);
}
if (is_partition_valid(parent)) {
parent->nr_subparts += subparts_delta;
WARN_ON_ONCE(parent->nr_subparts < 0);
}
if (old_prs != new_prs) { if (old_prs != new_prs) {
cs->partition_root_state = new_prs; cs->partition_root_state = new_prs;
if (new_prs <= 0) if (new_prs <= 0)
cs->nr_subparts = 0; cs->nr_subparts = 0;
} }
/*
* Adding to parent's effective_cpus means deletion CPUs from cs
* and vice versa.
*/
if (adding)
partition_xcpus_del(old_prs, parent, tmp->addmask);
if (deleting)
partition_xcpus_add(new_prs, parent, tmp->delmask);
if (is_partition_valid(parent)) {
parent->nr_subparts += subparts_delta;
WARN_ON_ONCE(parent->nr_subparts < 0);
}
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
if ((old_prs != new_prs) && (cmd == partcmd_update)) if ((old_prs != new_prs) && (cmd == partcmd_update))
@ -2948,6 +2995,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
int err = PERR_NONE, old_prs = cs->partition_root_state; int err = PERR_NONE, old_prs = cs->partition_root_state;
struct cpuset *parent = parent_cs(cs); struct cpuset *parent = parent_cs(cs);
struct tmpmasks tmpmask; struct tmpmasks tmpmask;
bool new_xcpus_state = false;
if (old_prs == new_prs) if (old_prs == new_prs)
return 0; return 0;
@ -2977,6 +3025,9 @@ static int update_prstate(struct cpuset *cs, int new_prs)
goto out; goto out;
if (!old_prs) { if (!old_prs) {
enum partition_cmd cmd = (new_prs == PRS_ROOT)
? partcmd_enable : partcmd_enablei;
/* /*
* cpus_allowed cannot be empty. * cpus_allowed cannot be empty.
*/ */
@ -2985,19 +3036,18 @@ static int update_prstate(struct cpuset *cs, int new_prs)
goto out; goto out;
} }
err = update_parent_effective_cpumask(cs, partcmd_enable, err = update_parent_effective_cpumask(cs, cmd, NULL, &tmpmask);
NULL, &tmpmask);
/* /*
* If an attempt to become local partition root fails, * If an attempt to become local partition root fails,
* try to become a remote partition root instead. * try to become a remote partition root instead.
*/ */
if (err && remote_partition_enable(cs, &tmpmask)) if (err && remote_partition_enable(cs, new_prs, &tmpmask))
err = 0; err = 0;
} else if (old_prs && new_prs) { } else if (old_prs && new_prs) {
/* /*
* A change in load balance state only, no change in cpumasks. * A change in load balance state only, no change in cpumasks.
*/ */
; new_xcpus_state = true;
} else { } else {
/* /*
* Switching back to member is always allowed even if it * Switching back to member is always allowed even if it
@ -3029,6 +3079,8 @@ out:
WRITE_ONCE(cs->prs_err, err); WRITE_ONCE(cs->prs_err, err);
if (!is_partition_valid(cs)) if (!is_partition_valid(cs))
reset_partition_data(cs); reset_partition_data(cs);
else if (new_xcpus_state)
partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus);
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
/* Force update if switching back to member */ /* Force update if switching back to member */
@ -3386,6 +3438,7 @@ typedef enum {
FILE_SUBPARTS_CPULIST, FILE_SUBPARTS_CPULIST,
FILE_EXCLUSIVE_CPULIST, FILE_EXCLUSIVE_CPULIST,
FILE_EFFECTIVE_XCPULIST, FILE_EFFECTIVE_XCPULIST,
FILE_ISOLATED_CPULIST,
FILE_CPU_EXCLUSIVE, FILE_CPU_EXCLUSIVE,
FILE_MEM_EXCLUSIVE, FILE_MEM_EXCLUSIVE,
FILE_MEM_HARDWALL, FILE_MEM_HARDWALL,
@ -3582,6 +3635,9 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
case FILE_SUBPARTS_CPULIST: case FILE_SUBPARTS_CPULIST:
seq_printf(sf, "%*pbl\n", cpumask_pr_args(subpartitions_cpus)); seq_printf(sf, "%*pbl\n", cpumask_pr_args(subpartitions_cpus));
break; break;
case FILE_ISOLATED_CPULIST:
seq_printf(sf, "%*pbl\n", cpumask_pr_args(isolated_cpus));
break;
default: default:
ret = -EINVAL; ret = -EINVAL;
} }
@ -3875,6 +3931,13 @@ static struct cftype dfl_files[] = {
.flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG, .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG,
}, },
{
.name = "cpus.isolated",
.seq_show = cpuset_common_seq_show,
.private = FILE_ISOLATED_CPULIST,
.flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG,
},
{ } /* terminate */ { } /* terminate */
}; };
@ -4194,6 +4257,7 @@ int __init cpuset_init(void)
BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_xcpus, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_xcpus, GFP_KERNEL));
BUG_ON(!alloc_cpumask_var(&top_cpuset.exclusive_cpus, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&top_cpuset.exclusive_cpus, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&subpartitions_cpus, GFP_KERNEL)); BUG_ON(!zalloc_cpumask_var(&subpartitions_cpus, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&isolated_cpus, GFP_KERNEL));
cpumask_setall(top_cpuset.cpus_allowed); cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed); nodes_setall(top_cpuset.mems_allowed);