mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	 88264981f2
			
		
	
	
		88264981f2
		
	
	
	
	
		
			
			This is the initial pull request of sched_ext. The v7 patchset (https://lkml.kernel.org/r/20240618212056.2833381-1-tj@kernel.org) is applied on top of tip/sched/core + bpf/master as of Jun 18th. tip/sched/core 793a62823d1c ("sched/core: Drop spinlocks on contention iff kernel is preempti ble") bpf/masterf6afdaf72a("Merge branch 'bpf-support-resilient-split-btf'") Since then, the following pulls were made: - v6.11-rc1 is pulled to keep up with the mainline. - tip/sched/core was pulled several times: -7b9f6c864a,0df340ceae,5ac998574f,0b1777f0fa: To resolve conflicts. See each commit for details on conflicts and their resolutions. -d7b01aef9d: To receivefd03c5b858("sched: Rework pick_next_task()") and related commits. @prev in added to sched_class->put_prev_task() and put_prev_task() is reordered after ->pick_task(), which makes sched_class->switch_class() unnecessary. The follow-up commits update sched_ext accordingly and drop sched_class->switch_class(). - bpf/master was pulled to receivebaebe9aaba("bpf: allow passing struct bpf_iter_<type> as kfunc arguments") and related changes in preparation for the DSQ iterator patchset To obtain the net sched_ext changes, diff against: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git for-6.12-base which is the merge of: tip/sched/corebc9057da1a("sched/cpufreq: Use NSEC_PER_MSEC for deadline task") bpf/master2ad6d23f46("selftests/bpf: Do not update vmlinux.h unnecessarily") Since the v7 patchset, the following changes were made: - cpuperf support which was a part of the v6 patchset was posted separately and then applied after reviews. - cgroup support which was a part of the v6 patchset was posted seprately, iterated and then applied. - Improve integration with sched core. - Double locking usage in migration paths dropped. Depend on TASK_ON_RQ_MIGRATING synchronization instead. - The BPF scheduler couldn't directly dispatch to the local DSQ of another CPU using a SCX_DSQ_LOCAL_ON verdict. This caused difficulties around handling non-wakeup enqueues. Updated so that SCX_DSQ_LOCAL_ON can be used in the enqueue path too. - DSQ iterator which was a part of the v6 patchset was posted separately. The iterator itself was applied after a couple revisions. The associated selective consumption kfunc can use further improvements and is still being worked on. - scx_bpf_dispatch[_vtime]_from_dsq() added to increase flexibility. A task can now be transferred between two DSQs from almost any context. This involved significant refactoring of migration code. - Various fixes and improvements. As the branch is based on top of tip/sched/core + bpf/master, please merge after both are applied. -----BEGIN PGP SIGNATURE----- iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCZuOSuA4cdGpAa2VybmVs Lm9yZwAKCRCxYfJx3gVYGVZyAQDBU3WPkYKB8gl6a6YQ+/PzBXorOK7mioS9A2iJ vBR3FgEAg1vtcss1S+2juWmVq7ItiFNWCqtXzUr/bVmL9CqqDwA= =bOOC -----END PGP SIGNATURE----- Merge tag 'sched_ext-for-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext Pull sched_ext support from Tejun Heo: "This implements a new scheduler class called ‘ext_sched_class’, or sched_ext, which allows scheduling policies to be implemented as BPF programs. The goals of this are: - Ease of experimentation and exploration: Enabling rapid iteration of new scheduling policies. - Customization: Building application-specific schedulers which implement policies that are not applicable to general-purpose schedulers. - Rapid scheduler deployments: Non-disruptive swap outs of scheduling policies in production environments" See individual commits for more documentation, but also the cover letter for the latest series: Link: https://lore.kernel.org/all/20240618212056.2833381-1-tj@kernel.org/ * tag 'sched_ext-for-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: (110 commits) sched: Move update_other_load_avgs() to kernel/sched/pelt.c sched_ext: Don't trigger ops.quiescent/runnable() on migrations sched_ext: Synchronize bypass state changes with rq lock scx_qmap: Implement highpri boosting sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq() sched_ext: Compact struct bpf_iter_scx_dsq_kern sched_ext: Replace consume_local_task() with move_local_task_to_local_dsq() sched_ext: Move consume_local_task() upward sched_ext: Move sanity check and dsq_mod_nr() into task_unlink_from_dsq() sched_ext: Reorder args for consume_local/remote_task() sched_ext: Restructure dispatch_to_local_dsq() sched_ext: Fix processs_ddsp_deferred_locals() by unifying DTL_INVALID handling sched_ext: Make find_dsq_for_dispatch() handle SCX_DSQ_LOCAL_ON sched_ext: Refactor consume_remote_task() sched_ext: Rename scx_kfunc_set_sleepable to unlocked and relocate sched_ext: Add missing static to scx_dump_data sched_ext: Add missing static to scx_has_op[] sched_ext: Temporarily work around pick_task_scx() being called without balance_scx() sched_ext: Add a cgroup scheduler which uses flattened hierarchy sched_ext: Add cgroup support ...
		
			
				
	
	
		
			232 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			232 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0
 | |
| #include <linux/init_task.h>
 | |
| #include <linux/export.h>
 | |
| #include <linux/mqueue.h>
 | |
| #include <linux/sched.h>
 | |
| #include <linux/sched/sysctl.h>
 | |
| #include <linux/sched/rt.h>
 | |
| #include <linux/sched/task.h>
 | |
| #include <linux/sched/ext.h>
 | |
| #include <linux/init.h>
 | |
| #include <linux/fs.h>
 | |
| #include <linux/mm.h>
 | |
| #include <linux/audit.h>
 | |
| #include <linux/numa.h>
 | |
| #include <linux/scs.h>
 | |
| #include <linux/plist.h>
 | |
| 
 | |
| #include <linux/uaccess.h>
 | |
| 
 | |
| static struct signal_struct init_signals = {
 | |
| 	.nr_threads	= 1,
 | |
| 	.thread_head	= LIST_HEAD_INIT(init_task.thread_node),
 | |
| 	.wait_chldexit	= __WAIT_QUEUE_HEAD_INITIALIZER(init_signals.wait_chldexit),
 | |
| 	.shared_pending	= {
 | |
| 		.list = LIST_HEAD_INIT(init_signals.shared_pending.list),
 | |
| 		.signal =  {{0}}
 | |
| 	},
 | |
| 	.multiprocess	= HLIST_HEAD_INIT,
 | |
| 	.rlim		= INIT_RLIMITS,
 | |
| 	.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
 | |
| 	.exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock),
 | |
| #ifdef CONFIG_POSIX_TIMERS
 | |
| 	.posix_timers	= HLIST_HEAD_INIT,
 | |
| 	.cputimer	= {
 | |
| 		.cputime_atomic	= INIT_CPUTIME_ATOMIC,
 | |
| 	},
 | |
| #endif
 | |
| 	INIT_CPU_TIMERS(init_signals)
 | |
| 	.pids = {
 | |
| 		[PIDTYPE_PID]	= &init_struct_pid,
 | |
| 		[PIDTYPE_TGID]	= &init_struct_pid,
 | |
| 		[PIDTYPE_PGID]	= &init_struct_pid,
 | |
| 		[PIDTYPE_SID]	= &init_struct_pid,
 | |
| 	},
 | |
| 	INIT_PREV_CPUTIME(init_signals)
 | |
| };
 | |
| 
 | |
| static struct sighand_struct init_sighand = {
 | |
| 	.count		= REFCOUNT_INIT(1),
 | |
| 	.action		= { { { .sa_handler = SIG_DFL, } }, },
 | |
| 	.siglock	= __SPIN_LOCK_UNLOCKED(init_sighand.siglock),
 | |
| 	.signalfd_wqh	= __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
 | |
| };
 | |
| 
 | |
| #ifdef CONFIG_SHADOW_CALL_STACK
 | |
| unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] = {
 | |
| 	[(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
 | |
| };
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * Set up the first task table, touch at your own risk!. Base=0,
 | |
|  * limit=0x1fffff (=2MB)
 | |
|  */
 | |
| struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
 | |
| #ifdef CONFIG_THREAD_INFO_IN_TASK
 | |
| 	.thread_info	= INIT_THREAD_INFO(init_task),
 | |
| 	.stack_refcount	= REFCOUNT_INIT(1),
 | |
| #endif
 | |
| 	.__state	= 0,
 | |
| 	.stack		= init_stack,
 | |
| 	.usage		= REFCOUNT_INIT(2),
 | |
| 	.flags		= PF_KTHREAD,
 | |
| 	.prio		= MAX_PRIO - 20,
 | |
| 	.static_prio	= MAX_PRIO - 20,
 | |
| 	.normal_prio	= MAX_PRIO - 20,
 | |
| 	.policy		= SCHED_NORMAL,
 | |
| 	.cpus_ptr	= &init_task.cpus_mask,
 | |
| 	.user_cpus_ptr	= NULL,
 | |
| 	.cpus_mask	= CPU_MASK_ALL,
 | |
| 	.max_allowed_capacity	= SCHED_CAPACITY_SCALE,
 | |
| 	.nr_cpus_allowed= NR_CPUS,
 | |
| 	.mm		= NULL,
 | |
| 	.active_mm	= &init_mm,
 | |
| 	.faults_disabled_mapping = NULL,
 | |
| 	.restart_block	= {
 | |
| 		.fn = do_no_restart_syscall,
 | |
| 	},
 | |
| 	.se		= {
 | |
| 		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
 | |
| 	},
 | |
| 	.rt		= {
 | |
| 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
 | |
| 		.time_slice	= RR_TIMESLICE,
 | |
| 	},
 | |
| 	.tasks		= LIST_HEAD_INIT(init_task.tasks),
 | |
| #ifdef CONFIG_SMP
 | |
| 	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
 | |
| #endif
 | |
| #ifdef CONFIG_CGROUP_SCHED
 | |
| 	.sched_task_group = &root_task_group,
 | |
| #endif
 | |
| #ifdef CONFIG_SCHED_CLASS_EXT
 | |
| 	.scx		= {
 | |
| 		.dsq_list.node	= LIST_HEAD_INIT(init_task.scx.dsq_list.node),
 | |
| 		.sticky_cpu	= -1,
 | |
| 		.holding_cpu	= -1,
 | |
| 		.runnable_node	= LIST_HEAD_INIT(init_task.scx.runnable_node),
 | |
| 		.runnable_at	= INITIAL_JIFFIES,
 | |
| 		.ddsp_dsq_id	= SCX_DSQ_INVALID,
 | |
| 		.slice		= SCX_SLICE_DFL,
 | |
| 	},
 | |
| #endif
 | |
| 	.ptraced	= LIST_HEAD_INIT(init_task.ptraced),
 | |
| 	.ptrace_entry	= LIST_HEAD_INIT(init_task.ptrace_entry),
 | |
| 	.real_parent	= &init_task,
 | |
| 	.parent		= &init_task,
 | |
| 	.children	= LIST_HEAD_INIT(init_task.children),
 | |
| 	.sibling	= LIST_HEAD_INIT(init_task.sibling),
 | |
| 	.group_leader	= &init_task,
 | |
| 	RCU_POINTER_INITIALIZER(real_cred, &init_cred),
 | |
| 	RCU_POINTER_INITIALIZER(cred, &init_cred),
 | |
| 	.comm		= INIT_TASK_COMM,
 | |
| 	.thread		= INIT_THREAD,
 | |
| 	.fs		= &init_fs,
 | |
| 	.files		= &init_files,
 | |
| #ifdef CONFIG_IO_URING
 | |
| 	.io_uring	= NULL,
 | |
| #endif
 | |
| 	.signal		= &init_signals,
 | |
| 	.sighand	= &init_sighand,
 | |
| 	.nsproxy	= &init_nsproxy,
 | |
| 	.pending	= {
 | |
| 		.list = LIST_HEAD_INIT(init_task.pending.list),
 | |
| 		.signal = {{0}}
 | |
| 	},
 | |
| 	.blocked	= {{0}},
 | |
| 	.alloc_lock	= __SPIN_LOCK_UNLOCKED(init_task.alloc_lock),
 | |
| 	.journal_info	= NULL,
 | |
| 	INIT_CPU_TIMERS(init_task)
 | |
| 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
 | |
| 	.timer_slack_ns = 50000, /* 50 usec default slack */
 | |
| 	.thread_pid	= &init_struct_pid,
 | |
| 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
 | |
| #ifdef CONFIG_AUDIT
 | |
| 	.loginuid	= INVALID_UID,
 | |
| 	.sessionid	= AUDIT_SID_UNSET,
 | |
| #endif
 | |
| #ifdef CONFIG_PERF_EVENTS
 | |
| 	.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
 | |
| 	.perf_event_list = LIST_HEAD_INIT(init_task.perf_event_list),
 | |
| #endif
 | |
| #ifdef CONFIG_PREEMPT_RCU
 | |
| 	.rcu_read_lock_nesting = 0,
 | |
| 	.rcu_read_unlock_special.s = 0,
 | |
| 	.rcu_node_entry = LIST_HEAD_INIT(init_task.rcu_node_entry),
 | |
| 	.rcu_blocked_node = NULL,
 | |
| #endif
 | |
| #ifdef CONFIG_TASKS_RCU
 | |
| 	.rcu_tasks_holdout = false,
 | |
| 	.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
 | |
| 	.rcu_tasks_idle_cpu = -1,
 | |
| 	.rcu_tasks_exit_list = LIST_HEAD_INIT(init_task.rcu_tasks_exit_list),
 | |
| #endif
 | |
| #ifdef CONFIG_TASKS_TRACE_RCU
 | |
| 	.trc_reader_nesting = 0,
 | |
| 	.trc_reader_special.s = 0,
 | |
| 	.trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
 | |
| 	.trc_blkd_node = LIST_HEAD_INIT(init_task.trc_blkd_node),
 | |
| #endif
 | |
| #ifdef CONFIG_CPUSETS
 | |
| 	.mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq,
 | |
| 						 &init_task.alloc_lock),
 | |
| #endif
 | |
| #ifdef CONFIG_RT_MUTEXES
 | |
| 	.pi_waiters	= RB_ROOT_CACHED,
 | |
| 	.pi_top_task	= NULL,
 | |
| #endif
 | |
| 	INIT_PREV_CPUTIME(init_task)
 | |
| #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 | |
| 	.vtime.seqcount	= SEQCNT_ZERO(init_task.vtime_seqcount),
 | |
| 	.vtime.starttime = 0,
 | |
| 	.vtime.state	= VTIME_SYS,
 | |
| #endif
 | |
| #ifdef CONFIG_NUMA_BALANCING
 | |
| 	.numa_preferred_nid = NUMA_NO_NODE,
 | |
| 	.numa_group	= NULL,
 | |
| 	.numa_faults	= NULL,
 | |
| #endif
 | |
| #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 | |
| 	.kasan_depth	= 1,
 | |
| #endif
 | |
| #ifdef CONFIG_KCSAN
 | |
| 	.kcsan_ctx = {
 | |
| 		.scoped_accesses	= {LIST_POISON1, NULL},
 | |
| 	},
 | |
| #endif
 | |
| #ifdef CONFIG_TRACE_IRQFLAGS
 | |
| 	.softirqs_enabled = 1,
 | |
| #endif
 | |
| #ifdef CONFIG_LOCKDEP
 | |
| 	.lockdep_depth = 0, /* no locks held yet */
 | |
| 	.curr_chain_key = INITIAL_CHAIN_KEY,
 | |
| 	.lockdep_recursion = 0,
 | |
| #endif
 | |
| #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 | |
| 	.ret_stack		= NULL,
 | |
| 	.tracing_graph_pause	= ATOMIC_INIT(0),
 | |
| #endif
 | |
| #if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPTION)
 | |
| 	.trace_recursion = 0,
 | |
| #endif
 | |
| #ifdef CONFIG_LIVEPATCH
 | |
| 	.patch_state	= KLP_TRANSITION_IDLE,
 | |
| #endif
 | |
| #ifdef CONFIG_SECURITY
 | |
| 	.security	= NULL,
 | |
| #endif
 | |
| #ifdef CONFIG_SECCOMP_FILTER
 | |
| 	.seccomp	= { .filter_count = ATOMIC_INIT(0) },
 | |
| #endif
 | |
| };
 | |
| EXPORT_SYMBOL(init_task);
 | |
| 
 | |
| /*
 | |
|  * Initial thread structure. Alignment of this is handled by a special
 | |
|  * linker map entry.
 | |
|  */
 | |
| #ifndef CONFIG_THREAD_INFO_IN_TASK
 | |
| struct thread_info init_thread_info __init_thread_info = INIT_THREAD_INFO(init_task);
 | |
| #endif
 |