These are the changes for supporting the system call and set the cpus_preferred mask as the application wants. This patch does not make the cpus_preferred take any action.
Signed-off-by: Rohit Jain rohit.k.jain@oracle.com --- arch/x86/entry/syscalls/syscall_64.tbl | 1 + include/linux/init_task.h | 1 + include/linux/sched.h | 4 +- include/linux/syscalls.h | 3 + include/uapi/asm-generic/unistd.h | 4 +- include/uapi/linux/sched.h | 3 + kernel/compat.c | 2 +- kernel/sched/core.c | 167 ++++++++++++++++++++++++++++----- kernel/time/tick-sched.c | 1 + 9 files changed, 159 insertions(+), 27 deletions(-)
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 5aef183..bd5f346 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -339,6 +339,7 @@ 330 common pkey_alloc sys_pkey_alloc 331 common pkey_free sys_pkey_free 332 common statx sys_statx +333 common sched_setaffinity_flags sys_sched_setaffinity_flags
# # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 0e84971..bb8a8e1 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -235,6 +235,7 @@ extern struct cred init_cred; .normal_prio = MAX_PRIO-20, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ + .cpus_preferred = CPU_MASK_ALL, \ .nr_cpus_allowed= NR_CPUS, \ .mm = NULL, \ .active_mm = &init_mm, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 534542d..7e08ae8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -582,6 +582,7 @@ struct task_struct { unsigned int policy; int nr_cpus_allowed; cpumask_t cpus_allowed; + cpumask_t cpus_preferred;
#ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; @@ -1647,7 +1648,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) # define vcpu_is_preempted(cpu) false #endif
-extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); +extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask, + int flags); extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
#ifndef TASK_SIZE_OF diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d4dfac8..83d04da 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -326,6 +326,9 @@ asmlinkage long sys_sched_get_priority_max(int policy); asmlinkage long sys_sched_get_priority_min(int policy); asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval); +asmlinkage long sys_sched_setaffinity_flags(pid_t pid, unsigned int len, + unsigned long __user *user_mask_ptr, + int flags); asmlinkage long sys_setpriority(int which, int who, int niceval); asmlinkage long sys_getpriority(int which, int who);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 061185a..5e88941 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -376,6 +376,8 @@ __SYSCALL(__NR_sched_getparam, sys_sched_getparam) #define __NR_sched_setaffinity 122 __SC_COMP(__NR_sched_setaffinity, sys_sched_setaffinity, \ compat_sys_sched_setaffinity) +#define __NR_sched_setaffinity_flags 293 +__SYSCALL(__NR_sched_setaffinity_flags, sys_sched_setaffinity_flags) #define __NR_sched_getaffinity 123 __SC_COMP(__NR_sched_getaffinity, sys_sched_getaffinity, \ compat_sys_sched_getaffinity) @@ -733,7 +735,7 @@ __SYSCALL(__NR_pkey_free, sys_pkey_free) __SYSCALL(__NR_statx, sys_statx)
#undef __NR_syscalls -#define __NR_syscalls 292 +#define __NR_syscalls 293
/* * All syscalls below here should go away really, diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index e2a6c7b..81c17f5 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -49,4 +49,7 @@ #define SCHED_FLAG_RESET_ON_FORK 0x01 #define SCHED_FLAG_RECLAIM 0x02
+#define SCHED_HARD_AFFINITY 0 +#define SCHED_SOFT_AFFINITY 1 + #endif /* _UAPI_LINUX_SCHED_H */ diff --git a/kernel/compat.c b/kernel/compat.c index 6f0a0e7..0ec60ea 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -356,7 +356,7 @@ COMPAT_SYSCALL_DEFINE3(sched_setaffinity, compat_pid_t, pid, if (retval) goto out;
- retval = sched_setaffinity(pid, new_mask); + retval = sched_setaffinity(pid, new_mask, SCHED_HARD_AFFINITY); out: free_cpumask_var(new_mask); return retval; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ec80d2f..2e8d392 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1031,6 +1031,11 @@ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_ma p->nr_cpus_allowed = cpumask_weight(new_mask); }
+void set_cpus_preferred_common(struct task_struct *p, const struct cpumask *new_mask) +{ + cpumask_copy(&p->cpus_preferred, new_mask); +} + void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { struct rq *rq = task_rq(p); @@ -1053,6 +1058,36 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) put_prev_task(rq, p);
p->sched_class->set_cpus_allowed(p, new_mask); + set_cpus_preferred_common(p, new_mask); + + if (queued) + enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); + if (running) + set_curr_task(rq, p); +} + +void do_set_cpus_preferred(struct task_struct *p, const struct cpumask *new_mask) +{ + struct rq *rq = task_rq(p); + bool queued, running; + + lockdep_assert_held(&p->pi_lock); + + queued = task_on_rq_queued(p); + running = task_current(rq, p); + + if (queued) { + /* + * Because __kthread_bind() calls this on blocked tasks without + * holding rq->lock. + */ + lockdep_assert_held(&rq->lock); + dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); + } + if (running) + put_prev_task(rq, p); + + set_cpus_preferred_common(p, new_mask);
if (queued) enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); @@ -1142,6 +1177,63 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, return ret; }
+static int +__set_cpus_preferred_ptr(struct task_struct *p, const struct cpumask *new_mask) +{ + const struct cpumask *cpu_valid_mask = cpu_active_mask; + unsigned int dest_cpu; + struct rq_flags rf; + struct rq *rq; + int ret = 0; + + rq = task_rq_lock(p, &rf); + update_rq_clock(rq); + + if (p->flags & PF_KTHREAD) { + /* + * Kernel threads are allowed on online && !active CPUs + */ + cpu_valid_mask = cpu_online_mask; + } + + if (cpumask_equal(&p->cpus_preferred, new_mask)) + goto out; + + if (!cpumask_intersects(new_mask, cpu_valid_mask)) { + ret = -EINVAL; + goto out; + } + + do_set_cpus_preferred(p, new_mask); + + if (p->flags & PF_KTHREAD) { + /* + * For kernel threads that do indeed end up on online && + * !active we want to ensure they are strict per-CPU threads. + */ + WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && + !cpumask_intersects(new_mask, cpu_active_mask) && + p->nr_cpus_allowed != 1); + } + + /* Can the task run on the task's current CPU? If so, we're done */ + if (cpumask_test_cpu(task_cpu(p), new_mask)) + goto out; + + dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); + if (task_on_rq_queued(p)) { + /* + * OK, since we're going to drop the lock immediately + * afterwards anyway. + */ + rq = move_queued_task(rq, &rf, p, dest_cpu); + } +out: + task_rq_unlock(rq, p, &rf); + + return ret; +} + int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { return __set_cpus_allowed_ptr(p, new_mask, false); @@ -4620,7 +4712,7 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, return retval; }
-long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) +long sched_setaffinity(pid_t pid, const struct cpumask *in_mask, int flags) { cpumask_var_t cpus_allowed, new_mask; struct task_struct *p; @@ -4686,19 +4778,23 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) } #endif again: - retval = __set_cpus_allowed_ptr(p, new_mask, true); - - if (!retval) { - cpuset_cpus_allowed(p, cpus_allowed); - if (!cpumask_subset(new_mask, cpus_allowed)) { - /* - * We must have raced with a concurrent cpuset - * update. Just reset the cpus_allowed to the - * cpuset's cpus_allowed - */ - cpumask_copy(new_mask, cpus_allowed); - goto again; + if (flags == SCHED_HARD_AFFINITY) { + retval = __set_cpus_allowed_ptr(p, new_mask, true); + + if (!retval) { + cpuset_cpus_allowed(p, cpus_allowed); + if (!cpumask_subset(new_mask, cpus_allowed)) { + /* + * We must have raced with a concurrent cpuset + * update. Just reset the cpus_allowed to the + * cpuset's cpus_allowed + */ + cpumask_copy(new_mask, cpus_allowed); + goto again; + } } + } else if (flags == SCHED_SOFT_AFFINITY) { + retval = __set_cpus_preferred_ptr(p, new_mask); } out_free_new_mask: free_cpumask_var(new_mask); @@ -4720,30 +4816,53 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; }
-/** - * sys_sched_setaffinity - set the CPU affinity of a process - * @pid: pid of the process - * @len: length in bytes of the bitmask pointed to by user_mask_ptr - * @user_mask_ptr: user-space pointer to the new CPU mask - * - * Return: 0 on success. An error code otherwise. - */ -SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, - unsigned long __user *, user_mask_ptr) +static bool +valid_affinity_flags(int flags) +{ + return flags == SCHED_HARD_AFFINITY || flags == SCHED_SOFT_AFFINITY; +} + +static int +sched_setaffinity_common(pid_t pid, unsigned int len, + unsigned long __user *user_mask_ptr, int flags) { cpumask_var_t new_mask; int retval;
+ if (!valid_affinity_flags(flags)) + return -EINVAL; + if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) return -ENOMEM;
retval = get_user_cpu_mask(user_mask_ptr, len, new_mask); if (retval == 0) - retval = sched_setaffinity(pid, new_mask); + retval = sched_setaffinity(pid, new_mask, flags); free_cpumask_var(new_mask); return retval; }
+SYSCALL_DEFINE4(sched_setaffinity_flags, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr, int, flags) +{ + return sched_setaffinity_common(pid, len, user_mask_ptr, flags); +} + +/** + * sys_sched_setaffinity - set the CPU affinity of a process + * @pid: pid of the process + * @len: length in bytes of the bitmask pointed to by user_mask_ptr + * @user_mask_ptr: user-space pointer to the new CPU mask + * + * Return: 0 on success. An error code otherwise. + */ +SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) +{ + return sched_setaffinity_common(pid, len, user_mask_ptr, + SCHED_HARD_AFFINITY); +} + long sched_getaffinity(pid_t pid, struct cpumask *mask) { struct task_struct *p; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index eb0e975..ede1add 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -19,6 +19,7 @@ #include <linux/percpu.h> #include <linux/nmi.h> #include <linux/profile.h> +#include <linux/vmstat.h> #include <linux/sched/signal.h> #include <linux/sched/clock.h> #include <linux/sched/stat.h> -- 2.7.4