3.16.60-rc1 review patch. If anyone has any objections, please let me know.
------------------
From: "Eric W. Biederman" ebiederm@xmission.com
commit 51d6f2635b39709ee5e62479be23d423b760292c upstream.
Today the last process to update a semaphore is remembered and reported in the pid namespace of that process. If there are processes in any other pid namespace querying that process id with GETPID the result will be unusable nonsense as it does not make any sense in your own pid namespace.
Due to ipc_update_pid I don't think you will be able to get System V ipc semaphores into a troublesome cache line ping-pong. Using struct pids from separate process are not a problem because they do not share a cache line. Using struct pid from different threads of the same process are unlikely to be a problem as the reference count update can be avoided.
Further linux futexes are a much better tool for the job of mutual exclusion between processes than System V semaphores. So I expect programs that are performance limited by their interprocess mutual exclusion primitive will be using futexes.
So while it is possible that enhancing the storage of the last rocess of a System V semaphore from an integer to a struct pid will cause a performance regression because of the effect of frequently updating the pid reference count. I don't expect that to happen in practice.
This change updates semctl(..., GETPID, ...) to return the process id of the last process to update a semphore inthe pid namespace of the calling process.
Fixes: b488893a390e ("pid namespaces: changes to show virtual ids to user") Signed-off-by: "Eric W. Biederman" ebiederm@xmission.com [bwh: Backported to 3.16: - sem_queue::pid was also used to store an error temporarily; add a new wake_error field for this purpose - Adjust context] Signed-off-by: Ben Hutchings ben@decadent.org.uk --- ipc/sem.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-)
--- a/ipc/sem.c +++ b/ipc/sem.c @@ -99,7 +99,7 @@ struct sem { * - semctl, via SETVAL and SETALL. * - at task exit when performing undo adjustments (see exit_sem). */ - int sempid; + struct pid *sempid; spinlock_t lock; /* spinlock for fine-grained semtimedop */ struct list_head pending_alter; /* pending single-sop operations */ /* that alter the semaphore */ @@ -113,7 +113,8 @@ struct sem_queue { struct list_head list; /* queue of pending operations */ struct task_struct *sleeper; /* this process */ struct sem_undo *undo; /* undo structure */ - int pid; /* process id of requesting process */ + struct pid *pid; /* process id of requesting process */ + int wake_error; int status; /* completion status of operation */ struct sembuf *sops; /* array of pending operations */ struct sembuf *blocking; /* the operation that blocked */ @@ -644,7 +645,8 @@ SYSCALL_DEFINE3(semget, key_t, key, int, */ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) { - int result, sem_op, nsops, pid; + int result, sem_op, nsops; + struct pid *pid; struct sembuf *sop; struct sem *curr; struct sembuf *sops; @@ -682,7 +684,7 @@ static int perform_atomic_semop(struct s sop--; pid = q->pid; while (sop >= sops) { - sma->sem_base[sop->sem_num].sempid = pid; + ipc_update_pid(&sma->sem_base[sop->sem_num].sempid, pid); sop--; }
@@ -730,7 +732,7 @@ static void wake_up_sem_queue_prepare(st preempt_disable(); } q->status = IN_WAKEUP; - q->pid = error; + q->wake_error = error;
list_add_tail(&q->list, pt); } @@ -754,7 +756,7 @@ static void wake_up_sem_queue_do(struct wake_up_process(q->sleeper); /* q can disappear immediately after writing q->status. */ smp_wmb(); - q->status = q->pid; + q->status = q->wake_error; } if (did_something) preempt_enable(); @@ -812,7 +814,7 @@ static int check_restart(struct sem_arra * be called with semnum = -1, as well as with the number of each modified * semaphore. * The tasks that must be woken up are added to @pt. The return code - * is stored in q->pid. + * is stored in q->wake_error. * The function returns 1 if at least one operation was completed successfully. */ static int wake_const_ops(struct sem_array *sma, int semnum, @@ -912,7 +914,7 @@ static int do_smart_wakeup_zero(struct s * be called with semnum = -1, as well as with the number of each modified * semaphore. * The tasks that must be woken up are added to @pt. The return code - * is stored in q->pid. + * is stored in q->wake_error. * The function internally checks if const operations can now succeed. * * The function return 1 if at least one semop was completed successfully. @@ -1156,6 +1158,7 @@ static void freeary(struct ipc_namespace unlink_queue(sma, q); wake_up_sem_queue_prepare(&tasks, q, -EIDRM); } + ipc_update_pid(&sem->sempid, NULL); }
/* Remove the semaphore set from the IDR */ @@ -1357,7 +1360,7 @@ static int semctl_setval(struct ipc_name un->semadj[semnum] = 0;
curr->semval = val; - curr->sempid = task_tgid_vnr(current); + ipc_update_pid(&curr->sempid, task_tgid(current)); sma->sem_ctime = get_seconds(); /* maybe some queued-up processes were waiting for this */ do_smart_update(sma, NULL, 0, 0, &tasks); @@ -1478,7 +1481,7 @@ static int semctl_main(struct ipc_namesp
for (i = 0; i < nsems; i++) { sma->sem_base[i].semval = sem_io[i]; - sma->sem_base[i].sempid = task_tgid_vnr(current); + ipc_update_pid(&sma->sem_base[i].sempid, task_tgid(current)); }
ipc_assert_locked_object(&sma->sem_perm); @@ -1510,7 +1513,7 @@ static int semctl_main(struct ipc_namesp err = curr->semval; goto out_unlock; case GETPID: - err = curr->sempid; + err = pid_vnr(curr->sempid); goto out_unlock; case GETNCNT: err = count_semcnt(sma, semnum, 0); @@ -1933,7 +1936,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, queue.sops = sops; queue.nsops = nsops; queue.undo = un; - queue.pid = task_tgid_vnr(current); + queue.pid = task_tgid(current); queue.alter = alter;
error = perform_atomic_semop(sma, &queue); @@ -2193,7 +2196,7 @@ void exit_sem(struct task_struct *tsk) semaphore->semval = 0; if (semaphore->semval > SEMVMX) semaphore->semval = SEMVMX; - semaphore->sempid = task_tgid_vnr(current); + ipc_update_pid(&semaphore->sempid, task_tgid(current)); } } /* maybe some queued-up processes were waiting for this */