From: Eugeniy Paltsev <Eugeniy.Paltsev(a)synopsys.com>
As of today if userspace process tries to access a kernel virtual addres
(0x7000_0000 to 0x7ffff_ffff) such that a legit kernel mapping already
exists, that process hangs instead of being killed with SIGSEGV
Fix that by ensuring that do_page_fault() handles kenrel vaddr only if
in kernel mode.
And given this, we can also simplify the code a bit. Now a vmalloc fault
implies kernel mode so its failure (for some reason) can reuse the
@no_context label and we can remove @bad_area_nosemaphore.
Reproduce user test for original problem:
------------------------>8-----------------
#include <stdlib.h>
#include <stdint.h>
int main(int argc, char *argv[])
{
volatile uint32_t temp;
temp = *(uint32_t *)(0x70000000);
}
------------------------>8-----------------
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev(a)synopsys.com>
Signed-off-by: Vineet Gupta <vgupta(a)synopsys.com>
---
arch/arc/mm/fault.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 8df1638259f3..6836095251ed 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -66,7 +66,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
struct vm_area_struct *vma = NULL;
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
- int si_code = 0;
+ int si_code = SEGV_MAPERR;
int ret;
vm_fault_t fault;
int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */
@@ -81,16 +81,14 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
* only copy the information from the master page table,
* nothing more.
*/
- if (address >= VMALLOC_START) {
+ if (address >= VMALLOC_START && !user_mode(regs)) {
ret = handle_kernel_vaddr_fault(address);
if (unlikely(ret))
- goto bad_area_nosemaphore;
+ goto no_context;
else
return;
}
- si_code = SEGV_MAPERR;
-
/*
* If we're in an interrupt or have no user
* context, we must not take the fault..
@@ -198,7 +196,6 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
bad_area:
up_read(&mm->mmap_sem);
-bad_area_nosemaphore:
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
tsk->thread.fault_address = address;
--
2.7.4
From: Dan Carpenter <dan.carpenter(a)oracle.com>
Subject: drivers/virt/fsl_hypervisor.c: prevent integer overflow in ioctl
The "param.count" value is a u64 thatcomes from the user. The code later
in the function assumes that param.count is at least one and if it's not
then it leads to an Oops when we dereference the ZERO_SIZE_PTR.
Also the addition can have an integer overflow which would lead us to
allocate a smaller "pages" array than required. I can't immediately tell
what the possible run times implications are, but it's safest to prevent
the overflow.
Link: http://lkml.kernel.org/r/20181218082129.GE32567@kadam
Fixes: 6db7199407ca ("drivers/virt: introduce Freescale hypervisor management driver")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Timur Tabi <timur(a)freescale.com>
Cc: Mihai Caraman <mihai.caraman(a)freescale.com>
Cc: Kumar Gala <galak(a)kernel.crashing.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/virt/fsl_hypervisor.c | 3 +++
1 file changed, 3 insertions(+)
--- a/drivers/virt/fsl_hypervisor.c~fsl_hypervisor-prevent-integer-overflow-in-ioctl
+++ a/drivers/virt/fsl_hypervisor.c
@@ -215,6 +215,9 @@ static long ioctl_memcpy(struct fsl_hv_i
* hypervisor.
*/
lb_offset = param.local_vaddr & (PAGE_SIZE - 1);
+ if (param.count == 0 ||
+ param.count > U64_MAX - lb_offset - PAGE_SIZE + 1)
+ return -EINVAL;
num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
/* Allocate the buffers we need */
_
From: Andrea Arcangeli <aarcange(a)redhat.com>
Subject: userfaultfd: use RCU to free the task struct when fork fails
The task structure is freed while get_mem_cgroup_from_mm() holds
rcu_read_lock() and dereferences mm->owner.
get_mem_cgroup_from_mm() failing fork()
---- ---
task = mm->owner
mm->owner = NULL;
free(task)
if (task) *task; /* use after free */
The fix consists in freeing the task with RCU also in the fork failure
case, exactly like it always happens for the regular exit(2) path. That
is enough to make the rcu_read_lock hold in get_mem_cgroup_from_mm() (left
side above) effective to avoid a use after free when dereferencing the
task structure.
An alternate possible fix would be to defer the delivery of the
userfaultfd contexts to the monitor until after fork() is guaranteed to
succeed. Such a change would require more changes because it would create
a strict ordering dependency where the uffd methods would need to be
called beyond the last potentially failing branch in order to be safe.
This solution as opposed only adds the dependency to common code to set
mm->owner to NULL and to free the task struct that was pointed by
mm->owner with RCU, if fork ends up failing. The userfaultfd methods can
still be called anywhere during the fork runtime and the monitor will keep
discarding orphaned "mm" coming from failed forks in userland.
This race condition couldn't trigger if CONFIG_MEMCG was set =n at build
time.
[aarcange(a)redhat.com: improve changelog, reduce #ifdefs per Michal]
Link: http://lkml.kernel.org/r/20190429035752.4508-1-aarcange@redhat.com
Link: http://lkml.kernel.org/r/20190325225636.11635-2-aarcange@redhat.com
Fixes: 893e26e61d04 ("userfaultfd: non-cooperative: Add fork() event")
Signed-off-by: Andrea Arcangeli <aarcange(a)redhat.com>
Tested-by: zhong jiang <zhongjiang(a)huawei.com>
Reported-by: syzbot+cbb52e396df3e565ab02(a)syzkaller.appspotmail.com
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Jason Gunthorpe <jgg(a)mellanox.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov(a)linux.intel.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: zhong jiang <zhongjiang(a)huawei.com>
Cc: syzbot+cbb52e396df3e565ab02(a)syzkaller.appspotmail.com
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/fork.c | 31 +++++++++++++++++++++++++++++--
1 file changed, 29 insertions(+), 2 deletions(-)
--- a/kernel/fork.c~userfaultfd-use-rcu-to-free-the-task-struct-when-fork-fails
+++ a/kernel/fork.c
@@ -955,6 +955,15 @@ static void mm_init_aio(struct mm_struct
#endif
}
+static __always_inline void mm_clear_owner(struct mm_struct *mm,
+ struct task_struct *p)
+{
+#ifdef CONFIG_MEMCG
+ if (mm->owner == p)
+ WRITE_ONCE(mm->owner, NULL);
+#endif
+}
+
static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
{
#ifdef CONFIG_MEMCG
@@ -1343,6 +1352,7 @@ static struct mm_struct *dup_mm(struct t
free_pt:
/* don't put binfmt in mmput, we haven't got module yet */
mm->binfmt = NULL;
+ mm_init_owner(mm, NULL);
mmput(mm);
fail_nomem:
@@ -1726,6 +1736,21 @@ static int pidfd_create(struct pid *pid)
return fd;
}
+static void __delayed_free_task(struct rcu_head *rhp)
+{
+ struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+ free_task(tsk);
+}
+
+static __always_inline void delayed_free_task(struct task_struct *tsk)
+{
+ if (IS_ENABLED(CONFIG_MEMCG))
+ call_rcu(&tsk->rcu, __delayed_free_task);
+ else
+ free_task(tsk);
+}
+
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
@@ -2233,8 +2258,10 @@ bad_fork_cleanup_io:
bad_fork_cleanup_namespaces:
exit_task_namespaces(p);
bad_fork_cleanup_mm:
- if (p->mm)
+ if (p->mm) {
+ mm_clear_owner(p->mm, p);
mmput(p->mm);
+ }
bad_fork_cleanup_signal:
if (!(clone_flags & CLONE_THREAD))
free_signal_struct(p->signal);
@@ -2265,7 +2292,7 @@ bad_fork_cleanup_count:
bad_fork_free:
p->state = TASK_DEAD;
put_task_stack(p);
- free_task(p);
+ delayed_free_task(p);
fork_out:
spin_lock_irq(¤t->sighand->siglock);
hlist_del_init(&delayed.node);
_