From: Lyude <cpaul(a)redhat.com>
As observed with the latest ThinkPad docks, we unfortunately can't rely
on docks keeping us updated with hotplug events that happened while we
were suspended. On top of that, even if the number of connectors remains
the same between suspend and resume it's still not safe to assume that
there were no hotplugs, since a different monitor might have been
plugged into a port another monitor previously occupied. As such, we
need to go through all of the MST ports and check whether or not their
EDIDs have changed.
In addition to that, we also now return -EINVAL from
drm_dp_mst_topology_mgr_resume to indicate to callers that they need to
reset the MST connection, and that they can't rely on any other method
of reprobing.
Cc: stable(a)vger.kernel.org
Signed-off-by: Lyude <cpaul(a)redhat.com>
Signed-off-by: Juston Li <juston.li(a)intel.com>
---
drivers/gpu/drm/drm_dp_mst_topology.c | 94 ++++++++++++++++++++++++++-
1 file changed, 93 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 5ff1d79b86c4..88abebe52021 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -29,6 +29,7 @@
#include <linux/i2c.h>
#include <drm/drm_dp_mst_helper.h>
#include <drm/drmP.h>
+#include <drm/drm_edid.h>
#include <drm/drm_fixed.h>
#include <drm/drm_atomic.h>
@@ -2201,6 +2202,64 @@ void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr)
}
EXPORT_SYMBOL(drm_dp_mst_topology_mgr_suspend);
+static bool drm_dp_mst_edids_changed(struct drm_dp_mst_topology_mgr *mgr,
+ struct drm_dp_mst_port *port)
+{
+ struct drm_device *dev;
+ struct drm_connector *connector;
+ struct drm_dp_mst_port *dport;
+ struct drm_dp_mst_branch *mstb;
+ struct edid *current_edid, *cached_edid;
+ bool ret = false;
+
+ port = drm_dp_get_validated_port_ref(mgr, port);
+ if (!port)
+ return false;
+
+ mstb = drm_dp_get_validated_mstb_ref(mgr, port->mstb);
+ if (mstb) {
+ list_for_each_entry(dport, &port->mstb->ports, next) {
+ ret = drm_dp_mst_edids_changed(mgr, dport);
+ if (ret)
+ break;
+ }
+
+ drm_dp_put_mst_branch_device(mstb);
+ if (ret)
+ goto out;
+ }
+
+ connector = port->connector;
+ if (!connector || !port->aux.ddc.algo)
+ goto out;
+
+ dev = connector->dev;
+ mutex_lock(&dev->mode_config.mutex);
+
+ current_edid = drm_get_edid(connector, &port->aux.ddc);
+ if (connector->edid_blob_ptr)
+ cached_edid = (void *)connector->edid_blob_ptr->data;
+ else
+ return false;
+
+ if ((current_edid && cached_edid && memcmp(current_edid, cached_edid,
+ sizeof(struct edid)) != 0) ||
+ (!current_edid && cached_edid) || (current_edid && !cached_edid)) {
+ ret = true;
+ DRM_DEBUG_KMS("EDID on %s changed, reprobing connectors\n",
+ connector->name);
+ }
+
+ mutex_unlock(&dev->mode_config.mutex);
+
+ kfree(current_edid);
+
+out:
+ drm_dp_put_port(port);
+
+ return ret;
+}
+
/**
* drm_dp_mst_topology_mgr_resume() - resume the MST manager
* @mgr: manager to resume
@@ -2210,9 +2269,15 @@ EXPORT_SYMBOL(drm_dp_mst_topology_mgr_suspend);
*
* if the device fails this returns -1, and the driver should do
* a full MST reprobe, in case we were undocked.
+ *
+ * if the device can no longer be trusted, this returns -EINVAL
+ * and the driver should unconditionally disconnect and reconnect
+ * the dock.
*/
int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr)
{
+ struct drm_dp_mst_branch *mstb;
+ struct drm_dp_mst_port *port;
int ret = 0;
mutex_lock(&mgr->lock);
@@ -2246,8 +2311,35 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr)
drm_dp_check_mstb_guid(mgr->mst_primary, guid);
ret = 0;
- } else
+
+ /*
+ * Some hubs also forget to notify us of hotplugs that happened
+ * while we were in suspend, so we need to verify that the edid
+ * hasn't changed for any of the connectors. If it has been,
+ * we unfortunately can't rely on the dock updating us with
+ * hotplug events, so indicate we need a full reconnect.
+ */
+
+ /* MST's I2C helpers can't be used while holding this lock */
+ mutex_unlock(&mgr->lock);
+
+ mstb = drm_dp_get_validated_mstb_ref(mgr, mgr->mst_primary);
+ if (mstb) {
+ list_for_each_entry(port, &mstb->ports, next) {
+ if (drm_dp_mst_edids_changed(mgr, port)) {
+ ret = -EINVAL;
+ break;
+ }
+ }
+
+ drm_dp_put_mst_branch_device(mstb);
+ }
+ } else {
ret = -1;
+ mutex_unlock(&mgr->lock);
+ }
+
+ return ret;
out_unlock:
mutex_unlock(&mgr->lock);
--
2.17.2
coprocessor_flush_all may be called from a context of a thread that is
different from the thread being flushed. In that case contents of the
cpenable special register may not match ti->cpenable of the target
thread, resulting in unhandled coprocessor exception in the kernel
context.
Set cpenable special register to the ti->cpenable of the target register
for the duration of the flush and restore it afterwards.
This fixes the following crash caused by coprocessor register inspection
in native gdb:
(gdb) p/x $w0
Illegal instruction in kernel: sig: 9 [#1] PREEMPT
Call Trace:
___might_sleep+0x184/0x1a4
__might_sleep+0x41/0xac
exit_signals+0x14/0x218
do_exit+0xc9/0x8b8
die+0x99/0xa0
do_illegal_instruction+0x18/0x6c
common_exception+0x77/0x77
coprocessor_flush+0x16/0x3c
arch_ptrace+0x46c/0x674
sys_ptrace+0x2ce/0x3b4
system_call+0x54/0x80
common_exception+0x77/0x77
note: gdb[100] exited with preempt_count 1
Killed
Cc: stable(a)vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc(a)gmail.com>
---
arch/xtensa/kernel/process.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 9363629eb7cc..3fa0c440f664 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -94,18 +94,21 @@ void coprocessor_release_all(struct thread_info *ti)
void coprocessor_flush_all(struct thread_info *ti)
{
- unsigned long cpenable;
+ unsigned long cpenable, old_cpenable;
int i;
preempt_disable();
+ RSR_CPENABLE(old_cpenable);
cpenable = ti->cpenable;
+ WSR_CPENABLE(cpenable);
for (i = 0; i < XCHAL_CP_MAX; i++) {
if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
coprocessor_flush(ti, i);
cpenable >>= 1;
}
+ WSR_CPENABLE(old_cpenable);
preempt_enable();
}
--
2.11.0
Hi Greg,
This was not marked for stable but seems it should be in stable.
And another commit which fixes it.
Please apply to your queue of 4.14-stable.
--
Regards
Sudip
> From: Sasha Levin <sashal(a)kernel.org>
> Sent: Monday, November 26, 2018 1:53 AM
> Hi,
>
> [This is an automated email]
>
> This commit has been processed because it contains a -stable tag.
> The stable tag indicates that it's relevant for the following trees: all
>
> The bot has tested the following trees: v4.19.4, v4.14.83, v4.9.140, v4.4.164,
> v3.18.126.
>
> v4.19.4: Build OK!
> v4.14.83: Build OK!
> v4.9.140: Build OK!
> v4.4.164: Failed to apply! Possible dependencies:
> v3.18.126: Failed to apply! Possible dependencies:
> How should we proceed with this patch?
>
> Sasha
Hi Sasha,
Please see the attached patch, which is a rebase for both v4.4.164 and v3.18.126.
Thanks,
--Dexuan
Jerry Zuo pointed out a rather obscure hotplugging issue that it seems I
accidentally introduced into DRM two years ago.
Pretend we have a topology like this:
|- DP-1: mst_primary
|- DP-4: active display
|- DP-5: disconnected
|- DP-6: active hub
|- DP-7: active display
|- DP-8: disconnected
|- DP-9: disconnected
If we unplug DP-6, the topology starting at DP-7 will be destroyed but
it's payloads will live on in DP-1's VCPI allocations and thus require
removal. However, this removal currently fails because
drm_dp_update_payload_part1() will (rightly so) try to validate the port
before accessing it, fail then abort. If we keep going, eventually we
run the MST hub out of bandwidth and all new allocations will start to
fail (or in my case; all new displays just start flickering a ton).
We could just teach drm_dp_update_payload_part1() not to drop the port
ref in this case, but then we also need to teach
drm_dp_destroy_payload_step1() to do the same thing, then hope no one
ever adds anything to the that requires a validated port reference in
drm_dp_destroy_connector_work(). Kind of sketchy.
So let's go with a more clever solution: any port that
drm_dp_destroy_connector_work() interacts with is guaranteed to still
exist in memory until we say so. While said port might not be valid we
don't really care: that's the whole reason we're destroying it in the
first place! So, teach drm_dp_get_validated_port_ref() to use the all
mighty current_work() function to avoid attempting to validate ports
from the context of mgr->destroy_connector_work. I can't see any
situation where this wouldn't be safe, and this avoids having to play
whack-a-mole in the future of trying to work around port validation.
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Fixes: 263efde31f97 ("drm/dp/mst: Get validated port ref in drm_dp_update_payload_part1()")
Reported-by: Jerry Zuo <Jerry.Zuo(a)amd.com>
Cc: Jerry Zuo <Jerry.Zuo(a)amd.com>
Cc: Harry Wentland <Harry.Wentland(a)amd.com>
Cc: <stable(a)vger.kernel.org> # v4.6+
---
drivers/gpu/drm/drm_dp_mst_topology.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 529414556962..08978ad72f33 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1023,9 +1023,20 @@ static struct drm_dp_mst_port *drm_dp_mst_get_port_ref_locked(struct drm_dp_mst_
static struct drm_dp_mst_port *drm_dp_get_validated_port_ref(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port)
{
struct drm_dp_mst_port *rport = NULL;
+
mutex_lock(&mgr->lock);
- if (mgr->mst_primary)
- rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary, port);
+ /*
+ * Port may or may not be 'valid' but we don't care about that when
+ * destroying the port and we are guaranteed that the port pointer
+ * will be valid until we've finished
+ */
+ if (current_work() == &mgr->destroy_connector_work) {
+ kref_get(&port->kref);
+ rport = port;
+ } else if (mgr->mst_primary) {
+ rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary,
+ port);
+ }
mutex_unlock(&mgr->lock);
return rport;
}
--
2.19.1
The patch titled
Subject: userfaultfd: shmem: add i_size checks
has been added to the -mm tree. Its filename is
userfaultfd-shmem-add-i_size-checks.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/userfaultfd-shmem-add-i_size-check…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/userfaultfd-shmem-add-i_size-check…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Andrea Arcangeli <aarcange(a)redhat.com>
Subject: userfaultfd: shmem: add i_size checks
With MAP_SHARED: recheck the i_size after taking the PT lock, to serialize
against truncate with the PT lock. Delete the page from the pagecache if
the i_size_read check fails.
With MAP_PRIVATE: check the i_size after the PT lock before mapping
anonymous memory or zeropages into the MAP_PRIVATE shmem mapping.
A mostly irrelevant cleanup: like we do the delete_from_page_cache()
pagecache removal after dropping the PT lock, the PT lock is a spinlock so
drop it before the sleepable page lock.
Link: http://lkml.kernel.org/r/20181126173452.26955-5-aarcange@redhat.com
Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support")
Signed-off-by: Andrea Arcangeli <aarcange(a)redhat.com>
Reviewed-by: Mike Rapoport <rppt(a)linux.ibm.com>
Reviewed-by: Hugh Dickins <hughd(a)google.com>
Reported-by: Jann Horn <jannh(a)google.com>
Cc: <stable(a)vger.kernel.org>
Cc: "Dr. David Alan Gilbert" <dgilbert(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/shmem.c~userfaultfd-shmem-add-i_size-checks
+++ a/mm/shmem.c
@@ -2216,6 +2216,7 @@ static int shmem_mfill_atomic_pte(struct
struct page *page;
pte_t _dst_pte, *dst_pte;
int ret;
+ pgoff_t offset, max_off;
ret = -ENOMEM;
if (!shmem_inode_acct_block(inode, 1))
@@ -2253,6 +2254,12 @@ static int shmem_mfill_atomic_pte(struct
__SetPageSwapBacked(page);
__SetPageUptodate(page);
+ ret = -EFAULT;
+ offset = linear_page_index(dst_vma, dst_addr);
+ max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+ if (unlikely(offset >= max_off))
+ goto out_release;
+
ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false);
if (ret)
goto out_release;
@@ -2268,8 +2275,14 @@ static int shmem_mfill_atomic_pte(struct
if (dst_vma->vm_flags & VM_WRITE)
_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
- ret = -EEXIST;
dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+
+ ret = -EFAULT;
+ max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+ if (unlikely(offset >= max_off))
+ goto out_release_uncharge_unlock;
+
+ ret = -EEXIST;
if (!pte_none(*dst_pte))
goto out_release_uncharge_unlock;
@@ -2287,13 +2300,14 @@ static int shmem_mfill_atomic_pte(struct
/* No need to invalidate - it was non-present before */
update_mmu_cache(dst_vma, dst_addr, dst_pte);
- unlock_page(page);
pte_unmap_unlock(dst_pte, ptl);
+ unlock_page(page);
ret = 0;
out:
return ret;
out_release_uncharge_unlock:
pte_unmap_unlock(dst_pte, ptl);
+ delete_from_page_cache(page);
out_release_uncharge:
mem_cgroup_cancel_charge(page, memcg, false);
out_release:
--- a/mm/userfaultfd.c~userfaultfd-shmem-add-i_size-checks
+++ a/mm/userfaultfd.c
@@ -33,6 +33,8 @@ static int mcopy_atomic_pte(struct mm_st
void *page_kaddr;
int ret;
struct page *page;
+ pgoff_t offset, max_off;
+ struct inode *inode;
if (!*pagep) {
ret = -ENOMEM;
@@ -73,8 +75,17 @@ static int mcopy_atomic_pte(struct mm_st
if (dst_vma->vm_flags & VM_WRITE)
_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
- ret = -EEXIST;
dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+ if (dst_vma->vm_file) {
+ /* the shmem MAP_PRIVATE case requires checking the i_size */
+ inode = dst_vma->vm_file->f_inode;
+ offset = linear_page_index(dst_vma, dst_addr);
+ max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+ ret = -EFAULT;
+ if (unlikely(offset >= max_off))
+ goto out_release_uncharge_unlock;
+ }
+ ret = -EEXIST;
if (!pte_none(*dst_pte))
goto out_release_uncharge_unlock;
@@ -108,11 +119,22 @@ static int mfill_zeropage_pte(struct mm_
pte_t _dst_pte, *dst_pte;
spinlock_t *ptl;
int ret;
+ pgoff_t offset, max_off;
+ struct inode *inode;
_dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
dst_vma->vm_page_prot));
- ret = -EEXIST;
dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+ if (dst_vma->vm_file) {
+ /* the shmem MAP_PRIVATE case requires checking the i_size */
+ inode = dst_vma->vm_file->f_inode;
+ offset = linear_page_index(dst_vma, dst_addr);
+ max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+ ret = -EFAULT;
+ if (unlikely(offset >= max_off))
+ goto out_unlock;
+ }
+ ret = -EEXIST;
if (!pte_none(*dst_pte))
goto out_unlock;
set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
_
Patches currently in -mm which might be from aarcange(a)redhat.com are
userfaultfd-use-enoent-instead-of-efault-if-the-atomic-copy-user-fails.patch
userfaultfd-shmem-allocate-anonymous-memory-for-map_private-shmem.patch
userfaultfd-shmem-hugetlbfs-only-allow-to-register-vm_maywrite-vmas.patch
userfaultfd-shmem-add-i_size-checks.patch
userfaultfd-shmem-uffdio_copy-set-the-page-dirty-if-vm_write-is-not-set.patch
The patch titled
Subject: userfaultfd: shmem/hugetlbfs: only allow to register VM_MAYWRITE vmas
has been added to the -mm tree. Its filename is
userfaultfd-shmem-hugetlbfs-only-allow-to-register-vm_maywrite-vmas.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/userfaultfd-shmem-hugetlbfs-only-a…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/userfaultfd-shmem-hugetlbfs-only-a…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Andrea Arcangeli <aarcange(a)redhat.com>
Subject: userfaultfd: shmem/hugetlbfs: only allow to register VM_MAYWRITE vmas
After the VMA to register the uffd onto is found, check that it has
VM_MAYWRITE set before allowing registration. This way we inherit all
common code checks before allowing to fill file holes in shmem and
hugetlbfs with UFFDIO_COPY.
The userfaultfd memory model is not applicable for readonly files unless
it's a MAP_PRIVATE.
Link: http://lkml.kernel.org/r/20181126173452.26955-4-aarcange@redhat.com
Fixes: ff62a3421044 ("hugetlb: implement memfd sealing")
Signed-off-by: Andrea Arcangeli <aarcange(a)redhat.com>
Reviewed-by: Mike Rapoport <rppt(a)linux.ibm.com>
Reviewed-by: Hugh Dickins <hughd(a)google.com>
Reported-by: Jann Horn <jannh(a)google.com>
Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support")
Cc: <stable(a)vger.kernel.org>
Cc: "Dr. David Alan Gilbert" <dgilbert(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/fs/userfaultfd.c~userfaultfd-shmem-hugetlbfs-only-allow-to-register-vm_maywrite-vmas
+++ a/fs/userfaultfd.c
@@ -1361,6 +1361,19 @@ static int userfaultfd_register(struct u
ret = -EINVAL;
if (!vma_can_userfault(cur))
goto out_unlock;
+
+ /*
+ * UFFDIO_COPY will fill file holes even without
+ * PROT_WRITE. This check enforces that if this is a
+ * MAP_SHARED, the process has write permission to the backing
+ * file. If VM_MAYWRITE is set it also enforces that on a
+ * MAP_SHARED vma: there is no F_WRITE_SEAL and no further
+ * F_WRITE_SEAL can be taken until the vma is destroyed.
+ */
+ ret = -EPERM;
+ if (unlikely(!(cur->vm_flags & VM_MAYWRITE)))
+ goto out_unlock;
+
/*
* If this vma contains ending address, and huge pages
* check alignment.
@@ -1406,6 +1419,7 @@ static int userfaultfd_register(struct u
BUG_ON(!vma_can_userfault(vma));
BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
vma->vm_userfaultfd_ctx.ctx != ctx);
+ WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
/*
* Nothing to do: this vma is already registered into this
@@ -1552,6 +1566,7 @@ static int userfaultfd_unregister(struct
cond_resched();
BUG_ON(!vma_can_userfault(vma));
+ WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
/*
* Nothing to do: this vma is already registered into this
--- a/mm/userfaultfd.c~userfaultfd-shmem-hugetlbfs-only-allow-to-register-vm_maywrite-vmas
+++ a/mm/userfaultfd.c
@@ -205,8 +205,9 @@ retry:
if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
goto out_unlock;
/*
- * Only allow __mcopy_atomic_hugetlb on userfaultfd
- * registered ranges.
+ * Check the vma is registered in uffd, this is
+ * required to enforce the VM_MAYWRITE check done at
+ * uffd registration time.
*/
if (!dst_vma->vm_userfaultfd_ctx.ctx)
goto out_unlock;
@@ -459,13 +460,9 @@ retry:
if (!dst_vma)
goto out_unlock;
/*
- * Be strict and only allow __mcopy_atomic on userfaultfd
- * registered ranges to prevent userland errors going
- * unnoticed. As far as the VM consistency is concerned, it
- * would be perfectly safe to remove this check, but there's
- * no useful usage for __mcopy_atomic ouside of userfaultfd
- * registered ranges. This is after all why these are ioctls
- * belonging to the userfaultfd and not syscalls.
+ * Check the vma is registered in uffd, this is required to
+ * enforce the VM_MAYWRITE check done at uffd registration
+ * time.
*/
if (!dst_vma->vm_userfaultfd_ctx.ctx)
goto out_unlock;
_
Patches currently in -mm which might be from aarcange(a)redhat.com are
userfaultfd-use-enoent-instead-of-efault-if-the-atomic-copy-user-fails.patch
userfaultfd-shmem-allocate-anonymous-memory-for-map_private-shmem.patch
userfaultfd-shmem-hugetlbfs-only-allow-to-register-vm_maywrite-vmas.patch
userfaultfd-shmem-add-i_size-checks.patch
userfaultfd-shmem-uffdio_copy-set-the-page-dirty-if-vm_write-is-not-set.patch
The patch titled
Subject: userfaultfd: shmem: allocate anonymous memory for MAP_PRIVATE shmem
has been added to the -mm tree. Its filename is
userfaultfd-shmem-allocate-anonymous-memory-for-map_private-shmem.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/userfaultfd-shmem-allocate-anonymo…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/userfaultfd-shmem-allocate-anonymo…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Andrea Arcangeli <aarcange(a)redhat.com>
Subject: userfaultfd: shmem: allocate anonymous memory for MAP_PRIVATE shmem
Userfaultfd did not create private memory when UFFDIO_COPY was invoked on
a MAP_PRIVATE shmem mapping. Instead it wrote to the shmem file, even
when that had not been opened for writing. Though, fortunately, that
could only happen where there was a hole in the file.
Fix the shmem-backed implementation of UFFDIO_COPY to create private
memory for MAP_PRIVATE mappings. The hugetlbfs-backed implementation was
already correct.
This change is visible to userland, if userfaultfd has been used in
unintended ways: so it introduces a small risk of incompatibility, but is
necessary in order to respect file permissions.
An app that uses UFFDIO_COPY for anything like postcopy live migration
won't notice the difference, and in fact it'll run faster because there
will be no copy-on-write and memory waste in the tmpfs pagecache anymore.
Userfaults on MAP_PRIVATE shmem keep triggering only on file holes like
before.
The real zeropage can also be built on a MAP_PRIVATE shmem mapping through
UFFDIO_ZEROPAGE and that's safe because the zeropage pte is never dirty,
in turn even an mprotect upgrading the vma permission from PROT_READ to
PROT_READ|PROT_WRITE won't make the zeropage pte writable.
Link: http://lkml.kernel.org/r/20181126173452.26955-3-aarcange@redhat.com
Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support")
Signed-off-by: Andrea Arcangeli <aarcange(a)redhat.com>
Reported-by: Mike Rapoport <rppt(a)linux.ibm.com>
Reviewed-by: Hugh Dickins <hughd(a)google.com>
Cc: <stable(a)vger.kernel.org>
Cc: "Dr. David Alan Gilbert" <dgilbert(a)redhat.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/userfaultfd.c~userfaultfd-shmem-allocate-anonymous-memory-for-map_private-shmem
+++ a/mm/userfaultfd.c
@@ -380,7 +380,17 @@ static __always_inline ssize_t mfill_ato
{
ssize_t err;
- if (vma_is_anonymous(dst_vma)) {
+ /*
+ * The normal page fault path for a shmem will invoke the
+ * fault, fill the hole in the file and COW it right away. The
+ * result generates plain anonymous memory. So when we are
+ * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
+ * generate anonymous memory directly without actually filling
+ * the hole. For the MAP_PRIVATE case the robustness check
+ * only happens in the pagetable (to verify it's still none)
+ * and not in the radix tree.
+ */
+ if (!(dst_vma->vm_flags & VM_SHARED)) {
if (!zeropage)
err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
dst_addr, src_addr, page);
@@ -489,7 +499,8 @@ retry:
* dst_vma.
*/
err = -ENOMEM;
- if (vma_is_anonymous(dst_vma) && unlikely(anon_vma_prepare(dst_vma)))
+ if (!(dst_vma->vm_flags & VM_SHARED) &&
+ unlikely(anon_vma_prepare(dst_vma)))
goto out_unlock;
while (src_addr < src_start + len) {
_
Patches currently in -mm which might be from aarcange(a)redhat.com are
userfaultfd-use-enoent-instead-of-efault-if-the-atomic-copy-user-fails.patch
userfaultfd-shmem-allocate-anonymous-memory-for-map_private-shmem.patch
userfaultfd-shmem-hugetlbfs-only-allow-to-register-vm_maywrite-vmas.patch
userfaultfd-shmem-add-i_size-checks.patch
userfaultfd-shmem-uffdio_copy-set-the-page-dirty-if-vm_write-is-not-set.patch
The patch titled
Subject: userfaultfd: use ENOENT instead of EFAULT if the atomic copy user fails
has been added to the -mm tree. Its filename is
userfaultfd-use-enoent-instead-of-efault-if-the-atomic-copy-user-fails.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/userfaultfd-use-enoent-instead-of-…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/userfaultfd-use-enoent-instead-of-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Andrea Arcangeli <aarcange(a)redhat.com>
Subject: userfaultfd: use ENOENT instead of EFAULT if the atomic copy user fails
Patch series "userfaultfd shmem updates".
Jann found two bugs in the userfaultfd shmem MAP_SHARED backend: the lack
of the VM_MAYWRITE check and the lack of i_size checks.
Then looking into the above we also fixed the MAP_PRIVATE case.
Hugh by source review also found a data loss source if UFFDIO_COPY is used
on shmem MAP_SHARED PROT_READ mappings (the production usages incidentally
run with PROT_READ|PROT_WRITE, so the data loss couldn't happen in those
production usages like with QEMU).
The whole patchset is marked for stable.
We verified QEMU postcopy live migration with guest running on shmem
MAP_PRIVATE run as well as before after the fix of shmem MAP_PRIVATE.
Regardless if it's shmem or hugetlbfs or MAP_PRIVATE or MAP_SHARED, QEMU
unconditionally invokes a punch hole if the guest mapping is filebacked
and a MADV_DONTNEED too (needed to get rid of the MAP_PRIVATE COWs and for
the anon backend).
This patch (of 5):
We internally used EFAULT to communicate with the caller, switch to
ENOENT, so EFAULT can be used as a non internal retval.
Link: http://lkml.kernel.org/r/20181126173452.26955-2-aarcange@redhat.com
Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support")
Signed-off-by: Andrea Arcangeli <aarcange(a)redhat.com>
Reviewed-by: Mike Rapoport <rppt(a)linux.ibm.com>
Reviewed-by: Hugh Dickins <hughd(a)google.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/hugetlb.c~userfaultfd-use-enoent-instead-of-efault-if-the-atomic-copy-user-fails
+++ a/mm/hugetlb.c
@@ -4080,7 +4080,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
/* fallback to copy_from_user outside mmap_sem */
if (unlikely(ret)) {
- ret = -EFAULT;
+ ret = -ENOENT;
*pagep = page;
/* don't free the page */
goto out;
--- a/mm/shmem.c~userfaultfd-use-enoent-instead-of-efault-if-the-atomic-copy-user-fails
+++ a/mm/shmem.c
@@ -2238,7 +2238,7 @@ static int shmem_mfill_atomic_pte(struct
*pagep = page;
shmem_inode_unacct_blocks(inode, 1);
/* don't free the page */
- return -EFAULT;
+ return -ENOENT;
}
} else { /* mfill_zeropage_atomic */
clear_highpage(page);
--- a/mm/userfaultfd.c~userfaultfd-use-enoent-instead-of-efault-if-the-atomic-copy-user-fails
+++ a/mm/userfaultfd.c
@@ -48,7 +48,7 @@ static int mcopy_atomic_pte(struct mm_st
/* fallback to copy_from_user outside mmap_sem */
if (unlikely(ret)) {
- ret = -EFAULT;
+ ret = -ENOENT;
*pagep = page;
/* don't free the page */
goto out;
@@ -274,7 +274,7 @@ retry:
cond_resched();
- if (unlikely(err == -EFAULT)) {
+ if (unlikely(err == -ENOENT)) {
up_read(&dst_mm->mmap_sem);
BUG_ON(!page);
@@ -530,7 +530,7 @@ retry:
src_addr, &page, zeropage);
cond_resched();
- if (unlikely(err == -EFAULT)) {
+ if (unlikely(err == -ENOENT)) {
void *page_kaddr;
up_read(&dst_mm->mmap_sem);
_
Patches currently in -mm which might be from aarcange(a)redhat.com are
userfaultfd-use-enoent-instead-of-efault-if-the-atomic-copy-user-fails.patch
userfaultfd-shmem-allocate-anonymous-memory-for-map_private-shmem.patch
userfaultfd-shmem-hugetlbfs-only-allow-to-register-vm_maywrite-vmas.patch
userfaultfd-shmem-add-i_size-checks.patch
userfaultfd-shmem-uffdio_copy-set-the-page-dirty-if-vm_write-is-not-set.patch