4.14-stable review patch. If anyone has any objections, please let me know.
------------------
From: Yang Shi yang.shi@linux.alibaba.com
commit a7f40cfe3b7ada57af9b62fd28430eeb4a7cfcb7 upstream.
When MPOL_MF_STRICT was specified and an existing page was already on a node that does not follow the policy, mbind() should return -EIO. But commit 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") broke the rule.
And commit c8633798497c ("mm: mempolicy: mbind and migrate_pages support thp migration") didn't return the correct value for THP mbind() too.
If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it reaches queue_pages_to_pte_range() or queue_pages_pmd() to check if an existing page was already on a node that does not follow the policy. And, non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or MPOL_MF_MOVE_ALL was specified.
Tested with https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbind...
[akpm@linux-foundation.org: tweak code comment] Link: http://lkml.kernel.org/r/1553020556-38583-1-git-send-email-yang.shi@linux.al... Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") Signed-off-by: Yang Shi yang.shi@linux.alibaba.com Signed-off-by: Oscar Salvador osalvador@suse.de Reported-by: Cyril Hrubis chrubis@suse.cz Suggested-by: Kirill A. Shutemov kirill@shutemov.name Acked-by: Rafael Aquini aquini@redhat.com Reviewed-by: Oscar Salvador osalvador@suse.de Acked-by: David Rientjes rientjes@google.com Cc: Vlastimil Babka vbabka@suse.cz Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
--- mm/mempolicy.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-)
--- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -427,6 +427,13 @@ static inline bool queue_pages_required( return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); }
+/* + * queue_pages_pmd() has three possible return values: + * 1 - pages are placed on the right node or queued successfully. + * 0 - THP was split. + * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing + * page was already on a node that does not follow the policy. + */ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -436,7 +443,7 @@ static int queue_pages_pmd(pmd_t *pmd, s unsigned long flags;
if (unlikely(is_pmd_migration_entry(*pmd))) { - ret = 1; + ret = -EIO; goto unlock; } page = pmd_page(*pmd); @@ -462,8 +469,15 @@ static int queue_pages_pmd(pmd_t *pmd, s ret = 1; flags = qp->flags; /* go to thp migration */ - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + if (!vma_migratable(walk->vma)) { + ret = -EIO; + goto unlock; + } + migrate_page_add(page, qp->pagelist, flags); + } else + ret = -EIO; unlock: spin_unlock(ptl); out: @@ -488,8 +502,10 @@ static int queue_pages_pte_range(pmd_t * ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { ret = queue_pages_pmd(pmd, ptl, addr, end, walk); - if (ret) + if (ret > 0) return 0; + else if (ret < 0) + return ret; }
if (pmd_trans_unstable(pmd)) @@ -526,11 +542,16 @@ retry: goto retry; }
- migrate_page_add(page, qp->pagelist, flags); + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + if (!vma_migratable(vma)) + break; + migrate_page_add(page, qp->pagelist, flags); + } else + break; } pte_unmap_unlock(pte - 1, ptl); cond_resched(); - return 0; + return addr != end ? -EIO : 0; }
static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, @@ -600,7 +621,12 @@ static int queue_pages_test_walk(unsigne unsigned long endvma = vma->vm_end; unsigned long flags = qp->flags;
- if (!vma_migratable(vma)) + /* + * Need check MPOL_MF_STRICT to return -EIO if possible + * regardless of vma_migratable + */ + if (!vma_migratable(vma) && + !(flags & MPOL_MF_STRICT)) return 1;
if (endvma > end) @@ -627,7 +653,7 @@ static int queue_pages_test_walk(unsigne }
/* queue pages from current vma */ - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & MPOL_MF_VALID) return 0; return 1; }