On Wed, Mar 20, 2019 at 12:06 AM Yang Shi yang.shi@linux.alibaba.com wrote:
When MPOL_MF_STRICT was specified and an existing page was already on a node that does not follow the policy, mbind() should return -EIO. But commit 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") broke the rule.
And, commit c8633798497c ("mm: mempolicy: mbind and migrate_pages support thp migration") didn't return the correct value for THP mbind() too.
If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it reaches queue_pages_to_pte_range() or queue_pages_pmd() to check if an existing page was already on a node that does not follow the policy. And, non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or MPOL_MF_MOVE_ALL was specified.
Tested with https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbind...
Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") Reported-by: Cyril Hrubis chrubis@suse.cz Cc: Vlastimil Babka vbabka@suse.cz Cc: stable@vger.kernel.org Suggested-by: Kirill A. Shutemov kirill@shutemov.name Signed-off-by: Yang Shi yang.shi@linux.alibaba.com Signed-off-by: Oscar Salvador osalvador@suse.de
mm/mempolicy.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index abe7a67..401c817 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -447,6 +447,13 @@ static inline bool queue_pages_required(struct page *page, return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); }
+/*
- The queue_pages_pmd() may have three kind of return value.
- 1 - pages are placed on he right node or queued successfully.
Minor typo -> s/he/the ?
- 0 - THP get split.
- -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing
page was already on a node that does not follow the policy.
- */
static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -456,7 +463,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, unsigned long flags;
if (unlikely(is_pmd_migration_entry(*pmd))) {
ret = 1;
ret = -EIO; goto unlock; } page = pmd_page(*pmd);
@@ -473,8 +480,15 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, ret = 1; flags = qp->flags; /* go to thp migration */
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
if (!vma_migratable(walk->vma)) {
ret = -EIO;
goto unlock;
}
migrate_page_add(page, qp->pagelist, flags);
} else
ret = -EIO;
unlock: spin_unlock(ptl); out: @@ -499,8 +513,10 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
if (ret)
if (ret > 0) return 0;
else if (ret < 0)
return ret; } if (pmd_trans_unstable(pmd))
@@ -521,11 +537,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, continue; if (!queue_pages_required(page, qp)) continue;
migrate_page_add(page, qp->pagelist, flags);
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
if (!vma_migratable(vma))
break;
migrate_page_add(page, qp->pagelist, flags);
} else
break; } pte_unmap_unlock(pte - 1, ptl); cond_resched();
return 0;
return addr != end ? -EIO : 0;
}
static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, @@ -595,7 +616,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, unsigned long endvma = vma->vm_end; unsigned long flags = qp->flags;
if (!vma_migratable(vma))
/*
* Need check MPOL_MF_STRICT to return -EIO if possible
* regardless of vma_migratable
*/
if (!vma_migratable(vma) &&
!(flags & MPOL_MF_STRICT)) return 1; if (endvma > end)
@@ -622,7 +648,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, }
/* queue pages from current vma */
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
if (flags & MPOL_MF_VALID) return 0; return 1;
}
1.8.3.1