The patch titled
Subject: mm/khugepaged: minor reorderings in collapse_shmem()
has been added to the -mm tree. Its filename is
mm-khugepaged-minor-reorderings-in-collapse_shmem.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-khugepaged-minor-reorderings-in…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-khugepaged-minor-reorderings-in…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: mm/khugepaged: minor reorderings in collapse_shmem()
Several cleanups in collapse_shmem(): most of which probably do not really
matter, beyond doing things in a more familiar and reassuring order.
Simplify the failure gotos in the main loop, and on success update stats
while interrupts still disabled from the last iteration.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261526400.2275@eggly.anvils
Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/khugepaged.c~mm-khugepaged-minor-reorderings-in-collapse_shmem
+++ a/mm/khugepaged.c
@@ -1329,10 +1329,10 @@ static void collapse_shmem(struct mm_str
goto out;
}
+ __SetPageLocked(new_page);
+ __SetPageSwapBacked(new_page);
new_page->index = start;
new_page->mapping = mapping;
- __SetPageSwapBacked(new_page);
- __SetPageLocked(new_page);
BUG_ON(!page_ref_freeze(new_page, 1));
/*
@@ -1366,13 +1366,13 @@ static void collapse_shmem(struct mm_str
if (index == start) {
if (!xas_next_entry(&xas, end - 1)) {
result = SCAN_TRUNCATED;
- break;
+ goto xa_locked;
}
xas_set(&xas, index);
}
if (!shmem_charge(mapping->host, 1)) {
result = SCAN_FAIL;
- break;
+ goto xa_locked;
}
xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
nr_none++;
@@ -1387,13 +1387,12 @@ static void collapse_shmem(struct mm_str
result = SCAN_FAIL;
goto xa_unlocked;
}
- xas_lock_irq(&xas);
- xas_set(&xas, index);
} else if (trylock_page(page)) {
get_page(page);
+ xas_unlock_irq(&xas);
} else {
result = SCAN_PAGE_LOCK;
- break;
+ goto xa_locked;
}
/*
@@ -1408,11 +1407,10 @@ static void collapse_shmem(struct mm_str
result = SCAN_TRUNCATED;
goto out_unlock;
}
- xas_unlock_irq(&xas);
if (isolate_lru_page(page)) {
result = SCAN_DEL_PAGE_LRU;
- goto out_isolate_failed;
+ goto out_unlock;
}
if (page_mapped(page))
@@ -1432,7 +1430,9 @@ static void collapse_shmem(struct mm_str
*/
if (!page_ref_freeze(page, 3)) {
result = SCAN_PAGE_COUNT;
- goto out_lru;
+ xas_unlock_irq(&xas);
+ putback_lru_page(page);
+ goto out_unlock;
}
/*
@@ -1444,24 +1444,26 @@ static void collapse_shmem(struct mm_str
/* Finally, replace with the new page. */
xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
continue;
-out_lru:
- xas_unlock_irq(&xas);
- putback_lru_page(page);
-out_isolate_failed:
- unlock_page(page);
- put_page(page);
- goto xa_unlocked;
out_unlock:
unlock_page(page);
put_page(page);
- break;
+ goto xa_unlocked;
+ }
+
+ __inc_node_page_state(new_page, NR_SHMEM_THPS);
+ if (nr_none) {
+ struct zone *zone = page_zone(new_page);
+
+ __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
+ __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
}
- xas_unlock_irq(&xas);
+xa_locked:
+ xas_unlock_irq(&xas);
xa_unlocked:
+
if (result == SCAN_SUCCEED) {
struct page *page, *tmp;
- struct zone *zone = page_zone(new_page);
/*
* Replacing old pages with new one has succeeded, now we
@@ -1476,11 +1478,11 @@ xa_unlocked:
copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
page);
list_del(&page->lru);
- unlock_page(page);
- page_ref_unfreeze(page, 1);
page->mapping = NULL;
+ page_ref_unfreeze(page, 1);
ClearPageActive(page);
ClearPageUnevictable(page);
+ unlock_page(page);
put_page(page);
index++;
}
@@ -1489,28 +1491,17 @@ xa_unlocked:
index++;
}
- local_irq_disable();
- __inc_node_page_state(new_page, NR_SHMEM_THPS);
- if (nr_none) {
- __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
- __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
- }
- local_irq_enable();
-
- /*
- * Remove pte page tables, so we can re-fault
- * the page as huge.
- */
- retract_page_tables(mapping, start);
-
/* Everything is ready, let's unfreeze the new_page */
- set_page_dirty(new_page);
SetPageUptodate(new_page);
page_ref_unfreeze(new_page, HPAGE_PMD_NR);
+ set_page_dirty(new_page);
mem_cgroup_commit_charge(new_page, memcg, false, true);
lru_cache_add_anon(new_page);
- unlock_page(new_page);
+ /*
+ * Remove pte page tables, so we can re-fault the page as huge.
+ */
+ retract_page_tables(mapping, start);
*hpage = NULL;
khugepaged_pages_collapsed++;
@@ -1543,8 +1534,8 @@ xa_unlocked:
xas_store(&xas, page);
xas_pause(&xas);
xas_unlock_irq(&xas);
- putback_lru_page(page);
unlock_page(page);
+ putback_lru_page(page);
xas_lock_irq(&xas);
}
VM_BUG_ON(nr_none);
@@ -1553,9 +1544,10 @@ xa_unlocked:
/* Unfreeze new_page, caller would take care about freeing it */
page_ref_unfreeze(new_page, 1);
mem_cgroup_cancel_charge(new_page, memcg, true);
- unlock_page(new_page);
new_page->mapping = NULL;
}
+
+ unlock_page(new_page);
out:
VM_BUG_ON(!list_empty(&pagelist));
/* TODO: tracepoints */
_
Patches currently in -mm which might be from hughd(a)google.com are
mm-huge_memory-rename-freeze_page-to-unmap_page.patch
mm-huge_memory-splitting-set-mappingindex-before-unfreeze.patch
mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_size_read.patch
mm-khugepaged-collapse_shmem-stop-if-punched-or-truncated.patch
mm-khugepaged-fix-crashes-due-to-misaccounted-holes.patch
mm-khugepaged-collapse_shmem-remember-to-clear-holes.patch
mm-khugepaged-minor-reorderings-in-collapse_shmem.patch
mm-khugepaged-collapse_shmem-without-freezing-new_page.patch
mm-khugepaged-collapse_shmem-do-not-crash-on-compound.patch
mm-khugepaged-fix-the-xas_create_range-error-path.patch
mm-put_and_wait_on_page_locked-while-page-is-migrated.patch
The patch titled
Subject: mm/khugepaged: collapse_shmem() remember to clear holes
has been added to the -mm tree. Its filename is
mm-khugepaged-collapse_shmem-remember-to-clear-holes.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-khugepaged-collapse_shmem-remem…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-khugepaged-collapse_shmem-remem…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: mm/khugepaged: collapse_shmem() remember to clear holes
Huge tmpfs testing reminds us that there is no __GFP_ZERO in the gfp flags
khugepaged uses to allocate a huge page - in all common cases it would
just be a waste of effort - so collapse_shmem() must remember to clear out
any holes that it instantiates.
The obvious place to do so, where they are put into the page cache tree,
is not a good choice: because interrupts are disabled there. Leave it
until further down, once success is assured, where the other pages are
copied (before setting PageUptodate).
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261525080.2275@eggly.anvils
Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/khugepaged.c~mm-khugepaged-collapse_shmem-remember-to-clear-holes
+++ a/mm/khugepaged.c
@@ -1467,7 +1467,12 @@ xa_unlocked:
* Replacing old pages with new one has succeeded, now we
* need to copy the content and free the old pages.
*/
+ index = start;
list_for_each_entry_safe(page, tmp, &pagelist, lru) {
+ while (index < page->index) {
+ clear_highpage(new_page + (index % HPAGE_PMD_NR));
+ index++;
+ }
copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
page);
list_del(&page->lru);
@@ -1477,6 +1482,11 @@ xa_unlocked:
ClearPageActive(page);
ClearPageUnevictable(page);
put_page(page);
+ index++;
+ }
+ while (index < end) {
+ clear_highpage(new_page + (index % HPAGE_PMD_NR));
+ index++;
}
local_irq_disable();
_
Patches currently in -mm which might be from hughd(a)google.com are
mm-huge_memory-rename-freeze_page-to-unmap_page.patch
mm-huge_memory-splitting-set-mappingindex-before-unfreeze.patch
mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_size_read.patch
mm-khugepaged-collapse_shmem-stop-if-punched-or-truncated.patch
mm-khugepaged-fix-crashes-due-to-misaccounted-holes.patch
mm-khugepaged-collapse_shmem-remember-to-clear-holes.patch
mm-khugepaged-minor-reorderings-in-collapse_shmem.patch
mm-khugepaged-collapse_shmem-without-freezing-new_page.patch
mm-khugepaged-collapse_shmem-do-not-crash-on-compound.patch
mm-khugepaged-fix-the-xas_create_range-error-path.patch
mm-put_and_wait_on_page_locked-while-page-is-migrated.patch
The patch titled
Subject: mm/khugepaged: fix crashes due to misaccounted holes
has been added to the -mm tree. Its filename is
mm-khugepaged-fix-crashes-due-to-misaccounted-holes.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-khugepaged-fix-crashes-due-to-m…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-khugepaged-fix-crashes-due-to-m…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: mm/khugepaged: fix crashes due to misaccounted holes
Huge tmpfs testing on a shortish file mapped into a pmd-rounded extent hit
shmem_evict_inode()'s WARN_ON(inode->i_blocks) followed by clear_inode()'s
BUG_ON(inode->i_data.nrpages) when the file was later closed and unlinked.
khugepaged's collapse_shmem() was forgetting to update mapping->nrpages on
the rollback path, after it had added but then needs to undo some holes.
There is indeed an irritating asymmetry between shmem_charge(), whose
callers want it to increment nrpages after successfully accounting blocks,
and shmem_uncharge(), when __delete_from_page_cache() already decremented
nrpages itself: oh well, just add a comment on that to them both.
And shmem_recalc_inode() is supposed to be called when the accounting is
expected to be in balance (so it can deduce from imbalance that reclaim
discarded some pages): so change shmem_charge() to update nrpages earlier
(though it's rare for the difference to matter at all).
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261523450.2275@eggly.anvils
Fixes: 800d8c63b2e98 ("shmem: add huge pages support")
Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/khugepaged.c~mm-khugepaged-fix-crashes-due-to-misaccounted-holes
+++ a/mm/khugepaged.c
@@ -1506,9 +1506,12 @@ xa_unlocked:
khugepaged_pages_collapsed++;
} else {
struct page *page;
+
/* Something went wrong: roll back page cache changes */
- shmem_uncharge(mapping->host, nr_none);
xas_lock_irq(&xas);
+ mapping->nrpages -= nr_none;
+ shmem_uncharge(mapping->host, nr_none);
+
xas_set(&xas, start);
xas_for_each(&xas, page, end - 1) {
page = list_first_entry_or_null(&pagelist,
--- a/mm/shmem.c~mm-khugepaged-fix-crashes-due-to-misaccounted-holes
+++ a/mm/shmem.c
@@ -297,12 +297,14 @@ bool shmem_charge(struct inode *inode, l
if (!shmem_inode_acct_block(inode, pages))
return false;
+ /* nrpages adjustment first, then shmem_recalc_inode() when balanced */
+ inode->i_mapping->nrpages += pages;
+
spin_lock_irqsave(&info->lock, flags);
info->alloced += pages;
inode->i_blocks += pages * BLOCKS_PER_PAGE;
shmem_recalc_inode(inode);
spin_unlock_irqrestore(&info->lock, flags);
- inode->i_mapping->nrpages += pages;
return true;
}
@@ -312,6 +314,8 @@ void shmem_uncharge(struct inode *inode,
struct shmem_inode_info *info = SHMEM_I(inode);
unsigned long flags;
+ /* nrpages adjustment done by __delete_from_page_cache() or caller */
+
spin_lock_irqsave(&info->lock, flags);
info->alloced -= pages;
inode->i_blocks -= pages * BLOCKS_PER_PAGE;
_
Patches currently in -mm which might be from hughd(a)google.com are
mm-huge_memory-rename-freeze_page-to-unmap_page.patch
mm-huge_memory-splitting-set-mappingindex-before-unfreeze.patch
mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_size_read.patch
mm-khugepaged-collapse_shmem-stop-if-punched-or-truncated.patch
mm-khugepaged-fix-crashes-due-to-misaccounted-holes.patch
mm-khugepaged-collapse_shmem-remember-to-clear-holes.patch
mm-khugepaged-minor-reorderings-in-collapse_shmem.patch
mm-khugepaged-collapse_shmem-without-freezing-new_page.patch
mm-khugepaged-collapse_shmem-do-not-crash-on-compound.patch
mm-khugepaged-fix-the-xas_create_range-error-path.patch
mm-put_and_wait_on_page_locked-while-page-is-migrated.patch
The patch titled
Subject: mm/khugepaged: collapse_shmem() stop if punched or truncated
has been added to the -mm tree. Its filename is
mm-khugepaged-collapse_shmem-stop-if-punched-or-truncated.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-khugepaged-collapse_shmem-stop-…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-khugepaged-collapse_shmem-stop-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: mm/khugepaged: collapse_shmem() stop if punched or truncated
Huge tmpfs testing showed that although collapse_shmem() recognizes a
concurrently truncated or hole-punched page correctly, its handling of
holes was liable to refill an emptied extent. Add check to stop that.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261522040.2275@eggly.anvils
Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Reviewed-by: Matthew Wilcox <willy(a)infradead.org>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/khugepaged.c~mm-khugepaged-collapse_shmem-stop-if-punched-or-truncated
+++ a/mm/khugepaged.c
@@ -1359,6 +1359,17 @@ static void collapse_shmem(struct mm_str
VM_BUG_ON(index != xas.xa_index);
if (!page) {
+ /*
+ * Stop if extent has been truncated or hole-punched,
+ * and is now completely empty.
+ */
+ if (index == start) {
+ if (!xas_next_entry(&xas, end - 1)) {
+ result = SCAN_TRUNCATED;
+ break;
+ }
+ xas_set(&xas, index);
+ }
if (!shmem_charge(mapping->host, 1)) {
result = SCAN_FAIL;
break;
_
Patches currently in -mm which might be from hughd(a)google.com are
mm-huge_memory-rename-freeze_page-to-unmap_page.patch
mm-huge_memory-splitting-set-mappingindex-before-unfreeze.patch
mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_size_read.patch
mm-khugepaged-collapse_shmem-stop-if-punched-or-truncated.patch
mm-khugepaged-fix-crashes-due-to-misaccounted-holes.patch
mm-khugepaged-collapse_shmem-remember-to-clear-holes.patch
mm-khugepaged-minor-reorderings-in-collapse_shmem.patch
mm-khugepaged-collapse_shmem-without-freezing-new_page.patch
mm-khugepaged-collapse_shmem-do-not-crash-on-compound.patch
mm-khugepaged-fix-the-xas_create_range-error-path.patch
mm-put_and_wait_on_page_locked-while-page-is-migrated.patch
The patch titled
Subject: mm/huge_memory: fix lockdep complaint on 32-bit i_size_read()
has been added to the -mm tree. Its filename is
mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_size_read.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-huge_memory-fix-lockdep-complai…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-huge_memory-fix-lockdep-complai…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: mm/huge_memory: fix lockdep complaint on 32-bit i_size_read()
Huge tmpfs testing, on 32-bit kernel with lockdep enabled, showed that
__split_huge_page() was using i_size_read() while holding the irq-safe
lru_lock and page tree lock, but the 32-bit i_size_read() uses an
irq-unsafe seqlock which should not be nested inside them.
Instead, read the i_size earlier in split_huge_page_to_list(), and pass
the end offset down to __split_huge_page(): all while holding head page
lock, which is enough to prevent truncation of that extent before the page
tree lock has been taken.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261520070.2275@eggly.anvils
Fixes: baa355fd33142 ("thp: file pages support for split_huge_page()")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/huge_memory.c~mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_size_read
+++ a/mm/huge_memory.c
@@ -2439,12 +2439,11 @@ static void __split_huge_page_tail(struc
}
static void __split_huge_page(struct page *page, struct list_head *list,
- unsigned long flags)
+ pgoff_t end, unsigned long flags)
{
struct page *head = compound_head(page);
struct zone *zone = page_zone(head);
struct lruvec *lruvec;
- pgoff_t end = -1;
int i;
lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
@@ -2452,9 +2451,6 @@ static void __split_huge_page(struct pag
/* complete memcg works before add pages to LRU */
mem_cgroup_split_huge_fixup(head);
- if (!PageAnon(page))
- end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
-
for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
__split_huge_page_tail(head, i, lruvec, list);
/* Some pages can be beyond i_size: drop them from page cache */
@@ -2626,6 +2622,7 @@ int split_huge_page_to_list(struct page
int count, mapcount, extra_pins, ret;
bool mlocked;
unsigned long flags;
+ pgoff_t end;
VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -2648,6 +2645,7 @@ int split_huge_page_to_list(struct page
ret = -EBUSY;
goto out;
}
+ end = -1;
mapping = NULL;
anon_vma_lock_write(anon_vma);
} else {
@@ -2661,6 +2659,15 @@ int split_huge_page_to_list(struct page
anon_vma = NULL;
i_mmap_lock_read(mapping);
+
+ /*
+ *__split_huge_page() may need to trim off pages beyond EOF:
+ * but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
+ * which cannot be nested inside the page tree lock. So note
+ * end now: i_size itself may be changed at any moment, but
+ * head page lock is good enough to serialize the trimming.
+ */
+ end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
}
/*
@@ -2707,7 +2714,7 @@ int split_huge_page_to_list(struct page
if (mapping)
__dec_node_page_state(page, NR_SHMEM_THPS);
spin_unlock(&pgdata->split_queue_lock);
- __split_huge_page(page, list, flags);
+ __split_huge_page(page, list, end, flags);
if (PageSwapCache(head)) {
swp_entry_t entry = { .val = page_private(head) };
_
Patches currently in -mm which might be from hughd(a)google.com are
mm-huge_memory-rename-freeze_page-to-unmap_page.patch
mm-huge_memory-splitting-set-mappingindex-before-unfreeze.patch
mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_size_read.patch
mm-khugepaged-collapse_shmem-stop-if-punched-or-truncated.patch
mm-khugepaged-fix-crashes-due-to-misaccounted-holes.patch
mm-khugepaged-collapse_shmem-remember-to-clear-holes.patch
mm-khugepaged-minor-reorderings-in-collapse_shmem.patch
mm-khugepaged-collapse_shmem-without-freezing-new_page.patch
mm-khugepaged-collapse_shmem-do-not-crash-on-compound.patch
mm-khugepaged-fix-the-xas_create_range-error-path.patch
mm-put_and_wait_on_page_locked-while-page-is-migrated.patch
The patch titled
Subject: mm/huge_memory: splitting set mapping+index before unfreeze
has been added to the -mm tree. Its filename is
mm-huge_memory-splitting-set-mappingindex-before-unfreeze.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-huge_memory-splitting-set-mappi…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-huge_memory-splitting-set-mappi…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: mm/huge_memory: splitting set mapping+index before unfreeze
Huge tmpfs stress testing has occasionally hit shmem_undo_range()'s
VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page).
Move the setting of mapping and index up before the page_ref_unfreeze() in
__split_huge_page_tail() to fix this: so that a page cache lookup cannot
get a reference while the tail's mapping and index are unstable.
In fact, might as well move them up before the smp_wmb(): I don't see an
actual need for that, but if I'm missing something, this way round is
safer than the other, and no less efficient.
You might argue that VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page) is
misplaced, and should be left until after the trylock_page(); but left as
is has not crashed since, and gives more stringent assurance.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261516380.2275@eggly.anvils
Fixes: e9b61f19858a5 ("thp: reintroduce split_huge_page()")
Requires: 605ca5ede764 ("mm/huge_memory.c: reorder operations in __split_huge_page_tail()")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/huge_memory.c~mm-huge_memory-splitting-set-mappingindex-before-unfreeze
+++ a/mm/huge_memory.c
@@ -2402,6 +2402,12 @@ static void __split_huge_page_tail(struc
(1L << PG_unevictable) |
(1L << PG_dirty)));
+ /* ->mapping in first tail page is compound_mapcount */
+ VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
+ page_tail);
+ page_tail->mapping = head->mapping;
+ page_tail->index = head->index + tail;
+
/* Page flags must be visible before we make the page non-compound. */
smp_wmb();
@@ -2422,12 +2428,6 @@ static void __split_huge_page_tail(struc
if (page_is_idle(head))
set_page_idle(page_tail);
- /* ->mapping in first tail page is compound_mapcount */
- VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
- page_tail);
- page_tail->mapping = head->mapping;
-
- page_tail->index = head->index + tail;
page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
/*
_
Patches currently in -mm which might be from hughd(a)google.com are
mm-huge_memory-rename-freeze_page-to-unmap_page.patch
mm-huge_memory-splitting-set-mappingindex-before-unfreeze.patch
mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_size_read.patch
mm-khugepaged-collapse_shmem-stop-if-punched-or-truncated.patch
mm-khugepaged-fix-crashes-due-to-misaccounted-holes.patch
mm-khugepaged-collapse_shmem-remember-to-clear-holes.patch
mm-khugepaged-minor-reorderings-in-collapse_shmem.patch
mm-khugepaged-collapse_shmem-without-freezing-new_page.patch
mm-khugepaged-collapse_shmem-do-not-crash-on-compound.patch
mm-khugepaged-fix-the-xas_create_range-error-path.patch
mm-put_and_wait_on_page_locked-while-page-is-migrated.patch
The patch titled
Subject: mm/huge_memory: rename freeze_page() to unmap_page()
has been added to the -mm tree. Its filename is
mm-huge_memory-rename-freeze_page-to-unmap_page.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-huge_memory-rename-freeze_page-…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-huge_memory-rename-freeze_page-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: mm/huge_memory: rename freeze_page() to unmap_page()
The term "freeze" is used in several ways in the kernel, and in mm it has
the particular meaning of forcing page refcount temporarily to 0.
freeze_page() is just too confusing a name for a function that unmaps a
page: rename it unmap_page(), and rename unfreeze_page() remap_page().
Went to change the mention of freeze_page() added later in mm/rmap.c, but
found it to be incorrect: ordinary page reclaim reaches there too; but the
substance of the comment still seems correct, so edit it down.
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261514080.2275@eggly.anvils
Fixes: e9b61f19858a5 ("thp: reintroduce split_huge_page()")
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/huge_memory.c~mm-huge_memory-rename-freeze_page-to-unmap_page
+++ a/mm/huge_memory.c
@@ -2350,7 +2350,7 @@ void vma_adjust_trans_huge(struct vm_are
}
}
-static void freeze_page(struct page *page)
+static void unmap_page(struct page *page)
{
enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
@@ -2365,7 +2365,7 @@ static void freeze_page(struct page *pag
VM_BUG_ON_PAGE(!unmap_success, page);
}
-static void unfreeze_page(struct page *page)
+static void remap_page(struct page *page)
{
int i;
if (PageTransHuge(page)) {
@@ -2483,7 +2483,7 @@ static void __split_huge_page(struct pag
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
- unfreeze_page(head);
+ remap_page(head);
for (i = 0; i < HPAGE_PMD_NR; i++) {
struct page *subpage = head + i;
@@ -2664,7 +2664,7 @@ int split_huge_page_to_list(struct page
}
/*
- * Racy check if we can split the page, before freeze_page() will
+ * Racy check if we can split the page, before unmap_page() will
* split PMDs
*/
if (!can_split_huge_page(head, &extra_pins)) {
@@ -2673,7 +2673,7 @@ int split_huge_page_to_list(struct page
}
mlocked = PageMlocked(page);
- freeze_page(head);
+ unmap_page(head);
VM_BUG_ON_PAGE(compound_mapcount(head), head);
/* Make sure the page is not on per-CPU pagevec as it takes pin */
@@ -2727,7 +2727,7 @@ int split_huge_page_to_list(struct page
fail: if (mapping)
xa_unlock(&mapping->i_pages);
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
- unfreeze_page(head);
+ remap_page(head);
ret = -EBUSY;
}
--- a/mm/rmap.c~mm-huge_memory-rename-freeze_page-to-unmap_page
+++ a/mm/rmap.c
@@ -1627,16 +1627,9 @@ static bool try_to_unmap_one(struct page
address + PAGE_SIZE);
} else {
/*
- * We should not need to notify here as we reach this
- * case only from freeze_page() itself only call from
- * split_huge_page_to_list() so everything below must
- * be true:
- * - page is not anonymous
- * - page is locked
- *
- * So as it is a locked file back page thus it can not
- * be remove from the page cache and replace by a new
- * page before mmu_notifier_invalidate_range_end so no
+ * This is a locked file-backed page, thus it cannot
+ * be removed from the page cache and replaced by a new
+ * page before mmu_notifier_invalidate_range_end, so no
* concurrent thread might update its page table to
* point at new page while a device still is using this
* page.
_
Patches currently in -mm which might be from hughd(a)google.com are
mm-huge_memory-rename-freeze_page-to-unmap_page.patch
mm-huge_memory-splitting-set-mappingindex-before-unfreeze.patch
mm-huge_memory-fix-lockdep-complaint-on-32-bit-i_size_read.patch
mm-khugepaged-collapse_shmem-stop-if-punched-or-truncated.patch
mm-khugepaged-fix-crashes-due-to-misaccounted-holes.patch
mm-khugepaged-collapse_shmem-remember-to-clear-holes.patch
mm-khugepaged-minor-reorderings-in-collapse_shmem.patch
mm-khugepaged-collapse_shmem-without-freezing-new_page.patch
mm-khugepaged-collapse_shmem-do-not-crash-on-compound.patch
mm-khugepaged-fix-the-xas_create_range-error-path.patch
mm-put_and_wait_on_page_locked-while-page-is-migrated.patch
From: Dexuan Cui <decui(a)microsoft.com>
We can concurrently try to open the same sub-channel from 2 paths:
path #1: vmbus_onoffer() -> vmbus_process_offer() -> handle_sc_creation().
path #2: storvsc_probe() -> storvsc_connect_to_vsp() ->
-> storvsc_channel_init() -> handle_multichannel_storage() ->
-> vmbus_are_subchannels_present() -> handle_sc_creation().
They conflict with each other, but it was not an issue before the recent
commit ae6935ed7d42 ("vmbus: split ring buffer allocation from open"),
because at the beginning of vmbus_open() we checked newchannel->state so
only one path could succeed, and the other would return with -EINVAL.
After ae6935ed7d42, the failing path frees the channel's ringbuffer by
vmbus_free_ring(), and this causes a panic later.
Commit ae6935ed7d42 itself is good, and it just reveals the longstanding
race. We can resolve the issue by removing path #2, i.e. removing the
second vmbus_are_subchannels_present() in handle_multichannel_storage().
BTW, the comment "Check to see if sub-channels have already been created"
in handle_multichannel_storage() is incorrect: when we unload the driver,
we first close the sub-channel(s) and then close the primary channel, next
the host sends rescind-offer message(s) so primary->sc_list will become
empty. This means the first vmbus_are_subchannels_present() in
handle_multichannel_storage() is never useful.
Fixes: ae6935ed7d42 ("vmbus: split ring buffer allocation from open")
Cc: stable(a)vger.kernel.org
Cc: Long Li <longli(a)microsoft.com>
Cc: Stephen Hemminger <sthemmin(a)microsoft.com>
Cc: K. Y. Srinivasan <kys(a)microsoft.com>
Cc: Haiyang Zhang <haiyangz(a)microsoft.com>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys(a)microsoft.com>
---
drivers/scsi/storvsc_drv.c | 61 +++++++++++++++++++-------------------
1 file changed, 30 insertions(+), 31 deletions(-)
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index f03dc03a42c3..8f88348ebe42 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -446,7 +446,6 @@ struct storvsc_device {
bool destroy;
bool drain_notify;
- bool open_sub_channel;
atomic_t num_outstanding_req;
struct Scsi_Host *host;
@@ -636,33 +635,38 @@ static inline struct storvsc_device *get_in_stor_device(
static void handle_sc_creation(struct vmbus_channel *new_sc)
{
struct hv_device *device = new_sc->primary_channel->device_obj;
+ struct device *dev = &device->device;
struct storvsc_device *stor_device;
struct vmstorage_channel_properties props;
+ int ret;
stor_device = get_out_stor_device(device);
if (!stor_device)
return;
- if (stor_device->open_sub_channel == false)
- return;
-
memset(&props, 0, sizeof(struct vmstorage_channel_properties));
- vmbus_open(new_sc,
- storvsc_ringbuffer_size,
- storvsc_ringbuffer_size,
- (void *)&props,
- sizeof(struct vmstorage_channel_properties),
- storvsc_on_channel_callback, new_sc);
+ ret = vmbus_open(new_sc,
+ storvsc_ringbuffer_size,
+ storvsc_ringbuffer_size,
+ (void *)&props,
+ sizeof(struct vmstorage_channel_properties),
+ storvsc_on_channel_callback, new_sc);
- if (new_sc->state == CHANNEL_OPENED_STATE) {
- stor_device->stor_chns[new_sc->target_cpu] = new_sc;
- cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
+ /* In case vmbus_open() fails, we don't use the sub-channel. */
+ if (ret != 0) {
+ dev_err(dev, "Failed to open sub-channel: err=%d\n", ret);
+ return;
}
+
+ /* Add the sub-channel to the array of available channels. */
+ stor_device->stor_chns[new_sc->target_cpu] = new_sc;
+ cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
}
static void handle_multichannel_storage(struct hv_device *device, int max_chns)
{
+ struct device *dev = &device->device;
struct storvsc_device *stor_device;
int num_cpus = num_online_cpus();
int num_sc;
@@ -679,21 +683,11 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns)
request = &stor_device->init_request;
vstor_packet = &request->vstor_packet;
- stor_device->open_sub_channel = true;
/*
* Establish a handler for dealing with subchannels.
*/
vmbus_set_sc_create_callback(device->channel, handle_sc_creation);
- /*
- * Check to see if sub-channels have already been created. This
- * can happen when this driver is re-loaded after unloading.
- */
-
- if (vmbus_are_subchannels_present(device->channel))
- return;
-
- stor_device->open_sub_channel = false;
/*
* Request the host to create sub-channels.
*/
@@ -710,23 +704,29 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns)
VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
- if (ret != 0)
+ if (ret != 0) {
+ dev_err(dev, "Failed to create sub-channel: err=%d\n", ret);
return;
+ }
t = wait_for_completion_timeout(&request->wait_event, 10*HZ);
- if (t == 0)
+ if (t == 0) {
+ dev_err(dev, "Failed to create sub-channel: timed out\n");
return;
+ }
if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO ||
- vstor_packet->status != 0)
+ vstor_packet->status != 0) {
+ dev_err(dev, "Failed to create sub-channel: op=%d, sts=%d\n",
+ vstor_packet->operation, vstor_packet->status);
return;
+ }
/*
- * Now that we created the sub-channels, invoke the check; this
- * may trigger the callback.
+ * We need to do nothing here, because vmbus_process_offer()
+ * invokes channel->sc_creation_callback, which will open and use
+ * the sub-channel(s).
*/
- stor_device->open_sub_channel = true;
- vmbus_are_subchannels_present(device->channel);
}
static void cache_wwn(struct storvsc_device *stor_device,
@@ -1794,7 +1794,6 @@ static int storvsc_probe(struct hv_device *device,
}
stor_device->destroy = false;
- stor_device->open_sub_channel = false;
init_waitqueue_head(&stor_device->waiting_to_drain);
stor_device->device = device;
stor_device->host = host;
--
2.19.1