Embarrassingly, the recent fix introduced worse problem than it solved,
causing the balloon not to inflate. The VM informed the hypervisor that
the pages for lock/unlock are sitting in the wrong address, as it used
the page that is used the uninitialized page variable.
Fixes: b23220fe054e9 ("vmw_balloon: fixing double free when batching mode is off")
Cc: stable(a)vger.kernel.org
Reviewed-by: Xavier Deguillard <xdeguillard(a)vmware.com>
Signed-off-by: Nadav Amit <namit(a)vmware.com>
---
drivers/misc/vmw_balloon.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index efd733472a35..56c6f79a5c5a 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -467,7 +467,7 @@ static int vmballoon_send_batched_lock(struct vmballoon *b,
unsigned int num_pages, bool is_2m_pages, unsigned int *target)
{
unsigned long status;
- unsigned long pfn = page_to_pfn(b->page);
+ unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
STATS_INC(b->stats.lock[is_2m_pages]);
@@ -515,7 +515,7 @@ static bool vmballoon_send_batched_unlock(struct vmballoon *b,
unsigned int num_pages, bool is_2m_pages, unsigned int *target)
{
unsigned long status;
- unsigned long pfn = page_to_pfn(b->page);
+ unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
STATS_INC(b->stats.unlock[is_2m_pages]);
--
2.17.1
The patch titled
Subject: mm: teach dump_page() to correctly output poisoned struct pages
has been added to the -mm tree. Its filename is
mm-teach-dump_page-to-correctly-output-poisoned-struct-pages.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-teach-dump_page-to-correctly-ou…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-teach-dump_page-to-correctly-ou…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Pavel Tatashin <pasha.tatashin(a)oracle.com>
Subject: mm: teach dump_page() to correctly output poisoned struct pages
If struct page is poisoned, and uninitialized access is detected via
PF_POISONED_CHECK(page) dump_page() is called to output the page. But,
the dump_page() itself accesses struct page to determine how to print it,
and therefore gets into a recursive loop.
For example:
dump_page()
__dump_page()
PageSlab(page)
PF_POISONED_CHECK(page)
VM_BUG_ON_PGFLAGS(PagePoisoned(page), page)
dump_page() recursion loop.
Link: http://lkml.kernel.org/r/20180702180536.2552-1-pasha.tatashin@oracle.com
Fixes: f165b378bbdf ("mm: uninitialized struct page poisoning sanity checking")
Signed-off-by: Pavel Tatashin <pasha.tatashin(a)oracle.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/debug.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff -puN mm/debug.c~mm-teach-dump_page-to-correctly-output-poisoned-struct-pages mm/debug.c
--- a/mm/debug.c~mm-teach-dump_page-to-correctly-output-poisoned-struct-pages
+++ a/mm/debug.c
@@ -43,12 +43,25 @@ const struct trace_print_flags vmaflag_n
void __dump_page(struct page *page, const char *reason)
{
+ bool page_poisoned = PagePoisoned(page);
+ int mapcount;
+
+ /*
+ * If struct page is poisoned don't access Page*() functions as that
+ * leads to recursive loop. Page*() check for poisoned pages, and calls
+ * dump_page() when detected.
+ */
+ if (page_poisoned) {
+ pr_emerg("page:%px is uninitialized and poisoned", page);
+ goto hex_only;
+ }
+
/*
* Avoid VM_BUG_ON() in page_mapcount().
* page->_mapcount space in struct page is used by sl[aou]b pages to
* encode own info.
*/
- int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
+ mapcount = PageSlab(page) ? 0 : page_mapcount(page);
pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx",
page, page_ref_count(page), mapcount,
@@ -60,6 +73,7 @@ void __dump_page(struct page *page, cons
pr_emerg("flags: %#lx(%pGp)\n", page->flags, &page->flags);
+hex_only:
print_hex_dump(KERN_ALERT, "raw: ", DUMP_PREFIX_NONE, 32,
sizeof(unsigned long), page,
sizeof(struct page), false);
@@ -68,7 +82,7 @@ void __dump_page(struct page *page, cons
pr_alert("page dumped because: %s\n", reason);
#ifdef CONFIG_MEMCG
- if (page->mem_cgroup)
+ if (!page_poisoned && page->mem_cgroup)
pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup);
#endif
}
_
Patches currently in -mm which might be from pasha.tatashin(a)oracle.com are
mm-teach-dump_page-to-correctly-output-poisoned-struct-pages.patch
mm-skip-invalid-pages-block-at-a-time-in-zero_resv_unresv.patch
sparc64-ng4-memset-32-bits-overflow.patch
Currently, on the AMD board Asus F2A85-M Pro there is a 100 ms delay as
the USB bus of each of the two OHCI PCI devices is reset. As a 50 ms
delay is done per the USB specification.
Commit c6187597 (OHCI: final fix for NVIDIA problems (I hope))
unconditionally does the bus reset for
all chipsets, while it was only doen for NVIDIA chipsets before.
As it should not be needed for non-NVIDIA chipsets, only do the reset
for Nvidia devices.
Tested on Asus F2A85-M PRO and ASRock E350M1. The USB keyboard works and
the LUKS passphrase can be e
ntered.
Signed-off-by: Paul Menzel <pmenzel(a)molgen.mpg.de>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: linux-usb(a)vger.kernel.org
Cc: Alan Stern <stern(a)rowland.harvard.edu>
Cc: linux-kernel(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
---
drivers/usb/host/pci-quirks.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index 3625a5c1a41b..f6b1a9bbe301 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -784,7 +784,7 @@ static void quirk_usb_handoff_ohci(struct pci_dev *pdev)
writel((u32) ~0, base + OHCI_INTRDISABLE);
/* Reset the USB bus, if the controller isn't already in RESET */
- if (control & OHCI_HCFS) {
+ if ((pdev->vendor == PCI_VENDOR_ID_NVIDIA) && (control & OHCI_HCFS)) {
/* Go into RESET, preserving RWC (and possibly IR) */
writel(control & OHCI_CTRL_MASK, base + OHCI_CONTROL);
readl(base + OHCI_CONTROL);
--
2.17.1
commit 1bc0299d976e000ececc6acd76e33b4582646cb7 upstream.
The following code fails to allocate a buffer for the
tail address that the hardware DMAs into when the user
context DMA_RTAIL is set.
if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
&dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail,
gfp_flags);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
rcd->rcvhdrqtailaddr_dma = dma_hdrqtail;
}
So the rcvhdrtail_kvaddr would then be NULL.
The mmap logic fails to check for a NULL rcvhdrtail_kvaddr.
The fix is to test for both user and kernel DMA_TAIL options
during the allocation as well as testing for a NULL
rcvhdrtail_kvaddr during the mmap processing.
Additionally, all downstream testing of the capmask for DMA_RTAIL
have been eliminated in favor of testing rcvhdrtail_kvaddr.
The patch had to be adjusted for lack of VNIC and interating
contexts differently in the 4.9 code base.
Reviewed-by: Michael J. Ruhl <michael.j.ruhl(a)intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn(a)intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro(a)intel.com>
Signed-off-by: Jason Gunthorpe <jgg(a)mellanox.com>
---
drivers/infiniband/hw/hfi1/chip.c | 8 ++++----
drivers/infiniband/hw/hfi1/file_ops.c | 2 +-
drivers/infiniband/hw/hfi1/init.c | 9 ++++-----
3 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 148b313..d30b3b9 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -6717,7 +6717,7 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
for (i = 0; i < dd->n_krcv_queues; i++) {
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
- rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
+ rcvmask |= dd->rcd[i]->rcvhdrtail_kvaddr ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
hfi1_rcvctrl(dd, rcvmask, i);
}
@@ -8211,7 +8211,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
u32 tail;
int present;
- if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
+ if (!rcd->rcvhdrtail_kvaddr)
present = (rcd->seq_cnt ==
rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
else /* is RDMA rtail */
@@ -11550,7 +11550,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
/* reset the tail and hdr addresses, and sequence count */
write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
rcd->rcvhdrq_dma);
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
+ if (rcd->rcvhdrtail_kvaddr)
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
rcd->rcvhdrqtailaddr_dma);
rcd->seq_cnt = 1;
@@ -11630,7 +11630,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma)
+ if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
/* See comment on RcvCtxtCtrl.TailUpd above */
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index bb72976..d612f9d 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -609,7 +609,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
ret = -EINVAL;
goto done;
}
- if (flags & VM_WRITE) {
+ if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) {
ret = -EPERM;
goto done;
}
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index ae1f90d..28f4079 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1605,7 +1605,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
u64 reg;
if (!rcd->rcvhdrq) {
- dma_addr_t dma_hdrqtail;
gfp_t gfp_flags;
/*
@@ -1628,13 +1627,13 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
goto bail;
}
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
+ if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
+ HFI1_CAP_UGET_MASK(rcd->flags, DMA_RTAIL)) {
rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
- &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail,
- gfp_flags);
+ &dd->pcidev->dev, PAGE_SIZE,
+ &rcd->rcvhdrqtailaddr_dma, gfp_flags);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
- rcd->rcvhdrqtailaddr_dma = dma_hdrqtail;
}
rcd->rcvhdrq_size = amt;