This is a preparation patch for fixing issues on x86_64 virtual-machines with more then 4G of RAM, atm we pass __GFP_DMA32 to kmalloc, but kmalloc does not honor that, so we need to switch to get_pages, which means we will not be able to use kfree to free memory allocated with vbg_alloc_req.
While at it also remove a comment on a vbg_alloc_req call which talks about Windows (inherited from the vbox upstream cross-platform code).
Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede hdegoede@redhat.com --- drivers/virt/vboxguest/vboxguest_core.c | 66 +++++++++++++----------- drivers/virt/vboxguest/vboxguest_utils.c | 14 +++-- include/linux/vbox_utils.h | 7 +++ 3 files changed, 53 insertions(+), 34 deletions(-)
diff --git a/drivers/virt/vboxguest/vboxguest_core.c b/drivers/virt/vboxguest/vboxguest_core.c index 190dbf8cfcb5..7411a535fda2 100644 --- a/drivers/virt/vboxguest/vboxguest_core.c +++ b/drivers/virt/vboxguest/vboxguest_core.c @@ -114,7 +114,7 @@ static void vbg_guest_mappings_init(struct vbg_dev *gdev) }
out: - kfree(req); + vbg_req_free(req, sizeof(*req)); kfree(pages); }
@@ -144,7 +144,7 @@ static void vbg_guest_mappings_exit(struct vbg_dev *gdev)
rc = vbg_req_perform(gdev, req);
- kfree(req); + vbg_req_free(req, sizeof(*req));
if (rc < 0) { vbg_err("%s error: %d\n", __func__, rc); @@ -214,8 +214,8 @@ static int vbg_report_guest_info(struct vbg_dev *gdev) ret = vbg_status_code_to_errno(rc);
out_free: - kfree(req2); - kfree(req1); + vbg_req_free(req2, sizeof(*req2)); + vbg_req_free(req1, sizeof(*req1)); return ret; }
@@ -245,7 +245,7 @@ static int vbg_report_driver_status(struct vbg_dev *gdev, bool active) if (rc == VERR_NOT_IMPLEMENTED) /* Compatibility with older hosts. */ rc = VINF_SUCCESS;
- kfree(req); + vbg_req_free(req, sizeof(*req));
return vbg_status_code_to_errno(rc); } @@ -431,7 +431,7 @@ static int vbg_heartbeat_host_config(struct vbg_dev *gdev, bool enabled) rc = vbg_req_perform(gdev, req); do_div(req->interval_ns, 1000000); /* ns -> ms */ gdev->heartbeat_interval_ms = req->interval_ns; - kfree(req); + vbg_req_free(req, sizeof(*req));
return vbg_status_code_to_errno(rc); } @@ -454,12 +454,6 @@ static int vbg_heartbeat_init(struct vbg_dev *gdev) if (ret < 0) return ret;
- /* - * Preallocate the request to use it from the timer callback because: - * 1) on Windows vbg_req_alloc must be called at IRQL <= APC_LEVEL - * and the timer callback runs at DISPATCH_LEVEL; - * 2) avoid repeated allocations. - */ gdev->guest_heartbeat_req = vbg_req_alloc( sizeof(*gdev->guest_heartbeat_req), VMMDEVREQ_GUEST_HEARTBEAT); @@ -481,8 +475,8 @@ static void vbg_heartbeat_exit(struct vbg_dev *gdev) { del_timer_sync(&gdev->heartbeat_timer); vbg_heartbeat_host_config(gdev, false); - kfree(gdev->guest_heartbeat_req); - + vbg_req_free(gdev->guest_heartbeat_req, + sizeof(*gdev->guest_heartbeat_req)); }
/** @@ -543,7 +537,7 @@ static int vbg_reset_host_event_filter(struct vbg_dev *gdev, if (rc < 0) vbg_err("%s error, rc: %d\n", __func__, rc);
- kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); }
@@ -617,7 +611,7 @@ static int vbg_set_session_event_filter(struct vbg_dev *gdev,
out: mutex_unlock(&gdev->session_mutex); - kfree(req); + vbg_req_free(req, sizeof(*req));
return ret; } @@ -642,7 +636,7 @@ static int vbg_reset_host_capabilities(struct vbg_dev *gdev) if (rc < 0) vbg_err("%s error, rc: %d\n", __func__, rc);
- kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); }
@@ -712,7 +706,7 @@ static int vbg_set_session_capabilities(struct vbg_dev *gdev,
out: mutex_unlock(&gdev->session_mutex); - kfree(req); + vbg_req_free(req, sizeof(*req));
return ret; } @@ -749,7 +743,7 @@ static int vbg_query_host_version(struct vbg_dev *gdev) }
out: - kfree(req); + vbg_req_free(req, sizeof(*req)); return ret; }
@@ -847,11 +841,16 @@ int vbg_core_init(struct vbg_dev *gdev, u32 fixed_events) return 0;
err_free_reqs: - kfree(gdev->mouse_status_req); - kfree(gdev->ack_events_req); - kfree(gdev->cancel_req); - kfree(gdev->mem_balloon.change_req); - kfree(gdev->mem_balloon.get_req); + vbg_req_free(gdev->mouse_status_req, + sizeof(*gdev->mouse_status_req)); + vbg_req_free(gdev->ack_events_req, + sizeof(*gdev->ack_events_req)); + vbg_req_free(gdev->cancel_req, + sizeof(*gdev->cancel_req)); + vbg_req_free(gdev->mem_balloon.change_req, + sizeof(*gdev->mem_balloon.change_req)); + vbg_req_free(gdev->mem_balloon.get_req, + sizeof(*gdev->mem_balloon.get_req)); return ret; }
@@ -872,11 +871,16 @@ void vbg_core_exit(struct vbg_dev *gdev) vbg_reset_host_capabilities(gdev); vbg_core_set_mouse_status(gdev, 0);
- kfree(gdev->mouse_status_req); - kfree(gdev->ack_events_req); - kfree(gdev->cancel_req); - kfree(gdev->mem_balloon.change_req); - kfree(gdev->mem_balloon.get_req); + vbg_req_free(gdev->mouse_status_req, + sizeof(*gdev->mouse_status_req)); + vbg_req_free(gdev->ack_events_req, + sizeof(*gdev->ack_events_req)); + vbg_req_free(gdev->cancel_req, + sizeof(*gdev->cancel_req)); + vbg_req_free(gdev->mem_balloon.change_req, + sizeof(*gdev->mem_balloon.change_req)); + vbg_req_free(gdev->mem_balloon.get_req, + sizeof(*gdev->mem_balloon.get_req)); }
/** @@ -1415,7 +1419,7 @@ static int vbg_ioctl_write_core_dump(struct vbg_dev *gdev, req->flags = dump->u.in.flags; dump->hdr.rc = vbg_req_perform(gdev, req);
- kfree(req); + vbg_req_free(req, sizeof(*req)); return 0; }
@@ -1513,7 +1517,7 @@ int vbg_core_set_mouse_status(struct vbg_dev *gdev, u32 features) if (rc < 0) vbg_err("%s error, rc: %d\n", __func__, rc);
- kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); }
diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c index 0f0dab8023cf..bad915463359 100644 --- a/drivers/virt/vboxguest/vboxguest_utils.c +++ b/drivers/virt/vboxguest/vboxguest_utils.c @@ -82,6 +82,14 @@ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type) return req; }
+void vbg_req_free(void *req, size_t len) +{ + if (!req) + return; + + kfree(req); +} + /* Note this function returns a VBox status code, not a negative errno!! */ int vbg_req_perform(struct vbg_dev *gdev, void *req) { @@ -137,7 +145,7 @@ int vbg_hgcm_connect(struct vbg_dev *gdev, rc = hgcm_connect->header.result; }
- kfree(hgcm_connect); + vbg_req_free(hgcm_connect, sizeof(*hgcm_connect));
*vbox_status = rc; return 0; @@ -166,7 +174,7 @@ int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 client_id, int *vbox_status) if (rc >= 0) rc = hgcm_disconnect->header.result;
- kfree(hgcm_disconnect); + vbg_req_free(hgcm_disconnect, sizeof(*hgcm_disconnect));
*vbox_status = rc; return 0; @@ -623,7 +631,7 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function, }
if (!leak_it) - kfree(call); + vbg_req_free(call, size);
free_bounce_bufs: if (bounce_bufs) { diff --git a/include/linux/vbox_utils.h b/include/linux/vbox_utils.h index c71def6b310f..0b7cd179f2ee 100644 --- a/include/linux/vbox_utils.h +++ b/include/linux/vbox_utils.h @@ -33,6 +33,13 @@ __printf(1, 2) void vbg_debug(const char *fmt, ...); */ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type);
+/** + * Free a request allocated by vbg_req_alloc() + * @req: Request to free. + * @len: Length, must be the same as the value used for alloc. + */ +void vbg_req_free(void *req, size_t len); + /** * Perform a generic request. *
It is not possible to get DMA32 zone memory through kmalloc, causing the vboxguest driver to malfunction due to getting memory above 4G which the PCI device cannot handle.
This commit changes the kmalloc calls where the 4G limit matters to using __get_free_pages() fixing vboxguest not working on x86_64 guests with more then 4G RAM.
Cc: stable@vger.kernel.org Reported-by: Eloy Coto Pereiro eloy.coto@gmail.com Signed-off-by: Hans de Goede hdegoede@redhat.com --- drivers/virt/vboxguest/vboxguest_linux.c | 19 ++++++++++++++++--- drivers/virt/vboxguest/vboxguest_utils.c | 5 +++-- 2 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/drivers/virt/vboxguest/vboxguest_linux.c b/drivers/virt/vboxguest/vboxguest_linux.c index 82e280d38cc2..398d22693234 100644 --- a/drivers/virt/vboxguest/vboxguest_linux.c +++ b/drivers/virt/vboxguest/vboxguest_linux.c @@ -87,6 +87,7 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req, struct vbg_session *session = filp->private_data; size_t returned_size, size; struct vbg_ioctl_hdr hdr; + bool is_vmmdev_req; int ret = 0; void *buf;
@@ -106,8 +107,17 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req, if (size > SZ_16M) return -E2BIG;
- /* __GFP_DMA32 because IOCTL_VMMDEV_REQUEST passes this to the host */ - buf = kmalloc(size, GFP_KERNEL | __GFP_DMA32); + /* + * IOCTL_VMMDEV_REQUEST needs the buffer to be below 4G to avoid + * the need for a bounce-buffer and another copy later on. + */ + is_vmmdev_req = (req & ~IOCSIZE_MASK) == VBG_IOCTL_VMMDEV_REQUEST(0) || + req == VBG_IOCTL_VMMDEV_REQUEST_BIG; + + if (is_vmmdev_req) + buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT); + else + buf = kmalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM;
@@ -132,7 +142,10 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req, ret = -EFAULT;
out: - kfree(buf); + if (is_vmmdev_req) + vbg_req_free(buf, size); + else + kfree(buf);
return ret; } diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c index bad915463359..bf4474214b4d 100644 --- a/drivers/virt/vboxguest/vboxguest_utils.c +++ b/drivers/virt/vboxguest/vboxguest_utils.c @@ -65,8 +65,9 @@ VBG_LOG(vbg_debug, pr_debug); void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type) { struct vmmdev_request_header *req; + int order = get_order(PAGE_ALIGN(len));
- req = kmalloc(len, GFP_KERNEL | __GFP_DMA32); + req = (void *)__get_free_pages(GFP_KERNEL | GFP_DMA32, order); if (!req) return NULL;
@@ -87,7 +88,7 @@ void vbg_req_free(void *req, size_t len) if (!req) return;
- kfree(req); + free_pages((unsigned long)req, get_order(PAGE_ALIGN(len))); }
/* Note this function returns a VBox status code, not a negative errno!! */
On Thu, Mar 29, 2018 at 01:21:15PM +0200, Hans de Goede wrote:
@@ -87,7 +88,7 @@ void vbg_req_free(void *req, size_t len) if (!req) return;
- kfree(req);
- free_pages((unsigned long)req, get_order(PAGE_ALIGN(len)));
}
Really? This feels wrong to me, why is this driver so special that it has to use free_pages and the like?
greg k-h
On Thu, Mar 29, 2018 at 01:21:15PM +0200, Hans de Goede wrote:
It is not possible to get DMA32 zone memory through kmalloc,
Why can't we just fix that issue here instead?
thanks,
greg k-h
Hi,
On 29-03-18 13:58, Greg Kroah-Hartman wrote:
On Thu, Mar 29, 2018 at 01:21:15PM +0200, Hans de Goede wrote:
It is not possible to get DMA32 zone memory through kmalloc,
Why can't we just fix that issue here instead?
AFAIK that would go against the design of the whole slab allocator, it creates buckets (kmem_cache objects) for differently sized allocs out of the normal memzone, you cannot select what memzone you want with kmalloc, if you need to specify a memzone you need to use either dma specific malloc functions such as the dmapool.h functions, or directly call alloc_pages / get_free_pages.
A quick grep on the drivers dir shows 50 files using GFP_DMA32 and almost all are passing it to alloc_page or __get_page.
I do actually see 2 drivers which are passing it to kmalloc, which as said will not work I will mail the maintainers of those about this.
Regards,
Hans
On Thu, Mar 29, 2018 at 03:42:59PM +0200, Hans de Goede wrote:
Hi,
On 29-03-18 13:58, Greg Kroah-Hartman wrote:
On Thu, Mar 29, 2018 at 01:21:15PM +0200, Hans de Goede wrote:
It is not possible to get DMA32 zone memory through kmalloc,
Why can't we just fix that issue here instead?
AFAIK that would go against the design of the whole slab allocator, it creates buckets (kmem_cache objects) for differently sized allocs out of the normal memzone, you cannot select what memzone you want with kmalloc, if you need to specify a memzone you need to use either dma specific malloc functions such as the dmapool.h functions, or directly call alloc_pages / get_free_pages.
A quick grep on the drivers dir shows 50 files using GFP_DMA32 and almost all are passing it to alloc_page or __get_page.
If there are 50 users of such an api, why is the mm core not handling this for drivers automatically? It seems harsh that they all have to do this type of allocation on their own.
I do actually see 2 drivers which are passing it to kmalloc, which as said will not work I will mail the maintainers of those about this.
If they haven't noticed that their hardware isn't working, perhaps they don't need to set that flag? :)
thanks,
greg k-h
Hi,
On 29-03-18 18:39, Greg Kroah-Hartman wrote:
On Thu, Mar 29, 2018 at 03:42:59PM +0200, Hans de Goede wrote:
Hi,
On 29-03-18 13:58, Greg Kroah-Hartman wrote:
On Thu, Mar 29, 2018 at 01:21:15PM +0200, Hans de Goede wrote:
It is not possible to get DMA32 zone memory through kmalloc,
Why can't we just fix that issue here instead?
AFAIK that would go against the design of the whole slab allocator, it creates buckets (kmem_cache objects) for differently sized allocs out of the normal memzone, you cannot select what memzone you want with kmalloc, if you need to specify a memzone you need to use either dma specific malloc functions such as the dmapool.h functions, or directly call alloc_pages / get_free_pages.
A quick grep on the drivers dir shows 50 files using GFP_DMA32 and almost all are passing it to alloc_page or __get_page.
If there are 50 users of such an api, why is the mm core not handling this for drivers automatically? It seems harsh that they all have to do this type of allocation on their own.
True, but fixing that falls outside the scope of this patch-set, which is purely a small bug-fix patchset fixing a bug which is being hit be users in the field atm.
So it would be nice if we can get (version 2) of this patch-set merged.
I do actually see 2 drivers which are passing it to kmalloc, which as said will not work I will mail the maintainers of those about this.
If they haven't noticed that their hardware isn't working, perhaps they don't need to set that flag? :)
Or they have never tested with more then 4G of RAM, which is what caused me to not find this problem in my own testing of the vboxguest driver.
Regards,
Hans
On Thu, Mar 29, 2018 at 01:21:14PM +0200, Hans de Goede wrote:
--- a/include/linux/vbox_utils.h +++ b/include/linux/vbox_utils.h @@ -33,6 +33,13 @@ __printf(1, 2) void vbg_debug(const char *fmt, ...); */ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type); +/**
- Free a request allocated by vbg_req_alloc()
- @req: Request to free.
- @len: Length, must be the same as the value used for alloc.
- */
+void vbg_req_free(void *req, size_t len);
Why is this in vbox_utils.h and not exported? Shouldn't you just put this in drivers/virt/vboxguest/vboxguest_core.h?
thanks,
greg k-h
Hi,
On 29-03-18 13:55, Greg Kroah-Hartman wrote:
On Thu, Mar 29, 2018 at 01:21:14PM +0200, Hans de Goede wrote:
--- a/include/linux/vbox_utils.h +++ b/include/linux/vbox_utils.h @@ -33,6 +33,13 @@ __printf(1, 2) void vbg_debug(const char *fmt, ...); */ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type); +/**
- Free a request allocated by vbg_req_alloc()
- @req: Request to free.
- @len: Length, must be the same as the value used for alloc.
- */
+void vbg_req_free(void *req, size_t len);
Why is this in vbox_utils.h and not exported? Shouldn't you just put this in drivers/virt/vboxguest/vboxguest_core.h?
I put it in include/linux/vbox_utils.h because that is where its counterpart vbg_req_alloc() lives. But you're right that neither is used outside of the vboxguest module, so I will move vbg_req_alloc() to vboxguest_core.h for v2 and also put the new vbg_req_free() there.
Regards,
Hans
On Thu, Mar 29, 2018 at 01:21:14PM +0200, Hans de Goede wrote:
+void vbg_req_free(void *req, size_t len) +{
- if (!req)
return;
- kfree(req);
+}
Wait, no, this isn't ok. Don't create wrapper functions that you never use except as a wrapper :(
I can see someone sending me almost the indental revert patch right after this one gets applied...
thanks,
greg k-h
Hi,
On 29-03-18 13:56, Greg Kroah-Hartman wrote:
On Thu, Mar 29, 2018 at 01:21:14PM +0200, Hans de Goede wrote:
+void vbg_req_free(void *req, size_t len) +{
- if (!req)
return;
- kfree(req);
+}
Wait, no, this isn't ok. Don't create wrapper functions that you never use except as a wrapper :(
I can see someone sending me almost the indental revert patch right after this one gets applied...
The 2nd patch in the series changes this function to be more then just a wrapper around kfree. As mentioned in the commit message this is a preparation patch for that 2nd patch, the idea is that by splitting out the patch replacing all the kfree() calls with vbg_req_free() calls the 2nd patch becomes easier to review.
Regards,
Hans
linux-stable-mirror@lists.linaro.org