Hi all,
Peter noticed that with some dumb luck you can toast the kernel address space with exported vmalloc symbols.
I used this as an opportunity to decruft the vmalloc.c API and make it much more systematic. This also removes any chance to create vmalloc mappings outside the designated areas or using executable permissions from modules. Besides that it removes more than 300 lines of code.
A git tree is also available here:
git://git.infradead.org/users/hch/misc.git sanitize-vmalloc-api
Gitweb:
http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/sanitize-vma...
Use the designated helper for allocating executable kernel memory, and remove the now unused PAGE_KERNEL_RX define.
Signed-off-by: Christoph Hellwig hch@lst.de --- arch/x86/hyperv/hv_init.c | 2 +- arch/x86/include/asm/pgtable_types.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index b0da5320bcff..5a4b363ba67b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -355,7 +355,7 @@ void __init hyperv_init(void) guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
- hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); + hv_hypercall_pg = vmalloc_exec(PAGE_SIZE); if (hv_hypercall_pg == NULL) { wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); goto remove_cpuhp_state; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b6606fe6cfdf..947867f112ea 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -194,7 +194,6 @@ enum page_cache_mode { #define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) #define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) #define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) -#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G) #define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) #define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) #define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) @@ -220,7 +219,6 @@ enum page_cache_mode { #define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC) #define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC) #define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0) -#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC) #define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC) #define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC) #define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
vm_map_ram can keep mappings around after the vm_unmap_ram. Using that with non-PAGE_KERNEL mappings can lead to all kinds of aliasing issues.
Signed-off-by: Christoph Hellwig hch@lst.de --- drivers/staging/android/ion/ion_heap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c index 473b465724f1..a2d5c6df4b96 100644 --- a/drivers/staging/android/ion/ion_heap.c +++ b/drivers/staging/android/ion/ion_heap.c @@ -99,12 +99,12 @@ int ion_heap_map_user(struct ion_heap *heap, struct ion_buffer *buffer,
static int ion_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot) { - void *addr = vm_map_ram(pages, num, -1, pgprot); + void *addr = vmap(pages, num, VM_MAP);
if (!addr) return -ENOMEM; memset(addr, 0, PAGE_SIZE * num); - vm_unmap_ram(addr, num); + vunmap(addr);
return 0; }
On Wed, Apr 08, 2020 at 01:59:00PM +0200, Christoph Hellwig wrote:
vm_map_ram can keep mappings around after the vm_unmap_ram. Using that with non-PAGE_KERNEL mappings can lead to all kinds of aliasing issues.
Signed-off-by: Christoph Hellwig hch@lst.de
Acked-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Just use vmap instead of messing with vmalloc internals.
Signed-off-by: Christoph Hellwig hch@lst.de --- drivers/staging/media/ipu3/ipu3-css-pool.h | 4 +-- drivers/staging/media/ipu3/ipu3-dmamap.c | 30 ++++++---------------- 2 files changed, 9 insertions(+), 25 deletions(-)
diff --git a/drivers/staging/media/ipu3/ipu3-css-pool.h b/drivers/staging/media/ipu3/ipu3-css-pool.h index f4a60b41401b..a8ccd4f70320 100644 --- a/drivers/staging/media/ipu3/ipu3-css-pool.h +++ b/drivers/staging/media/ipu3/ipu3-css-pool.h @@ -15,14 +15,12 @@ struct imgu_device; * @size: size of the buffer in bytes. * @vaddr: kernel virtual address. * @daddr: iova dma address to access IPU3. - * @vma: private, a pointer to &struct vm_struct, - * used for imgu_dmamap_free. */ struct imgu_css_map { size_t size; void *vaddr; dma_addr_t daddr; - struct vm_struct *vma; + struct page **pages; };
/** diff --git a/drivers/staging/media/ipu3/ipu3-dmamap.c b/drivers/staging/media/ipu3/ipu3-dmamap.c index 7431322379f6..8a19b0024152 100644 --- a/drivers/staging/media/ipu3/ipu3-dmamap.c +++ b/drivers/staging/media/ipu3/ipu3-dmamap.c @@ -96,6 +96,7 @@ void *imgu_dmamap_alloc(struct imgu_device *imgu, struct imgu_css_map *map, unsigned long shift = iova_shift(&imgu->iova_domain); struct device *dev = &imgu->pci_dev->dev; size_t size = PAGE_ALIGN(len); + int count = size >> PAGE_SHIFT; struct page **pages; dma_addr_t iovaddr; struct iova *iova; @@ -114,7 +115,7 @@ void *imgu_dmamap_alloc(struct imgu_device *imgu, struct imgu_css_map *map,
/* Call IOMMU driver to setup pgt */ iovaddr = iova_dma_addr(&imgu->iova_domain, iova); - for (i = 0; i < size / PAGE_SIZE; ++i) { + for (i = 0; i < count; ++i) { rval = imgu_mmu_map(imgu->mmu, iovaddr, page_to_phys(pages[i]), PAGE_SIZE); if (rval) @@ -123,33 +124,23 @@ void *imgu_dmamap_alloc(struct imgu_device *imgu, struct imgu_css_map *map, iovaddr += PAGE_SIZE; }
- /* Now grab a virtual region */ - map->vma = __get_vm_area(size, VM_USERMAP, VMALLOC_START, VMALLOC_END); - if (!map->vma) + map->vaddr = vmap(pages, count, VM_USERMAP, PAGE_KERNEL); + if (!map->vaddr) goto out_unmap;
- map->vma->pages = pages; - /* And map it in KVA */ - if (map_vm_area(map->vma, PAGE_KERNEL, pages)) - goto out_vunmap; - + map->pages = pages; map->size = size; map->daddr = iova_dma_addr(&imgu->iova_domain, iova); - map->vaddr = map->vma->addr;
dev_dbg(dev, "%s: allocated %zu @ IOVA %pad @ VA %p\n", __func__, - size, &map->daddr, map->vma->addr); - - return map->vma->addr; + size, &map->daddr, map->vaddr);
-out_vunmap: - vunmap(map->vma->addr); + return map->vaddr;
out_unmap: imgu_dmamap_free_buffer(pages, size); imgu_mmu_unmap(imgu->mmu, iova_dma_addr(&imgu->iova_domain, iova), i * PAGE_SIZE); - map->vma = NULL;
out_free_iova: __free_iova(&imgu->iova_domain, iova); @@ -177,8 +168,6 @@ void imgu_dmamap_unmap(struct imgu_device *imgu, struct imgu_css_map *map) */ void imgu_dmamap_free(struct imgu_device *imgu, struct imgu_css_map *map) { - struct vm_struct *area = map->vma; - dev_dbg(&imgu->pci_dev->dev, "%s: freeing %zu @ IOVA %pad @ VA %p\n", __func__, map->size, &map->daddr, map->vaddr);
@@ -187,11 +176,8 @@ void imgu_dmamap_free(struct imgu_device *imgu, struct imgu_css_map *map)
imgu_dmamap_unmap(imgu, map);
- if (WARN_ON(!area) || WARN_ON(!area->pages)) - return; - - imgu_dmamap_free_buffer(area->pages, map->size); vunmap(map->vaddr); + imgu_dmamap_free_buffer(map->pages, map->size); map->vaddr = NULL; }
Replace the open coded instance of vmap with the actual function. In the non-contiguous (IOMMU) case this requires an extra find_vm_area, but given that this isn't a fast path function that is a small price to pay.
Signed-off-by: Christoph Hellwig hch@lst.de --- kernel/dma/remap.c | 48 ++++++++++++---------------------------------- 1 file changed, 12 insertions(+), 36 deletions(-)
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index d14cbc83986a..7a8ba60951e8 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -20,23 +20,6 @@ struct page **dma_common_find_pages(void *cpu_addr) return area->pages; }
-static struct vm_struct *__dma_common_pages_remap(struct page **pages, - size_t size, pgprot_t prot, const void *caller) -{ - struct vm_struct *area; - - area = get_vm_area_caller(size, VM_DMA_COHERENT, caller); - if (!area) - return NULL; - - if (map_vm_area(area, prot, pages)) { - vunmap(area->addr); - return NULL; - } - - return area; -} - /* * Remaps an array of PAGE_SIZE pages into another vm_area. * Cannot be used in non-sleeping contexts @@ -44,15 +27,12 @@ static struct vm_struct *__dma_common_pages_remap(struct page **pages, void *dma_common_pages_remap(struct page **pages, size_t size, pgprot_t prot, const void *caller) { - struct vm_struct *area; + void *vaddr;
- area = __dma_common_pages_remap(pages, size, prot, caller); - if (!area) - return NULL; - - area->pages = pages; - - return area->addr; + vaddr = vmap(pages, count, VM_DMA_COHERENT, prot); + if (vaddr) + find_vm_area(vaddr)->pages = pages; + return vaddr; }
/* @@ -62,24 +42,20 @@ void *dma_common_pages_remap(struct page **pages, size_t size, void *dma_common_contiguous_remap(struct page *page, size_t size, pgprot_t prot, const void *caller) { - int i; + int count = size >> PAGE_SHIFT; struct page **pages; - struct vm_struct *area; + void *vaddr; + int i;
- pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL); + pages = kmalloc_array(count, sizeof(struct page *), GFP_KERNEL); if (!pages) return NULL; - - for (i = 0; i < (size >> PAGE_SHIFT); i++) + for (i = 0; i < count; i++) pages[i] = nth_page(page, i); - - area = __dma_common_pages_remap(pages, size, prot, caller); - + vaddr = vmap(pages, count, VM_DMA_COHERENT, prot); kfree(pages);
- if (!area) - return NULL; - return area->addr; + return vaddr; }
/*
These helpers are only used for remapping the ISA I/O base. Replace the mapping side with a remap_isa_range helper in isa-bridge.c that hard codes all the known arguments, and just remove __iounmap_at in favour of open coding it in the only caller.
Signed-off-by: Christoph Hellwig hch@lst.de --- arch/powerpc/include/asm/io.h | 8 ----- arch/powerpc/kernel/isa-bridge.c | 28 +++++++++++++----- arch/powerpc/mm/ioremap_64.c | 50 -------------------------------- 3 files changed, 21 insertions(+), 65 deletions(-)
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 71f1c5d69839..4fdbb9e45dd7 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -699,10 +699,6 @@ static inline void iosync(void) * * * iounmap undoes such a mapping and can be hooked * - * * __ioremap_at (and the pending __iounmap_at) are low level functions to - * create hand-made mappings for use only by the PCI code and cannot - * currently be hooked. Must be page aligned. - * * * __ioremap_caller is the same as above but takes an explicit caller * reference rather than using __builtin_return_address(0) * @@ -729,10 +725,6 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size, pgprot_t prot, void *caller);
-extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea, - unsigned long size, pgprot_t prot); -extern void __iounmap_at(void *ea, unsigned long size); - /* * When CONFIG_PPC_INDIRECT_PIO is set, we use the generic iomap implementation * which needs some additional definitions here. They basically allow PIO diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index 773671b512df..2257d24e6a26 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/notifier.h> +#include <linux/vmalloc.h>
#include <asm/processor.h> #include <asm/io.h> @@ -38,6 +39,22 @@ EXPORT_SYMBOL_GPL(isa_bridge_pcidev); #define ISA_SPACE_MASK 0x1 #define ISA_SPACE_IO 0x1
+static void remap_isa_base(phys_addr_t pa, unsigned long size) +{ + WARN_ON_ONCE(ISA_IO_BASE & ~PAGE_MASK); + WARN_ON_ONCE(pa & ~PAGE_MASK); + WARN_ON_ONCE(size & ~PAGE_MASK); + + if (slab_is_available()) { + if (ioremap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa, + pgprot_noncached(PAGE_KERNEL))) + unmap_kernel_range(ISA_IO_BASE, size); + } else { + early_ioremap_range(ISA_IO_BASE, pa, size, + pgprot_noncached(PAGE_KERNEL)); + } +} + static void pci_process_ISA_OF_ranges(struct device_node *isa_node, unsigned long phb_io_base_phys) { @@ -105,15 +122,13 @@ static void pci_process_ISA_OF_ranges(struct device_node *isa_node, if (size > 0x10000) size = 0x10000;
- __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, - size, pgprot_noncached(PAGE_KERNEL)); + remap_isa_base(phb_io_base_phys, size); return;
inval_range: printk(KERN_ERR "no ISA IO ranges or unexpected isa range, " "mapping 64k\n"); - __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, - 0x10000, pgprot_noncached(PAGE_KERNEL)); + remap_isa_base(phb_io_base_phys, 0x10000); }
@@ -248,8 +263,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np) * and map it */ isa_io_base = ISA_IO_BASE; - __ioremap_at(pbase, (void *)ISA_IO_BASE, - size, pgprot_noncached(PAGE_KERNEL)); + remap_isa_base(pbase, size);
pr_debug("ISA: Non-PCI bridge is %pOF\n", np); } @@ -297,7 +311,7 @@ static void isa_bridge_remove(void) isa_bridge_pcidev = NULL;
/* Unmap the ISA area */ - __iounmap_at((void *)ISA_IO_BASE, 0x10000); + unmap_kernel_range(ISA_IO_BASE, 0x10000); }
/** diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c index 50a99d9684f7..ba5cbb0d66bd 100644 --- a/arch/powerpc/mm/ioremap_64.c +++ b/arch/powerpc/mm/ioremap_64.c @@ -4,56 +4,6 @@ #include <linux/slab.h> #include <linux/vmalloc.h>
-/** - * Low level function to establish the page tables for an IO mapping - */ -void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot) -{ - int ret; - unsigned long va = (unsigned long)ea; - - /* We don't support the 4K PFN hack with ioremap */ - if (pgprot_val(prot) & H_PAGE_4K_PFN) - return NULL; - - if ((ea + size) >= (void *)IOREMAP_END) { - pr_warn("Outside the supported range\n"); - return NULL; - } - - WARN_ON(pa & ~PAGE_MASK); - WARN_ON(((unsigned long)ea) & ~PAGE_MASK); - WARN_ON(size & ~PAGE_MASK); - - if (slab_is_available()) { - ret = ioremap_page_range(va, va + size, pa, prot); - if (ret) - unmap_kernel_range(va, size); - } else { - ret = early_ioremap_range(va, pa, size, prot); - } - - if (ret) - return NULL; - - return (void __iomem *)ea; -} -EXPORT_SYMBOL(__ioremap_at); - -/** - * Low level function to tear down the page tables for an IO mapping. This is - * used for mappings that are manipulated manually, like partial unmapping of - * PCI IOs or ISA space. - */ -void __iounmap_at(void *ea, unsigned long size) -{ - WARN_ON(((unsigned long)ea) & ~PAGE_MASK); - WARN_ON(size & ~PAGE_MASK); - - unmap_kernel_range((unsigned long)ea, size); -} -EXPORT_SYMBOL(__iounmap_at); - void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller) {
Factor code shared between pci_64 and electra_cf into a ioremap_pbh helper that follows the normal ioremap semantics, and returns a useful __iomem pointer. Note that it opencodes __ioremap_at as we know from the callers the slab is available. Switch pci_64 to also store the result as __iomem pointer, and unmap the result using iounmap instead of force casting and using vmalloc APIs.
Signed-off-by: Christoph Hellwig hch@lst.de --- arch/powerpc/include/asm/io.h | 2 + arch/powerpc/include/asm/pci-bridge.h | 2 +- arch/powerpc/kernel/pci_64.c | 53 ++++++++++++++++++--------- drivers/pcmcia/electra_cf.c | 45 ++++++++--------------- 4 files changed, 54 insertions(+), 48 deletions(-)
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 635969b5b58e..71f1c5d69839 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -719,6 +719,8 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
extern void iounmap(volatile void __iomem *addr);
+void __iomem *ioremap_pbh(phys_addr_t paddr, unsigned long size); + int early_ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot); void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 69f4cb3b7c56..b92e81b256e5 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -66,7 +66,7 @@ struct pci_controller {
void __iomem *io_base_virt; #ifdef CONFIG_PPC64 - void *io_base_alloc; + void __iomem *io_base_alloc; #endif resource_size_t io_base_phys; resource_size_t pci_io_size; diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index f83d1f69b1dd..8e86bd9c1eca 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -109,23 +109,46 @@ int pcibios_unmap_io_space(struct pci_bus *bus) /* Get the host bridge */ hose = pci_bus_to_host(bus);
- /* Check if we have IOs allocated */ - if (hose->io_base_alloc == NULL) - return 0; - pr_debug("IO unmapping for PHB %pOF\n", hose->dn); pr_debug(" alloc=0x%p\n", hose->io_base_alloc);
- /* This is a PHB, we fully unmap the IO area */ - vunmap(hose->io_base_alloc); - + iounmap(hose->io_base_alloc); return 0; } EXPORT_SYMBOL_GPL(pcibios_unmap_io_space);
-static int pcibios_map_phb_io_space(struct pci_controller *hose) +void __iomem *ioremap_pbh(phys_addr_t paddr, unsigned long size) { struct vm_struct *area; + unsigned long addr; + + WARN_ON_ONCE(paddr & ~PAGE_MASK); + WARN_ON_ONCE(size & ~PAGE_MASK); + + /* + * Let's allocate some IO space for that guy. We don't pass VM_IOREMAP + * because we don't care about alignment tricks that the core does in + * that case. Maybe we should due to stupid card with incomplete + * address decoding but I'd rather not deal with those outside of the + * reserved 64K legacy region. + */ + area = __get_vm_area(size, 0, PHB_IO_BASE, PHB_IO_END); + if (!area) + return NULL; + + addr = (unsigned long)area->addr; + if (ioremap_page_range(addr, addr + size, paddr, + pgprot_noncached(PAGE_KERNEL))) { + unmap_kernel_range(addr, size); + return NULL; + } + + return (void __iomem *)addr; +} +EXPORT_SYMBOL_GPL(ioremap_pbh); + +static int pcibios_map_phb_io_space(struct pci_controller *hose) +{ unsigned long phys_page; unsigned long size_page; unsigned long io_virt_offset; @@ -146,12 +169,11 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose) * with incomplete address decoding but I'd rather not deal with * those outside of the reserved 64K legacy region. */ - area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END); - if (area == NULL) + hose->io_base_alloc = ioremap_pbh(phys_page, size_page); + if (!hose->io_base_alloc) return -ENOMEM; - hose->io_base_alloc = area->addr; - hose->io_base_virt = (void __iomem *)(area->addr + - hose->io_base_phys - phys_page); + hose->io_base_virt = hose->io_base_alloc + + hose->io_base_phys - phys_page;
pr_debug("IO mapping for PHB %pOF\n", hose->dn); pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n", @@ -159,11 +181,6 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose) pr_debug(" size=0x%016llx (alloc=0x%016lx)\n", hose->pci_io_size, size_page);
- /* Establish the mapping */ - if (__ioremap_at(phys_page, area->addr, size_page, - pgprot_noncached(PAGE_KERNEL)) == NULL) - return -ENOMEM; - /* Fixup hose IO resource */ io_virt_offset = pcibios_io_space_offset(hose); hose->io_resource.start += io_virt_offset; diff --git a/drivers/pcmcia/electra_cf.c b/drivers/pcmcia/electra_cf.c index f2741c04289d..77b229f8d218 100644 --- a/drivers/pcmcia/electra_cf.c +++ b/drivers/pcmcia/electra_cf.c @@ -178,10 +178,9 @@ static int electra_cf_probe(struct platform_device *ofdev) struct device_node *np = ofdev->dev.of_node; struct electra_cf_socket *cf; struct resource mem, io; - int status; + int status = -ENOMEM; const unsigned int *prop; int err; - struct vm_struct *area;
err = of_address_to_resource(np, 0, &mem); if (err) @@ -202,30 +201,19 @@ static int electra_cf_probe(struct platform_device *ofdev) cf->mem_phys = mem.start; cf->mem_size = PAGE_ALIGN(resource_size(&mem)); cf->mem_base = ioremap(cf->mem_phys, cf->mem_size); + if (!cf->mem_base) + goto out_free_cf; cf->io_size = PAGE_ALIGN(resource_size(&io)); - - area = __get_vm_area(cf->io_size, 0, PHB_IO_BASE, PHB_IO_END); - if (area == NULL) { - status = -ENOMEM; - goto fail1; - } - - cf->io_virt = (void __iomem *)(area->addr); + cf->io_virt = ioremap_pbh(io.start, cf->io_size); + if (!cf->io_virt) + goto out_unmap_mem;
cf->gpio_base = ioremap(0xfc103000, 0x1000); + if (!cf->gpio_base) + goto out_unmap_virt; dev_set_drvdata(device, cf);
- if (!cf->mem_base || !cf->io_virt || !cf->gpio_base || - (__ioremap_at(io.start, cf->io_virt, cf->io_size, - pgprot_noncached(PAGE_KERNEL)) == NULL)) { - dev_err(device, "can't ioremap ranges\n"); - status = -ENOMEM; - goto fail1; - } - - cf->io_base = (unsigned long)cf->io_virt - VMALLOC_END; - cf->iomem.start = (unsigned long)cf->mem_base; cf->iomem.end = (unsigned long)cf->mem_base + (mem.end - mem.start); cf->iomem.flags = IORESOURCE_MEM; @@ -305,14 +293,13 @@ static int electra_cf_probe(struct platform_device *ofdev) if (cf->irq) free_irq(cf->irq, cf);
- if (cf->io_virt) - __iounmap_at(cf->io_virt, cf->io_size); - if (cf->mem_base) - iounmap(cf->mem_base); - if (cf->gpio_base) - iounmap(cf->gpio_base); - if (area) - device_init_wakeup(&ofdev->dev, 0); + iounmap(cf->gpio_base); +out_unmap_virt: + device_init_wakeup(&ofdev->dev, 0); + iounmap(cf->io_virt); +out_unmap_mem: + iounmap(cf->mem_base); +out_free_cf: kfree(cf); return status;
@@ -330,7 +317,7 @@ static int electra_cf_remove(struct platform_device *ofdev) free_irq(cf->irq, cf); del_timer_sync(&cf->timer);
- __iounmap_at(cf->io_virt, cf->io_size); + iounmap(cf->io_virt); iounmap(cf->mem_base); iounmap(cf->gpio_base); release_mem_region(cf->mem_phys, cf->mem_size);
There are no modular users of this function.
Signed-off-by: Christoph Hellwig hch@lst.de --- mm/vmalloc.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d1534d610b48..3375f9508ef6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2029,7 +2029,6 @@ void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) { vunmap_page_range(addr, addr + size); } -EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
/** * unmap_kernel_range - unmap kernel VM area and flush cache and TLB
Switch the two remaining callers to use __get_vm_area_caller instead.
Signed-off-by: Christoph Hellwig hch@lst.de --- arch/powerpc/kernel/pci_64.c | 3 ++- arch/sh/kernel/cpu/sh4/sq.c | 3 ++- include/linux/vmalloc.h | 2 -- mm/vmalloc.c | 8 -------- 4 files changed, 4 insertions(+), 12 deletions(-)
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 8e86bd9c1eca..155e2ef60053 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -132,7 +132,8 @@ void __iomem *ioremap_pbh(phys_addr_t paddr, unsigned long size) * address decoding but I'd rather not deal with those outside of the * reserved 64K legacy region. */ - area = __get_vm_area(size, 0, PHB_IO_BASE, PHB_IO_END); + area = __get_vm_area_caller(size, 0, PHB_IO_BASE, PHB_IO_END, + __builtin_return_address(0)); if (!area) return NULL;
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index 934ff84844fa..d432164b23b7 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -103,7 +103,8 @@ static int __sq_remap(struct sq_mapping *map, pgprot_t prot) #if defined(CONFIG_MMU) struct vm_struct *vma;
- vma = __get_vm_area(map->size, VM_ALLOC, map->sq_addr, SQ_ADDRMAX); + vma = __get_vm_area_caller(map->size, VM_ALLOC, map->sq_addr, + SQ_ADDRMAX, __builtin_return_address(0)); if (!vma) return -ENOMEM;
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0507a162ccd0..3070b4dbc2d9 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -161,8 +161,6 @@ static inline size_t get_vm_area_size(const struct vm_struct *area) extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); extern struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, const void *caller); -extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, - unsigned long start, unsigned long end); extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 399f219544f7..d1534d610b48 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2127,14 +2127,6 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, return area; }
-struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, - unsigned long start, unsigned long end) -{ - return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE, - GFP_KERNEL, __builtin_return_address(0)); -} -EXPORT_SYMBOL_GPL(__get_vm_area); - struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, const void *caller)
This matches the map_kernel_range_noflush API. Also change to pass a size instead of the end, similar to the noflush version.
Signed-off-by: Christoph Hellwig hch@lst.de --- mm/vmalloc.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 55df5dc6a9fc..a3d810def567 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -272,13 +272,13 @@ int map_kernel_range_noflush(unsigned long addr, unsigned long size, return nr; }
-static int vmap_page_range(unsigned long start, unsigned long end, +static int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, struct page **pages) { int ret;
- ret = map_kernel_range_noflush(start, end - start, prot, pages); - flush_cache_vmap(start, end); + ret = map_kernel_range_noflush(start, size, prot, pages); + flush_cache_vmap(start, start + size); return ret; }
@@ -1866,7 +1866,7 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
kasan_unpoison_vmalloc(mem, size);
- if (vmap_page_range(addr, addr + size, prot, pages) < 0) { + if (map_kernel_range(addr, size, prot, pages) < 0) { vm_unmap_ram(mem, count); return NULL; } @@ -2030,10 +2030,9 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages) { unsigned long addr = (unsigned long)area->addr; - unsigned long end = addr + get_vm_area_size(area); int err;
- err = vmap_page_range(addr, end, prot, pages); + err = map_kernel_range(addr, get_vm_area_size(area), prot, pages);
return err > 0 ? 0 : err; }
Ever use of addr in vb_free casts to unsigned long first, and the caller has an unsigned long version of the address available anyway. Just pass that and avoid all the casts.
Signed-off-by: Christoph Hellwig hch@lst.de --- mm/vmalloc.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 9183fc0d365a..aada9e9144bd 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1664,7 +1664,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) return vaddr; }
-static void vb_free(const void *addr, unsigned long size) +static void vb_free(unsigned long addr, unsigned long size) { unsigned long offset; unsigned long vb_idx; @@ -1674,24 +1674,22 @@ static void vb_free(const void *addr, unsigned long size) BUG_ON(offset_in_page(size)); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
- flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size); + flush_cache_vunmap(addr, addr + size);
order = get_order(size);
- offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); - offset >>= PAGE_SHIFT; + offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT;
- vb_idx = addr_to_vb_idx((unsigned long)addr); + vb_idx = addr_to_vb_idx(addr); rcu_read_lock(); vb = radix_tree_lookup(&vmap_block_tree, vb_idx); rcu_read_unlock(); BUG_ON(!vb);
- vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); + vunmap_page_range(addr, addr + size);
if (debug_pagealloc_enabled_static()) - flush_tlb_kernel_range((unsigned long)addr, - (unsigned long)addr + size); + flush_tlb_kernel_range(addr, addr + size);
spin_lock(&vb->lock);
@@ -1791,7 +1789,7 @@ void vm_unmap_ram(const void *mem, unsigned int count)
if (likely(count <= VMAP_MAX_ALLOC)) { debug_check_no_locks_freed(mem, size); - vb_free(mem, size); + vb_free(addr, size); return; }
These have non-static aliases claled map_kernel_range_noflush and unmap_kernel_range_noflush that just differ slightly in the calling conventions that pass addr + size instead of an end.
Signed-off-by: Christoph Hellwig hch@lst.de --- mm/vmalloc.c | 98 +++++++++++++++++++++------------------------------- 1 file changed, 40 insertions(+), 58 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index aada9e9144bd..55df5dc6a9fc 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -127,10 +127,24 @@ static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end) } while (p4d++, addr = next, addr != end); }
-static void vunmap_page_range(unsigned long addr, unsigned long end) +/** + * unmap_kernel_range_noflush - unmap kernel VM area + * @addr: start of the VM area to unmap + * @size: size of the VM area to unmap + * + * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size specify + * should have been allocated using get_vm_area() and its friends. + * + * NOTE: + * This function does NOT do any cache flushing. The caller is responsible + * for calling flush_cache_vunmap() on to-be-mapped areas before calling this + * function and flush_tlb_kernel_range() after. + */ +void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) { - pgd_t *pgd; + unsigned long end = addr + size; unsigned long next; + pgd_t *pgd;
BUG_ON(addr >= end); pgd = pgd_offset_k(addr); @@ -219,18 +233,30 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, return 0; }
-/* - * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and - * will have pfns corresponding to the "pages" array. +/** + * map_kernel_range_noflush - map kernel VM area with the specified pages + * @addr: start of the VM area to map + * @size: size of the VM area to map + * @prot: page protection flags to use + * @pages: pages to map * - * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N] + * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size specify should + * have been allocated using get_vm_area() and its friends. + * + * NOTE: + * This function does NOT do any cache flushing. The caller is responsible for + * calling flush_cache_vmap() on to-be-mapped areas before calling this + * function. + * + * RETURNS: + * The number of pages mapped on success, -errno on failure. */ -static int vmap_page_range_noflush(unsigned long start, unsigned long end, - pgprot_t prot, struct page **pages) +int map_kernel_range_noflush(unsigned long addr, unsigned long size, + pgprot_t prot, struct page **pages) { - pgd_t *pgd; + unsigned long end = addr + size; unsigned long next; - unsigned long addr = start; + pgd_t *pgd; int err = 0; int nr = 0;
@@ -251,7 +277,7 @@ static int vmap_page_range(unsigned long start, unsigned long end, { int ret;
- ret = vmap_page_range_noflush(start, end, prot, pages); + ret = map_kernel_range_noflush(start, end - start, prot, pages); flush_cache_vmap(start, end); return ret; } @@ -1226,7 +1252,7 @@ EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier); */ static void unmap_vmap_area(struct vmap_area *va) { - vunmap_page_range(va->va_start, va->va_end); + unmap_kernel_range_noflush(va->va_start, va->va_end - va->va_start); }
/* @@ -1686,7 +1712,7 @@ static void vb_free(unsigned long addr, unsigned long size) rcu_read_unlock(); BUG_ON(!vb);
- vunmap_page_range(addr, addr + size); + unmap_kernel_range_noflush(addr, size);
if (debug_pagealloc_enabled_static()) flush_tlb_kernel_range(addr, addr + size); @@ -1984,50 +2010,6 @@ void __init vmalloc_init(void) vmap_initialized = true; }
-/** - * map_kernel_range_noflush - map kernel VM area with the specified pages - * @addr: start of the VM area to map - * @size: size of the VM area to map - * @prot: page protection flags to use - * @pages: pages to map - * - * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size - * specify should have been allocated using get_vm_area() and its - * friends. - * - * NOTE: - * This function does NOT do any cache flushing. The caller is - * responsible for calling flush_cache_vmap() on to-be-mapped areas - * before calling this function. - * - * RETURNS: - * The number of pages mapped on success, -errno on failure. - */ -int map_kernel_range_noflush(unsigned long addr, unsigned long size, - pgprot_t prot, struct page **pages) -{ - return vmap_page_range_noflush(addr, addr + size, prot, pages); -} - -/** - * unmap_kernel_range_noflush - unmap kernel VM area - * @addr: start of the VM area to unmap - * @size: size of the VM area to unmap - * - * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size - * specify should have been allocated using get_vm_area() and its - * friends. - * - * NOTE: - * This function does NOT do any cache flushing. The caller is - * responsible for calling flush_cache_vunmap() on to-be-mapped areas - * before calling this function and flush_tlb_kernel_range() after. - */ -void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) -{ - vunmap_page_range(addr, addr + size); -} - /** * unmap_kernel_range - unmap kernel VM area and flush cache and TLB * @addr: start of the VM area to unmap @@ -2041,7 +2023,7 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) unsigned long end = addr + size;
flush_cache_vunmap(addr, end); - vunmap_page_range(addr, end); + unmap_kernel_range_noflush(addr, size); flush_tlb_kernel_range(addr, end); }
Rename the Kconfig variable to clarify the scope.
Signed-off-by: Christoph Hellwig hch@lst.de --- arch/arm/configs/omap2plus_defconfig | 2 +- include/linux/zsmalloc.h | 2 +- mm/Kconfig | 2 +- mm/zsmalloc.c | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 3cc3ca5fa027..583d8abd80a4 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -81,7 +81,7 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_BINFMT_MISC=y CONFIG_CMA=y CONFIG_ZSMALLOC=m -CONFIG_PGTABLE_MAPPING=y +CONFIG_ZSMALLOC_PGTABLE_MAPPING=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 2219cce81ca4..0fdbf653b173 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -20,7 +20,7 @@ * zsmalloc mapping modes * * NOTE: These only make a difference when a mapped object spans pages. - * They also have no effect when PGTABLE_MAPPING is selected. + * They also have no effect when ZSMALLOC_PGTABLE_MAPPING is selected. */ enum zs_mapmode { ZS_MM_RW, /* normal read-write mapping */ diff --git a/mm/Kconfig b/mm/Kconfig index 691021492e78..36949a9425b8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -700,7 +700,7 @@ config ZSMALLOC returned by an alloc(). This handle must be mapped in order to access the allocated space.
-config PGTABLE_MAPPING +config ZSMALLOC_PGTABLE_MAPPING bool "Use page table mapping to access object in zsmalloc" depends on ZSMALLOC help diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 2f836a2b993f..ac0524330b9b 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -293,7 +293,7 @@ struct zspage { };
struct mapping_area { -#ifdef CONFIG_PGTABLE_MAPPING +#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING struct vm_struct *vm; /* vm area for mapping object that span pages */ #else char *vm_buf; /* copy buffer for objects that span pages */ @@ -1113,7 +1113,7 @@ static struct zspage *find_get_zspage(struct size_class *class) return zspage; }
-#ifdef CONFIG_PGTABLE_MAPPING +#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING static inline int __zs_cpu_up(struct mapping_area *area) { /* @@ -1151,7 +1151,7 @@ static inline void __zs_unmap_object(struct mapping_area *area, unmap_kernel_range(addr, PAGE_SIZE * 2); }
-#else /* CONFIG_PGTABLE_MAPPING */ +#else /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */
static inline int __zs_cpu_up(struct mapping_area *area) { @@ -1233,7 +1233,7 @@ static void __zs_unmap_object(struct mapping_area *area, pagefault_enable(); }
-#endif /* CONFIG_PGTABLE_MAPPING */ +#endif /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */
static int zs_cpu_prepare(unsigned int cpu) {
On 4/8/20 4:59 AM, Christoph Hellwig wrote:
Rename the Kconfig variable to clarify the scope.
Signed-off-by: Christoph Hellwig hch@lst.de
arch/arm/configs/omap2plus_defconfig | 2 +- include/linux/zsmalloc.h | 2 +- mm/Kconfig | 2 +- mm/zsmalloc.c | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-)
Looks good. Thanks.
Acked-by: Randy Dunlap rdunlap@infradead.org
On Wed, Apr 08, 2020 at 01:59:07PM +0200, Christoph Hellwig wrote:
Rename the Kconfig variable to clarify the scope.
Signed-off-by: Christoph Hellwig hch@lst.de
Acked-by: Minchan Kim minchan@kernel.org
None of the callers needs the number of pages, and a 0 / -errno return value is a lot more intuitive.
Signed-off-by: Christoph Hellwig hch@lst.de --- mm/vmalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a3d810def567..ca8dc5d42580 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -249,7 +249,7 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, * function. * * RETURNS: - * The number of pages mapped on success, -errno on failure. + * 0 on success, -errno on failure. */ int map_kernel_range_noflush(unsigned long addr, unsigned long size, pgprot_t prot, struct page **pages) @@ -269,7 +269,7 @@ int map_kernel_range_noflush(unsigned long addr, unsigned long size, return err; } while (pgd++, addr = next, addr != end);
- return nr; + return 0; }
static int map_kernel_range(unsigned long start, unsigned long size,
This allows to unexport map_vm_area and unmap_kernel_range, which are rather deep internal and should not be available to modules.
Signed-off-by: Christoph Hellwig hch@lst.de --- mm/Kconfig | 2 +- mm/vmalloc.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/mm/Kconfig b/mm/Kconfig index 36949a9425b8..614cc786b519 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -702,7 +702,7 @@ config ZSMALLOC
config ZSMALLOC_PGTABLE_MAPPING bool "Use page table mapping to access object in zsmalloc" - depends on ZSMALLOC + depends on ZSMALLOC=y help By default, zsmalloc uses a copy-based object mapping method to access allocations that span two pages. However, if a particular diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 3375f9508ef6..9183fc0d365a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2046,7 +2046,6 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) vunmap_page_range(addr, end); flush_tlb_kernel_range(addr, end); } -EXPORT_SYMBOL_GPL(unmap_kernel_range);
int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages) { @@ -2058,7 +2057,6 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
return err > 0 ? 0 : err; } -EXPORT_SYMBOL_GPL(map_vm_area);
static inline void setup_vmalloc_vm_locked(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller)
Hi,
On 4/8/20 4:59 AM, Christoph Hellwig wrote:
diff --git a/mm/Kconfig b/mm/Kconfig index 36949a9425b8..614cc786b519 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -702,7 +702,7 @@ config ZSMALLOC config ZSMALLOC_PGTABLE_MAPPING bool "Use page table mapping to access object in zsmalloc"
- depends on ZSMALLOC
- depends on ZSMALLOC=y
It's a bool so this shouldn't matter... not needed.
help By default, zsmalloc uses a copy-based object mapping method to access allocations that span two pages. However, if a particular
On Wed, Apr 08, 2020 at 08:01:00AM -0700, Randy Dunlap wrote:
Hi,
On 4/8/20 4:59 AM, Christoph Hellwig wrote:
diff --git a/mm/Kconfig b/mm/Kconfig index 36949a9425b8..614cc786b519 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -702,7 +702,7 @@ config ZSMALLOC config ZSMALLOC_PGTABLE_MAPPING bool "Use page table mapping to access object in zsmalloc"
- depends on ZSMALLOC
- depends on ZSMALLOC=y
It's a bool so this shouldn't matter... not needed.
My mm/Kconfig has:
config ZSMALLOC tristate "Memory allocator for compressed pages" depends on MMU
which I think means it can be modular, no?
On Wed, Apr 08, 2020 at 01:59:08PM +0200, Christoph Hellwig wrote:
This allows to unexport map_vm_area and unmap_kernel_range, which are rather deep internal and should not be available to modules.
Even though I don't know how many usecase we have using zsmalloc as module(I heard only once by dumb reason), it could affect existing users. Thus, please include concrete explanation in the patch to justify when the complain occurs.
Signed-off-by: Christoph Hellwig hch@lst.de
mm/Kconfig | 2 +- mm/vmalloc.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/mm/Kconfig b/mm/Kconfig index 36949a9425b8..614cc786b519 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -702,7 +702,7 @@ config ZSMALLOC config ZSMALLOC_PGTABLE_MAPPING bool "Use page table mapping to access object in zsmalloc"
- depends on ZSMALLOC
- depends on ZSMALLOC=y help By default, zsmalloc uses a copy-based object mapping method to access allocations that span two pages. However, if a particular
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 3375f9508ef6..9183fc0d365a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2046,7 +2046,6 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) vunmap_page_range(addr, end); flush_tlb_kernel_range(addr, end); } -EXPORT_SYMBOL_GPL(unmap_kernel_range); int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages) { @@ -2058,7 +2057,6 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages) return err > 0 ? 0 : err; } -EXPORT_SYMBOL_GPL(map_vm_area); static inline void setup_vmalloc_vm_locked(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) -- 2.25.1
On Thu, Apr 09, 2020 at 09:08:26AM -0700, Minchan Kim wrote:
On Wed, Apr 08, 2020 at 01:59:08PM +0200, Christoph Hellwig wrote:
This allows to unexport map_vm_area and unmap_kernel_range, which are rather deep internal and should not be available to modules.
Even though I don't know how many usecase we have using zsmalloc as module(I heard only once by dumb reason), it could affect existing users. Thus, please include concrete explanation in the patch to justify when the complain occurs.
The justification is 'we can unexport functions that have no sane reason of being exported in the first place'.
The Changelog pretty much says that.
On Thu, Apr 09, 2020 at 06:50:30PM +0200, Peter Zijlstra wrote:
On Thu, Apr 09, 2020 at 09:08:26AM -0700, Minchan Kim wrote:
On Wed, Apr 08, 2020 at 01:59:08PM +0200, Christoph Hellwig wrote:
This allows to unexport map_vm_area and unmap_kernel_range, which are rather deep internal and should not be available to modules.
Even though I don't know how many usecase we have using zsmalloc as module(I heard only once by dumb reason), it could affect existing users. Thus, please include concrete explanation in the patch to justify when the complain occurs.
The justification is 'we can unexport functions that have no sane reason of being exported in the first place'.
The Changelog pretty much says that.
Okay, I hope there is no affected user since this patch. If there are someone, they need to provide sane reason why they want to have zsmalloc as module.
Acked-by: Minchan Kim minchan@kernel.org
Switch all callers to map_kernel_range, which symmetric to the unmap side (as well as the _noflush versions).
Signed-off-by: Christoph Hellwig hch@lst.de --- Documentation/core-api/cachetlb.rst | 2 +- include/linux/vmalloc.h | 10 ++++------ mm/vmalloc.c | 21 +++++++-------------- mm/zsmalloc.c | 4 +++- net/ceph/ceph_common.c | 3 +-- 5 files changed, 16 insertions(+), 24 deletions(-)
diff --git a/Documentation/core-api/cachetlb.rst b/Documentation/core-api/cachetlb.rst index 93cb65d52720..a1582cc79f0f 100644 --- a/Documentation/core-api/cachetlb.rst +++ b/Documentation/core-api/cachetlb.rst @@ -213,7 +213,7 @@ Here are the routines, one by one: there will be no entries in the cache for the kernel address space for virtual addresses in the range 'start' to 'end-1'.
- The first of these two routines is invoked after map_vm_area() + The first of these two routines is invoked after map_kernel_range() has installed the page table entries. The second is invoked before unmap_kernel_range() deletes the page table entries.
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 3070b4dbc2d9..15ffbd8e8e65 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -168,11 +168,11 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr);
-extern int map_vm_area(struct vm_struct *area, pgprot_t prot, - struct page **pages); #ifdef CONFIG_MMU extern int map_kernel_range_noflush(unsigned long start, unsigned long size, pgprot_t prot, struct page **pages); +int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, + struct page **pages); extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); extern void unmap_kernel_range(unsigned long addr, unsigned long size); static inline void set_vm_flush_reset_perms(void *addr) @@ -189,14 +189,12 @@ map_kernel_range_noflush(unsigned long start, unsigned long size, { return size >> PAGE_SHIFT; } +#define map_kernel_range map_kernel_range_noflush static inline void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) { } -static inline void -unmap_kernel_range(unsigned long addr, unsigned long size) -{ -} +#define unmap_kernel_range unmap_kernel_range_noflush static inline void set_vm_flush_reset_perms(void *addr) { } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ca8dc5d42580..b0c7cdc8701a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -272,8 +272,8 @@ int map_kernel_range_noflush(unsigned long addr, unsigned long size, return 0; }
-static int map_kernel_range(unsigned long start, unsigned long size, - pgprot_t prot, struct page **pages) +int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, + struct page **pages) { int ret;
@@ -2027,16 +2027,6 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) flush_tlb_kernel_range(addr, end); }
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages) -{ - unsigned long addr = (unsigned long)area->addr; - int err; - - err = map_kernel_range(addr, get_vm_area_size(area), prot, pages); - - return err > 0 ? 0 : err; -} - static inline void setup_vmalloc_vm_locked(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) { @@ -2408,7 +2398,8 @@ void *vmap(struct page **pages, unsigned int count, if (!area) return NULL;
- if (map_vm_area(area, prot, pages)) { + if (map_kernel_range((unsigned long)area->addr, size, prot, + pages) < 0) { vunmap(area->addr); return NULL; } @@ -2471,8 +2462,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, } atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
- if (map_vm_area(area, prot, pages)) + if (map_kernel_range((unsigned long)area->addr, get_vm_area_size(area), + prot, pages) < 0) goto fail; + return area->addr;
fail: diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index ac0524330b9b..f6dc0673e62c 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1138,7 +1138,9 @@ static inline void __zs_cpu_down(struct mapping_area *area) static inline void *__zs_map_object(struct mapping_area *area, struct page *pages[2], int off, int size) { - BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages)); + unsigned long addr = (unsigned long)area->vm->addr; + + BUG_ON(map_kernel_range(addr, PAGE_SIZE * 2, PAGE_KERNEL, pages) < 0); area->vm_addr = area->vm->addr; return area->vm_addr + off; } diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index a0e97f6c1072..66f22e8aa529 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -190,8 +190,7 @@ EXPORT_SYMBOL(ceph_compare_options); * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are * compatible with (a superset of) GFP_KERNEL. This is because while the * actual pages are allocated with the specified flags, the page table pages - * are always allocated with GFP_KERNEL. map_vm_area() doesn't even take - * flags because GFP_KERNEL is hard-coded in {p4d,pud,pmd,pte}_alloc(). + * are always allocated with GFP_KERNEL. * * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO. */
This function just has a single caller, open code it there.
Signed-off-by: Christoph Hellwig hch@lst.de --- mm/vmalloc.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b0c7cdc8701a..258220b203f1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1247,14 +1247,6 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
-/* - * Clear the pagetable entries of a given vmap_area - */ -static void unmap_vmap_area(struct vmap_area *va) -{ - unmap_kernel_range_noflush(va->va_start, va->va_end - va->va_start); -} - /* * lazy_max_pages is the maximum amount of virtual address space we gather up * before attempting to purge with a TLB flush. @@ -1416,7 +1408,7 @@ static void free_vmap_area_noflush(struct vmap_area *va) static void free_unmap_vmap_area(struct vmap_area *va) { flush_cache_vunmap(va->va_start, va->va_end); - unmap_vmap_area(va); + unmap_kernel_range_noflush(va->va_start, va->va_end - va->va_start); if (debug_pagealloc_enabled_static()) flush_tlb_kernel_range(va->va_start, va->va_end);
To help enforcing the W^X protection don't allow remapping existing pages as executable.
Based on patch from Peter Zijlstra peterz@infradead.org.
Signed-off-by: Christoph Hellwig hch@lst.de --- arch/x86/include/asm/pgtable_types.h | 6 ++++++ include/asm-generic/pgtable.h | 4 ++++ mm/vmalloc.c | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 947867f112ea..2e7c442cc618 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -282,6 +282,12 @@ typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
typedef struct { pgdval_t pgd; } pgd_t;
+static inline pgprot_t pgprot_nx(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) | _PAGE_NX); +} +#define pgprot_nx pgprot_nx + #ifdef CONFIG_X86_PAE
/* diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 329b8c8ca703..8c5f9c29698b 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -491,6 +491,10 @@ static inline int arch_unmap_one(struct mm_struct *mm, #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) #endif
+#ifndef pgprot_nx +#define pgprot_nx(prot) (prot) +#endif + #ifndef pgprot_noncached #define pgprot_noncached(prot) (prot) #endif diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 7356b3f07bd8..334c75251ddb 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2390,7 +2390,7 @@ void *vmap(struct page **pages, unsigned int count, if (!area) return NULL;
- if (map_kernel_range((unsigned long)area->addr, size, prot, + if (map_kernel_range((unsigned long)area->addr, size, pgprot_nx(prot), pages) < 0) { vunmap(area->addr); return NULL;
This is always GFP_KERNEL - for long term mappings with other properties vmap should be used.
Signed-off-by: Christoph Hellwig hch@lst.de --- drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c | 2 +- drivers/media/common/videobuf2/videobuf2-dma-sg.c | 3 +-- drivers/media/common/videobuf2/videobuf2-vmalloc.c | 3 +-- fs/erofs/decompressor.c | 2 +- fs/xfs/xfs_buf.c | 2 +- include/linux/vmalloc.h | 3 +-- mm/nommu.c | 2 +- mm/vmalloc.c | 4 ++-- 8 files changed, 9 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c index 9272bef57092..debaf7b18ab5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c @@ -66,7 +66,7 @@ static void *mock_dmabuf_vmap(struct dma_buf *dma_buf) { struct mock_dmabuf *mock = to_mock(dma_buf);
- return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL); + return vm_map_ram(mock->pages, mock->npages, 0); }
static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c b/drivers/media/common/videobuf2/videobuf2-dma-sg.c index 6db60e9d5183..92072a08af25 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c +++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c @@ -309,8 +309,7 @@ static void *vb2_dma_sg_vaddr(void *buf_priv) if (buf->db_attach) buf->vaddr = dma_buf_vmap(buf->db_attach->dmabuf); else - buf->vaddr = vm_map_ram(buf->pages, - buf->num_pages, -1, PAGE_KERNEL); + buf->vaddr = vm_map_ram(buf->pages, buf->num_pages, -1); }
/* add offset in case userptr is not page-aligned */ diff --git a/drivers/media/common/videobuf2/videobuf2-vmalloc.c b/drivers/media/common/videobuf2/videobuf2-vmalloc.c index 1a4f0ca87c7c..c66fda4a65e4 100644 --- a/drivers/media/common/videobuf2/videobuf2-vmalloc.c +++ b/drivers/media/common/videobuf2/videobuf2-vmalloc.c @@ -107,8 +107,7 @@ static void *vb2_vmalloc_get_userptr(struct device *dev, unsigned long vaddr, buf->vaddr = (__force void *) ioremap(__pfn_to_phys(nums[0]), size + offset); } else { - buf->vaddr = vm_map_ram(frame_vector_pages(vec), n_pages, -1, - PAGE_KERNEL); + buf->vaddr = vm_map_ram(frame_vector_pages(vec), n_pages, -1); }
if (!buf->vaddr) diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 5d2d81940679..7628816f2453 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -274,7 +274,7 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
i = 0; while (1) { - dst = vm_map_ram(rq->out, nrpages_out, -1, PAGE_KERNEL); + dst = vm_map_ram(rq->out, nrpages_out, -1);
/* retry two more times (totally 3 times) */ if (dst || ++i >= 3) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index f880141a2268..940af9da6db1 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -474,7 +474,7 @@ _xfs_buf_map_pages( nofs_flag = memalloc_nofs_save(); do { bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, - -1, PAGE_KERNEL); + -1); if (bp->b_addr) break; vm_unmap_aliases(); diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 15ffbd8e8e65..9273b1a91ca5 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -88,8 +88,7 @@ struct vmap_area { * Highlevel APIs for driver use */ extern void vm_unmap_ram(const void *mem, unsigned int count); -extern void *vm_map_ram(struct page **pages, unsigned int count, - int node, pgprot_t prot); +extern void *vm_map_ram(struct page **pages, unsigned int count, int node); extern void vm_unmap_aliases(void);
#ifdef CONFIG_MMU diff --git a/mm/nommu.c b/mm/nommu.c index 318df4e236c9..4f07b7ef0297 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -351,7 +351,7 @@ void vunmap(const void *addr) } EXPORT_SYMBOL(vunmap);
-void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) +void *vm_map_ram(struct page **pages, unsigned int count, int node) { BUG(); return NULL; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 258220b203f1..7356b3f07bd8 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1834,7 +1834,7 @@ EXPORT_SYMBOL(vm_unmap_ram); * * Returns: a pointer to the address that has been mapped, or %NULL on failure */ -void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) +void *vm_map_ram(struct page **pages, unsigned int count, int node) { unsigned long size = (unsigned long)count << PAGE_SHIFT; unsigned long addr; @@ -1858,7 +1858,7 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
kasan_unpoison_vmalloc(mem, size);
- if (map_kernel_range(addr, size, prot, pages) < 0) { + if (map_kernel_range(addr, size, PAGE_KERNEL, pages) < 0) { vm_unmap_ram(mem, count); return NULL; }
On Wed, Apr 08, 2020 at 01:59:15PM +0200, Christoph Hellwig wrote:
This is always GFP_KERNEL - for long term mappings with other properties vmap should be used.
PAGE_KERNEL != GFP_KERNEL :-)
- return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL);
- return vm_map_ram(mock->pages, mock->npages, 0);
On Wed, Apr 08, 2020 at 02:21:04PM +0200, Peter Zijlstra wrote:
On Wed, Apr 08, 2020 at 01:59:15PM +0200, Christoph Hellwig wrote:
This is always GFP_KERNEL - for long term mappings with other properties vmap should be used.
PAGE_KERNEL != GFP_KERNEL :-)
Yep. The compiler complained about that a few times :)
If this code was broken for non-coherent caches a crude powerpc hack isn't going to help anyone else. Remove the hack as it is the last user of __vmalloc passing a page protection flag other than PAGE_KERNEL.
Signed-off-by: Christoph Hellwig hch@lst.de --- drivers/gpu/drm/drm_scatter.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c index ca520028b2cb..f4e6184d1877 100644 --- a/drivers/gpu/drm/drm_scatter.c +++ b/drivers/gpu/drm/drm_scatter.c @@ -43,15 +43,6 @@
#define DEBUG_SCATTER 0
-static inline void *drm_vmalloc_dma(unsigned long size) -{ -#if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE) - return __vmalloc(size, GFP_KERNEL, pgprot_noncached_wc(PAGE_KERNEL)); -#else - return vmalloc_32(size); -#endif -} - static void drm_sg_cleanup(struct drm_sg_mem * entry) { struct page *page; @@ -126,7 +117,7 @@ int drm_legacy_sg_alloc(struct drm_device *dev, void *data, return -ENOMEM; }
- entry->virtual = drm_vmalloc_dma(pages << PAGE_SHIFT); + entry->virtual = vmalloc_32(pages << PAGE_SHIFT); if (!entry->virtual) { kfree(entry->busaddr); kfree(entry->pagelist);
On Wed, Apr 08, 2020 at 01:59:17PM +0200, Christoph Hellwig wrote:
If this code was broken for non-coherent caches a crude powerpc hack isn't going to help anyone else. Remove the hack as it is the last user of __vmalloc passing a page protection flag other than PAGE_KERNEL.
Well Ben added this to make stuff work on ppc, ofc the home grown dma layer in drm from back then isn't going to work in other places. I guess should have at least an ack from him, in case anyone still cares about this on ppc. Adding Ben to cc. -Daniel
Signed-off-by: Christoph Hellwig hch@lst.de
drivers/gpu/drm/drm_scatter.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c index ca520028b2cb..f4e6184d1877 100644 --- a/drivers/gpu/drm/drm_scatter.c +++ b/drivers/gpu/drm/drm_scatter.c @@ -43,15 +43,6 @@ #define DEBUG_SCATTER 0 -static inline void *drm_vmalloc_dma(unsigned long size) -{ -#if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
- return __vmalloc(size, GFP_KERNEL, pgprot_noncached_wc(PAGE_KERNEL));
-#else
- return vmalloc_32(size);
-#endif -}
static void drm_sg_cleanup(struct drm_sg_mem * entry) { struct page *page; @@ -126,7 +117,7 @@ int drm_legacy_sg_alloc(struct drm_device *dev, void *data, return -ENOMEM; }
- entry->virtual = drm_vmalloc_dma(pages << PAGE_SHIFT);
- entry->virtual = vmalloc_32(pages << PAGE_SHIFT); if (!entry->virtual) { kfree(entry->busaddr); kfree(entry->pagelist);
-- 2.25.1
On Wed, 2020-04-08 at 14:25 +0200, Daniel Vetter wrote:
On Wed, Apr 08, 2020 at 01:59:17PM +0200, Christoph Hellwig wrote:
If this code was broken for non-coherent caches a crude powerpc hack isn't going to help anyone else. Remove the hack as it is the last user of __vmalloc passing a page protection flag other than PAGE_KERNEL.
Well Ben added this to make stuff work on ppc, ofc the home grown dma layer in drm from back then isn't going to work in other places. I guess should have at least an ack from him, in case anyone still cares about this on ppc. Adding Ben to cc.
This was due to some drivers (radeon ?) trying to use vmalloc pages for coherent DMA, which means on those 4xx powerpc's need to be non-cached.
There were machines using that (440 based iirc), though I honestly can't tell if anybody still uses any of it.
Cheers, Ben.
-Daniel
Signed-off-by: Christoph Hellwig hch@lst.de
drivers/gpu/drm/drm_scatter.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c index ca520028b2cb..f4e6184d1877 100644 --- a/drivers/gpu/drm/drm_scatter.c +++ b/drivers/gpu/drm/drm_scatter.c @@ -43,15 +43,6 @@ #define DEBUG_SCATTER 0 -static inline void *drm_vmalloc_dma(unsigned long size) -{ -#if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
- return __vmalloc(size, GFP_KERNEL, pgprot_noncached_wc(PAGE_KERNEL));
-#else
- return vmalloc_32(size);
-#endif -}
static void drm_sg_cleanup(struct drm_sg_mem * entry) { struct page *page; @@ -126,7 +117,7 @@ int drm_legacy_sg_alloc(struct drm_device *dev, void *data, return -ENOMEM; }
- entry->virtual = drm_vmalloc_dma(pages << PAGE_SHIFT);
- entry->virtual = vmalloc_32(pages << PAGE_SHIFT); if (!entry->virtual) { kfree(entry->busaddr); kfree(entry->pagelist);
-- 2.25.1
On Thu, Apr 9, 2020 at 10:54 AM Benjamin Herrenschmidt benh@kernel.crashing.org wrote:
On Wed, 2020-04-08 at 14:25 +0200, Daniel Vetter wrote:
On Wed, Apr 08, 2020 at 01:59:17PM +0200, Christoph Hellwig wrote:
If this code was broken for non-coherent caches a crude powerpc hack isn't going to help anyone else. Remove the hack as it is the last user of __vmalloc passing a page protection flag other than PAGE_KERNEL.
Well Ben added this to make stuff work on ppc, ofc the home grown dma layer in drm from back then isn't going to work in other places. I guess should have at least an ack from him, in case anyone still cares about this on ppc. Adding Ben to cc.
This was due to some drivers (radeon ?) trying to use vmalloc pages for coherent DMA, which means on those 4xx powerpc's need to be non-cached.
There were machines using that (440 based iirc), though I honestly can't tell if anybody still uses any of it.
agp subsystem still seems to happily do that (vmalloc memory for device access), never having been ported to dma apis (or well converted to iommu drivers, which they kinda are really). So I think this all still works exactly as back then, even with the kms radeon drivers. Question really is whether we have users left, and I have no clue about that either.
Now if these boxes didn't ever have agp then I think we can get away with deleting this, since we've already deleted the legacy radeon driver. And that one used vmalloc for everything. The new kms one does use the dma-api if the gpu isn't connected through agp. -Daniel
Cheers, Ben.
-Daniel
Signed-off-by: Christoph Hellwig hch@lst.de
drivers/gpu/drm/drm_scatter.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c index ca520028b2cb..f4e6184d1877 100644 --- a/drivers/gpu/drm/drm_scatter.c +++ b/drivers/gpu/drm/drm_scatter.c @@ -43,15 +43,6 @@
#define DEBUG_SCATTER 0
-static inline void *drm_vmalloc_dma(unsigned long size) -{ -#if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
- return __vmalloc(size, GFP_KERNEL, pgprot_noncached_wc(PAGE_KERNEL));
-#else
- return vmalloc_32(size);
-#endif -}
static void drm_sg_cleanup(struct drm_sg_mem * entry) { struct page *page; @@ -126,7 +117,7 @@ int drm_legacy_sg_alloc(struct drm_device *dev, void *data, return -ENOMEM; }
- entry->virtual = drm_vmalloc_dma(pages << PAGE_SHIFT);
- entry->virtual = vmalloc_32(pages << PAGE_SHIFT); if (!entry->virtual) { kfree(entry->busaddr); kfree(entry->pagelist);
-- 2.25.1
On Thu, Apr 9, 2020 at 5:41 AM Daniel Vetter daniel@ffwll.ch wrote:
On Thu, Apr 9, 2020 at 10:54 AM Benjamin Herrenschmidt benh@kernel.crashing.org wrote:
On Wed, 2020-04-08 at 14:25 +0200, Daniel Vetter wrote:
On Wed, Apr 08, 2020 at 01:59:17PM +0200, Christoph Hellwig wrote:
If this code was broken for non-coherent caches a crude powerpc hack isn't going to help anyone else. Remove the hack as it is the last user of __vmalloc passing a page protection flag other than PAGE_KERNEL.
Well Ben added this to make stuff work on ppc, ofc the home grown dma layer in drm from back then isn't going to work in other places. I guess should have at least an ack from him, in case anyone still cares about this on ppc. Adding Ben to cc.
This was due to some drivers (radeon ?) trying to use vmalloc pages for coherent DMA, which means on those 4xx powerpc's need to be non-cached.
There were machines using that (440 based iirc), though I honestly can't tell if anybody still uses any of it.
agp subsystem still seems to happily do that (vmalloc memory for device access), never having been ported to dma apis (or well converted to iommu drivers, which they kinda are really). So I think this all still works exactly as back then, even with the kms radeon drivers. Question really is whether we have users left, and I have no clue about that either.
Now if these boxes didn't ever have agp then I think we can get away with deleting this, since we've already deleted the legacy radeon driver. And that one used vmalloc for everything. The new kms one does use the dma-api if the gpu isn't connected through agp.
All radeons have a built in remapping table to handle non-AGP systems. On the earlier radeons it wasn't quite as performant as AGP, but it was always more reliable because AGP is AGP. Maybe it's time to let AGP go?
Alex
-Daniel
Cheers, Ben.
-Daniel
Signed-off-by: Christoph Hellwig hch@lst.de
drivers/gpu/drm/drm_scatter.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c index ca520028b2cb..f4e6184d1877 100644 --- a/drivers/gpu/drm/drm_scatter.c +++ b/drivers/gpu/drm/drm_scatter.c @@ -43,15 +43,6 @@
#define DEBUG_SCATTER 0
-static inline void *drm_vmalloc_dma(unsigned long size) -{ -#if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
- return __vmalloc(size, GFP_KERNEL, pgprot_noncached_wc(PAGE_KERNEL));
-#else
- return vmalloc_32(size);
-#endif -}
static void drm_sg_cleanup(struct drm_sg_mem * entry) { struct page *page; @@ -126,7 +117,7 @@ int drm_legacy_sg_alloc(struct drm_device *dev, void *data, return -ENOMEM; }
- entry->virtual = drm_vmalloc_dma(pages << PAGE_SHIFT);
- entry->virtual = vmalloc_32(pages << PAGE_SHIFT); if (!entry->virtual) { kfree(entry->busaddr); kfree(entry->pagelist);
-- 2.25.1
-- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
On Thu, Apr 9, 2020 at 4:19 PM Alex Deucher alexdeucher@gmail.com wrote:
On Thu, Apr 9, 2020 at 5:41 AM Daniel Vetter daniel@ffwll.ch wrote:
On Thu, Apr 9, 2020 at 10:54 AM Benjamin Herrenschmidt benh@kernel.crashing.org wrote:
On Wed, 2020-04-08 at 14:25 +0200, Daniel Vetter wrote:
On Wed, Apr 08, 2020 at 01:59:17PM +0200, Christoph Hellwig wrote:
If this code was broken for non-coherent caches a crude powerpc hack isn't going to help anyone else. Remove the hack as it is the last user of __vmalloc passing a page protection flag other than PAGE_KERNEL.
Well Ben added this to make stuff work on ppc, ofc the home grown dma layer in drm from back then isn't going to work in other places. I guess should have at least an ack from him, in case anyone still cares about this on ppc. Adding Ben to cc.
This was due to some drivers (radeon ?) trying to use vmalloc pages for coherent DMA, which means on those 4xx powerpc's need to be non-cached.
There were machines using that (440 based iirc), though I honestly can't tell if anybody still uses any of it.
agp subsystem still seems to happily do that (vmalloc memory for device access), never having been ported to dma apis (or well converted to iommu drivers, which they kinda are really). So I think this all still works exactly as back then, even with the kms radeon drivers. Question really is whether we have users left, and I have no clue about that either.
Now if these boxes didn't ever have agp then I think we can get away with deleting this, since we've already deleted the legacy radeon driver. And that one used vmalloc for everything. The new kms one does use the dma-api if the gpu isn't connected through agp.
All radeons have a built in remapping table to handle non-AGP systems. On the earlier radeons it wasn't quite as performant as AGP, but it was always more reliable because AGP is AGP. Maybe it's time to let AGP go?
I'd be very much in favour of that, if we can just use the integrated gart and drop agp fast writes wobbliness on the floor. I think the only other modern driver using agp would be nouveau at that point. -Daniel
Alex
-Daniel
Cheers, Ben.
-Daniel
Signed-off-by: Christoph Hellwig hch@lst.de
drivers/gpu/drm/drm_scatter.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c index ca520028b2cb..f4e6184d1877 100644 --- a/drivers/gpu/drm/drm_scatter.c +++ b/drivers/gpu/drm/drm_scatter.c @@ -43,15 +43,6 @@
#define DEBUG_SCATTER 0
-static inline void *drm_vmalloc_dma(unsigned long size) -{ -#if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
- return __vmalloc(size, GFP_KERNEL, pgprot_noncached_wc(PAGE_KERNEL));
-#else
- return vmalloc_32(size);
-#endif -}
static void drm_sg_cleanup(struct drm_sg_mem * entry) { struct page *page; @@ -126,7 +117,7 @@ int drm_legacy_sg_alloc(struct drm_device *dev, void *data, return -ENOMEM; }
- entry->virtual = drm_vmalloc_dma(pages << PAGE_SHIFT);
- entry->virtual = vmalloc_32(pages << PAGE_SHIFT); if (!entry->virtual) { kfree(entry->busaddr); kfree(entry->pagelist);
-- 2.25.1
-- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
On Thu, 2020-04-09 at 11:41 +0200, Daniel Vetter wrote:
Now if these boxes didn't ever have agp then I think we can get away with deleting this, since we've already deleted the legacy radeon driver. And that one used vmalloc for everything. The new kms one does use the dma-api if the gpu isn't connected through agp
Definitely no AGP there.
Cheers Ben.
On Fri, Apr 10, 2020 at 12:57 AM Benjamin Herrenschmidt benh@kernel.crashing.org wrote:
On Thu, 2020-04-09 at 11:41 +0200, Daniel Vetter wrote:
Now if these boxes didn't ever have agp then I think we can get away with deleting this, since we've already deleted the legacy radeon driver. And that one used vmalloc for everything. The new kms one does use the dma-api if the gpu isn't connected through agp
Definitely no AGP there.
Ah in that case I think we can be sure that this code is dead.
Acked-by: Daniel Vetter daniel.vetter@ffwll.ch
Cheers, Daniel
The real version just had a few callers that can open code it and remove one layer of indirection. The nommu stub was public but only had a single caller, so remove it and avoid a CONFIG_MMU ifdef in vmalloc.h.
Signed-off-by: Christoph Hellwig hch@lst.de --- include/linux/vmalloc.h | 9 --------- mm/nommu.c | 3 ++- mm/vmalloc.c | 20 ++++++-------------- 3 files changed, 8 insertions(+), 24 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index c1b9d6eca05f..4a46d296e70d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -115,17 +115,8 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller); -#ifndef CONFIG_MMU -extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); -static inline void *__vmalloc_node_flags_caller(unsigned long size, int node, - gfp_t flags, void *caller) -{ - return __vmalloc_node_flags(size, node, flags); -} -#else extern void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, void *caller); -#endif
extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); diff --git a/mm/nommu.c b/mm/nommu.c index 2df549adb22b..9553efa59787 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -150,7 +150,8 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask) } EXPORT_SYMBOL(__vmalloc);
-void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags) +void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, + void *caller) { return __vmalloc(size, flags); } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index de7952959e82..3d59d848ad48 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2566,14 +2566,6 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask) } EXPORT_SYMBOL(__vmalloc);
-static inline void *__vmalloc_node_flags(unsigned long size, - int node, gfp_t flags) -{ - return __vmalloc_node(size, 1, flags, node, - __builtin_return_address(0)); -} - - void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, void *caller) { @@ -2594,8 +2586,8 @@ void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, */ void *vmalloc(unsigned long size) { - return __vmalloc_node_flags(size, NUMA_NO_NODE, - GFP_KERNEL); + return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE, + __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc);
@@ -2614,8 +2606,8 @@ EXPORT_SYMBOL(vmalloc); */ void *vzalloc(unsigned long size) { - return __vmalloc_node_flags(size, NUMA_NO_NODE, - GFP_KERNEL | __GFP_ZERO); + return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, + __builtin_return_address(0)); } EXPORT_SYMBOL(vzalloc);
@@ -2670,8 +2662,8 @@ EXPORT_SYMBOL(vmalloc_node); */ void *vzalloc_node(unsigned long size, int node) { - return __vmalloc_node_flags(size, node, - GFP_KERNEL | __GFP_ZERO); + return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node, + __builtin_return_address(0)); } EXPORT_SYMBOL(vzalloc_node);
This is always PAGE_KERNEL now.
Signed-off-by: Christoph Hellwig hch@lst.de --- mm/vmalloc.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 466a449b3a15..de7952959e82 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2401,8 +2401,7 @@ void *vmap(struct page **pages, unsigned int count, EXPORT_SYMBOL(vmap);
static void *__vmalloc_node(unsigned long size, unsigned long align, - gfp_t gfp_mask, pgprot_t prot, - int node, const void *caller); + gfp_t gfp_mask, int node, const void *caller); static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot, int node) { @@ -2420,7 +2419,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask, - PAGE_KERNEL, node, area->caller); + node, area->caller); } else { pages = kmalloc_node(array_size, nested_gfp, node); } @@ -2539,13 +2538,11 @@ EXPORT_SYMBOL_GPL(__vmalloc_node_range); * @size: allocation size * @align: desired alignment * @gfp_mask: flags for the page level allocator - * @prot: protection mask for the allocated pages * @node: node to use for allocation or NUMA_NO_NODE * @caller: caller's return address * - * Allocate enough pages to cover @size from the page level - * allocator with @gfp_mask flags. Map them into contiguous - * kernel virtual space, using a pagetable protection of @prot. + * Allocate enough pages to cover @size from the page level allocator with + * @gfp_mask flags. Map them into contiguous kernel virtual space. * * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL * and __GFP_NOFAIL are not supported @@ -2556,16 +2553,15 @@ EXPORT_SYMBOL_GPL(__vmalloc_node_range); * Return: pointer to the allocated memory or %NULL on error */ static void *__vmalloc_node(unsigned long size, unsigned long align, - gfp_t gfp_mask, pgprot_t prot, - int node, const void *caller) + gfp_t gfp_mask, int node, const void *caller) { return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, - gfp_mask, prot, 0, node, caller); + gfp_mask, PAGE_KERNEL, 0, node, caller); }
void *__vmalloc(unsigned long size, gfp_t gfp_mask) { - return __vmalloc_node(size, 1, gfp_mask, PAGE_KERNEL, NUMA_NO_NODE, + return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE, __builtin_return_address(0)); } EXPORT_SYMBOL(__vmalloc); @@ -2573,15 +2569,15 @@ EXPORT_SYMBOL(__vmalloc); static inline void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags) { - return __vmalloc_node(size, 1, flags, PAGE_KERNEL, - node, __builtin_return_address(0)); + return __vmalloc_node(size, 1, flags, node, + __builtin_return_address(0)); }
void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, void *caller) { - return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller); + return __vmalloc_node(size, 1, flags, node, caller); }
/** @@ -2656,8 +2652,8 @@ EXPORT_SYMBOL(vmalloc_user); */ void *vmalloc_node(unsigned long size, int node) { - return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL, - node, __builtin_return_address(0)); + return __vmalloc_node(size, 1, GFP_KERNEL, node, + __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_node);
@@ -2670,9 +2666,6 @@ EXPORT_SYMBOL(vmalloc_node); * allocator and map them into contiguous kernel virtual space. * The memory allocated is set to zero. * - * For tight control over page level allocator and protection flags - * use __vmalloc_node() instead. - * * Return: pointer to the allocated memory or %NULL on error */ void *vzalloc_node(unsigned long size, int node) @@ -2745,8 +2738,8 @@ void *vmalloc_exec(unsigned long size) */ void *vmalloc_32(unsigned long size) { - return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL, - NUMA_NO_NODE, __builtin_return_address(0)); + return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE, + __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_32);
Just use __vmalloc_node instead which gets and extra argument. To be able to to use __vmalloc_node in all caller make it available outside of vmalloc and implement it in nommu.c.
Signed-off-by: Christoph Hellwig hch@lst.de --- include/linux/vmalloc.h | 4 ++-- kernel/bpf/syscall.c | 5 ++--- mm/nommu.c | 4 ++-- mm/util.c | 2 +- mm/vmalloc.c | 10 +--------- 5 files changed, 8 insertions(+), 17 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 4a46d296e70d..108f49b47756 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -115,8 +115,8 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller); -extern void *__vmalloc_node_flags_caller(unsigned long size, - int node, gfp_t flags, void *caller); +void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, + int node, const void *caller);
extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 64783da34202..48d98ea8fad6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -299,9 +299,8 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) return vmalloc_user_node_flags(size, numa_node, GFP_KERNEL | __GFP_RETRY_MAYFAIL | flags); } - return __vmalloc_node_flags_caller(size, numa_node, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | - flags, __builtin_return_address(0)); + return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_RETRY_MAYFAIL | flags, + numa_node, __builtin_return_address(0)); }
void *bpf_map_area_alloc(u64 size, int numa_node) diff --git a/mm/nommu.c b/mm/nommu.c index 9553efa59787..81a86cd85893 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -150,8 +150,8 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask) } EXPORT_SYMBOL(__vmalloc);
-void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, - void *caller) +void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, + int node, const void *caller) { return __vmalloc(size, flags); } diff --git a/mm/util.c b/mm/util.c index 988d11e6c17c..6d5868adbe18 100644 --- a/mm/util.c +++ b/mm/util.c @@ -580,7 +580,7 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) if (ret || size <= PAGE_SIZE) return ret;
- return __vmalloc_node_flags_caller(size, node, flags, + return __vmalloc_node(size, 1, flags, node, __builtin_return_address(0)); } EXPORT_SYMBOL(kvmalloc_node); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 3d59d848ad48..ae8249ef5821 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2400,8 +2400,6 @@ void *vmap(struct page **pages, unsigned int count, } EXPORT_SYMBOL(vmap);
-static void *__vmalloc_node(unsigned long size, unsigned long align, - gfp_t gfp_mask, int node, const void *caller); static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot, int node) { @@ -2552,7 +2550,7 @@ EXPORT_SYMBOL_GPL(__vmalloc_node_range); * * Return: pointer to the allocated memory or %NULL on error */ -static void *__vmalloc_node(unsigned long size, unsigned long align, +void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) { return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, @@ -2566,12 +2564,6 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask) } EXPORT_SYMBOL(__vmalloc);
-void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, - void *caller) -{ - return __vmalloc_node(size, 1, flags, node, caller); -} - /** * vmalloc - allocate virtually contiguous memory * @size: allocation size
Open code it in __bpf_map_area_alloc, which is the only caller. Also clean up __bpf_map_area_alloc to have a single vmalloc call with slightly different flags instead of the current two different calls.
For this to compile for the nommu case add a __vmalloc_node_range stub to nommu.c.
Signed-off-by: Christoph Hellwig hch@lst.de --- include/linux/vmalloc.h | 1 - kernel/bpf/syscall.c | 23 +++++++++++++---------- mm/nommu.c | 14 ++++++++------ mm/vmalloc.c | 20 -------------------- 4 files changed, 21 insertions(+), 37 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 108f49b47756..f90f2946aac2 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -106,7 +106,6 @@ extern void *vzalloc(unsigned long size); extern void *vmalloc_user(unsigned long size); extern void *vmalloc_node(unsigned long size, int node); extern void *vzalloc_node(unsigned long size, int node); -extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags); extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); extern void *vmalloc_32_user(unsigned long size); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 48d98ea8fad6..249d9bd43321 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -281,26 +281,29 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) * __GFP_RETRY_MAYFAIL to avoid such situations. */
- const gfp_t flags = __GFP_NOWARN | __GFP_ZERO; + const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO; + unsigned int flags = 0; + unsigned long align = 1; void *area;
if (size >= SIZE_MAX) return NULL;
/* kmalloc()'ed memory can't be mmap()'ed */ - if (!mmapable && size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { - area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags, + if (mmapable) { + BUG_ON(!PAGE_ALIGNED(size)); + align = SHMLBA; + flags = VM_USERMAP; + } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { + area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, numa_node); if (area != NULL) return area; } - if (mmapable) { - BUG_ON(!PAGE_ALIGNED(size)); - return vmalloc_user_node_flags(size, numa_node, GFP_KERNEL | - __GFP_RETRY_MAYFAIL | flags); - } - return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_RETRY_MAYFAIL | flags, - numa_node, __builtin_return_address(0)); + + return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, + gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, + flags, numa_node, __builtin_return_address(0)); }
void *bpf_map_area_alloc(u64 size, int numa_node) diff --git a/mm/nommu.c b/mm/nommu.c index 81a86cd85893..b42cd6003d7d 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -150,6 +150,14 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask) } EXPORT_SYMBOL(__vmalloc);
+void *__vmalloc_node_range(unsigned long size, unsigned long align, + unsigned long start, unsigned long end, gfp_t gfp_mask, + pgprot_t prot, unsigned long vm_flags, int node, + const void *caller) +{ + return __vmalloc(size, flags); +} + void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) { @@ -180,12 +188,6 @@ void *vmalloc_user(unsigned long size) } EXPORT_SYMBOL(vmalloc_user);
-void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags) -{ - return __vmalloc_user_flags(size, flags | __GFP_ZERO); -} -EXPORT_SYMBOL(vmalloc_user_node_flags); - struct page *vmalloc_to_page(const void *addr) { return virt_to_page(addr); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 333fbe77255a..f6f2acdaf70c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2658,26 +2658,6 @@ void *vzalloc_node(unsigned long size, int node) } EXPORT_SYMBOL(vzalloc_node);
-/** - * vmalloc_user_node_flags - allocate memory for userspace on a specific node - * @size: allocation size - * @node: numa node - * @flags: flags for the page level allocator - * - * The resulting memory area is zeroed so it can be mapped to userspace - * without leaking data. - * - * Return: pointer to the allocated memory or %NULL on error - */ -void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags) -{ - return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, - flags | __GFP_ZERO, PAGE_KERNEL, - VM_USERMAP, node, - __builtin_return_address(0)); -} -EXPORT_SYMBOL(vmalloc_user_node_flags); - /** * vmalloc_exec - allocate virtually contiguous, executable memory * @size: allocation size
No need to export the very low-level __vmalloc_node_range when the test module can use a slightly higher level variant.
Signed-off-by: Christoph Hellwig hch@lst.de --- lib/test_vmalloc.c | 26 +++++++------------------- mm/vmalloc.c | 17 ++++++++--------- 2 files changed, 15 insertions(+), 28 deletions(-)
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 8bbefcaddfe8..cd6aef05dfb4 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -91,12 +91,8 @@ static int random_size_align_alloc_test(void) */ size = ((rnd % 10) + 1) * PAGE_SIZE;
- ptr = __vmalloc_node_range(size, align, - VMALLOC_START, VMALLOC_END, - GFP_KERNEL | __GFP_ZERO, - PAGE_KERNEL, - 0, 0, __builtin_return_address(0)); - + ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, + __builtin_return_address(0)); if (!ptr) return -1;
@@ -118,12 +114,8 @@ static int align_shift_alloc_test(void) for (i = 0; i < BITS_PER_LONG; i++) { align = ((unsigned long) 1) << i;
- ptr = __vmalloc_node_range(PAGE_SIZE, align, - VMALLOC_START, VMALLOC_END, - GFP_KERNEL | __GFP_ZERO, - PAGE_KERNEL, - 0, 0, __builtin_return_address(0)); - + ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL | __GFP_ZERO, + __builtin_return_address(0)); if (!ptr) return -1;
@@ -139,13 +131,9 @@ static int fix_align_alloc_test(void) int i;
for (i = 0; i < test_loop_count; i++) { - ptr = __vmalloc_node_range(5 * PAGE_SIZE, - THREAD_ALIGN << 1, - VMALLOC_START, VMALLOC_END, - GFP_KERNEL | __GFP_ZERO, - PAGE_KERNEL, - 0, 0, __builtin_return_address(0)); - + ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1, + GFP_KERNEL | __GFP_ZERO, + __builtin_return_address(0)); if (!ptr) return -1;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ae8249ef5821..333fbe77255a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2522,15 +2522,6 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, return NULL; }
-/* - * This is only for performance analysis of vmalloc and stress purpose. - * It is required by vmalloc test module, therefore do not use it other - * than that. - */ -#ifdef CONFIG_TEST_VMALLOC_MODULE -EXPORT_SYMBOL_GPL(__vmalloc_node_range); -#endif - /** * __vmalloc_node - allocate virtually contiguous memory * @size: allocation size @@ -2556,6 +2547,14 @@ void *__vmalloc_node(unsigned long size, unsigned long align, return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, gfp_mask, PAGE_KERNEL, 0, node, caller); } +/* + * This is only for performance analysis of vmalloc and stress purpose. + * It is required by vmalloc test module, therefore do not use it other + * than that. + */ +#ifdef CONFIG_TEST_VMALLOC_MODULE +EXPORT_SYMBOL_GPL(__vmalloc_node); +#endif
void *__vmalloc(unsigned long size, gfp_t gfp_mask) {
alloc_vm_stack can use a slightly higher level vmalloc function.
Signed-off-by: Christoph Hellwig hch@lst.de --- arch/powerpc/kernel/irq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index a25ed47087ee..4518fb1d6bf4 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -735,9 +735,8 @@ void do_IRQ(struct pt_regs *regs)
static void *__init alloc_vm_stack(void) { - return __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START, - VMALLOC_END, THREADINFO_GFP, PAGE_KERNEL, - 0, NUMA_NO_NODE, (void*)_RET_IP_); + return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP, + NUMA_NO_NODE, (void *)_RET_IP_); }
static void __init vmap_irqstack_init(void)
arch_alloc_vmap_stack can use a slightly higher level vmalloc function.
Signed-off-by: Christoph Hellwig hch@lst.de --- arch/arm64/include/asm/vmap_stack.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h index 0a12115d9638..0cc6636e3f15 100644 --- a/arch/arm64/include/asm/vmap_stack.h +++ b/arch/arm64/include/asm/vmap_stack.h @@ -19,10 +19,8 @@ static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node) { BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
- return __vmalloc_node_range(stack_size, THREAD_ALIGN, - VMALLOC_START, VMALLOC_END, - THREADINFO_GFP, PAGE_KERNEL, 0, node, - __builtin_return_address(0)); + return __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node, + __builtin_return_address(0)); }
#endif /* __ASM_VMAP_STACK_H */
stack_alloc can use a slightly higher level vmalloc function.
Signed-off-by: Christoph Hellwig hch@lst.de --- arch/s390/kernel/setup.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 36445dd40fdb..0f0b140b5558 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -305,12 +305,9 @@ void *restart_stack __section(.data); unsigned long stack_alloc(void) { #ifdef CONFIG_VMAP_STACK - return (unsigned long) - __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, - VMALLOC_START, VMALLOC_END, - THREADINFO_GFP, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE, + THREADINFO_GFP, NUMA_NO_NODE, + __builtin_return_address(0)); #else return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); #endif
The pgprot argument to __vmalloc is always PROT_KERNEL now, so remove it.
Signed-off-by: Christoph Hellwig hch@lst.de --- arch/x86/hyperv/hv_init.c | 3 +-- arch/x86/include/asm/kvm_host.h | 3 +-- arch/x86/kvm/svm.c | 3 +-- drivers/block/drbd/drbd_bitmap.c | 4 +--- drivers/gpu/drm/etnaviv/etnaviv_dump.c | 4 ++-- drivers/lightnvm/pblk-init.c | 5 ++--- drivers/md/dm-bufio.c | 4 ++-- drivers/mtd/ubi/io.c | 4 ++-- drivers/scsi/sd_zbc.c | 3 +-- fs/gfs2/dir.c | 9 ++++----- fs/gfs2/quota.c | 2 +- fs/nfs/blocklayout/extent_tree.c | 2 +- fs/ntfs/malloc.h | 2 +- fs/ubifs/debug.c | 2 +- fs/ubifs/lprops.c | 2 +- fs/ubifs/lpt_commit.c | 4 ++-- fs/ubifs/orphan.c | 2 +- fs/xfs/kmem.c | 2 +- include/linux/vmalloc.h | 2 +- kernel/bpf/core.c | 6 +++--- kernel/groups.c | 2 +- kernel/module.c | 3 +-- mm/nommu.c | 15 +++++++-------- mm/page_alloc.c | 2 +- mm/percpu.c | 2 +- mm/vmalloc.c | 4 ++-- net/bridge/netfilter/ebtables.c | 6 ++---- sound/core/memalloc.c | 2 +- sound/core/pcm_memory.c | 2 +- 29 files changed, 47 insertions(+), 59 deletions(-)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 5a4b363ba67b..a3d689dfc745 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -95,8 +95,7 @@ static int hv_cpu_init(unsigned int cpu) * not be stopped in the case of CPU offlining and the VM will hang. */ if (!*hvp) { - *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO, - PAGE_KERNEL); + *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); }
if (*hvp) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 42a2d0d3984a..71bc09bff01a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1280,8 +1280,7 @@ extern struct kmem_cache *x86_fpu_cache; #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { - return __vmalloc(kvm_x86_ops.vm_size, - GFP_KERNEL_ACCOUNT | __GFP_ZERO, PAGE_KERNEL); + return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); } void kvm_arch_free_vm(struct kvm *kvm);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 851e9cc79930..83e8323ba4f2 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1927,8 +1927,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, /* Avoid using vmalloc for smaller buffers. */ size = npages * sizeof(struct page *); if (size > PAGE_SIZE) - pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO, - PAGE_KERNEL); + pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); else pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 15e99697234a..df53dca5d02c 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -396,9 +396,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) bytes = sizeof(struct page *)*want; new_pages = kzalloc(bytes, GFP_NOIO | __GFP_NOWARN); if (!new_pages) { - new_pages = __vmalloc(bytes, - GFP_NOIO | __GFP_ZERO, - PAGE_KERNEL); + new_pages = __vmalloc(bytes, GFP_NOIO | __GFP_ZERO); if (!new_pages) return NULL; } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index 648cf0207309..706af0304ca4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -154,8 +154,8 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit) file_size += sizeof(*iter.hdr) * n_obj;
/* Allocate the file in vmalloc memory, it's likely to be big */ - iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, - PAGE_KERNEL); + iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN | + __GFP_NORETRY); if (!iter.start) { mutex_unlock(&gpu->mmu_context->lock); dev_warn(gpu->dev, "failed to allocate devcoredump file\n"); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 9a967a2e83dd..6e677ff62cc9 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -145,9 +145,8 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init) int ret = 0;
map_size = pblk_trans_map_size(pblk); - pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN - | __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM, - PAGE_KERNEL); + pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN | + __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM); if (!pblk->trans_map) { pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n", map_size); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 2d519c223562..d1786cfd7f22 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -400,13 +400,13 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, */ if (gfp_mask & __GFP_NORETRY) { unsigned noio_flag = memalloc_noio_save(); - void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); + void *ptr = __vmalloc(c->block_size, gfp_mask);
memalloc_noio_restore(noio_flag); return ptr; }
- return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); + return __vmalloc(c->block_size, gfp_mask); }
/* diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index b57b84fb97d0..14d890b00d2c 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -1297,7 +1297,7 @@ static int self_check_write(struct ubi_device *ubi, const void *buf, int pnum, if (!ubi_dbg_chk_io(ubi)) return 0;
- buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); + buf1 = __vmalloc(len, GFP_NOFS); if (!buf1) { ubi_err(ubi, "cannot allocate memory to check writes"); return 0; @@ -1361,7 +1361,7 @@ int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) if (!ubi_dbg_chk_io(ubi)) return 0;
- buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); + buf = __vmalloc(len, GFP_NOFS); if (!buf) { ubi_err(ubi, "cannot allocate memory to check for 0xFFs"); return 0; diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index f45c22b09726..8be27426aa66 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -136,8 +136,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
while (bufsize >= SECTOR_SIZE) { buf = __vmalloc(bufsize, - GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY, - PAGE_KERNEL); + GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY); if (buf) { *buflen = bufsize; return buf; diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index c3f7732415be..c0f2875c946c 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -354,7 +354,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN); if (hc == NULL) - hc = __vmalloc(hsize, GFP_NOFS, PAGE_KERNEL); + hc = __vmalloc(hsize, GFP_NOFS);
if (hc == NULL) return ERR_PTR(-ENOMEM); @@ -1166,7 +1166,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
hc2 = kmalloc_array(hsize_bytes, 2, GFP_NOFS | __GFP_NOWARN); if (hc2 == NULL) - hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL); + hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS);
if (!hc2) return -ENOMEM; @@ -1327,7 +1327,7 @@ static void *gfs2_alloc_sort_buffer(unsigned size) if (size < KMALLOC_MAX_SIZE) ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN); if (!ptr) - ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL); + ptr = __vmalloc(size, GFP_NOFS); return ptr; }
@@ -1987,8 +1987,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN); if (ht == NULL) - ht = __vmalloc(size, GFP_NOFS | __GFP_NOWARN | __GFP_ZERO, - PAGE_KERNEL); + ht = __vmalloc(size, GFP_NOFS | __GFP_NOWARN | __GFP_ZERO); if (!ht) return -ENOMEM;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index cc0c4b5800be..b84ac5843ec4 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1368,7 +1368,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) sdp->sd_quota_bitmap = kzalloc(bm_size, GFP_NOFS | __GFP_NOWARN); if (sdp->sd_quota_bitmap == NULL) sdp->sd_quota_bitmap = __vmalloc(bm_size, GFP_NOFS | - __GFP_ZERO, PAGE_KERNEL); + __GFP_ZERO); if (!sdp->sd_quota_bitmap) return error;
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 7a57ff2528af..8f7cff7a4293 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -582,7 +582,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) if (!arg->layoutupdate_pages) return -ENOMEM;
- start_p = __vmalloc(buffer_size, GFP_NOFS, PAGE_KERNEL); + start_p = __vmalloc(buffer_size, GFP_NOFS); if (!start_p) { kfree(arg->layoutupdate_pages); return -ENOMEM; diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index 842b0bfc3ac9..7068425735f1 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -34,7 +34,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask) /* return (void *)__get_free_page(gfp_mask); */ } if (likely((size >> PAGE_SHIFT) < totalram_pages())) - return __vmalloc(size, gfp_mask, PAGE_KERNEL); + return __vmalloc(size, gfp_mask); return NULL; }
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 0f5a480fe264..31288d8fa2ce 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -815,7 +815,7 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum);
- buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + buf = __vmalloc(c->leb_size, GFP_NOFS); if (!buf) { ubifs_err(c, "cannot allocate memory for dumping LEB %d", lnum); return; diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 29826c51883a..22bfda158f7f 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1095,7 +1095,7 @@ static int scan_check_cb(struct ubifs_info *c, return LPT_SCAN_CONTINUE; }
- buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + buf = __vmalloc(c->leb_size, GFP_NOFS); if (!buf) return -ENOMEM;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index ff5e0411cf2d..d76a19e460cd 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -1596,7 +1596,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) if (!dbg_is_chk_lprops(c)) return 0;
- buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + buf = p = __vmalloc(c->leb_size, GFP_NOFS); if (!buf) { ubifs_err(c, "cannot allocate memory for ltab checking"); return 0; @@ -1845,7 +1845,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) void *buf, *p;
pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); - buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + buf = p = __vmalloc(c->leb_size, GFP_NOFS); if (!buf) { ubifs_err(c, "cannot allocate memory to dump LPT"); return; diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 283f9eb48410..2c294085ffed 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -977,7 +977,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) if (c->no_orphs) return 0;
- buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + buf = __vmalloc(c->leb_size, GFP_NOFS); if (!buf) { ubifs_err(c, "cannot allocate memory to check orphans"); return 0; diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 1da94237a8cf..f1366475c389 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -48,7 +48,7 @@ __kmem_vmalloc(size_t size, xfs_km_flags_t flags) if (flags & KM_NOFS) nofs_flag = memalloc_nofs_save();
- ptr = __vmalloc(size, lflags, PAGE_KERNEL); + ptr = __vmalloc(size, lflags);
if (flags & KM_NOFS) memalloc_nofs_restore(nofs_flag); diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 9273b1a91ca5..c1b9d6eca05f 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -110,7 +110,7 @@ extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags); extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); extern void *vmalloc_32_user(unsigned long size); -extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); +extern void *__vmalloc(unsigned long size, gfp_t gfp_mask); extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 916f5132a984..c712de560357 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -82,7 +82,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag struct bpf_prog *fp;
size = round_up(size, PAGE_SIZE); - fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); + fp = __vmalloc(size, gfp_flags); if (fp == NULL) return NULL;
@@ -232,7 +232,7 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, if (ret) return NULL;
- fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); + fp = __vmalloc(size, gfp_flags); if (fp == NULL) { __bpf_prog_uncharge(fp_old->aux->user, delta); } else { @@ -1089,7 +1089,7 @@ static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog *fp;
- fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL); + fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags); if (fp != NULL) { /* aux->prog still points to the fp_other one, so * when promoting the clone to the real program, diff --git a/kernel/groups.c b/kernel/groups.c index daae2f2dc6d4..6ee6691f6839 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -20,7 +20,7 @@ struct group_info *groups_alloc(int gidsetsize) len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize; gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY); if (!gi) - gi = __vmalloc(len, GFP_KERNEL_ACCOUNT, PAGE_KERNEL); + gi = __vmalloc(len, GFP_KERNEL_ACCOUNT); if (!gi) return NULL;
diff --git a/kernel/module.c b/kernel/module.c index 3447f3b74870..c607fed4e617 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2946,8 +2946,7 @@ static int copy_module_from_user(const void __user *umod, unsigned long len, return err;
/* Suck in entire file: we'll want most of it. */ - info->hdr = __vmalloc(info->len, - GFP_KERNEL | __GFP_NOWARN, PAGE_KERNEL); + info->hdr = __vmalloc(info->len, GFP_KERNEL | __GFP_NOWARN); if (!info->hdr) return -ENOMEM;
diff --git a/mm/nommu.c b/mm/nommu.c index 4f07b7ef0297..2df549adb22b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -140,7 +140,7 @@ void vfree(const void *addr) } EXPORT_SYMBOL(vfree);
-void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) +void *__vmalloc(unsigned long size, gfp_t gfp_mask) { /* * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() @@ -152,14 +152,14 @@ EXPORT_SYMBOL(__vmalloc);
void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags) { - return __vmalloc(size, flags, PAGE_KERNEL); + return __vmalloc(size, flags); }
static void *__vmalloc_user_flags(unsigned long size, gfp_t flags) { void *ret;
- ret = __vmalloc(size, flags, PAGE_KERNEL); + ret = __vmalloc(size, flags); if (ret) { struct vm_area_struct *vma;
@@ -230,7 +230,7 @@ long vwrite(char *buf, char *addr, unsigned long count) */ void *vmalloc(unsigned long size) { - return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM); } EXPORT_SYMBOL(vmalloc);
@@ -248,8 +248,7 @@ EXPORT_SYMBOL(vmalloc); */ void *vzalloc(unsigned long size) { - return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, - PAGE_KERNEL); + return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); } EXPORT_SYMBOL(vzalloc);
@@ -302,7 +301,7 @@ EXPORT_SYMBOL(vzalloc_node);
void *vmalloc_exec(unsigned long size) { - return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); + return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM); }
/** @@ -314,7 +313,7 @@ void *vmalloc_exec(unsigned long size) */ void *vmalloc_32(unsigned long size) { - return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); + return __vmalloc(size, GFP_KERNEL); } EXPORT_SYMBOL(vmalloc_32);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 114c56c3685d..53d43f72bcd8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8237,7 +8237,7 @@ void *__init alloc_large_system_hash(const char *tablename, table = memblock_alloc_raw(size, SMP_CACHE_BYTES); } else if (get_order(size) >= MAX_ORDER || hashdist) { - table = __vmalloc(size, gfp_flags, PAGE_KERNEL); + table = __vmalloc(size, gfp_flags); virt = true; } else { /* diff --git a/mm/percpu.c b/mm/percpu.c index d7e3bc649f4e..1e278099e185 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -481,7 +481,7 @@ static void *pcpu_mem_zalloc(size_t size, gfp_t gfp) if (size <= PAGE_SIZE) return kzalloc(size, gfp); else - return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL); + return __vmalloc(size, gfp | __GFP_ZERO); }
/** diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 334c75251ddb..466a449b3a15 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2563,9 +2563,9 @@ static void *__vmalloc_node(unsigned long size, unsigned long align, gfp_mask, prot, 0, node, caller); }
-void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) +void *__vmalloc(unsigned long size, gfp_t gfp_mask) { - return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE, + return __vmalloc_node(size, 1, gfp_mask, PAGE_KERNEL, NUMA_NO_NODE, __builtin_return_address(0)); } EXPORT_SYMBOL(__vmalloc); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 78db58c7aec2..7e869284e052 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1095,16 +1095,14 @@ static int do_replace(struct net *net, const void __user *user, tmp.name[sizeof(tmp.name) - 1] = 0;
countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; - newinfo = __vmalloc(sizeof(*newinfo) + countersize, GFP_KERNEL_ACCOUNT, - PAGE_KERNEL); + newinfo = __vmalloc(sizeof(*newinfo) + countersize, GFP_KERNEL_ACCOUNT); if (!newinfo) return -ENOMEM;
if (countersize) memset(newinfo->counters, 0, countersize);
- newinfo->entries = __vmalloc(tmp.entries_size, GFP_KERNEL_ACCOUNT, - PAGE_KERNEL); + newinfo->entries = __vmalloc(tmp.entries_size, GFP_KERNEL_ACCOUNT); if (!newinfo->entries) { ret = -ENOMEM; goto free_newinfo; diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index a83553fbedf0..bea46ed157a6 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -143,7 +143,7 @@ int snd_dma_alloc_pages(int type, struct device *device, size_t size, break; case SNDRV_DMA_TYPE_VMALLOC: gfp = snd_mem_get_gfp_flags(device, GFP_KERNEL | __GFP_HIGHMEM); - dmab->area = __vmalloc(size, gfp, PAGE_KERNEL); + dmab->area = __vmalloc(size, gfp); dmab->addr = 0; break; #ifdef CONFIG_HAS_DMA diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c index fcab37ea6641..860935e3aea4 100644 --- a/sound/core/pcm_memory.c +++ b/sound/core/pcm_memory.c @@ -460,7 +460,7 @@ int _snd_pcm_lib_alloc_vmalloc_buffer(struct snd_pcm_substream *substream, return 0; /* already large enough */ vfree(runtime->dma_area); } - runtime->dma_area = __vmalloc(size, gfp_flags, PAGE_KERNEL); + runtime->dma_area = __vmalloc(size, gfp_flags); if (!runtime->dma_area) return -ENOMEM; runtime->dma_bytes = size;
On Wed, Apr 08, 2020 at 01:58:58PM +0200, Christoph Hellwig wrote:
Hi all,
Peter noticed that with some dumb luck you can toast the kernel address space with exported vmalloc symbols.
I used this as an opportunity to decruft the vmalloc.c API and make it much more systematic. This also removes any chance to create vmalloc mappings outside the designated areas or using executable permissions from modules. Besides that it removes more than 300 lines of code.
Looks great, thanks for doing this!
Acked-by: Peter Zijlstra (Intel) peterz@infradead.org
On Wed, 8 Apr 2020 13:59:00 +0200
vm_map_ram can keep mappings around after the vm_unmap_ram. Using that with non-PAGE_KERNEL mappings can lead to all kinds of aliasing issues.
Signed-off-by: Christoph Hellwig hch@lst.de
drivers/staging/android/ion/ion_heap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c index 473b465724f1..a2d5c6df4b96 100644 --- a/drivers/staging/android/ion/ion_heap.c +++ b/drivers/staging/android/ion/ion_heap.c @@ -99,12 +99,12 @@ int ion_heap_map_user(struct ion_heap *heap, struct ion_buffer *buffer, static int ion_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot) {
- void *addr = vm_map_ram(pages, num, -1, pgprot);
- void *addr = vmap(pages, num, VM_MAP);
A merge glitch?
void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
if (!addr) return -ENOMEM; memset(addr, 0, PAGE_SIZE * num);
- vm_unmap_ram(addr, num);
- vunmap(addr);
return 0; } -- 2.25.1
On Wed, Apr 08, 2020 at 08:48:33PM +0800, Hillf Danton wrote:
- void *addr = vm_map_ram(pages, num, -1, pgprot);
- void *addr = vmap(pages, num, VM_MAP);
A merge glitch?
void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
Yes, thanks for the headsup, you were as fast as the build bot :)
Fixed now.
On Wed, Apr 08, 2020 at 01:58:58PM +0200, Christoph Hellwig wrote:
Hi all,
Peter noticed that with some dumb luck you can toast the kernel address space with exported vmalloc symbols.
I used this as an opportunity to decruft the vmalloc.c API and make it much more systematic. This also removes any chance to create vmalloc mappings outside the designated areas or using executable permissions from modules. Besides that it removes more than 300 lines of code.
I haven't read all your patches yet.
Have you tested it on 32-bit ARM, where the module area is located _below_ PAGE_OFFSET and outside of the vmalloc area?
On Wed, Apr 08, 2020 at 05:03:24PM +0100, Russell King - ARM Linux admin wrote:
I haven't read all your patches yet.
Have you tested it on 32-bit ARM, where the module area is located _below_ PAGE_OFFSET and outside of the vmalloc area?
I have not tested it. However existing in-kernel users that use different areas (and we have quite a few of those) have not been changed at all. I think the arm32 module loader (like various other module loaders) falls into that category.
linaro-mm-sig@lists.linaro.org