- Linux-stable-mirror - lists.linaro.org

[nacked] mm-add-sys-kernel-slab-cache-cache_dma32.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm: add /sys/kernel/slab/cache/cache_dma32 has been removed from the -mm tree. Its filename was mm-add-sys-kernel-slab-cache-cache_dma32.patch This patch was dropped because it was nacked ------------------------------------------------------ From: Nicolas Boichat <drinkcat(a)chromium.org> Subject: mm: add /sys/kernel/slab/cache/cache_dma32 The patch "mm: add support for kmem caches in DMA32 zone" added support for SLAB_CACHE_DMA32 kmem caches. This patch adds the corresponding /sys/kernel/slab/cache/cache_dma32 entries, and updates the slabinfo tool. Link: http://lkml.kernel.org/r/20181210011504.122604-4-drinkcat@chromium.org Signed-off-by: Nicolas Boichat <drinkcat(a)chromium.org> Cc: Christoph Hellwig <hch(a)infradead.org> Cc: Christoph Lameter <cl(a)linux.com> Cc: David Rientjes <rientjes(a)google.com> Cc: Hsin-Yi Wang <hsinyi(a)chromium.org> Cc: Huaisheng Ye <yehs1(a)lenovo.com> Cc: Joerg Roedel <joro(a)8bytes.org> Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Matthias Brugger <matthias.bgg(a)gmail.com> Cc: Mel Gorman <mgorman(a)techsingularity.net> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com> Cc: Pekka Enberg <penberg(a)kernel.org> Cc: Robin Murphy <robin.murphy(a)arm.com> Cc: Sasha Levin <Alexander.Levin(a)microsoft.com> Cc: Tomasz Figa <tfiga(a)google.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Will Deacon <will.deacon(a)arm.com> Cc: Yingjoe Chen <yingjoe.chen(a)mediatek.com> Cc: Yong Wu <yong.wu(a)mediatek.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- Documentation/ABI/testing/sysfs-kernel-slab | 9 +++++++++ mm/slub.c | 11 +++++++++++ tools/vm/slabinfo.c | 7 ++++++- 3 files changed, 26 insertions(+), 1 deletion(-) --- a/Documentation/ABI/testing/sysfs-kernel-slab~mm-add-sys-kernel-slab-cache-cache_dma32 +++ a/Documentation/ABI/testing/sysfs-kernel-slab @@ -106,6 +106,15 @@ Description: are from ZONE_DMA. Available when CONFIG_ZONE_DMA is enabled. +What: /sys/kernel/slab/cache/cache_dma32 +Date: December 2018 +KernelVersion: 4.21 +Contact: Nicolas Boichat <drinkcat(a)chromium.org> +Description: + The cache_dma32 file is read-only and specifies whether objects + are from ZONE_DMA32. + Available when CONFIG_ZONE_DMA32 is enabled. + What: /sys/kernel/slab/cache/cpu_slabs Date: May 2007 KernelVersion: 2.6.22 --- a/mm/slub.c~mm-add-sys-kernel-slab-cache-cache_dma32 +++ a/mm/slub.c @@ -5112,6 +5112,14 @@ static ssize_t cache_dma_show(struct kme SLAB_ATTR_RO(cache_dma); #endif +#ifdef CONFIG_ZONE_DMA32 +static ssize_t cache_dma32_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA32)); +} +SLAB_ATTR_RO(cache_dma32); +#endif + static ssize_t usersize_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%u\n", s->usersize); @@ -5452,6 +5460,9 @@ static struct attribute *slab_attrs[] = #ifdef CONFIG_ZONE_DMA &cache_dma_attr.attr, #endif +#ifdef CONFIG_ZONE_DMA32 + &cache_dma32_attr.attr, +#endif #ifdef CONFIG_NUMA &remote_node_defrag_ratio_attr.attr, #endif --- a/tools/vm/slabinfo.c~mm-add-sys-kernel-slab-cache-cache_dma32 +++ a/tools/vm/slabinfo.c @@ -29,7 +29,7 @@ struct slabinfo { char *name; int alias; int refs; - int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; + int aliases, align, cache_dma, cache_dma32, cpu_slabs, destroy_by_rcu; unsigned int hwcache_align, object_size, objs_per_slab; unsigned int sanity_checks, slab_size, store_user, trace; int order, poison, reclaim_account, red_zone; @@ -534,6 +534,8 @@ static void report(struct slabinfo *s) printf("** Hardware cacheline aligned\n"); if (s->cache_dma) printf("** Memory is allocated in a special DMA zone\n"); + if (s->cache_dma32) + printf("** Memory is allocated in a special DMA32 zone\n"); if (s->destroy_by_rcu) printf("** Slabs are destroyed via RCU\n"); if (s->reclaim_account) @@ -602,6 +604,8 @@ static void slabcache(struct slabinfo *s *p++ = '*'; if (s->cache_dma) *p++ = 'd'; + if (s->cache_dma32) + *p++ = 'D'; if (s->hwcache_align) *p++ = 'A'; if (s->poison) @@ -1208,6 +1212,7 @@ static void read_slab_dir(void) slab->aliases = get_obj("aliases"); slab->align = get_obj("align"); slab->cache_dma = get_obj("cache_dma"); + slab->cache_dma32 = get_obj("cache_dma32"); slab->cpu_slabs = get_obj("cpu_slabs"); slab->destroy_by_rcu = get_obj("destroy_by_rcu"); slab->hwcache_align = get_obj("hwcache_align"); _ Patches currently in -mm which might be from drinkcat(a)chromium.org are

6 years, 6 months

1
0
0 0

[PATCH AUTOSEL 5.0 01/66] arm64: dts: rockchip: fix rk3328-roc-cc gmac2io tx/rx_delay

by Sasha Levin

From: "Leonidas P. Papadakos" <papadakospan(a)gmail.com> [ Upstream commit 924726888f660b2a86382a5dd051ec9ca1b18190 ] The rk3328-roc-cc board exhibits tx stability issues with large packets, as does the rock64 board, which was fixed with this patch https://patchwork.kernel.org/patch/10178969/ A similar patch was merged for the rk3328-roc-cc here https://patchwork.kernel.org/patch/10804863/ but it doesn't include the tx/rx_delay tweaks, and I find that they help with an issue where large transfers would bring the ethernet link down, causing a link reset regularly. Signed-off-by: Leonidas P. Papadakos <papadakospan(a)gmail.com> Signed-off-by: Heiko Stuebner <heiko(a)sntech.de> Signed-off-by: Sasha Levin (Microsoft) <sashal(a)kernel.org> --- arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts index 99d0d9912950..a91f87df662e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts @@ -107,8 +107,8 @@ snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>; snps,reset-active-low; snps,reset-delays-us = <0 10000 50000>; - tx_delay = <0x25>; - rx_delay = <0x11>; + tx_delay = <0x24>; + rx_delay = <0x18>; status = "okay"; }; -- 2.19.1

6 years, 6 months

5
75
0 0

Re: [PATCH 1/2] HID: wacom: Don't set tool type until we're in range

by Jason Gerecke

I can produce a version of this patch specific to v4.14.113. Please let me know the proper process for submitting such a patch. Jason --- Now instead of four in the eights place / you’ve got three, ‘Cause you added one / (That is to say, eight) to the two, / But you can’t take seven from three, / So you look at the sixty-fours.... On Thu, Apr 25, 2019 at 5:15 AM Sasha Levin <sashal(a)kernel.org> wrote: > > Hi, > > [This is an automated email] > > This commit has been processed because it contains a "Fixes:" tag, > fixing commit: a48324de6d4d HID: wacom: Bluetooth IRQ for Intuos Pro should handle prox/range. > > The bot has tested the following trees: v5.0.9, v4.19.36, v4.14.113. > > v5.0.9: Build OK! > v4.19.36: Build OK! > v4.14.113: Failed to apply! Possible dependencies: > Unable to calculate > > > How should we proceed with this patch? > > -- > Thanks, > Sasha

6 years, 6 months

3
2
0 0

[for-linus][PATCH 3/3] trace: Fix preempt_enable_no_resched() abuse

by Steven Rostedt

From: Peter Zijlstra <peterz(a)infradead.org> Unless the very next line is schedule(), or implies it, one must not use preempt_enable_no_resched(). It can cause a preemption to go missing and thereby cause arbitrary delays, breaking the PREEMPT=y invariant. Link: http://lkml.kernel.org/r/20190423200318.GY14281@hirez.programming.kicks-ass… Cc: Waiman Long <longman(a)redhat.com> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: Ingo Molnar <mingo(a)redhat.com> Cc: Will Deacon <will.deacon(a)arm.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: the arch/x86 maintainers <x86(a)kernel.org> Cc: Davidlohr Bueso <dave(a)stgolabs.net> Cc: Tim Chen <tim.c.chen(a)linux.intel.com> Cc: huang ying <huang.ying.caritas(a)gmail.com> Cc: Roman Gushchin <guro(a)fb.com> Cc: Alexei Starovoitov <ast(a)kernel.org> Cc: Daniel Borkmann <daniel(a)iogearbox.net> Cc: stable(a)vger.kernel.org Fixes: 2c2d7329d8af ("tracing/ftrace: use preempt_enable_no_resched_notrace in ring_buffer_time_stamp()") Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org> --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 41b6f96e5366..4ee8d8aa3d0f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -762,7 +762,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) preempt_disable_notrace(); time = rb_time_stamp(buffer); - preempt_enable_no_resched_notrace(); + preempt_enable_notrace(); return time; } -- 2.20.1

6 years, 6 months

1
0
0 0

[for-linus][PATCH 2/3] tracing: Fix a memory leak by early error exit in trace_pid_write()

by Steven Rostedt

From: Wenwen Wang <wang6495(a)umn.edu> In trace_pid_write(), the buffer for trace parser is allocated through kmalloc() in trace_parser_get_init(). Later on, after the buffer is used, it is then freed through kfree() in trace_parser_put(). However, it is possible that trace_pid_write() is terminated due to unexpected errors, e.g., ENOMEM. In that case, the allocated buffer will not be freed, which is a memory leak bug. To fix this issue, free the allocated buffer when an error is encountered. Link: http://lkml.kernel.org/r/1555726979-15633-1-git-send-email-wang6495@umn.edu Fixes: f4d34a87e9c10 ("tracing: Use pid bitmap instead of a pid array for set_event_pid") Cc: stable(a)vger.kernel.org Signed-off-by: Wenwen Wang <wang6495(a)umn.edu> Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org> --- kernel/trace/trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0cfa13a60086..46f68fad6373 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -496,8 +496,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, * not modified. */ pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); - if (!pid_list) + if (!pid_list) { + trace_parser_put(&parser); return -ENOMEM; + } pid_list->pid_max = READ_ONCE(pid_max); @@ -507,6 +509,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3); if (!pid_list->pids) { + trace_parser_put(&parser); kfree(pid_list); return -ENOMEM; } -- 2.20.1

6 years, 6 months

1
0
0 0

[for-linus][PATCH 1/3] tracing: Fix buffer_ref pipe ops

by Steven Rostedt

From: Jann Horn <jannh(a)google.com> This fixes multiple issues in buffer_pipe_buf_ops: - The ->steal() handler must not return zero unless the pipe buffer has the only reference to the page. But generic_pipe_buf_steal() assumes that every reference to the pipe is tracked by the page's refcount, which isn't true for these buffers - buffer_pipe_buf_get(), which duplicates a buffer, doesn't touch the page's refcount. Fix it by using generic_pipe_buf_nosteal(), which refuses every attempted theft. It should be easy to actually support ->steal, but the only current users of pipe_buf_steal() are the virtio console and FUSE, and they also only use it as an optimization. So it's probably not worth the effort. - The ->get() and ->release() handlers can be invoked concurrently on pipe buffers backed by the same struct buffer_ref. Make them safe against concurrency by using refcount_t. - The pointers stored in ->private were only zeroed out when the last reference to the buffer_ref was dropped. As far as I know, this shouldn't be necessary anyway, but if we do it, let's always do it. Link: http://lkml.kernel.org/r/20190404215925.253531-1-jannh@google.com Cc: Ingo Molnar <mingo(a)redhat.com> Cc: Masami Hiramatsu <mhiramat(a)kernel.org> Cc: Al Viro <viro(a)zeniv.linux.org.uk> Cc: stable(a)vger.kernel.org Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring buffer") Signed-off-by: Jann Horn <jannh(a)google.com> Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org> --- fs/splice.c | 4 ++-- include/linux/pipe_fs_i.h | 1 + kernel/trace/trace.c | 28 ++++++++++++++-------------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 3ee7e82df48f..e75807380caa 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -330,8 +330,8 @@ const struct pipe_buf_operations default_pipe_buf_ops = { .get = generic_pipe_buf_get, }; -static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { return 1; } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 787d224ff43e..a830e9a00eb9 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -174,6 +174,7 @@ void free_pipe_info(struct pipe_inode_info *); void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); +int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); void pipe_buf_mark_unmergeable(struct pipe_buffer *buf); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 21153e64bf1c..0cfa13a60086 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7025,19 +7025,23 @@ struct buffer_ref { struct ring_buffer *buffer; void *page; int cpu; - int ref; + refcount_t refcount; }; +static void buffer_ref_release(struct buffer_ref *ref) +{ + if (!refcount_dec_and_test(&ref->refcount)) + return; + ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); + kfree(ref); +} + static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct buffer_ref *ref = (struct buffer_ref *)buf->private; - if (--ref->ref) - return; - - ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); - kfree(ref); + buffer_ref_release(ref); buf->private = 0; } @@ -7046,14 +7050,14 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, { struct buffer_ref *ref = (struct buffer_ref *)buf->private; - ref->ref++; + refcount_inc(&ref->refcount); } /* Pipe buffer operations for a buffer. */ static const struct pipe_buf_operations buffer_pipe_buf_ops = { .confirm = generic_pipe_buf_confirm, .release = buffer_pipe_buf_release, - .steal = generic_pipe_buf_steal, + .steal = generic_pipe_buf_nosteal, .get = buffer_pipe_buf_get, }; @@ -7066,11 +7070,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) struct buffer_ref *ref = (struct buffer_ref *)spd->partial[i].private; - if (--ref->ref) - return; - - ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); - kfree(ref); + buffer_ref_release(ref); spd->partial[i].private = 0; } @@ -7125,7 +7125,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; } - ref->ref = 1; + refcount_set(&ref->refcount, 1); ref->buffer = iter->trace_buffer->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); if (IS_ERR(ref->page)) { -- 2.20.1

6 years, 6 months

1
0
0 0

[PATCH 02/21] megaraid_sas: Fix calculation of target ID

by Shivasharan S

In megasas_get_target_prop(), driver is incorrectly calculating the target ID for devices with channel 1 and 3. Due to this, firmware will either fail the command (if there is no device with the target id sent from driver) or could return the properties for a target which was not intended. Devices could end up with the wrong queue depth due to this. Fix target id calculation for channel 1 and 3. Cc: stable(a)vger.kernel.org Signed-off-by: Shivasharan S <shivasharan.srikanteshwara(a)broadcom.com> --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index f677a84f6bc8..d2714fc833ae 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -6165,7 +6165,8 @@ megasas_get_target_prop(struct megasas_instance *instance, int ret; struct megasas_cmd *cmd; struct megasas_dcmd_frame *dcmd; - u16 targetId = (sdev->channel % 2) + sdev->id; + u16 targetId = ((sdev->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + + sdev->id; cmd = megasas_get_cmd(instance); -- 2.16.1

6 years, 6 months

1
0
0 0

[PATCH v2] mm: Fix modifying of page protection by insert_pfn_pmd()

by Aneesh Kumar K.V

With some architectures like ppc64, set_pmd_at() cannot cope with a situation where there is already some (different) valid entry present. Use pmdp_set_access_flags() instead to modify the pfn which is built to deal with modifying existing PMD entries. This is similar to commit cae85cb8add3 ("mm/memory.c: fix modifying of page protection by insert_pfn()") We also do similar update w.r.t insert_pfn_pud eventhough ppc64 don't support pud pfn entries now. Without this patch we also see the below message in kernel log "BUG: non-zero pgtables_bytes on freeing mm:" CC: stable(a)vger.kernel.org Reported-by: Chandan Rajendra <chandan(a)linux.ibm.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com> --- Changes from v1: * Fix the pgtable leak mm/huge_memory.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 404acdcd0455..165ea46bf149 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -755,6 +755,21 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, spinlock_t *ptl; ptl = pmd_lock(mm, pmd); + if (!pmd_none(*pmd)) { + if (write) { + if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) { + WARN_ON_ONCE(!is_huge_zero_pmd(*pmd)); + goto out_unlock; + } + entry = pmd_mkyoung(*pmd); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + if (pmdp_set_access_flags(vma, addr, pmd, entry, 1)) + update_mmu_cache_pmd(vma, addr, pmd); + } + + goto out_unlock; + } + entry = pmd_mkhuge(pfn_t_pmd(pfn, prot)); if (pfn_t_devmap(pfn)) entry = pmd_mkdevmap(entry); @@ -766,11 +781,16 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, if (pgtable) { pgtable_trans_huge_deposit(mm, pmd, pgtable); mm_inc_nr_ptes(mm); + pgtable = NULL; } set_pmd_at(mm, addr, pmd, entry); update_mmu_cache_pmd(vma, addr, pmd); + +out_unlock: spin_unlock(ptl); + if (pgtable) + pte_free(mm, pgtable); } vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, @@ -821,6 +841,20 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, spinlock_t *ptl; ptl = pud_lock(mm, pud); + if (!pud_none(*pud)) { + if (write) { + if (pud_pfn(*pud) != pfn_t_to_pfn(pfn)) { + WARN_ON_ONCE(!is_huge_zero_pud(*pud)); + goto out_unlock; + } + entry = pud_mkyoung(*pud); + entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma); + if (pudp_set_access_flags(vma, addr, pud, entry, 1)) + update_mmu_cache_pud(vma, addr, pud); + } + goto out_unlock; + } + entry = pud_mkhuge(pfn_t_pud(pfn, prot)); if (pfn_t_devmap(pfn)) entry = pud_mkdevmap(entry); @@ -830,6 +864,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, } set_pud_at(mm, addr, pud, entry); update_mmu_cache_pud(vma, addr, pud); + +out_unlock: spin_unlock(ptl); } -- 2.20.1

6 years, 6 months

4
10
0 0

+ mm-add-sys-kernel-slab-cache-cache_dma32.patch added to -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm: add /sys/kernel/slab/cache/cache_dma32 has been added to the -mm tree. Its filename is mm-add-sys-kernel-slab-cache-cache_dma32.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-add-sys-kernel-slab-cache-cache… and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-add-sys-kernel-slab-cache-cache… Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Nicolas Boichat <drinkcat(a)chromium.org> Subject: mm: add /sys/kernel/slab/cache/cache_dma32 A previous patch in this series adds support for SLAB_CACHE_DMA32 kmem caches. This adds the corresponding /sys/kernel/slab/cache/cache_dma32 entries, and fixes slabinfo tool. Link: http://lkml.kernel.org/r/20181210011504.122604-4-drinkcat@chromium.org Signed-off-by: Nicolas Boichat <drinkcat(a)chromium.org> Cc: Christoph Hellwig <hch(a)infradead.org> Cc: Christoph Lameter <cl(a)linux.com> Cc: David Rientjes <rientjes(a)google.com> Cc: Hsin-Yi Wang <hsinyi(a)chromium.org> Cc: Huaisheng Ye <yehs1(a)lenovo.com> Cc: Joerg Roedel <joro(a)8bytes.org> Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Matthias Brugger <matthias.bgg(a)gmail.com> Cc: Mel Gorman <mgorman(a)techsingularity.net> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com> Cc: Pekka Enberg <penberg(a)kernel.org> Cc: Robin Murphy <robin.murphy(a)arm.com> Cc: Sasha Levin <Alexander.Levin(a)microsoft.com> Cc: Tomasz Figa <tfiga(a)google.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Will Deacon <will.deacon(a)arm.com> Cc: Yingjoe Chen <yingjoe.chen(a)mediatek.com> Cc: Yong Wu <yong.wu(a)mediatek.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- Documentation/ABI/testing/sysfs-kernel-slab | 9 +++++++++ mm/slub.c | 11 +++++++++++ tools/vm/slabinfo.c | 7 ++++++- 3 files changed, 26 insertions(+), 1 deletion(-) --- a/Documentation/ABI/testing/sysfs-kernel-slab~mm-add-sys-kernel-slab-cache-cache_dma32 +++ a/Documentation/ABI/testing/sysfs-kernel-slab @@ -106,6 +106,15 @@ Description: are from ZONE_DMA. Available when CONFIG_ZONE_DMA is enabled. +What: /sys/kernel/slab/cache/cache_dma32 +Date: December 2018 +KernelVersion: 4.21 +Contact: Nicolas Boichat <drinkcat(a)chromium.org> +Description: + The cache_dma32 file is read-only and specifies whether objects + are from ZONE_DMA32. + Available when CONFIG_ZONE_DMA32 is enabled. + What: /sys/kernel/slab/cache/cpu_slabs Date: May 2007 KernelVersion: 2.6.22 --- a/mm/slub.c~mm-add-sys-kernel-slab-cache-cache_dma32 +++ a/mm/slub.c @@ -5112,6 +5112,14 @@ static ssize_t cache_dma_show(struct kme SLAB_ATTR_RO(cache_dma); #endif +#ifdef CONFIG_ZONE_DMA32 +static ssize_t cache_dma32_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA32)); +} +SLAB_ATTR_RO(cache_dma32); +#endif + static ssize_t usersize_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%u\n", s->usersize); @@ -5452,6 +5460,9 @@ static struct attribute *slab_attrs[] = #ifdef CONFIG_ZONE_DMA &cache_dma_attr.attr, #endif +#ifdef CONFIG_ZONE_DMA32 + &cache_dma32_attr.attr, +#endif #ifdef CONFIG_NUMA &remote_node_defrag_ratio_attr.attr, #endif --- a/tools/vm/slabinfo.c~mm-add-sys-kernel-slab-cache-cache_dma32 +++ a/tools/vm/slabinfo.c @@ -29,7 +29,7 @@ struct slabinfo { char *name; int alias; int refs; - int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; + int aliases, align, cache_dma, cache_dma32, cpu_slabs, destroy_by_rcu; unsigned int hwcache_align, object_size, objs_per_slab; unsigned int sanity_checks, slab_size, store_user, trace; int order, poison, reclaim_account, red_zone; @@ -534,6 +534,8 @@ static void report(struct slabinfo *s) printf("** Hardware cacheline aligned\n"); if (s->cache_dma) printf("** Memory is allocated in a special DMA zone\n"); + if (s->cache_dma32) + printf("** Memory is allocated in a special DMA32 zone\n"); if (s->destroy_by_rcu) printf("** Slabs are destroyed via RCU\n"); if (s->reclaim_account) @@ -602,6 +604,8 @@ static void slabcache(struct slabinfo *s *p++ = '*'; if (s->cache_dma) *p++ = 'd'; + if (s->cache_dma32) + *p++ = 'D'; if (s->hwcache_align) *p++ = 'A'; if (s->poison) @@ -1208,6 +1212,7 @@ static void read_slab_dir(void) slab->aliases = get_obj("aliases"); slab->align = get_obj("align"); slab->cache_dma = get_obj("cache_dma"); + slab->cache_dma32 = get_obj("cache_dma32"); slab->cpu_slabs = get_obj("cpu_slabs"); slab->destroy_by_rcu = get_obj("destroy_by_rcu"); slab->hwcache_align = get_obj("hwcache_align"); _ Patches currently in -mm which might be from drinkcat(a)chromium.org are mm-add-support-for-kmem-caches-in-dma32-zone.patch iommu-io-pgtable-arm-v7s-request-dma32-memory-and-improve-debugging.patch mm-add-sys-kernel-slab-cache-cache_dma32.patch

6 years, 6 months

5
7
0 0

[patch 5/9] mm: do not boost watermarks to avoid fragmentation for the DISCONTIG memory model

by akpm＠linux-foundation.org

From: Mel Gorman <mgorman(a)techsingularity.net> Subject: mm: do not boost watermarks to avoid fragmentation for the DISCONTIG memory model Mikulas Patocka reported that 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") "broke" memory management on parisc. The machine is not NUMA but the DISCONTIG model creates three pgdats even though it's a UMA machine for the following ranges 0) Start 0x0000000000000000 End 0x000000003fffffff Size 1024 MB 1) Start 0x0000000100000000 End 0x00000001bfdfffff Size 3070 MB 2) Start 0x0000004040000000 End 0x00000040ffffffff Size 3072 MB Mikulas reported: With the patch 1c30844d2, the kernel will incorrectly reclaim the first zone when it fills up, ignoring the fact that there are two completely free zones. Basiscally, it limits cache size to 1GiB. For example, if I run: # dd if=/dev/sda of=/dev/null bs=1M count=2048 - with the proper kernel, there should be "Buffers - 2GiB" when this command finishes. With the patch 1c30844d2, buffers will consume just 1GiB or slightly more, because the kernel was incorrectly reclaiming them. The page allocator and reclaim makes assumptions that pgdats really represent NUMA nodes and zones represent ranges and makes decisions on that basis. Watermark boosting for small pgdats leads to unexpected results even though this would have behaved reasonably on SPARSEMEM. DISCONTIG is essentially deprecated and even parisc plans to move to SPARSEMEM so there is no need to be fancy, this patch simply disables watermark boosting by default on DISCONTIGMEM. Link: http://lkml.kernel.org/r/20190419094335.GJ18914@techsingularity.net Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") Signed-off-by: Mel Gorman <mgorman(a)techsingularity.net> Reported-by: Mikulas Patocka <mpatocka(a)redhat.com> Tested-by: Mikulas Patocka <mpatocka(a)redhat.com> Acked-by: Vlastimil Babka <vbabka(a)suse.cz> Cc: James Bottomley <James.Bottomley(a)hansenpartnership.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- Documentation/sysctl/vm.txt | 16 ++++++++-------- mm/page_alloc.c | 13 +++++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) --- a/Documentation/sysctl/vm.txt~mm-do-not-boost-watermarks-to-avoid-fragmentation-for-the-discontig-memory-model +++ a/Documentation/sysctl/vm.txt @@ -866,14 +866,14 @@ The intent is that compaction has less w increase the success rate of future high-order allocations such as SLUB allocations, THP and hugetlbfs pages. -To make it sensible with respect to the watermark_scale_factor parameter, -the unit is in fractions of 10,000. The default value of 15,000 means -that up to 150% of the high watermark will be reclaimed in the event of -a pageblock being mixed due to fragmentation. The level of reclaim is -determined by the number of fragmentation events that occurred in the -recent past. If this value is smaller than a pageblock then a pageblocks -worth of pages will be reclaimed (e.g. 2MB on 64-bit x86). A boost factor -of 0 will disable the feature. +To make it sensible with respect to the watermark_scale_factor +parameter, the unit is in fractions of 10,000. The default value of +15,000 on !DISCONTIGMEM configurations means that up to 150% of the high +watermark will be reclaimed in the event of a pageblock being mixed due +to fragmentation. The level of reclaim is determined by the number of +fragmentation events that occurred in the recent past. If this value is +smaller than a pageblock then a pageblocks worth of pages will be reclaimed +(e.g. 2MB on 64-bit x86). A boost factor of 0 will disable the feature. ============================================================= --- a/mm/page_alloc.c~mm-do-not-boost-watermarks-to-avoid-fragmentation-for-the-discontig-memory-model +++ a/mm/page_alloc.c @@ -266,7 +266,20 @@ compound_page_dtor * const compound_page int min_free_kbytes = 1024; int user_min_free_kbytes = -1; +#ifdef CONFIG_DISCONTIGMEM +/* + * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges + * are not on separate NUMA nodes. Functionally this works but with + * watermark_boost_factor, it can reclaim prematurely as the ranges can be + * quite small. By default, do not boost watermarks on discontigmem as in + * many cases very high-order allocations like THP are likely to be + * unsupported and the premature reclaim offsets the advantage of long-term + * fragmentation avoidance. + */ +int watermark_boost_factor __read_mostly; +#else int watermark_boost_factor __read_mostly = 15000; +#endif int watermark_scale_factor = 10; static unsigned long nr_kernel_pages __initdata; _

6 years, 6 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror