- Linux-stable-mirror - lists.linaro.org

[PATCH] ocxl/afu_irq: Don't include <asm/pnv-ocxl.h>

by Greg Kurz

The AFU irq code doesn't need to reach out to the platform. Signed-off-by: Greg Kurz <groug(a)kaod.org> --- drivers/misc/ocxl/afu_irq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c index e70cfa24577f..11ab996657a2 100644 --- a/drivers/misc/ocxl/afu_irq.c +++ b/drivers/misc/ocxl/afu_irq.c @@ -2,7 +2,6 @@ // Copyright 2017 IBM Corp. #include <linux/interrupt.h> #include <linux/eventfd.h> -#include <asm/pnv-ocxl.h> #include "ocxl_internal.h" #include "trace.h"

7 years

4
4
0 0

[PATCH] ocxl: Clarify error path in setup_xsl_irq()

by Greg Kurz

Implementing rollback with goto and labels is a common practice that leads to prettier and more maintainable code. FWIW, this design pattern is already being used in alloc_link() a few lines below in this file. Do the same in setup_xsl_irq(). Signed-off-by: Greg Kurz <groug(a)kaod.org> --- drivers/misc/ocxl/link.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index eed92055184d..659977a17405 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -273,9 +273,9 @@ static int setup_xsl_irq(struct pci_dev *dev, struct link *link) spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x", link->domain, link->bus, link->dev); if (!spa->irq_name) { - unmap_irq_registers(spa); dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n"); - return -ENOMEM; + rc = -ENOMEM; + goto err_xsl; } /* * At some point, we'll need to look into allowing a higher @@ -283,11 +283,10 @@ static int setup_xsl_irq(struct pci_dev *dev, struct link *link) */ spa->virq = irq_create_mapping(NULL, hwirq); if (!spa->virq) { - kfree(spa->irq_name); - unmap_irq_registers(spa); dev_err(&dev->dev, "irq_create_mapping failed for translation interrupt\n"); - return -EINVAL; + rc = -EINVAL; + goto err_name; } dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq); @@ -295,15 +294,21 @@ static int setup_xsl_irq(struct pci_dev *dev, struct link *link) rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name, link); if (rc) { - irq_dispose_mapping(spa->virq); - kfree(spa->irq_name); - unmap_irq_registers(spa); dev_err(&dev->dev, "request_irq failed for translation interrupt: %d\n", rc); - return -EINVAL; + rc = -EINVAL; + goto err_mapping; } return 0; + +err_mapping: + irq_dispose_mapping(spa->virq); +err_name: + kfree(spa->irq_name); +err_xsl: + unmap_irq_registers(spa); + return rc; } static void release_xsl_irq(struct link *link)

7 years

4
3
0 0

FAILED: patch "[PATCH] ib_srpt: Fix a use-after-free in __srpt_close_all_ch()" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 14d15c2b278011056482eb015dff89f9cbf2b841 Mon Sep 17 00:00:00 2001 From: Bart Van Assche <bart.vanassche(a)wdc.com> Date: Mon, 2 Jul 2018 14:08:45 -0700 Subject: [PATCH] ib_srpt: Fix a use-after-free in __srpt_close_all_ch() BUG: KASAN: use-after-free in srpt_set_enabled+0x1a9/0x1e0 [ib_srpt] Read of size 4 at addr ffff8801269d23f8 by task check/29726 CPU: 4 PID: 29726 Comm: check Not tainted 4.18.0-rc2-dbg+ #4 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xa4/0xf5 print_address_description+0x6f/0x270 kasan_report+0x241/0x360 __asan_load4+0x78/0x80 srpt_set_enabled+0x1a9/0x1e0 [ib_srpt] srpt_tpg_enable_store+0xb8/0x120 [ib_srpt] configfs_write_file+0x14e/0x1d0 [configfs] __vfs_write+0xd2/0x3b0 vfs_write+0x101/0x270 ksys_write+0xab/0x120 __x64_sys_write+0x43/0x50 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f235cfe6154 Fixes: aaf45bd83eba ("IB/srpt: Detect session shutdown reliably") Signed-off-by: Bart Van Assche <bart.vanassche(a)wdc.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Jason Gunthorpe <jgg(a)mellanox.com> diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 754da8d30952..e42eec20c631 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1940,8 +1940,8 @@ static void __srpt_close_all_ch(struct srpt_port *sport) list_for_each_entry(nexus, &sport->nexus_list, entry) { list_for_each_entry(ch, &nexus->ch_list, list) { if (srpt_disconnect_ch(ch) >= 0) - pr_info("Closing channel %s-%d because target %s_%d has been disabled\n", - ch->sess_name, ch->qp->qp_num, + pr_info("Closing channel %s because target %s_%d has been disabled\n", + ch->sess_name, sport->sdev->device->name, sport->port); srpt_close_ch(ch); }

7 years

3
2
0 0

FAILED: patch "[PATCH] ubifs: Fix directory size calculation for symlinks" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 00ee8b60102862f4daf0814d12a2ea2744fc0b9b Mon Sep 17 00:00:00 2001 From: Richard Weinberger <richard(a)nod.at> Date: Mon, 11 Jun 2018 23:41:09 +0200 Subject: [PATCH] ubifs: Fix directory size calculation for symlinks We have to account the name of the symlink and not the target length. Fixes: ca7f85be8d6c ("ubifs: Add support for encrypted symlinks") Cc: <stable(a)vger.kernel.org> Signed-off-by: Richard Weinberger <richard(a)nod.at> diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 9da224d4f2da..e8616040bffc 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1123,8 +1123,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, struct ubifs_inode *ui; struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_info *c = dir->i_sb->s_fs_info; - int err, len = strlen(symname); - int sz_change = CALC_DENT_SIZE(len); + int err, sz_change, len = strlen(symname); struct fscrypt_str disk_link; struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, .new_ino_d = ALIGN(len, 8), @@ -1151,6 +1150,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, if (err) goto out_budg; + sz_change = CALC_DENT_SIZE(fname_len(&nm)); + inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) { err = PTR_ERR(inode);

7 years

4
3
0 0

[PATCH] iomap: Revert "fs/iomap.c: get/put the page in iomap_page_create/release()"

by Dave Chinner

From: Dave Chinner <dchinner(a)redhat.com> This reverts commit 61c6de667263184125d5ca75e894fcad632b0dd3. The reverted commit added page reference counting to iomap page structures that are used to track block size < page size state. This was supposed to align the code with page migration page accounting assumptions, but what it has done instead is break XFS filesystems. Every fstests run I've done on sub-page block size XFS filesystems has since picking up this commit 2 days ago has failed with bad page state errors such as: # ./run_check.sh "-m rmapbt=1,reflink=1 -i sparse=1 -b size=1k" "generic/038" .... SECTION -- xfs FSTYP -- xfs (debug) PLATFORM -- Linux/x86_64 test1 4.20.0-rc6-dgc+ MKFS_OPTIONS -- -f -m rmapbt=1,reflink=1 -i sparse=1 -b size=1k /dev/sdc MOUNT_OPTIONS -- /dev/sdc /mnt/scratch generic/038 454s ... run fstests generic/038 at 2018-12-20 18:43:05 XFS (sdc): Unmounting Filesystem XFS (sdc): Mounting V5 Filesystem XFS (sdc): Ending clean mount BUG: Bad page state in process kswapd0 pfn:3a7fa page:ffffea0000ccbeb0 count:0 mapcount:0 mapping:ffff88800d9b6360 index:0x1 flags: 0xfffffc0000000() raw: 000fffffc0000000 dead000000000100 dead000000000200 ffff88800d9b6360 raw: 0000000000000001 0000000000000000 00000000ffffffff page dumped because: non-NULL mapping CPU: 0 PID: 676 Comm: kswapd0 Not tainted 4.20.0-rc6-dgc+ #915 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.1-1 04/01/2014 Call Trace: dump_stack+0x67/0x90 bad_page.cold.116+0x8a/0xbd free_pcppages_bulk+0x4bf/0x6a0 free_unref_page_list+0x10f/0x1f0 shrink_page_list+0x49d/0xf50 shrink_inactive_list+0x19d/0x3b0 shrink_node_memcg.constprop.77+0x398/0x690 ? shrink_slab.constprop.81+0x278/0x3f0 shrink_node+0x7a/0x2f0 kswapd+0x34b/0x6d0 ? node_reclaim+0x240/0x240 kthread+0x11f/0x140 ? __kthread_bind_mask+0x60/0x60 ret_from_fork+0x24/0x30 Disabling lock debugging due to kernel taint .... The failures are from anyway that frees pages and empties the per-cpu page magazines, so it's not a predictable failure or an easy to debug failure. generic/038 is a reliable reproducer of this problem - it has a 9 in 10 failure rate on one of my test machines. Failure on other machines have been at random points in fstests runs but every run has ended up tripping this problem. Hence generic/038 was used to bisect the failure because it was the most reliable failure. It is too close to the 4.20 release (not to mention holidays) to try to diagnose, fix and test the underlying cause of the problem, so reverting the commit is the only option we have right now. The revert has been tested against a current tot 4.20-rc7+ kernel across multiple machines running sub-page block size XFs filesystems and none of the bad page state failures have been seen. Signed-off-by: Dave Chinner <dchinner(a)redhat.com> --- fs/iomap.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index 5bc172f3dfe8..d6bc98ae8d35 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -116,12 +116,6 @@ iomap_page_create(struct inode *inode, struct page *page) atomic_set(&iop->read_count, 0); atomic_set(&iop->write_count, 0); bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE); - - /* - * migrate_page_move_mapping() assumes that pages with private data have - * their count elevated by 1. - */ - get_page(page); set_page_private(page, (unsigned long)iop); SetPagePrivate(page); return iop; @@ -138,7 +132,6 @@ iomap_page_release(struct page *page) WARN_ON_ONCE(atomic_read(&iop->write_count)); ClearPagePrivate(page); set_page_private(page, 0); - put_page(page); kfree(iop); } -- 2.19.1

7 years

6
13
0 0

stable/linux-3.18.y boot: 47 boots: 1 failed, 45 passed with 1 conflict (v3.18.131)

by kernelci.org bot

stable/linux-3.18.y boot: 47 boots: 1 failed, 45 passed with 1 conflict (v3.18.131) Full Boot Summary: https://kernelci.org/boot/all/job/stable/branch/linux-3.18.y/kernel/v3.18.1… Full Build Summary: https://kernelci.org/build/stable/branch/linux-3.18.y/kernel/v3.18.131/ Tree: stable Branch: linux-3.18.y Git Describe: v3.18.131 Git Commit: fa42fea0d8b49ba65b49a999331950d74827a52d Git URL: http://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git Tested: 21 unique boards, 11 SoC families, 11 builds out of 185 Boot Regressions Detected: arm: multi_v7_defconfig: omap4-panda: lab-collabora: new failure (last pass: v3.18.130) Boot Failure Detected: arm: exynos_defconfig exynos4412-odroidx2: 1 failed lab Conflicting Boot Failure Detected: (These likely are not failures as other labs are reporting PASS. Needs review.) arm: multi_v7_defconfig: omap4-panda: lab-baylibre-seattle: PASS lab-collabora: FAIL --- For more info write to <info(a)kernelci.org>

7 years

1
0
0 0

stable/linux-4.14.y boot: 101 boots: 2 failed, 98 passed with 1 offline (v4.14.90)

by kernelci.org bot

stable/linux-4.14.y boot: 101 boots: 2 failed, 98 passed with 1 offline (v4.14.90) Full Boot Summary: https://kernelci.org/boot/all/job/stable/branch/linux-4.14.y/kernel/v4.14.9… Full Build Summary: https://kernelci.org/build/stable/branch/linux-4.14.y/kernel/v4.14.90/ Tree: stable Branch: linux-4.14.y Git Describe: v4.14.90 Git Commit: 592f5569e18471c07208f74540f4e0f646b226f7 Git URL: http://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git Tested: 59 unique boards, 22 SoC families, 12 builds out of 197 Boot Regressions Detected: arm64: defconfig: rk3399-firefly: lab-baylibre-seattle: failing since 13 days (last pass: v4.14.86 - first fail: v4.14.87) Boot Failures Detected: arm64: defconfig meson-gxl-s905x-libretech-cc: 1 failed lab rk3399-firefly: 1 failed lab Offline Platforms: arm: multi_v7_defconfig: stih410-b2120: 1 offline lab --- For more info write to <info(a)kernelci.org>

7 years

1
0
0 0

[patch 4/4] mm, page_alloc: fix has_unmovable_pages for HugePages

by akpm＠linux-foundation.org

From: Oscar Salvador <osalvador(a)suse.de> Subject: mm, page_alloc: fix has_unmovable_pages for HugePages While playing with gigantic hugepages and memory_hotplug, I triggered the following #PF when "cat memoryX/removable": <--- kernel: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 kernel: #PF error: [normal kernel read fault] kernel: PGD 0 P4D 0 kernel: Oops: 0000 [#1] SMP PTI kernel: CPU: 1 PID: 1481 Comm: cat Tainted: G E 4.20.0-rc6-mm1-1-default+ #18 kernel: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 kernel: RIP: 0010:has_unmovable_pages+0x154/0x210 kernel: Code: 1b ff ff ff eb 32 48 8b 45 00 bf 00 10 00 00 a9 00 00 01 00 74 07 0f b6 4d 51 48 d3 e7 e8 c4 81 05 00 48 85 c0 49 89 c1 75 7e <41> 8b 41 08 83 f8 09 74 41 83 f8 1b 74 3c 4d 2b 64 24 58 49 81 ec kernel: RSP: 0018:ffffc90000a1fd30 EFLAGS: 00010246 kernel: RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000009 kernel: RDX: ffffffff82aed4f0 RSI: 0000000000001000 RDI: 0000000000001000 kernel: RBP: ffffea0001800000 R08: 0000000000200000 R09: 0000000000000000 kernel: R10: 0000000000001000 R11: 0000000000000003 R12: ffff88813ffd45c0 kernel: R13: 0000000000060000 R14: 0000000000000001 R15: ffffea0000000000 kernel: FS: 00007fd71d9b3500(0000) GS:ffff88813bb00000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 0000000000000008 CR3: 00000001371c2002 CR4: 00000000003606e0 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 kernel: Call Trace: kernel: is_mem_section_removable+0x7d/0x100 kernel: removable_show+0x90/0xb0 kernel: dev_attr_show+0x1c/0x50 kernel: sysfs_kf_seq_show+0xca/0x1b0 kernel: seq_read+0x133/0x380 kernel: __vfs_read+0x26/0x180 kernel: vfs_read+0x89/0x140 kernel: ksys_read+0x42/0x90 kernel: do_syscall_64+0x5b/0x180 kernel: entry_SYSCALL_64_after_hwframe+0x44/0xa9 kernel: RIP: 0033:0x7fd71d4c8b41 kernel: Code: fe ff ff 48 8d 3d 27 9e 09 00 48 83 ec 08 e8 96 02 02 00 66 0f 1f 44 00 00 8b 05 ea fc 2c 00 48 63 ff 85 c0 75 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 55 53 48 89 d5 48 89 kernel: RSP: 002b:00007ffeab5f6448 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 kernel: RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007fd71d4c8b41 kernel: RDX: 0000000000020000 RSI: 00007fd71d809000 RDI: 0000000000000003 kernel: RBP: 0000000000020000 R08: ffffffffffffffff R09: 0000000000000000 kernel: R10: 000000000000038b R11: 0000000000000246 R12: 00007fd71d809000 kernel: R13: 0000000000000003 R14: 00007fd71d80900f R15: 0000000000020000 kernel: Modules linked in: af_packet(E) xt_tcpudp(E) ipt_REJECT(E) xt_conntrack(E) nf_conntrack(E) nf_defrag_ipv4(E) ip_set(E) nfnetlink(E) ebtable_nat(E) ebtable_broute(E) bridge(E) stp(E) llc(E) iptable_mangle(E) iptable_raw(E) iptable_security(E) ebtable_filter(E) ebtables(E) iptable_filter(E) ip_tables(E) x_tables(E) kvm_intel(E) kvm(E) irqbypass(E) crct10dif_pclmul(E) crc32_pclmul(E) ghash_clmulni_intel(E) bochs_drm(E) ttm(E) drm_kms_helper(E) drm(E) aesni_intel(E) virtio_net(E) syscopyarea(E) net_failover(E) sysfillrect(E) failover(E) aes_x86_64(E) crypto_simd(E) sysimgblt(E) cryptd(E) pcspkr(E) glue_helper(E) parport_pc(E) fb_sys_fops(E) i2c_piix4(E) parport(E) button(E) btrfs(E) libcrc32c(E) xor(E) zstd_decompress(E) zstd_compress(E) raid6_pq(E) sd_mod(E) ata_generic(E) ata_piix(E) ahci(E) libahci(E) serio_raw(E) crc32c_intel(E) virtio_pci(E) virtio_ring(E) virtio(E) libata(E) sg(E) scsi_mod(E) autofs4(E) kernel: CR2: 0000000000000008 kernel: ---[ end trace 49cade81474e40e7 ]--- kernel: RIP: 0010:has_unmovable_pages+0x154/0x210 kernel: Code: 1b ff ff ff eb 32 48 8b 45 00 bf 00 10 00 00 a9 00 00 01 00 74 07 0f b6 4d 51 48 d3 e7 e8 c4 81 05 00 48 85 c0 49 89 c1 75 7e <41> 8b 41 08 83 f8 09 74 41 83 f8 1b 74 3c 4d 2b 64 24 58 49 81 ec kernel: RSP: 0018:ffffc90000a1fd30 EFLAGS: 00010246 kernel: RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000009 kernel: RDX: ffffffff82aed4f0 RSI: 0000000000001000 RDI: 0000000000001000 kernel: RBP: ffffea0001800000 R08: 0000000000200000 R09: 0000000000000000 kernel: R10: 0000000000001000 R11: 0000000000000003 R12: ffff88813ffd45c0 kernel: R13: 0000000000060000 R14: 0000000000000001 R15: ffffea0000000000 kernel: FS: 00007fd71d9b3500(0000) GS:ffff88813bb00000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 0000000000000008 CR3: 00000001371c2002 CR4: 00000000003606e0 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 ---> The reason is we do not pass the Head to page_hstate(), and so, the call to compound_order() in page_hstate() returns 0, so we end up checking all hstates's size to match PAGE_SIZE. Obviously, we do not find any hstate matching that size, and we return NULL. Then, we dereference that NULL pointer in hugepage_migration_supported() and we got the #PF from above. Fix that by getting the head page before calling page_hstate(). Also, since gigantic pages span several pageblocks, re-adjust the logic for skipping pages. While are it, we can also get rid of the round_up(). [osalvador(a)suse.de: remove round_up(), adjust skip pages logic per Michal] Link: http://lkml.kernel.org/r/20181221062809.31771-1-osalvador@suse.de Link: http://lkml.kernel.org/r/20181217225113.17864-1-osalvador@suse.de Signed-off-by: Oscar Salvador <osalvador(a)suse.de> Acked-by: Michal Hocko <mhocko(a)suse.com> Reviewed-by: David Hildenbrand <david(a)redhat.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Pavel Tatashin <pavel.tatashin(a)microsoft.com> Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/page_alloc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) --- a/mm/page_alloc.c~mm-page_alloc-fix-has_unmovable_pages-for-hugepages +++ a/mm/page_alloc.c @@ -7814,11 +7814,14 @@ bool has_unmovable_pages(struct zone *zo * handle each tail page individually in migration. */ if (PageHuge(page)) { + struct page *head = compound_head(page); + unsigned int skip_pages; - if (!hugepage_migration_supported(page_hstate(page))) + if (!hugepage_migration_supported(page_hstate(head))) goto unmovable; - iter = round_up(iter + 1, 1<<compound_order(page)) - 1; + skip_pages = (1 << compound_order(head)) - (page - head); + iter += skip_pages - 1; continue; } _

7 years

1
0
0 0

[patch 3/4] fork,memcg: fix crash in free_thread_stack on memcg charge fail

by akpm＠linux-foundation.org

From: Rik van Riel <riel(a)surriel.com> Subject: fork,memcg: fix crash in free_thread_stack on memcg charge fail Changeset 9b6f7e163cd0 ("mm: rework memcg kernel stack accounting") will result in fork failing if allocating a kernel stack for a task in dup_task_struct exceeds the kernel memory allowance for that cgroup. Unfortunately, it also results in a crash. This is due to the code jumping to free_stack and calling free_thread_stack when the memcg kernel stack charge fails, but without tsk->stack pointing at the freshly allocated stack. This in turn results in the vfree_atomic in free_thread_stack oopsing with a backtrace like this: #5 [ffffc900244efc88] die at ffffffff8101f0ab #6 [ffffc900244efcb8] do_general_protection at ffffffff8101cb86 #7 [ffffc900244efce0] general_protection at ffffffff818ff082 [exception RIP: llist_add_batch+7] RIP: ffffffff8150d487 RSP: ffffc900244efd98 RFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff88085ef55980 RCX: 0000000000000000 RDX: ffff88085ef55980 RSI: 343834343531203a RDI: 343834343531203a RBP: ffffc900244efd98 R8: 0000000000000001 R9: ffff8808578c3600 R10: 0000000000000000 R11: 0000000000000001 R12: ffff88029f6c21c0 R13: 0000000000000286 R14: ffff880147759b00 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #8 [ffffc900244efda0] vfree_atomic at ffffffff811df2c7 #9 [ffffc900244efdb8] copy_process at ffffffff81086e37 #10 [ffffc900244efe98] _do_fork at ffffffff810884e0 #11 [ffffc900244eff10] sys_vfork at ffffffff810887ff #12 [ffffc900244eff20] do_syscall_64 at ffffffff81002a43 RIP: 000000000049b948 RSP: 00007ffcdb307830 RFLAGS: 00000246 RAX: ffffffffffffffda RBX: 0000000000896030 RCX: 000000000049b948 RDX: 0000000000000000 RSI: 00007ffcdb307790 RDI: 00000000005d7421 RBP: 000000000067370f R8: 00007ffcdb3077b0 R9: 000000000001ed00 R10: 0000000000000008 R11: 0000000000000246 R12: 0000000000000040 R13: 000000000000000f R14: 0000000000000000 R15: 000000000088d018 ORIG_RAX: 000000000000003a CS: 0033 SS: 002b The simplest fix is to assign tsk->stack right where it is allocated. Link: http://lkml.kernel.org/r/20181214231726.7ee4843c@imladris.surriel.com Fixes: 9b6f7e163cd0 ("mm: rework memcg kernel stack accounting") Signed-off-by: Rik van Riel <riel(a)surriel.com> Acked-by: Roman Gushchin <guro(a)fb.com> Acked-by: Michal Hocko <mhocko(a)suse.com> Cc: Shakeel Butt <shakeelb(a)google.com> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- kernel/fork.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) --- a/kernel/fork.c~forkmemcg-fix-crash-in-free_thread_stack-on-memcg-charge-fail +++ a/kernel/fork.c @@ -240,8 +240,10 @@ static unsigned long *alloc_thread_stack * free_thread_stack() can be called in interrupt context, * so cache the vm_struct. */ - if (stack) + if (stack) { tsk->stack_vm_area = find_vm_area(stack); + tsk->stack = stack; + } return stack; #else struct page *page = alloc_pages_node(node, THREADINFO_GFP, @@ -288,7 +290,10 @@ static struct kmem_cache *thread_stack_c static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { - return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); + unsigned long *stack; + stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); + tsk->stack = stack; + return stack; } static void free_thread_stack(struct task_struct *tsk) _

7 years

1
0
0 0

[patch 2/4] mm: thp: fix flags for pmd migration when split

by akpm＠linux-foundation.org

From: Peter Xu <peterx(a)redhat.com> Subject: mm: thp: fix flags for pmd migration when split When splitting a huge migrating PMD, we'll transfer all the existing PMD bits and apply them again onto the small PTEs. However we are fetching the bits unconditionally via pmd_soft_dirty(), pmd_write() or pmd_yound() while actually they don't make sense at all when it's a migration entry. Fix them up. Since at it, drop the ifdef together as not needed. Note that if my understanding is correct about the problem then if without the patch there is chance to lose some of the dirty bits in the migrating pmd pages (on x86_64 we're fetching bit 11 which is part of swap offset instead of bit 2) and it could potentially corrupt the memory of an userspace program which depends on the dirty bit. Link: http://lkml.kernel.org/r/20181213051510.20306-1-peterx@redhat.com Signed-off-by: Peter Xu <peterx(a)redhat.com> Reviewed-by: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru> Reviewed-by: William Kucharski <william.kucharski(a)oracle.com> Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Cc: Andrea Arcangeli <aarcange(a)redhat.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Dave Jiang <dave.jiang(a)intel.com> Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.vnet.ibm.com> Cc: Souptick Joarder <jrdr.linux(a)gmail.com> Cc: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru> Cc: Zi Yan <zi.yan(a)cs.rutgers.edu> Cc: <stable(a)vger.kernel.org> [4.14+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/huge_memory.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) --- a/mm/huge_memory.c~mm-thp-fix-flags-for-pmd-migration-when-split +++ a/mm/huge_memory.c @@ -2144,23 +2144,25 @@ static void __split_huge_pmd_locked(stru */ old_pmd = pmdp_invalidate(vma, haddr, pmd); -#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION pmd_migration = is_pmd_migration_entry(old_pmd); - if (pmd_migration) { + if (unlikely(pmd_migration)) { swp_entry_t entry; entry = pmd_to_swp_entry(old_pmd); page = pfn_to_page(swp_offset(entry)); - } else -#endif + write = is_write_migration_entry(entry); + young = false; + soft_dirty = pmd_swp_soft_dirty(old_pmd); + } else { page = pmd_page(old_pmd); + if (pmd_dirty(old_pmd)) + SetPageDirty(page); + write = pmd_write(old_pmd); + young = pmd_young(old_pmd); + soft_dirty = pmd_soft_dirty(old_pmd); + } VM_BUG_ON_PAGE(!page_count(page), page); page_ref_add(page, HPAGE_PMD_NR - 1); - if (pmd_dirty(old_pmd)) - SetPageDirty(page); - write = pmd_write(old_pmd); - young = pmd_young(old_pmd); - soft_dirty = pmd_soft_dirty(old_pmd); /* * Withdraw the table only after we mark the pmd entry invalid. _

7 years

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror