From: Alexander Gordeev agordeev@linux.ibm.com
[ Upstream commit 65f8780e2d70257200547b5a7654974aa7c37ce1 ]
The size of vmalloc area depends from various factors on boot and could be set to:
1. Default size as determined by VMALLOC_DEFAULT_SIZE macro; 2. One half of the virtual address space not occupied by modules and fixed mappings; 3. The size provided by user with vmalloc= kernel command line parameter;
In cases [1] and [2] the vmalloc area base address is aligned on Region3 table type boundary, while in case [3] in might get aligned on page boundary.
Limit the waste of page tables and always align vmalloc area size and base address on segment boundary.
Acked-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Alexander Gordeev agordeev@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/boot/ipl_parm.c | 2 +- arch/s390/boot/startup.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 7b7521762633..4230144645bc 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -272,7 +272,7 @@ void parse_boot_command_line(void) memory_limit = round_down(memparse(val, NULL), PAGE_SIZE);
if (!strcmp(param, "vmalloc") && val) { - vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE); + vmalloc_size = round_up(memparse(val, NULL), _SEGMENT_SIZE); vmalloc_size_set = 1; }
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index d3e48bd9c394..d08db5df6091 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -212,7 +212,8 @@ static unsigned long setup_kernel_memory_layout(void) VMALLOC_END = MODULES_VADDR;
/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */ - vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE)); + vsize = round_down(VMALLOC_END / 2, _SEGMENT_SIZE); + vmalloc_size = min(vmalloc_size, vsize); VMALLOC_START = VMALLOC_END - vmalloc_size;
/* split remaining virtual space between 1:1 mapping & vmemmap array */
From: Arnd Bergmann arnd@arndb.de
[ Upstream commit 64bac5ea17d527872121adddfee869c7a0618f8f ]
The prototype was hidden in an #ifdef on x86, which causes a warning:
kernel/irq_work.c:72:13: error: no previous prototype for 'arch_irq_work_raise' [-Werror=missing-prototypes]
Some architectures have a working prototype, while others don't. Fix this by providing it in only one place that is always visible.
Reviewed-by: Alexander Gordeev agordeev@linux.ibm.com Acked-by: Catalin Marinas catalin.marinas@arm.com Acked-by: Palmer Dabbelt palmer@rivosinc.com Acked-by: Guo Ren guoren@kernel.org Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/include/asm/irq_work.h | 2 -- arch/arm64/include/asm/irq_work.h | 2 -- arch/csky/include/asm/irq_work.h | 2 +- arch/powerpc/include/asm/irq_work.h | 1 - arch/riscv/include/asm/irq_work.h | 2 +- arch/s390/include/asm/irq_work.h | 2 -- arch/x86/include/asm/irq_work.h | 1 - include/linux/irq_work.h | 3 +++ 8 files changed, 5 insertions(+), 10 deletions(-)
diff --git a/arch/arm/include/asm/irq_work.h b/arch/arm/include/asm/irq_work.h index 3149e4dc1b54..8895999834cc 100644 --- a/arch/arm/include/asm/irq_work.h +++ b/arch/arm/include/asm/irq_work.h @@ -9,6 +9,4 @@ static inline bool arch_irq_work_has_interrupt(void) return is_smp(); }
-extern void arch_irq_work_raise(void); - #endif /* _ASM_ARM_IRQ_WORK_H */ diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index 81bbfa3a035b..a1020285ea75 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -2,8 +2,6 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H
-extern void arch_irq_work_raise(void); - static inline bool arch_irq_work_has_interrupt(void) { return true; diff --git a/arch/csky/include/asm/irq_work.h b/arch/csky/include/asm/irq_work.h index 33aaf39d6f94..d39fcc1f5395 100644 --- a/arch/csky/include/asm/irq_work.h +++ b/arch/csky/include/asm/irq_work.h @@ -7,5 +7,5 @@ static inline bool arch_irq_work_has_interrupt(void) { return true; } -extern void arch_irq_work_raise(void); + #endif /* __ASM_CSKY_IRQ_WORK_H */ diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h index b8b0be8f1a07..c6d3078bd8c3 100644 --- a/arch/powerpc/include/asm/irq_work.h +++ b/arch/powerpc/include/asm/irq_work.h @@ -6,6 +6,5 @@ static inline bool arch_irq_work_has_interrupt(void) { return true; } -extern void arch_irq_work_raise(void);
#endif /* _ASM_POWERPC_IRQ_WORK_H */ diff --git a/arch/riscv/include/asm/irq_work.h b/arch/riscv/include/asm/irq_work.h index b53891964ae0..b27a4d64fc6a 100644 --- a/arch/riscv/include/asm/irq_work.h +++ b/arch/riscv/include/asm/irq_work.h @@ -6,5 +6,5 @@ static inline bool arch_irq_work_has_interrupt(void) { return IS_ENABLED(CONFIG_SMP); } -extern void arch_irq_work_raise(void); + #endif /* _ASM_RISCV_IRQ_WORK_H */ diff --git a/arch/s390/include/asm/irq_work.h b/arch/s390/include/asm/irq_work.h index 603783766d0a..f00c9f610d5a 100644 --- a/arch/s390/include/asm/irq_work.h +++ b/arch/s390/include/asm/irq_work.h @@ -7,6 +7,4 @@ static inline bool arch_irq_work_has_interrupt(void) return true; }
-void arch_irq_work_raise(void); - #endif /* _ASM_S390_IRQ_WORK_H */ diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index 800ffce0db29..6b4d36c95165 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -9,7 +9,6 @@ static inline bool arch_irq_work_has_interrupt(void) { return boot_cpu_has(X86_FEATURE_APIC); } -extern void arch_irq_work_raise(void); #else static inline bool arch_irq_work_has_interrupt(void) { diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 8cd11a223260..136f2980cba3 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -66,6 +66,9 @@ void irq_work_sync(struct irq_work *work); void irq_work_run(void); bool irq_work_needs_cpu(void); void irq_work_single(void *arg); + +void arch_irq_work_raise(void); + #else static inline bool irq_work_needs_cpu(void) { return false; } static inline void irq_work_run(void) { }
From: Tony Krowiak akrowiak@linux.ibm.com
[ Upstream commit a0d8f4eeb7c4ffaee21702bcc91a09b3988c5b7a ]
The 'status' attribute for AP queue devices bound to the vfio_ap device driver displays incorrect status when the mediated device is attached to a guest, but the queue device is not passed through. In the current implementation, the status displayed is 'in_use' which is not correct; it should be 'assigned'. This can happen if one of the queue devices associated with a given adapter is not bound to the vfio_ap device driver. For example:
Queues listed in /sys/bus/ap/drivers/vfio_ap: 14.0005 14.0006 14.000d 16.0006 16.000d
Queues listed in /sys/devices/vfio_ap/matrix/$UUID/matrix 14.0005 14.0006 14.000d 16.0005 16.0006 16.000d
Queues listed in /sys/devices/vfio_ap/matrix/$UUID/guest_matrix 14.0005 14.0006 14.000d
The reason no queues for adapter 0x16 are listed in the guest_matrix is because queue 16.0005 is not bound to the vfio_ap device driver, so no queue associated with the adapter is passed through to the guest; therefore, each queue device for adapter 0x16 should display 'assigned' instead of 'in_use', because those queues are not in use by a guest, but only assigned to the mediated device.
Let's check the AP configuration for the guest to determine whether a queue device is passed through before displaying a status of 'in_use'.
Signed-off-by: Tony Krowiak akrowiak@linux.ibm.com Acked-by: Halil Pasic pasic@linux.ibm.com Acked-by: Harald Freudenberger freude@linux.ibm.com Link: https://lore.kernel.org/r/20231108201135.351419-1-akrowiak@linux.ibm.com Signed-off-by: Alexander Gordeev agordeev@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/s390/crypto/vfio_ap_ops.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 4db538a55192..6e0a79086656 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1976,6 +1976,7 @@ static ssize_t status_show(struct device *dev, { ssize_t nchars = 0; struct vfio_ap_queue *q; + unsigned long apid, apqi; struct ap_matrix_mdev *matrix_mdev; struct ap_device *apdev = to_ap_dev(dev);
@@ -1983,8 +1984,21 @@ static ssize_t status_show(struct device *dev, q = dev_get_drvdata(&apdev->device); matrix_mdev = vfio_ap_mdev_for_queue(q);
+ /* If the queue is assigned to the matrix mediated device, then + * determine whether it is passed through to a guest; otherwise, + * indicate that it is unassigned. + */ if (matrix_mdev) { - if (matrix_mdev->kvm) + apid = AP_QID_CARD(q->apqn); + apqi = AP_QID_QUEUE(q->apqn); + /* + * If the queue is passed through to the guest, then indicate + * that it is in use; otherwise, indicate that it is + * merely assigned to a matrix mediated device. + */ + if (matrix_mdev->kvm && + test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && + test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) nchars = scnprintf(buf, PAGE_SIZE, "%s\n", AP_QUEUE_IN_USE); else
From: Linus Walleij linus.walleij@linaro.org
[ Upstream commit d6e81532b10d8deb2bc30f7b44f09534876893e3 ]
Making virt_to_pfn() a static inline taking a strongly typed (const void *) makes the contract of a passing a pointer of that type to the function explicit and exposes any misuse of the macro virt_to_pfn() acting polymorphic and accepting many types such as (void *), (unitptr_t) or (unsigned long) as arguments without warnings.
For symmetry do the same with pfn_to_virt().
For compiletime resolution of __pa() we need PAGE_OFFSET which was not available to __pa() and resolved by the preprocessor wherever __pa() was used. Fix this by explicitly including <asm/mem-layout.h> where required, following the pattern of the architectures page.h file.
Acked-by: Brian Cain bcain@quicinc.com Signed-off-by: Linus Walleij linus.walleij@linaro.org Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Sasha Levin sashal@kernel.org --- arch/hexagon/include/asm/page.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h index 9c03b9965f07..10f1bc07423c 100644 --- a/arch/hexagon/include/asm/page.h +++ b/arch/hexagon/include/asm/page.h @@ -78,6 +78,9 @@ typedef struct page *pgtable_t; #define __pgd(x) ((pgd_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) })
+/* Needed for PAGE_OFFSET used in the macro right below */ +#include <asm/mem-layout.h> + /* * We need a __pa and a __va routine for kernel space. * MIPS says they're only used during mem_init. @@ -125,8 +128,16 @@ static inline void clear_page(void *page) */ #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
-#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) -#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +static inline unsigned long virt_to_pfn(const void *kaddr) +{ + return __pa(kaddr) >> PAGE_SHIFT; +} + +static inline void *pfn_to_virt(unsigned long pfn) +{ + return (void *)((unsigned long)__va(pfn) << PAGE_SHIFT); +} +
#define page_to_virt(page) __va(page_to_phys(page))
From: Heiko Carstens hca@linux.ibm.com
[ Upstream commit 8b13601d19c541158a6e18b278c00ba69ae37829 ]
If the content of the floating point control (fpc) register of a traced process is modified with the ptrace interface the new value is tested for validity by temporarily loading it into the fpc register.
This may lead to corruption of the fpc register of the tracing process: if an interrupt happens while the value is temporarily loaded into the fpc register, and within interrupt context floating point or vector registers are used, the current fp/vx registers are saved with save_fpu_regs() assuming they belong to user space and will be loaded into fp/vx registers when returning to user space.
test_fp_ctl() restores the original user space fpc register value, however it will be discarded, when returning to user space.
In result the tracer will incorrectly continue to run with the value that was supposed to be used for the traced process.
Fix this by saving fpu register contents with save_fpu_regs() before using test_fp_ctl().
Reviewed-by: Claudio Imbrenda imbrenda@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Alexander Gordeev agordeev@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/kernel/ptrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ea244a73efad..512b81473759 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -385,6 +385,7 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* * floating point control reg. is in the thread structure */ + save_fpu_regs(); if ((unsigned int) data != 0 || test_fp_ctl(data >> (BITS_PER_LONG - 32))) return -EINVAL; @@ -741,6 +742,7 @@ static int __poke_user_compat(struct task_struct *child, /* * floating point control reg. is in the thread structure */ + save_fpu_regs(); if (test_fp_ctl(tmp)) return -EINVAL; child->thread.fpu.fpc = data; @@ -904,9 +906,7 @@ static int s390_fpregs_set(struct task_struct *target, int rc = 0; freg_t fprs[__NUM_FPRS];
- if (target == current) - save_fpu_regs(); - + save_fpu_regs(); if (MACHINE_HAS_VX) convert_vx_to_fp(fprs, target->thread.fpu.vxrs); else
From: Heiko Carstens hca@linux.ibm.com
[ Upstream commit b988b1bb0053c0dcd26187d29ef07566a565cf55 ]
kvm_arch_vcpu_ioctl_set_fpu() allows to set the floating point control (fpc) register of a guest cpu. The new value is tested for validity by temporarily loading it into the fpc register.
This may lead to corruption of the fpc register of the host process: if an interrupt happens while the value is temporarily loaded into the fpc register, and within interrupt context floating point or vector registers are used, the current fp/vx registers are saved with save_fpu_regs() assuming they belong to user space and will be loaded into fp/vx registers when returning to user space.
test_fp_ctl() restores the original user space / host process fpc register value, however it will be discarded, when returning to user space.
In result the host process will incorrectly continue to run with the value that was supposed to be used for a guest cpu.
Fix this by simply removing the test. There is another test right before the SIE context is entered which will handles invalid values.
This results in a change of behaviour: invalid values will now be accepted instead of that the ioctl fails with -EINVAL. This seems to be acceptable, given that this interface is most likely not used anymore, and this is in addition the same behaviour implemented with the memory mapped interface (replace invalid values with zero) - see sync_regs() in kvm-s390.c.
Reviewed-by: Christian Borntraeger borntraeger@linux.ibm.com Reviewed-by: Claudio Imbrenda imbrenda@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Alexander Gordeev agordeev@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/kvm/kvm-s390.c | 5 ----- 1 file changed, 5 deletions(-)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b3f17e014cab..49cce436444e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4307,10 +4307,6 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
vcpu_load(vcpu);
- if (test_fp_ctl(fpu->fpc)) { - ret = -EINVAL; - goto out; - } vcpu->run->s.regs.fpc = fpu->fpc; if (MACHINE_HAS_VX) convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, @@ -4318,7 +4314,6 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) else memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
-out: vcpu_put(vcpu); return ret; }
From: Kent Overstreet kent.overstreet@linux.dev
[ Upstream commit 04bc786d663543512d08f1b86c7bcefb5144afe3 ]
Replace linux/percpu.h include with asm/percpu.h to avoid circular dependency.
Signed-off-by: Kent Overstreet kent.overstreet@linux.dev Signed-off-by: Suren Baghdasaryan surenb@google.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/include/asm/spectre.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 9cc501450486..75e837753772 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -13,8 +13,8 @@ #define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K)
#ifndef __ASSEMBLY__ - -#include <linux/percpu.h> +#include <linux/smp.h> +#include <asm/percpu.h>
#include <asm/cpufeature.h> #include <asm/virt.h>
From: Joel Granados j.granados@samsung.com
[ Upstream commit 315552310c7de92baea4e570967066569937a843 ]
When registering tables to the sysctl subsystem there is a check to see if header is a permanently empty directory (used for mounts). This check evaluates the first element of the ctl_table. This results in an out of bounds evaluation when registering empty directories.
The function register_sysctl_mount_point now passes a ctl_table of size 1 instead of size 0. It now relies solely on the type to identify a permanently empty register.
Make sure that the ctl_table has at least one element before testing for permanent emptiness.
Signed-off-by: Joel Granados j.granados@samsung.com Reported-by: kernel test robot oliver.sang@intel.com Closes: https://lore.kernel.org/oe-lkp/202311201431.57aae8f3-oliver.sang@intel.com Signed-off-by: Luis Chamberlain mcgrof@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/proc/proc_sysctl.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index de484195f49f..5b5cdc747cef 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -44,7 +44,7 @@ static struct ctl_table sysctl_mount_point[] = { */ struct ctl_table_header *register_sysctl_mount_point(const char *path) { - return register_sysctl_sz(path, sysctl_mount_point, 0); + return register_sysctl(path, sysctl_mount_point); } EXPORT_SYMBOL(register_sysctl_mount_point);
@@ -233,7 +233,8 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) return -EROFS;
/* Am I creating a permanently empty directory? */ - if (sysctl_is_perm_empty_ctl_table(header->ctl_table)) { + if (header->ctl_table_size > 0 && + sysctl_is_perm_empty_ctl_table(header->ctl_table)) { if (!RB_EMPTY_ROOT(&dir->root)) return -EINVAL; sysctl_set_perm_empty_ctl_header(dir_h); @@ -1213,6 +1214,10 @@ static bool get_links(struct ctl_dir *dir, struct ctl_table_header *tmp_head; struct ctl_table *entry, *link;
+ if (header->ctl_table_size == 0 || + sysctl_is_perm_empty_ctl_table(header->ctl_table)) + return true; + /* Are there links available for every entry in table? */ list_for_each_table_entry(entry, header) { const char *procname = entry->procname;
From: Anna Schumaker Anna.Schumaker@Netapp.com
[ Upstream commit 31b62908693c90d4d07db597e685d9f25a120073 ]
I received the following warning while running cthon against an ontap server running pNFS:
[ 57.202521] ============================= [ 57.202522] WARNING: suspicious RCU usage [ 57.202523] 6.7.0-rc3-g2cc14f52aeb7 #41492 Not tainted [ 57.202525] ----------------------------- [ 57.202525] net/sunrpc/xprtmultipath.c:349 RCU-list traversed in non-reader section!! [ 57.202527] other info that might help us debug this:
[ 57.202528] rcu_scheduler_active = 2, debug_locks = 1 [ 57.202529] no locks held by test5/3567. [ 57.202530] stack backtrace: [ 57.202532] CPU: 0 PID: 3567 Comm: test5 Not tainted 6.7.0-rc3-g2cc14f52aeb7 #41492 5b09971b4965c0aceba19f3eea324a4a806e227e [ 57.202534] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 2/2/2022 [ 57.202536] Call Trace: [ 57.202537] <TASK> [ 57.202540] dump_stack_lvl+0x77/0xb0 [ 57.202551] lockdep_rcu_suspicious+0x154/0x1a0 [ 57.202556] rpc_xprt_switch_has_addr+0x17c/0x190 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202596] rpc_clnt_setup_test_and_add_xprt+0x50/0x180 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202621] ? rpc_clnt_add_xprt+0x254/0x300 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202646] rpc_clnt_add_xprt+0x27a/0x300 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202671] ? __pfx_rpc_clnt_setup_test_and_add_xprt+0x10/0x10 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202696] nfs4_pnfs_ds_connect+0x345/0x760 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9] [ 57.202728] ? __pfx_nfs4_test_session_trunk+0x10/0x10 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9] [ 57.202754] nfs4_fl_prepare_ds+0x75/0xc0 [nfs_layout_nfsv41_files e3a4187f18ae8a27b630f9feae6831b584a9360a] [ 57.202760] filelayout_write_pagelist+0x4a/0x200 [nfs_layout_nfsv41_files e3a4187f18ae8a27b630f9feae6831b584a9360a] [ 57.202765] pnfs_generic_pg_writepages+0xbe/0x230 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9] [ 57.202788] __nfs_pageio_add_request+0x3fd/0x520 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202813] nfs_pageio_add_request+0x18b/0x390 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202831] nfs_do_writepage+0x116/0x1e0 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202849] nfs_writepages_callback+0x13/0x30 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202866] write_cache_pages+0x265/0x450 [ 57.202870] ? __pfx_nfs_writepages_callback+0x10/0x10 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202891] nfs_writepages+0x141/0x230 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202913] do_writepages+0xd2/0x230 [ 57.202917] ? filemap_fdatawrite_wbc+0x5c/0x80 [ 57.202921] filemap_fdatawrite_wbc+0x67/0x80 [ 57.202924] filemap_write_and_wait_range+0xd9/0x170 [ 57.202930] nfs_wb_all+0x49/0x180 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202947] nfs4_file_flush+0x72/0xb0 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9] [ 57.202969] __se_sys_close+0x46/0xd0 [ 57.202972] do_syscall_64+0x68/0x100 [ 57.202975] ? do_syscall_64+0x77/0x100 [ 57.202976] ? do_syscall_64+0x77/0x100 [ 57.202979] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 57.202982] RIP: 0033:0x7fe2b12e4a94 [ 57.202985] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d d5 18 0e 00 00 74 13 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 44 c3 0f 1f 00 48 83 ec 18 89 7c 24 0c e8 c3 [ 57.202987] RSP: 002b:00007ffe857ddb38 EFLAGS: 00000202 ORIG_RAX: 0000000000000003 [ 57.202989] RAX: ffffffffffffffda RBX: 00007ffe857dfd68 RCX: 00007fe2b12e4a94 [ 57.202991] RDX: 0000000000002000 RSI: 00007ffe857ddc40 RDI: 0000000000000003 [ 57.202992] RBP: 00007ffe857dfc50 R08: 7fffffffffffffff R09: 0000000065650f49 [ 57.202993] R10: 00007fe2b11f8300 R11: 0000000000000202 R12: 0000000000000000 [ 57.202994] R13: 00007ffe857dfd80 R14: 00007fe2b1445000 R15: 0000000000000000 [ 57.202999] </TASK>
The problem seems to be that two out of three callers aren't taking the rcu_read_lock() before calling the list_for_each_entry_rcu() function in rpc_xprt_switch_has_addr(). I fix this by having rpc_xprt_switch_has_addr() unconditionaly take the rcu_read_lock(), which is okay to do recursively in the case that the lock has already been taken by a caller.
Reviewed-by: Jeff Layton jlayton@kernel.org Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/sunrpc/xprtmultipath.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 701250b305db..0706575d9392 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -336,8 +336,9 @@ struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi) xprt_switch_find_current_entry_offline); }
-bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, - const struct sockaddr *sap) +static +bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) { struct list_head *head; struct rpc_xprt *pos; @@ -356,6 +357,18 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, return false; }
+bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) +{ + bool res; + + rcu_read_lock(); + res = __rpc_xprt_switch_has_addr(xps, sap); + rcu_read_unlock(); + + return res; +} + static struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, const struct rpc_xprt *cur, bool check_active)
From: Ojaswin Mujoo ojaswin@linux.ibm.com
[ Upstream commit e89fdcc425b6feea4dfb33877e9256757905d763 ]
dioread_nolock was originally disabled as a default option for bs < ps scenarios due to a data corruption issue. Since then, we've had some fixes in this area which address such issues. Enable dioread_nolock by default and remove the experimental warning message for bs < ps path.
dioread for bs < ps has been tested on a 64k pagesize machine using:
kvm-xfstest -C 3 -g auto
with the following configs:
64k adv bigalloc_4k bigalloc_64k data_journal encrypt dioread_nolock dioread_nolock_4k ext3 ext3conv nojournal
And no new regressions were seen compared to baseline kernel.
Suggested-by: Ritesh Harjani (IBM) ritesh.list@gmail.com Signed-off-by: Ojaswin Mujoo ojaswin@linux.ibm.com Reviewed-by: Ritesh Harjani (IBM) ritesh.list@gmail.com Link: https://lore.kernel.org/r/20231101154717.531865-1-ojaswin@linux.ibm.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/super.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d062383ea50e..99c8ae97112c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2785,15 +2785,6 @@ static int ext4_check_opt_consistency(struct fs_context *fc, return -EINVAL; }
- if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) { - int blocksize = - BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - if (blocksize < PAGE_SIZE) - ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an " - "experimental mount option 'dioread_nolock' " - "for blocksize < PAGE_SIZE"); - } - err = ext4_check_test_dummy_encryption(fc, sb); if (err) return err; @@ -4402,7 +4393,7 @@ static void ext4_set_def_opts(struct super_block *sb, ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) set_opt(sb, DELALLOC);
- if (sb->s_blocksize == PAGE_SIZE) + if (sb->s_blocksize <= PAGE_SIZE) set_opt(sb, DIOREAD_NOLOCK); }
From: Ojaswin Mujoo ojaswin@linux.ibm.com
[ Upstream commit 92573369144f40397e8514440afdf59f24905b40 ]
The call to filemap_write_and_wait_range() assumes the range passed to be inclusive, so fix the call to make sure we follow that.
Signed-off-by: Ojaswin Mujoo ojaswin@linux.ibm.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/e503107a7c73a2b68dec645c5ad798c437717c45.169885630... Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/extents.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4d8496d1a8ac..4c3e2f38349d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4522,7 +4522,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, * Round up offset. This is not fallocate, we need to zero out * blocks, so convert interior block aligned part of the range to * unwritten and possibly manually zero out unaligned parts of the - * range. + * range. Here, start and partial_begin are inclusive, end and + * partial_end are exclusive. */ start = round_up(offset, 1 << blkbits); end = round_down((offset + len), 1 << blkbits); @@ -4608,7 +4609,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, * disk in case of crash before zeroing trans is committed. */ if (ext4_should_journal_data(inode)) { - ret = filemap_write_and_wait_range(mapping, start, end); + ret = filemap_write_and_wait_range(mapping, start, + end - 1); if (ret) { filemap_invalidate_unlock(mapping); goto out_mutex;
From: Pierre Mariani pierre.mariani@gmail.com
[ Upstream commit 0108ce08aed195d200ffbad74c1948bbaefe6625 ]
Fixes no-op checkpatch errors and warnings.
Signed-off-by: Pierre Mariani pierre.mariani@gmail.com Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/smb/client/connect.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 76ccbdba5855..a763097f0af3 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -455,6 +455,7 @@ static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_ static int reconnect_dfs_server(struct TCP_Server_Info *server) { struct dfs_cache_tgt_iterator *target_hint = NULL; + DFS_CACHE_TGT_LIST(tl); int num_targets = 0; int rc = 0; @@ -717,6 +718,7 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, { struct msghdr smb_msg = {}; struct kvec iov = {.iov_base = buf, .iov_len = to_read}; + iov_iter_kvec(&smb_msg.msg_iter, ITER_DEST, &iov, 1, to_read);
return cifs_readv_from_socket(server, &smb_msg); @@ -1372,11 +1374,13 @@ cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs) case AF_INET: { struct sockaddr_in *saddr4 = (struct sockaddr_in *)srcaddr; struct sockaddr_in *vaddr4 = (struct sockaddr_in *)rhs; + return (saddr4->sin_addr.s_addr == vaddr4->sin_addr.s_addr); } case AF_INET6: { struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr; struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs; + return (ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr) && saddr6->sin6_scope_id == vaddr6->sin6_scope_id); } @@ -2576,8 +2580,8 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) rc = -EOPNOTSUPP; goto out_fail; } else { - cifs_dbg(VFS, "Check vers= mount option. SMB3.11 " - "disabled but required for POSIX extensions\n"); + cifs_dbg(VFS, + "Check vers= mount option. SMB3.11 disabled but required for POSIX extensions\n"); rc = -EOPNOTSUPP; goto out_fail; } @@ -2720,7 +2724,6 @@ cifs_put_tlink(struct tcon_link *tlink) if (!IS_ERR(tlink_tcon(tlink))) cifs_put_tcon(tlink_tcon(tlink)); kfree(tlink); - return; }
static int @@ -2861,6 +2864,7 @@ static inline void cifs_reclassify_socket4(struct socket *sock) { struct sock *sk = sock->sk; + BUG_ON(!sock_allow_reclassification(sk)); sock_lock_init_class_and_name(sk, "slock-AF_INET-CIFS", &cifs_slock_key[0], "sk_lock-AF_INET-CIFS", &cifs_key[0]); @@ -2870,6 +2874,7 @@ static inline void cifs_reclassify_socket6(struct socket *sock) { struct sock *sk = sock->sk; + BUG_ON(!sock_allow_reclassification(sk)); sock_lock_init_class_and_name(sk, "slock-AF_INET6-CIFS", &cifs_slock_key[1], "sk_lock-AF_INET6-CIFS", &cifs_key[1]); @@ -2904,15 +2909,18 @@ static int bind_socket(struct TCP_Server_Info *server) { int rc = 0; + if (server->srcaddr.ss_family != AF_UNSPEC) { /* Bind to the specified local IP address */ struct socket *socket = server->ssocket; + rc = kernel_bind(socket, (struct sockaddr *) &server->srcaddr, sizeof(server->srcaddr)); if (rc < 0) { struct sockaddr_in *saddr4; struct sockaddr_in6 *saddr6; + saddr4 = (struct sockaddr_in *)&server->srcaddr; saddr6 = (struct sockaddr_in6 *)&server->srcaddr; if (saddr6->sin6_family == AF_INET6) @@ -3142,6 +3150,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
if (!CIFSSMBQFSUnixInfo(xid, tcon)) { __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability); + cifs_dbg(FYI, "unix caps which server supports %lld\n", cap); /* * check for reconnect case in which we do not @@ -3645,7 +3654,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, smb_buffer_response = smb_buffer;
header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX, - NULL /*no tid */ , 4 /*wct */ ); + NULL /*no tid */, 4 /*wct */);
smb_buffer->Mid = get_next_mid(ses->server); smb_buffer->Uid = ses->Suid; @@ -3664,12 +3673,12 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, if (ses->server->sign) smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
- if (ses->capabilities & CAP_STATUS32) { + if (ses->capabilities & CAP_STATUS32) smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS; - } - if (ses->capabilities & CAP_DFS) { + + if (ses->capabilities & CAP_DFS) smb_buffer->Flags2 |= SMBFLG2_DFS; - } + if (ses->capabilities & CAP_UNICODE) { smb_buffer->Flags2 |= SMBFLG2_UNICODE; length =
From: Paulo Alcantara pc@manguebit.com
[ Upstream commit 7435d51b7ea2ab7801279c43ecd72063e9d5c92f ]
The client was sending an SMB2_CREATE request without setting OPEN_REPARSE_POINT flag thus failing the entire rename operation.
Fix this by setting OPEN_REPARSE_POINT in create options for SMB2_CREATE request when the source inode is a repase point.
Signed-off-by: Paulo Alcantara (SUSE) pc@manguebit.com Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/smb/client/cifsglob.h | 22 +++++++++++++++++----- fs/smb/client/cifsproto.h | 7 ++++--- fs/smb/client/cifssmb.c | 8 ++++---- fs/smb/client/inode.c | 3 ++- fs/smb/client/smb2inode.c | 38 +++++++++++++++++++++++--------------- fs/smb/client/smb2proto.h | 8 +++++--- 6 files changed, 55 insertions(+), 31 deletions(-)
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 4eac7dcb82f9..3650094590b3 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -205,9 +205,18 @@ struct cifs_open_info_data { }; };
-#define cifs_open_data_reparse(d) \ - ((d)->reparse_point || \ - (le32_to_cpu((d)->fi.Attributes) & ATTR_REPARSE)) +static inline bool cifs_open_data_reparse(struct cifs_open_info_data *data) +{ + struct smb2_file_all_info *fi = &data->fi; + u32 attrs = le32_to_cpu(fi->Attributes); + bool ret; + + ret = data->reparse_point || (attrs & ATTR_REPARSE); + if (ret) + attrs |= ATTR_REPARSE; + fi->Attributes = cpu_to_le32(attrs); + return ret; +}
static inline void cifs_free_open_info(struct cifs_open_info_data *data) { @@ -390,8 +399,11 @@ struct smb_version_operations { int (*rename_pending_delete)(const char *, struct dentry *, const unsigned int); /* send rename request */ - int (*rename)(const unsigned int, struct cifs_tcon *, const char *, - const char *, struct cifs_sb_info *); + int (*rename)(const unsigned int xid, + struct cifs_tcon *tcon, + struct dentry *source_dentry, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); /* send create hardlink request */ int (*create_hardlink)(const unsigned int, struct cifs_tcon *, const char *, const char *, diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index c858feaf4f92..35b58c2d81dd 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -434,9 +434,10 @@ extern int CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, int remap_special_chars); extern int CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); -extern int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, - const char *from_name, const char *to_name, - struct cifs_sb_info *cifs_sb); +int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, + struct dentry *source_dentry, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); extern int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *tcon, int netfid, const char *target_name, const struct nls_table *nls_codepage, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index bad91ba6c3a9..43a90e646a7a 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -2147,10 +2147,10 @@ CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) return rc; }
-int -CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, - const char *from_name, const char *to_name, - struct cifs_sb_info *cifs_sb) +int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, + struct dentry *source_dentry, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) { int rc = 0; RENAME_REQ *pSMB = NULL; diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 09c5c0f5c96e..eb54e4893777 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2219,7 +2219,8 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, return -ENOSYS;
/* try path-based rename first */ - rc = server->ops->rename(xid, tcon, from_path, to_path, cifs_sb); + rc = server->ops->rename(xid, tcon, from_dentry, + from_path, to_path, cifs_sb);
/* * Don't bother with rename by filehandle unless file is busy and diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index c94940af5d4b..c3e28673e0cd 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -781,11 +781,11 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL, NULL, NULL, NULL, NULL); }
-static int -smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, - const char *from_name, const char *to_name, - struct cifs_sb_info *cifs_sb, __u32 access, int command, - struct cifsFileInfo *cfile) +static int smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb, + __u32 create_options, __u32 access, + int command, struct cifsFileInfo *cfile) { __le16 *smb2_to_name = NULL; int rc; @@ -796,25 +796,33 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, goto smb2_rename_path; } rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access, - FILE_OPEN, 0, ACL_NO_MODE, smb2_to_name, + FILE_OPEN, create_options, ACL_NO_MODE, smb2_to_name, command, cfile, NULL, NULL, NULL, NULL); smb2_rename_path: kfree(smb2_to_name); return rc; }
-int -smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, - const char *from_name, const char *to_name, - struct cifs_sb_info *cifs_sb) +int smb2_rename_path(const unsigned int xid, + struct cifs_tcon *tcon, + struct dentry *source_dentry, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) { + struct cifsInodeInfo *ci; struct cifsFileInfo *cfile; + __u32 co = 0;
+ if (source_dentry) { + ci = CIFS_I(d_inode(source_dentry)); + if (ci->cifsAttrs & ATTR_REPARSE) + co |= OPEN_REPARSE_POINT; + } drop_cached_dir_by_name(xid, tcon, from_name, cifs_sb); cifs_get_writable_path(tcon, from_name, FIND_WR_WITH_DELETE, &cfile);
- return smb2_set_path_attr(xid, tcon, from_name, to_name, - cifs_sb, DELETE, SMB2_OP_RENAME, cfile); + return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, + co, DELETE, SMB2_OP_RENAME, cfile); }
int @@ -822,9 +830,9 @@ smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb) { - return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, - FILE_READ_ATTRIBUTES, SMB2_OP_HARDLINK, - NULL); + return smb2_set_path_attr(xid, tcon, from_name, to_name, + cifs_sb, 0, FILE_READ_ATTRIBUTES, + SMB2_OP_HARDLINK, NULL); }
int diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 0e371f7e2854..7cbf1a76b42d 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -80,9 +80,11 @@ extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); extern int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); -extern int smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, - const char *from_name, const char *to_name, - struct cifs_sb_info *cifs_sb); +int smb2_rename_path(const unsigned int xid, + struct cifs_tcon *tcon, + struct dentry *source_dentry, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb);
From: Paulo Alcantara pc@manguebit.com
[ Upstream commit 5408990aa662bcfd6ba894734023a023a16e8729 ]
The client was sending an SMB2_CREATE request without setting OPEN_REPARSE_POINT flag thus failing the entire hardlink operation.
Fix this by setting OPEN_REPARSE_POINT in create options for SMB2_CREATE request when the source inode is a repase point.
Signed-off-by: Paulo Alcantara (SUSE) pc@manguebit.com Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/smb/client/cifsglob.h | 8 +++++--- fs/smb/client/cifsproto.h | 8 +++++--- fs/smb/client/cifssmb.c | 9 +++++---- fs/smb/client/link.c | 4 ++-- fs/smb/client/smb2inode.c | 33 +++++++++++++++++++++------------ fs/smb/client/smb2proto.h | 8 +++++--- 6 files changed, 43 insertions(+), 27 deletions(-)
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 3650094590b3..50bed92f01a4 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -405,9 +405,11 @@ struct smb_version_operations { const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb); /* send create hardlink request */ - int (*create_hardlink)(const unsigned int, struct cifs_tcon *, - const char *, const char *, - struct cifs_sb_info *); + int (*create_hardlink)(const unsigned int xid, + struct cifs_tcon *tcon, + struct dentry *source_dentry, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); /* query symlink target */ int (*query_symlink)(const unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 35b58c2d81dd..a5ebcc310874 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -442,9 +442,11 @@ extern int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *tcon, int netfid, const char *target_name, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, - const char *from_name, const char *to_name, - struct cifs_sb_info *cifs_sb); +int CIFSCreateHardLink(const unsigned int xid, + struct cifs_tcon *tcon, + struct dentry *source_dentry, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); extern int CIFSUnixCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 43a90e646a7a..5331fda8b013 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -2528,10 +2528,11 @@ CIFSUnixCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, return rc; }
-int -CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, - const char *from_name, const char *to_name, - struct cifs_sb_info *cifs_sb) +int CIFSCreateHardLink(const unsigned int xid, + struct cifs_tcon *tcon, + struct dentry *source_dentry, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) { int rc = 0; NT_RENAME_REQ *pSMB = NULL; diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index c66be4904e1f..6c4ae52ddc04 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -522,8 +522,8 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, rc = -ENOSYS; goto cifs_hl_exit; } - rc = server->ops->create_hardlink(xid, tcon, from_name, to_name, - cifs_sb); + rc = server->ops->create_hardlink(xid, tcon, old_file, + from_name, to_name, cifs_sb); if ((rc == -EIO) || (rc == -EINVAL)) rc = -EOPNOTSUPP; } diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index c3e28673e0cd..6cac0b107a2d 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -35,6 +35,18 @@ free_set_inf_compound(struct smb_rqst *rqst) SMB2_close_free(&rqst[2]); }
+static inline __u32 file_create_options(struct dentry *dentry) +{ + struct cifsInodeInfo *ci; + + if (dentry) { + ci = CIFS_I(d_inode(dentry)); + if (ci->cifsAttrs & ATTR_REPARSE) + return OPEN_REPARSE_POINT; + } + return 0; +} + /* * note: If cfile is passed, the reference to it is dropped here. * So make sure that you do not reuse cfile after return from this func. @@ -809,15 +821,9 @@ int smb2_rename_path(const unsigned int xid, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb) { - struct cifsInodeInfo *ci; struct cifsFileInfo *cfile; - __u32 co = 0; + __u32 co = file_create_options(source_dentry);
- if (source_dentry) { - ci = CIFS_I(d_inode(source_dentry)); - if (ci->cifsAttrs & ATTR_REPARSE) - co |= OPEN_REPARSE_POINT; - } drop_cached_dir_by_name(xid, tcon, from_name, cifs_sb); cifs_get_writable_path(tcon, from_name, FIND_WR_WITH_DELETE, &cfile);
@@ -825,13 +831,16 @@ int smb2_rename_path(const unsigned int xid, co, DELETE, SMB2_OP_RENAME, cfile); }
-int -smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, - const char *from_name, const char *to_name, - struct cifs_sb_info *cifs_sb) +int smb2_create_hardlink(const unsigned int xid, + struct cifs_tcon *tcon, + struct dentry *source_dentry, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb) { + __u32 co = file_create_options(source_dentry); + return smb2_set_path_attr(xid, tcon, from_name, to_name, - cifs_sb, 0, FILE_READ_ATTRIBUTES, + cifs_sb, co, FILE_READ_ATTRIBUTES, SMB2_OP_HARDLINK, NULL); }
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 7cbf1a76b42d..a8084ce7fcbd 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -85,9 +85,11 @@ int smb2_rename_path(const unsigned int xid, struct dentry *source_dentry, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb); -extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, - const char *from_name, const char *to_name, - struct cifs_sb_info *cifs_sb); +int smb2_create_hardlink(const unsigned int xid, + struct cifs_tcon *tcon, + struct dentry *source_dentry, + const char *from_name, const char *to_name, + struct cifs_sb_info *cifs_sb); extern int smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const unsigned char *path, char *pbuf, unsigned int *pbytes_written);
From: Gabriel Krisman Bertazi krisman@suse.de
[ Upstream commit cd72c7ef5fed44272272a105b1da22810c91be69 ]
Even though it seems to be able to resolve some names of case-insensitive directories, the lack of d_hash and d_compare means we end up with a broken state in the d_cache. Considering it was never a goal to support these two together, and we are preparing to use d_revalidate in case-insensitive filesystems, which would make the combination even more broken, reject any attempt to get a casefolded inode from ecryptfs.
Signed-off-by: Gabriel Krisman Bertazi krisman@suse.de Reviewed-by: Eric Biggers ebiggers@google.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ecryptfs/inode.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5ab4b87888a7..795e9fe2f721 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -78,6 +78,14 @@ static struct inode *__ecryptfs_get_inode(struct inode *lower_inode,
if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) return ERR_PTR(-EXDEV); + + /* Reject dealing with casefold directories. */ + if (IS_CASEFOLDED(lower_inode)) { + pr_err_ratelimited("%s: Can't handle casefolded directory.\n", + __func__); + return ERR_PTR(-EREMOTE); + } + if (!igrab(lower_inode)) return ERR_PTR(-ESTALE); inode = iget5_locked(sb, (unsigned long)lower_inode,
From: Ye Bin yebin10@huawei.com
[ Upstream commit 68da4c44b994aea797eb9821acb3a4a36015293e ]
Suppose we issue two FITRIM ioctls for ranges [0,15] and [16,31] with mininum length of trimmed range set to 8 blocks. If we have say a range of blocks 10-22 free, this range will not be trimmed because it straddles the boundary of the two FITRIM ranges and neither part is big enough. This is a bit surprising to some users that call FITRIM on smaller ranges of blocks to limit impact on the system. Also XFS trims all free space extents that overlap with the specified range so we are inconsistent among filesystems. Let's change ext4_try_to_trim_range() to consider for trimming the whole free space extent that straddles the end of specified range, not just the part of it within the range.
Signed-off-by: Ye Bin yebin10@huawei.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20231216010919.1995851-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/mballoc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d70f7a06bab4..7de7e6bea292 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6905,13 +6905,15 @@ static int ext4_try_to_trim_range(struct super_block *sb, __acquires(ext4_group_lock_ptr(sb, e4b->bd_group)) __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) { - ext4_grpblk_t next, count, free_count; + ext4_grpblk_t next, count, free_count, last, origin_start; bool set_trimmed = false; void *bitmap;
+ last = ext4_last_grp_cluster(sb, e4b->bd_group); bitmap = e4b->bd_bitmap; - if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group)) + if (start == 0 && max >= last) set_trimmed = true; + origin_start = start; start = max(e4b->bd_info->bb_first_free, start); count = 0; free_count = 0; @@ -6920,7 +6922,10 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) start = mb_find_next_zero_bit(bitmap, max + 1, start); if (start > max) break; - next = mb_find_next_bit(bitmap, max + 1, start); + + next = mb_find_next_bit(bitmap, last + 1, start); + if (origin_start == 0 && next >= last) + set_trimmed = true;
if ((next - start) >= minblocks) { int ret = ext4_trim_extent(sb, start, next - start, e4b);
From: Baokun Li libaokun1@huawei.com
[ Upstream commit 658a52344fb139f9531e7543a6e0015b630feb38 ]
The maximum value of flexbg_size is 2^31, but the maximum value of int is (2^31 - 1), so overflow may occur when the type of flexbg_size is declared as int.
For example, when uninit_mask is initialized in ext4_alloc_group_tables(), if flexbg_size == 2^31, the initialized uninit_mask is incorrect, and this may causes set_flexbg_block_bitmap() to trigger a BUG_ON().
Therefore, the flexbg_size type is declared as unsigned int to avoid overflow and memory waste.
Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20231023013057.2117948-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/resize.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 667381180b26..f3a9b97bb7e7 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -241,7 +241,7 @@ struct ext4_new_flex_group_data { * * Returns NULL on failure otherwise address of the allocated structure. */ -static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) +static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size) { struct ext4_new_flex_group_data *flex_gd;
@@ -296,7 +296,7 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) */ static int ext4_alloc_group_tables(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, - int flexbg_size) + unsigned int flexbg_size) { struct ext4_new_group_data *group_data = flex_gd->groups; ext4_fsblk_t start_blk; @@ -397,12 +397,12 @@ static int ext4_alloc_group_tables(struct super_block *sb, group = group_data[0].group;
printk(KERN_DEBUG "EXT4-fs: adding a flex group with " - "%d groups, flexbg size is %d:\n", flex_gd->count, + "%u groups, flexbg size is %u:\n", flex_gd->count, flexbg_size);
for (i = 0; i < flex_gd->count; i++) { ext4_debug( - "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n", + "adding %s group %u: %u blocks (%u free, %u mdata blocks)\n", ext4_bg_has_super(sb, group + i) ? "normal" : "no-super", group + i, group_data[i].blocks_count, @@ -1623,7 +1623,7 @@ static int ext4_flex_group_add(struct super_block *sb, static int ext4_setup_next_flex_gd(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, ext4_fsblk_t n_blocks_count, - unsigned long flexbg_size) + unsigned int flexbg_size) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; @@ -2007,8 +2007,9 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_fsblk_t o_blocks_count; ext4_fsblk_t n_blocks_count_retry = 0; unsigned long last_update_time = 0; - int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; + int err = 0; int meta_bg; + unsigned int flexbg_size = ext4_flex_bg_size(sbi);
/* See if the device is actually as big as what was requested */ bh = ext4_sb_bread(sb, n_blocks_count - 1, 0);
From: Baokun Li libaokun1@huawei.com
[ Upstream commit b099eb87de105cf07cad731ded6fb40b2675108b ]
In commit 967ac8af4475 ("ext4: fix potential integer overflow in alloc_flex_gd()"), an overflow check is added to alloc_flex_gd() to prevent the allocated memory from being smaller than expected due to the overflow. However, after kmalloc() is replaced with kmalloc_array() in commit 6da2ec56059c ("treewide: kmalloc() -> kmalloc_array()"), the kmalloc_array() function has an overflow check, so the above problem will not occur. Therefore, the extra check is removed.
Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20231023013057.2117948-3-libaokun1@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/resize.c | 3 --- 1 file changed, 3 deletions(-)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f3a9b97bb7e7..df47b269efce 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -249,10 +249,7 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size) if (flex_gd == NULL) goto out3;
- if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) - goto out2; flex_gd->count = flexbg_size; - flex_gd->groups = kmalloc_array(flexbg_size, sizeof(struct ext4_new_group_data), GFP_NOFS);
From: Baokun Li libaokun1@huawei.com
[ Upstream commit 5d1935ac02ca5aee364a449a35e2977ea84509b0 ]
When we online resize an ext4 filesystem with a oversized flexbg_size,
mkfs.ext4 -F -G 67108864 $dev -b 4096 100M mount $dev $dir resize2fs $dev 16G
the following WARN_ON is triggered: ================================================================== WARNING: CPU: 0 PID: 427 at mm/page_alloc.c:4402 __alloc_pages+0x411/0x550 Modules linked in: sg(E) CPU: 0 PID: 427 Comm: resize2fs Tainted: G E 6.6.0-rc5+ #314 RIP: 0010:__alloc_pages+0x411/0x550 Call Trace: <TASK> __kmalloc_large_node+0xa2/0x200 __kmalloc+0x16e/0x290 ext4_resize_fs+0x481/0xd80 __ext4_ioctl+0x1616/0x1d90 ext4_ioctl+0x12/0x20 __x64_sys_ioctl+0xf0/0x150 do_syscall_64+0x3b/0x90 ==================================================================
This is because flexbg_size is too large and the size of the new_group_data array to be allocated exceeds MAX_ORDER. Currently, the minimum value of MAX_ORDER is 8, the minimum value of PAGE_SIZE is 4096, the corresponding maximum number of groups that can be allocated is:
(PAGE_SIZE << MAX_ORDER) / sizeof(struct ext4_new_group_data) ≈ 21845
And the value that is down-aligned to the power of 2 is 16384. Therefore, this value is defined as MAX_RESIZE_BG, and the number of groups added each time does not exceed this value during resizing, and is added multiple times to complete the online resizing. The difference is that the metadata in a flex_bg may be more dispersed.
Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20231023013057.2117948-4-libaokun1@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ext4/resize.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index df47b269efce..be280268da9f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -231,10 +231,17 @@ struct ext4_new_flex_group_data { in the flex group */ __u16 *bg_flags; /* block group flags of groups in @groups */ + ext4_group_t resize_bg; /* number of allocated + new_group_data */ ext4_group_t count; /* number of groups in @groups */ };
+/* + * Avoiding memory allocation failures due to too many groups added each time. + */ +#define MAX_RESIZE_BG 16384 + /* * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of * @flexbg_size. @@ -249,14 +256,18 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size) if (flex_gd == NULL) goto out3;
- flex_gd->count = flexbg_size; - flex_gd->groups = kmalloc_array(flexbg_size, + if (unlikely(flexbg_size > MAX_RESIZE_BG)) + flex_gd->resize_bg = MAX_RESIZE_BG; + else + flex_gd->resize_bg = flexbg_size; + + flex_gd->groups = kmalloc_array(flex_gd->resize_bg, sizeof(struct ext4_new_group_data), GFP_NOFS); if (flex_gd->groups == NULL) goto out2;
- flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16), + flex_gd->bg_flags = kmalloc_array(flex_gd->resize_bg, sizeof(__u16), GFP_NOFS); if (flex_gd->bg_flags == NULL) goto out1; @@ -1619,8 +1630,7 @@ static int ext4_flex_group_add(struct super_block *sb,
static int ext4_setup_next_flex_gd(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, - ext4_fsblk_t n_blocks_count, - unsigned int flexbg_size) + ext4_fsblk_t n_blocks_count) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; @@ -1644,7 +1654,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, BUG_ON(last); ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
- last_group = group | (flexbg_size - 1); + last_group = group | (flex_gd->resize_bg - 1); if (last_group > n_group) last_group = n_group;
@@ -2147,8 +2157,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) /* Add flex groups. Note that a regular group is a * flex group with 1 group. */ - while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, - flexbg_size)) { + while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count)) { if (time_is_before_jiffies(last_update_time + HZ * 10)) { if (last_update_time) ext4_msg(sb, KERN_INFO,
linux-stable-mirror@lists.linaro.org