The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 73b42dc69be8564d4951a14d00f827929fe5ef79
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024100123-unreached-enrage-2cb1@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
73b42dc69be8 ("KVM: x86: Re-split x2APIC ICR into ICR+ICR2 for AMD (x2AVIC)")
d33234342f8b ("KVM: x86: Move x2APIC ICR helper above kvm_apic_write_nodecode()")
71bf395a276f ("KVM: x86: Enforce x2APIC's must-be-zero reserved ICR bits")
4b7c3f6d04bd ("KVM: x86: Make x2APIC ID 100% readonly")
c7d4c5f01961 ("KVM: x86: Drop unused check_apicv_inhibit_reasons() callback definition")
5f18c642ff7e ("KVM: VMX: Move out vmx_x86_ops to 'main.c' to dispatch VMX and TDX")
0ec3d6d1f169 ("KVM: x86: Fully defer to vendor code to decide how to force immediate exit")
bf1a49436ea3 ("KVM: x86: Move handling of is_guest_mode() into fastpath exit handlers")
11776aa0cfa7 ("KVM: VMX: Handle forced exit due to preemption timer in fastpath")
e6b5d16bbd2d ("KVM: VMX: Re-enter guest in fastpath for "spurious" preemption timer exits")
9c9025ea003a ("KVM: x86: Plumb "force_immediate_exit" into kvm_entry() tracepoint")
8ecb10bcbfa3 ("Merge tag 'kvm-x86-lam-6.8' of https://github.com/kvm-x86/linux into HEAD")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 73b42dc69be8564d4951a14d00f827929fe5ef79 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 19 Jul 2024 16:51:00 -0700
Subject: [PATCH] KVM: x86: Re-split x2APIC ICR into ICR+ICR2 for AMD (x2AVIC)
Re-introduce the "split" x2APIC ICR storage that KVM used prior to Intel's
IPI virtualization support, but only for AMD. While not stated anywhere
in the APM, despite stating the ICR is a single 64-bit register, AMD CPUs
store the 64-bit ICR as two separate 32-bit values in ICR and ICR2. When
IPI virtualization (IPIv on Intel, all AVIC flavors on AMD) is enabled,
KVM needs to match CPU behavior as some ICR ICR writes will be handled by
the CPU, not by KVM.
Add a kvm_x86_ops knob to control the underlying format used by the CPU to
store the x2APIC ICR, and tune it to AMD vs. Intel regardless of whether
or not x2AVIC is enabled. If KVM is handling all ICR writes, the storage
format for x2APIC mode doesn't matter, and having the behavior follow AMD
versus Intel will provide better test coverage and ease debugging.
Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode")
Cc: stable(a)vger.kernel.org
Cc: Maxim Levitsky <mlevitsk(a)redhat.com>
Cc: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com>
Link: https://lore.kernel.org/r/20240719235107.3023592-4-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 95396e4cb3da..f9dfb2d62053 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1727,6 +1727,8 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
+
+ const bool x2apic_icr_is_split;
const unsigned long required_apicv_inhibits;
bool allow_apicv_in_x2apic_without_x2apic_virtualization;
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 63be07d7c782..c7180cb5f464 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2471,11 +2471,25 @@ int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
data &= ~APIC_ICR_BUSY;
kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
- kvm_lapic_set_reg64(apic, APIC_ICR, data);
+ if (kvm_x86_ops.x2apic_icr_is_split) {
+ kvm_lapic_set_reg(apic, APIC_ICR, data);
+ kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
+ } else {
+ kvm_lapic_set_reg64(apic, APIC_ICR, data);
+ }
trace_kvm_apic_write(APIC_ICR, data);
return 0;
}
+static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
+{
+ if (kvm_x86_ops.x2apic_icr_is_split)
+ return (u64)kvm_lapic_get_reg(apic, APIC_ICR) |
+ (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32;
+
+ return kvm_lapic_get_reg64(apic, APIC_ICR);
+}
+
/* emulate APIC access in a trap manner */
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
{
@@ -2493,7 +2507,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
* maybe-unecessary write, and both are in the noise anyways.
*/
if (apic_x2apic_mode(apic) && offset == APIC_ICR)
- WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR)));
+ WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic)));
else
kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
}
@@ -3013,18 +3027,22 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
/*
* In x2APIC mode, the LDR is fixed and based on the id. And
- * ICR is internally a single 64-bit register, but needs to be
- * split to ICR+ICR2 in userspace for backwards compatibility.
+ * if the ICR is _not_ split, ICR is internally a single 64-bit
+ * register, but needs to be split to ICR+ICR2 in userspace for
+ * backwards compatibility.
*/
- if (set) {
+ if (set)
*ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
- icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
- (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
- __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
- } else {
- icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
- __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
+ if (!kvm_x86_ops.x2apic_icr_is_split) {
+ if (set) {
+ icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
+ (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
+ __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
+ } else {
+ icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
+ __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
+ }
}
}
@@ -3222,7 +3240,7 @@ static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
u32 low;
if (reg == APIC_ICR) {
- *data = kvm_lapic_get_reg64(apic, APIC_ICR);
+ *data = kvm_x2apic_icr_read(apic);
return 0;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d8cfe8f23327..eb3de01602b9 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5053,6 +5053,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.enable_nmi_window = svm_enable_nmi_window,
.enable_irq_window = svm_enable_irq_window,
.update_cr8_intercept = svm_update_cr8_intercept,
+
+ .x2apic_icr_is_split = true,
.set_virtual_apic_mode = avic_refresh_virtual_apic_mode,
.refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
.apicv_post_state_restore = avic_apicv_post_state_restore,
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 4f6023a0deb3..0a094ebad4b1 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -89,6 +89,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.enable_nmi_window = vmx_enable_nmi_window,
.enable_irq_window = vmx_enable_irq_window,
.update_cr8_intercept = vmx_update_cr8_intercept,
+
+ .x2apic_icr_is_split = false,
.set_virtual_apic_mode = vmx_set_virtual_apic_mode,
.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
Jann reported [1] possible issue when trampoline_check_ip returns
address near the bottom of the address space that is allowed to
call into the syscall if uretprobes are not set up.
Though the mmap minimum address restrictions will typically prevent
creating mappings there, let's make sure uretprobe syscall checks
for that.
[1] https://lore.kernel.org/bpf/202502081235.5A6F352985@keescook/T/#m9d416df341…
Cc: Kees Cook <kees(a)kernel.org>
Cc: Eyal Birger <eyal.birger(a)gmail.com>
Cc: stable(a)vger.kernel.org
Reported-by: Jann Horn <jannh(a)google.com>
Signed-off-by: Jiri Olsa <jolsa(a)kernel.org>
---
arch/x86/kernel/uprobes.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 5a952c5ea66b..109d6641a1b3 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -357,19 +357,23 @@ void *arch_uprobe_trampoline(unsigned long *psize)
return &insn;
}
-static unsigned long trampoline_check_ip(void)
+static unsigned long trampoline_check_ip(unsigned long tramp)
{
- unsigned long tramp = uprobe_get_trampoline_vaddr();
-
return tramp + (uretprobe_syscall_check - uretprobe_trampoline_entry);
}
SYSCALL_DEFINE0(uretprobe)
{
struct pt_regs *regs = task_pt_regs(current);
- unsigned long err, ip, sp, r11_cx_ax[3];
+ unsigned long err, ip, sp, r11_cx_ax[3], tramp;
+
+ /* If there's no trampoline, we are called from wrong place. */
+ tramp = uprobe_get_trampoline_vaddr();
+ if (tramp == -1)
+ goto sigill;
- if (regs->ip != trampoline_check_ip())
+ /* Make sure the ip matches the only allowed sys_uretprobe caller. */
+ if (regs->ip != trampoline_check_ip(tramp))
goto sigill;
err = copy_from_user(r11_cx_ax, (void __user *)regs->sp, sizeof(r11_cx_ax));
--
2.48.1
The patch titled
Subject: mm: pgtable: fix incorrect reclaim of non-empty PTE pages
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-pgtable-fix-incorrect-reclaim-of-non-empty-pte-pages.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Qi Zheng <zhengqi.arch(a)bytedance.com>
Subject: mm: pgtable: fix incorrect reclaim of non-empty PTE pages
Date: Tue, 11 Feb 2025 15:26:25 +0800
In zap_pte_range(), if the pte lock was released midway, the pte entries
may be refilled with physical pages by another thread, which may cause a
non-empty PTE page to be reclaimed and eventually cause the system to
crash.
To fix it, fall back to the slow path in this case to recheck if all pte
entries are still none.
Link: https://lkml.kernel.org/r/20250211072625.89188-1-zhengqi.arch@bytedance.com
Fixes: 6375e95f381e ("mm: pgtable: reclaim empty PTE page in madvise(MADV_DONTNEED)")
Signed-off-by: Qi Zheng <zhengqi.arch(a)bytedance.com>
Reported-by: Christian Brauner <brauner(a)kernel.org>
Closes: https://lore.kernel.org/all/20250207-anbot-bankfilialen-acce9d79a2c7@braune…
Reported-by: Qu Wenruo <quwenruo.btrfs(a)gmx.com>
Closes: https://lore.kernel.org/all/152296f3-5c81-4a94-97f3-004108fba7be@gmx.com/
Tested-by: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Cc: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Zi Yan <ziy(a)nvidia.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
--- a/mm/memory.c~mm-pgtable-fix-incorrect-reclaim-of-non-empty-pte-pages
+++ a/mm/memory.c
@@ -1719,7 +1719,7 @@ static unsigned long zap_pte_range(struc
pmd_t pmdval;
unsigned long start = addr;
bool can_reclaim_pt = reclaim_pt_is_enabled(start, end, details);
- bool direct_reclaim = false;
+ bool direct_reclaim = true;
int nr;
retry:
@@ -1734,8 +1734,10 @@ retry:
do {
bool any_skipped = false;
- if (need_resched())
+ if (need_resched()) {
+ direct_reclaim = false;
break;
+ }
nr = do_zap_pte_range(tlb, vma, pte, addr, end, details, rss,
&force_flush, &force_break, &any_skipped);
@@ -1743,11 +1745,20 @@ retry:
can_reclaim_pt = false;
if (unlikely(force_break)) {
addr += nr * PAGE_SIZE;
+ direct_reclaim = false;
break;
}
} while (pte += nr, addr += PAGE_SIZE * nr, addr != end);
- if (can_reclaim_pt && addr == end)
+ /*
+ * Fast path: try to hold the pmd lock and unmap the PTE page.
+ *
+ * If the pte lock was released midway (retry case), or if the attempt
+ * to hold the pmd lock failed, then we need to recheck all pte entries
+ * to ensure they are still none, thereby preventing the pte entries
+ * from being repopulated by another thread.
+ */
+ if (can_reclaim_pt && direct_reclaim && addr == end)
direct_reclaim = try_get_and_clear_pmd(mm, pmd, &pmdval);
add_mm_rss_vec(mm, rss);
_
Patches currently in -mm which might be from zhengqi.arch(a)bytedance.com are
mm-pgtable-fix-incorrect-reclaim-of-non-empty-pte-pages.patch
From: Lu Baolu <baolu.lu(a)linux.intel.com>
commit 89e8a2366e3bce584b6c01549d5019c5cda1205e upstream.
iommu_sva_bind_device() should return either a sva bond handle or an
ERR_PTR value in error cases. Existing drivers (idxd and uacce) only
check the return value with IS_ERR(). This could potentially lead to
a kernel NULL pointer dereference issue if the function returns NULL
instead of an error pointer.
In reality, this doesn't cause any problems because iommu_sva_bind_device()
only returns NULL when the kernel is not configured with CONFIG_IOMMU_SVA.
In this case, iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) will
return an error, and the device drivers won't call iommu_sva_bind_device()
at all.
Fixes: 26b25a2b98e4 ("iommu: Bind process address spaces to devices")
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe(a)linaro.org>
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Reviewed-by: Vasant Hegde <vasant.hegde(a)amd.com>
Link: https://lore.kernel.org/r/20240528042528.71396-1-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
Signed-off-by: Bin Lan <lanbincn(a)qq.com>
---
include/linux/iommu.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 9d87090953bc..2bfa9611be67 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -999,7 +999,7 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
static inline struct iommu_sva *
iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
{
- return NULL;
+ return ERR_PTR(-ENODEV);
}
static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
--
2.43.0
The mapping table for the rk3328 is missing the entry for -25C which is
found in the TRM section 9.5.2 "Temperature-to-code mapping".
NOTE: the kernel uses the tsadc_q_sel=1'b1 mode which is defined as:
4096-<code in table>. Whereas the table in the TRM gives the code
"3774" for -25C, the kernel uses 4096-3774=322.
Link: https://opensource.rock-chips.com/images/9/97/Rockchip_RK3328TRM_V1.1-Part1…
Cc: stable(a)vger.kernel.org
Fixes: eda519d5f73e ("thermal: rockchip: Support the RK3328 SOC in thermal driver")
Signed-off-by: Trevor Woerner <twoerner(a)gmail.com>
---
changes in v2:
- remove non-ascii characters in commit message
- remove dangling [1] reference in commit message
- include "Fixes:"
- add request for stable backport
---
drivers/thermal/rockchip_thermal.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
index f551df48eef9..a8ad85feb68f 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -386,6 +386,7 @@ static const struct tsadc_table rk3328_code_table[] = {
{296, -40000},
{304, -35000},
{313, -30000},
+ {322, -25000},
{331, -20000},
{340, -15000},
{349, -10000},
--
2.44.0.501.g19981daefd7c
In zap_pte_range(), if the pte lock was released midway, the pte entries
may be refilled with physical pages by another thread, which may cause a
non-empty PTE page to be reclaimed and eventually cause the system to
crash.
To fix it, fall back to the slow path in this case to recheck if all pte
entries are still none.
Fixes: 6375e95f381e ("mm: pgtable: reclaim empty PTE page in madvise(MADV_DONTNEED)")
Reported-by: Christian Brauner <brauner(a)kernel.org>
Closes: https://lore.kernel.org/all/20250207-anbot-bankfilialen-acce9d79a2c7@braune…
Reported-by: Qu Wenruo <quwenruo.btrfs(a)gmx.com>
Closes: https://lore.kernel.org/all/152296f3-5c81-4a94-97f3-004108fba7be@gmx.com/
Tested-by: Zi Yan <ziy(a)nvidia.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Qi Zheng <zhengqi.arch(a)bytedance.com>
---
mm/memory.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index a8196ae72e9ae..7c7193cb21248 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1721,7 +1721,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
pmd_t pmdval;
unsigned long start = addr;
bool can_reclaim_pt = reclaim_pt_is_enabled(start, end, details);
- bool direct_reclaim = false;
+ bool direct_reclaim = true;
int nr;
retry:
@@ -1736,8 +1736,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
do {
bool any_skipped = false;
- if (need_resched())
+ if (need_resched()) {
+ direct_reclaim = false;
break;
+ }
nr = do_zap_pte_range(tlb, vma, pte, addr, end, details, rss,
&force_flush, &force_break, &any_skipped);
@@ -1745,11 +1747,20 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
can_reclaim_pt = false;
if (unlikely(force_break)) {
addr += nr * PAGE_SIZE;
+ direct_reclaim = false;
break;
}
} while (pte += nr, addr += PAGE_SIZE * nr, addr != end);
- if (can_reclaim_pt && addr == end)
+ /*
+ * Fast path: try to hold the pmd lock and unmap the PTE page.
+ *
+ * If the pte lock was released midway (retry case), or if the attempt
+ * to hold the pmd lock failed, then we need to recheck all pte entries
+ * to ensure they are still none, thereby preventing the pte entries
+ * from being repopulated by another thread.
+ */
+ if (can_reclaim_pt && direct_reclaim && addr == end)
direct_reclaim = try_get_and_clear_pmd(mm, pmd, &pmdval);
add_mm_rss_vec(mm, rss);
--
2.20.1
From: Xu Kuohai <xukuohai(a)huawei.com>
[ Upstream commit 28ead3eaabc16ecc907cfb71876da028080f6356 ]
bpf progs can be attached to kernel functions, and the attached functions
can take different parameters or return different return values. If
prog attached to one kernel function tail calls prog attached to another
kernel function, the ctx access or return value verification could be
bypassed.
For example, if prog1 is attached to func1 which takes only 1 parameter
and prog2 is attached to func2 which takes two parameters. Since verifier
assumes the bpf ctx passed to prog2 is constructed based on func2's
prototype, verifier allows prog2 to access the second parameter from
the bpf ctx passed to it. The problem is that verifier does not prevent
prog1 from passing its bpf ctx to prog2 via tail call. In this case,
the bpf ctx passed to prog2 is constructed from func1 instead of func2,
that is, the assumption for ctx access verification is bypassed.
Another example, if BPF LSM prog1 is attached to hook file_alloc_security,
and BPF LSM prog2 is attached to hook bpf_lsm_audit_rule_known. Verifier
knows the return value rules for these two hooks, e.g. it is legal for
bpf_lsm_audit_rule_known to return positive number 1, and it is illegal
for file_alloc_security to return positive number. So verifier allows
prog2 to return positive number 1, but does not allow prog1 to return
positive number. The problem is that verifier does not prevent prog1
from calling prog2 via tail call. In this case, prog2's return value 1
will be used as the return value for prog1's hook file_alloc_security.
That is, the return value rule is bypassed.
This patch adds restriction for tail call to prevent such bypasses.
Signed-off-by: Xu Kuohai <xukuohai(a)huawei.com>
Link: https://lore.kernel.org/r/20240719110059.797546-4-xukuohai@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Signed-off-by: Andrii Nakryiko <andrii(a)kernel.org>
Signed-off-by: BRUNO VERNAY <bruno.vernay(a)se.com>
Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource(a)witekio.com>
---
include/linux/bpf.h | 1 +
kernel/bpf/core.c | 19 +++++++++++++++++--
2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7f4ce183dcb0..39291ec48374 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -250,6 +250,7 @@ struct bpf_map {
* same prog type, JITed flag and xdp_has_frags flag.
*/
struct {
+ const struct btf_type *attach_func_proto;
spinlock_t lock;
enum bpf_prog_type type;
bool jited;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 83b416af4da1..c281f5b8705e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2121,6 +2121,7 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
{
enum bpf_prog_type prog_type = resolve_prog_type(fp);
bool ret;
+ struct bpf_prog_aux *aux = fp->aux;
if (fp->kprobe_override)
return false;
@@ -2132,12 +2133,26 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
*/
map->owner.type = prog_type;
map->owner.jited = fp->jited;
- map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
+ map->owner.xdp_has_frags = aux->xdp_has_frags;
+ map->owner.attach_func_proto = aux->attach_func_proto;
ret = true;
} else {
ret = map->owner.type == prog_type &&
map->owner.jited == fp->jited &&
- map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
+ map->owner.xdp_has_frags == aux->xdp_has_frags;
+ if (ret &&
+ map->owner.attach_func_proto != aux->attach_func_proto) {
+ switch (prog_type) {
+ case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_LSM:
+ case BPF_PROG_TYPE_EXT:
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ ret = false;
+ break;
+ default:
+ break;
+ }
+ }
}
spin_unlock(&map->owner.lock);
--
2.43.0
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using tasklet_hi_schedule() to schedule the inner
f_midi_transmit() via a tasklet from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
---
drivers/usb/gadget/function/f_midi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 837fcdfa3840..37d438e5d451 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ tasklet_hi_schedule(&midi->tasklet);
return;
}
break;
--
2.25.1