No upstream commit exists for this commit.
The issue was introduced with commit 63fac3343b99 ("Bluetooth: btbcm:
Support per-board firmware variants").
In btbcm_get_board_name() devm_kstrdup() can return NULL due to memory
allocation failure.
Add NULL return check to prevent NULL dereference.
Upstream branch code has been significantly refactored and can't be
backported directly.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: 63fac3343b99 ("Bluetooth: btbcm: Support per-board firmware variants")
Signed-off-by: Aleksandr Mishin <amishin(a)t-argos.ru>
---
drivers/bluetooth/btbcm.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index de2ea589aa49..6191fd74ab3d 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -551,6 +551,8 @@ static const char *btbcm_get_board_name(struct device *dev)
/* get rid of any '/' in the compatible string */
len = strlen(tmp) + 1;
board_type = devm_kzalloc(dev, len, GFP_KERNEL);
+ if (!board_type)
+ return NULL;
strscpy(board_type, tmp, len);
for (i = 0; i < len; i++) {
if (board_type[i] == '/')
--
2.30.2
From: Zijun Hu <quic_zijuhu(a)quicinc.com>
Remove macro list_for_each_reverse due to below reasons:
- it is same as list_for_each_prev.
- it is not used by current kernel tree.
Fixes: 8bf0cdfac7f8 ("<linux/list.h>: Introduce the list_for_each_reverse() method")
Cc: stable(a)vger.kernel.org
Signed-off-by: Zijun Hu <quic_zijuhu(a)quicinc.com>
---
include/linux/list.h | 8 --------
1 file changed, 8 deletions(-)
diff --git a/include/linux/list.h b/include/linux/list.h
index 5f4b0a39cf46..29a375889fb8 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -686,14 +686,6 @@ static inline void list_splice_tail_init(struct list_head *list,
#define list_for_each(pos, head) \
for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next)
-/**
- * list_for_each_reverse - iterate backwards over a list
- * @pos: the &struct list_head to use as a loop cursor.
- * @head: the head for your list.
- */
-#define list_for_each_reverse(pos, head) \
- for (pos = (head)->prev; pos != (head); pos = pos->prev)
-
/**
* list_for_each_rcu - Iterate over a list in an RCU-safe fashion
* @pos: the &struct list_head to use as a loop cursor.
---
base-commit: 6a36d828bdef0e02b1e6c12e2160f5b83be6aab5
change-id: 20240916-fix_list-553c447bde0f
Best regards,
--
Zijun Hu <quic_zijuhu(a)quicinc.com>
From: Schspa Shi <schspa(a)gmail.com>
commit a5201d42e2f8a8e8062103170027840ee372742f upstream.
When num_reg_defaults > 0 but reg_defaults is NULL, there will be a
NULL pointer exception.
Current code has no such usage, but as additional hardening, also
check this to prevent any chance of crashing.
Signed-off-by: Schspa Shi <schspa(a)gmail.com>
Link: https://lore.kernel.org/r/20220629130951.63040-1-schspa@gmail.com
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Signed-off-by: Roman Smirnov <r.smirnov(a)omp.ru>
---
drivers/base/regmap/regcache.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 7fdd702e564a..5ff79ba665ad 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -133,6 +133,12 @@ int regcache_init(struct regmap *map, const struct regmap_config *config)
return -EINVAL;
}
+ if (config->num_reg_defaults && !config->reg_defaults) {
+ dev_err(map->dev,
+ "Register defaults number are set without the reg!\n");
+ return -EINVAL;
+ }
+
for (i = 0; i < config->num_reg_defaults; i++)
if (config->reg_defaults[i].reg % map->reg_stride)
return -EINVAL;
--
2.34.1
From: Dandan Zhang <zhangdandan(a)uniontech.com>
[ Upstream commit 494b0792d962e8efac72b3a5b6d9bcd4e6fa8cf0 ]
The kvm_hypercall() set for LoongArch is limited to a1-a5. So the
mention of a6 in the comment is undefined that needs to be rectified.
Reviewed-by: Bibo Mao <maobibo(a)loongson.cn>
Signed-off-by: Wentao Guan <guanwentao(a)uniontech.com>
Signed-off-by: Dandan Zhang <zhangdandan(a)uniontech.com>
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
Signed-off-by: WangYuli <wangyuli(a)uniontech.com>
--
Changlog:
*v1 -> v2: Correct the commit-msg format.
---
arch/loongarch/include/asm/kvm_para.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
index 4ba2312e5f8c..6d5e9b6c5714 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -28,9 +28,9 @@
* Hypercall interface for KVM hypervisor
*
* a0: function identifier
- * a1-a6: args
+ * a1-a5: args
* Return value will be placed in a0.
- * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ * Up to 5 arguments are passed in a1, a2, a3, a4, a5.
*/
static __always_inline long kvm_hypercall0(u64 fid)
{
--
2.43.0
The quilt patch titled
Subject: zram: free secondary algorithms names
has been removed from the -mm tree. Its filename was
zram-free-secondary-algorithms-names.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Subject: zram: free secondary algorithms names
Date: Wed, 11 Sep 2024 11:54:56 +0900
We need to kfree() secondary algorithms names when reset zram device that
had multi-streams, otherwise we leak memory.
[senozhatsky(a)chromium.org: kfree(NULL) is legal]
Link: https://lkml.kernel.org/r/20240917013021.868769-1-senozhatsky@chromium.org
Link: https://lkml.kernel.org/r/20240911025600.3681789-1-senozhatsky@chromium.org
Fixes: 001d92735701 ("zram: add recompression algorithm sysfs knob")
Signed-off-by: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/block/zram/zram_drv.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/drivers/block/zram/zram_drv.c~zram-free-secondary-algorithms-names
+++ a/drivers/block/zram/zram_drv.c
@@ -2112,6 +2112,11 @@ static void zram_destroy_comps(struct zr
zram->num_active_comps--;
}
+ for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
+ kfree(zram->comp_algs[prio]);
+ zram->comp_algs[prio] = NULL;
+ }
+
zram_comp_params_reset(zram);
}
_
Patches currently in -mm which might be from senozhatsky(a)chromium.org are
The quilt patch titled
Subject: mm: z3fold: deprecate CONFIG_Z3FOLD
has been removed from the -mm tree. Its filename was
mm-z3fold-deprecate-config_z3fold.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Yosry Ahmed <yosryahmed(a)google.com>
Subject: mm: z3fold: deprecate CONFIG_Z3FOLD
Date: Wed, 4 Sep 2024 23:33:43 +0000
The z3fold compressed pages allocator is rarely used, most users use
zsmalloc. The only disadvantage of zsmalloc in comparison is the
dependency on MMU, and zbud is a more common option for !MMU as it was the
default zswap allocator for a long time.
Historically, zsmalloc had worse latency than zbud and z3fold but offered
better memory savings. This is no longer the case as shown by a simple
recent analysis [1]. That analysis showed that z3fold does not have any
advantage over zsmalloc or zbud considering both performance and memory
usage. In a kernel build test on tmpfs in a limited cgroup, z3fold took
3% more time and used 1.8% more memory. The latency of zswap_load() was
7% higher, and that of zswap_store() was 10% higher. Zsmalloc is better
in all metrics.
Moreover, z3fold apparently has latent bugs, which was made noticeable by
a recent soft lockup bug report with z3fold [2]. Switching to zsmalloc
not only fixed the problem, but also reduced the swap usage from 6~8G to
1~2G. Other users have also reported being bitten by mistakenly enabling
z3fold.
Other than hurting users, z3fold is repeatedly causing wasted engineering
effort. Apart from investigating the above bug, it came up in multiple
development discussions (e.g. [3]) as something we need to handle, when
there aren't any legit users (at least not intentionally).
The natural course of action is to deprecate z3fold, and remove in a few
cycles if no objections are raised from active users. Next on the list
should be zbud, as it offers marginal latency gains at the cost of huge
memory waste when compared to zsmalloc. That one will need to wait until
zsmalloc does not depend on MMU.
Rename the user-visible config option from CONFIG_Z3FOLD to
CONFIG_Z3FOLD_DEPRECATED so that users with CONFIG_Z3FOLD=y get a new
prompt with explanation during make oldconfig. Also, remove
CONFIG_Z3FOLD=y from defconfigs.
[1]https://lore.kernel.org/lkml/CAJD7tkbRF6od-2x_L8-A1QL3=2Ww13sCj4S3i4bNndq…
[2]https://lore.kernel.org/lkml/EF0ABD3E-A239-4111-A8AB-5C442E759CF3@gmail.c…
[3]https://lore.kernel.org/lkml/CAJD7tkbnmeVugfunffSovJf9FAgy9rhBVt_tx=nxUve…
[arnd(a)arndb.de: deprecate ZSWAP_ZPOOL_DEFAULT_Z3FOLD as well]
Link: https://lkml.kernel.org/r/20240909202625.1054880-1-arnd@kernel.org
Link: https://lkml.kernel.org/r/20240904233343.933462-1-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Acked-by: Chris Down <chris(a)chrisdown.name>
Acked-by: Nhat Pham <nphamcs(a)gmail.com>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Acked-by: Vitaly Wool <vitaly.wool(a)konsulko.com>
Acked-by: Christoph Hellwig <hch(a)lst.de>
Cc: Aneesh Kumar K.V <aneesh.kumar(a)kernel.org>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: Huacai Chen <chenhuacai(a)kernel.org>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: Naveen N. Rao <naveen.n.rao(a)linux.ibm.com>
Cc: Nicholas Piggin <npiggin(a)gmail.com>
Cc: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Cc: WANG Xuerui <kernel(a)xen0n.name>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/loongarch/configs/loongson3_defconfig | 1
arch/powerpc/configs/ppc64_defconfig | 1
mm/Kconfig | 25 ++++++++++++++-----
3 files changed, 19 insertions(+), 8 deletions(-)
--- a/arch/loongarch/configs/loongson3_defconfig~mm-z3fold-deprecate-config_z3fold
+++ a/arch/loongarch/configs/loongson3_defconfig
@@ -96,7 +96,6 @@ CONFIG_ZPOOL=y
CONFIG_ZSWAP=y
CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y
CONFIG_ZBUD=y
-CONFIG_Z3FOLD=y
CONFIG_ZSMALLOC=m
# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
--- a/arch/powerpc/configs/ppc64_defconfig~mm-z3fold-deprecate-config_z3fold
+++ a/arch/powerpc/configs/ppc64_defconfig
@@ -81,7 +81,6 @@ CONFIG_MODULE_SIG_SHA512=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
-CONFIG_Z3FOLD=y
CONFIG_ZSMALLOC=y
# CONFIG_SLAB_MERGE_DEFAULT is not set
CONFIG_SLAB_FREELIST_RANDOM=y
--- a/mm/Kconfig~mm-z3fold-deprecate-config_z3fold
+++ a/mm/Kconfig
@@ -146,12 +146,15 @@ config ZSWAP_ZPOOL_DEFAULT_ZBUD
help
Use the zbud allocator as the default allocator.
-config ZSWAP_ZPOOL_DEFAULT_Z3FOLD
- bool "z3fold"
- select Z3FOLD
+config ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED
+ bool "z3foldi (DEPRECATED)"
+ select Z3FOLD_DEPRECATED
help
Use the z3fold allocator as the default allocator.
+ Deprecated and scheduled for removal in a few cycles,
+ see CONFIG_Z3FOLD_DEPRECATED.
+
config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
bool "zsmalloc"
select ZSMALLOC
@@ -163,7 +166,7 @@ config ZSWAP_ZPOOL_DEFAULT
string
depends on ZSWAP
default "zbud" if ZSWAP_ZPOOL_DEFAULT_ZBUD
- default "z3fold" if ZSWAP_ZPOOL_DEFAULT_Z3FOLD
+ default "z3fold" if ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED
default "zsmalloc" if ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
default ""
@@ -177,15 +180,25 @@ config ZBUD
deterministic reclaim properties that make it preferable to a higher
density approach when reclaim will be used.
-config Z3FOLD
- tristate "3:1 compression allocator (z3fold)"
+config Z3FOLD_DEPRECATED
+ tristate "3:1 compression allocator (z3fold) (DEPRECATED)"
depends on ZSWAP
help
+ Deprecated and scheduled for removal in a few cycles. If you have
+ a good reason for using Z3FOLD over ZSMALLOC, please contact
+ linux-mm(a)kvack.org and the zswap maintainers.
+
A special purpose allocator for storing compressed pages.
It is designed to store up to three compressed pages per physical
page. It is a ZBUD derivative so the simplicity and determinism are
still there.
+config Z3FOLD
+ tristate
+ default y if Z3FOLD_DEPRECATED=y
+ default m if Z3FOLD_DEPRECATED=m
+ depends on Z3FOLD_DEPRECATED
+
config ZSMALLOC
tristate
prompt "N:1 compression allocator (zsmalloc)" if (ZSWAP || ZRAM)
_
Patches currently in -mm which might be from yosryahmed(a)google.com are
The quilt patch titled
Subject: mm/huge_memory: ensure huge_zero_folio won't have large_rmappable flag set
has been removed from the -mm tree. Its filename was
mm-huge_memory-ensure-huge_zero_folio-wont-have-large_rmappable-flag-set.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Miaohe Lin <linmiaohe(a)huawei.com>
Subject: mm/huge_memory: ensure huge_zero_folio won't have large_rmappable flag set
Date: Sat, 14 Sep 2024 09:53:06 +0800
Ensure huge_zero_folio won't have large_rmappable flag set. So it can be
reported as thp,zero correctly through stable_page_flags().
Link: https://lkml.kernel.org/r/20240914015306.3656791-1-linmiaohe@huawei.com
Fixes: 5691753d73a2 ("mm: convert huge_zero_page to huge_zero_folio")
Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/huge_memory.c | 2 ++
1 file changed, 2 insertions(+)
--- a/mm/huge_memory.c~mm-huge_memory-ensure-huge_zero_folio-wont-have-large_rmappable-flag-set
+++ a/mm/huge_memory.c
@@ -220,6 +220,8 @@ retry:
count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
return false;
}
+ /* Ensure zero folio won't have large_rmappable flag set. */
+ folio_clear_large_rmappable(zero_folio);
preempt_disable();
if (cmpxchg(&huge_zero_folio, NULL, zero_folio)) {
preempt_enable();
_
Patches currently in -mm which might be from linmiaohe(a)huawei.com are
mm-memory-failure-fix-vm_bug_on_pagepagepoisonedpage-when-unpoison-memory.patch
The quilt patch titled
Subject: mm/hugetlb.c: fix UAF of vma in hugetlb fault pathway
has been removed from the -mm tree. Its filename was
mm-hugetlbc-fix-uaf-of-vma-in-hugetlb-fault-pathway.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: "Vishal Moola (Oracle)" <vishal.moola(a)gmail.com>
Subject: mm/hugetlb.c: fix UAF of vma in hugetlb fault pathway
Date: Sat, 14 Sep 2024 12:41:19 -0700
Syzbot reports a UAF in hugetlb_fault(). This happens because
vmf_anon_prepare() could drop the per-VMA lock and allow the current VMA
to be freed before hugetlb_vma_unlock_read() is called.
We can fix this by using a modified version of vmf_anon_prepare() that
doesn't release the VMA lock on failure, and then release it ourselves
after hugetlb_vma_unlock_read().
Link: https://lkml.kernel.org/r/20240914194243.245-2-vishal.moola@gmail.com
Fixes: 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()")
Reported-by: syzbot+2dab93857ee95f2eeb08(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-mm/00000000000067c20b06219fbc26@google.com/
Signed-off-by: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
--- a/mm/hugetlb.c~mm-hugetlbc-fix-uaf-of-vma-in-hugetlb-fault-pathway
+++ a/mm/hugetlb.c
@@ -6048,7 +6048,7 @@ retry_avoidcopy:
* When the original hugepage is shared one, it does not have
* anon_vma prepared.
*/
- ret = vmf_anon_prepare(vmf);
+ ret = __vmf_anon_prepare(vmf);
if (unlikely(ret))
goto out_release_all;
@@ -6247,7 +6247,7 @@ static vm_fault_t hugetlb_no_page(struct
}
if (!(vma->vm_flags & VM_MAYSHARE)) {
- ret = vmf_anon_prepare(vmf);
+ ret = __vmf_anon_prepare(vmf);
if (unlikely(ret))
goto out;
}
@@ -6378,6 +6378,14 @@ static vm_fault_t hugetlb_no_page(struct
folio_unlock(folio);
out:
hugetlb_vma_unlock_read(vma);
+
+ /*
+ * We must check to release the per-VMA lock. __vmf_anon_prepare() is
+ * the only way ret can be set to VM_FAULT_RETRY.
+ */
+ if (unlikely(ret & VM_FAULT_RETRY))
+ vma_end_read(vma);
+
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
return ret;
@@ -6599,6 +6607,14 @@ out_ptl:
}
out_mutex:
hugetlb_vma_unlock_read(vma);
+
+ /*
+ * We must check to release the per-VMA lock. __vmf_anon_prepare() in
+ * hugetlb_wp() is the only way ret can be set to VM_FAULT_RETRY.
+ */
+ if (unlikely(ret & VM_FAULT_RETRY))
+ vma_end_read(vma);
+
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
/*
* Generally it's safe to hold refcount during waiting page lock. But
_
Patches currently in -mm which might be from vishal.moola(a)gmail.com are
The quilt patch titled
Subject: mm: change vmf_anon_prepare() to __vmf_anon_prepare()
has been removed from the -mm tree. Its filename was
mm-change-vmf_anon_prepare-to-__vmf_anon_prepare.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: "Vishal Moola (Oracle)" <vishal.moola(a)gmail.com>
Subject: mm: change vmf_anon_prepare() to __vmf_anon_prepare()
Date: Sat, 14 Sep 2024 12:41:18 -0700
Some callers of vmf_anon_prepare() may not want us to release the per-VMA
lock ourselves. Rename vmf_anon_prepare() to __vmf_anon_prepare() and let
the callers drop the lock when desired.
Also, make vmf_anon_prepare() a wrapper that releases the per-VMA lock
itself for any callers that don't care.
This is in preparation to fix this bug reported by syzbot:
https://lore.kernel.org/linux-mm/00000000000067c20b06219fbc26@google.com/
Link: https://lkml.kernel.org/r/20240914194243.245-1-vishal.moola@gmail.com
Fixes: 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()")
Reported-by: syzbot+2dab93857ee95f2eeb08(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-mm/00000000000067c20b06219fbc26@google.com/
Signed-off-by: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/internal.h | 11 ++++++++++-
mm/memory.c | 8 +++-----
2 files changed, 13 insertions(+), 6 deletions(-)
--- a/mm/internal.h~mm-change-vmf_anon_prepare-to-__vmf_anon_prepare
+++ a/mm/internal.h
@@ -310,7 +310,16 @@ static inline void wake_throttle_isolate
wake_up(wqh);
}
-vm_fault_t vmf_anon_prepare(struct vm_fault *vmf);
+vm_fault_t __vmf_anon_prepare(struct vm_fault *vmf);
+static inline vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
+{
+ vm_fault_t ret = __vmf_anon_prepare(vmf);
+
+ if (unlikely(ret & VM_FAULT_RETRY))
+ vma_end_read(vmf->vma);
+ return ret;
+}
+
vm_fault_t do_swap_page(struct vm_fault *vmf);
void folio_rotate_reclaimable(struct folio *folio);
bool __folio_end_writeback(struct folio *folio);
--- a/mm/memory.c~mm-change-vmf_anon_prepare-to-__vmf_anon_prepare
+++ a/mm/memory.c
@@ -3259,7 +3259,7 @@ static inline vm_fault_t vmf_can_call_fa
}
/**
- * vmf_anon_prepare - Prepare to handle an anonymous fault.
+ * __vmf_anon_prepare - Prepare to handle an anonymous fault.
* @vmf: The vm_fault descriptor passed from the fault handler.
*
* When preparing to insert an anonymous page into a VMA from a
@@ -3273,7 +3273,7 @@ static inline vm_fault_t vmf_can_call_fa
* Return: 0 if fault handling can proceed. Any other value should be
* returned to the caller.
*/
-vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
+vm_fault_t __vmf_anon_prepare(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
vm_fault_t ret = 0;
@@ -3281,10 +3281,8 @@ vm_fault_t vmf_anon_prepare(struct vm_fa
if (likely(vma->anon_vma))
return 0;
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
- if (!mmap_read_trylock(vma->vm_mm)) {
- vma_end_read(vma);
+ if (!mmap_read_trylock(vma->vm_mm))
return VM_FAULT_RETRY;
- }
}
if (__anon_vma_prepare(vma))
ret = VM_FAULT_OOM;
_
Patches currently in -mm which might be from vishal.moola(a)gmail.com are
The quilt patch titled
Subject: resource: fix region_intersects() vs add_memory_driver_managed()
has been removed from the -mm tree. Its filename was
resource-fix-region_intersects-vs-add_memory_driver_managed.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Huang Ying <ying.huang(a)intel.com>
Subject: resource: fix region_intersects() vs add_memory_driver_managed()
Date: Fri, 6 Sep 2024 11:07:11 +0800
On a system with CXL memory, the resource tree (/proc/iomem) related to
CXL memory may look like something as follows.
490000000-50fffffff : CXL Window 0
490000000-50fffffff : region0
490000000-50fffffff : dax0.0
490000000-50fffffff : System RAM (kmem)
Because drivers/dax/kmem.c calls add_memory_driver_managed() during
onlining CXL memory, which makes "System RAM (kmem)" a descendant of "CXL
Window X". This confuses region_intersects(), which expects all "System
RAM" resources to be at the top level of iomem_resource. This can lead to
bugs.
For example, when the following command line is executed to write some
memory in CXL memory range via /dev/mem,
$ dd if=data of=/dev/mem bs=$((1 << 10)) seek=$((0x490000000 >> 10)) count=1
dd: error writing '/dev/mem': Bad address
1+0 records in
0+0 records out
0 bytes copied, 0.0283507 s, 0.0 kB/s
the command fails as expected. However, the error code is wrong. It
should be "Operation not permitted" instead of "Bad address". More
seriously, the /dev/mem permission checking in devmem_is_allowed() passes
incorrectly. Although the accessing is prevented later because ioremap()
isn't allowed to map system RAM, it is a potential security issue. During
command executing, the following warning is reported in the kernel log for
calling ioremap() on system RAM.
ioremap on RAM at 0x0000000490000000 - 0x0000000490000fff
WARNING: CPU: 2 PID: 416 at arch/x86/mm/ioremap.c:216 __ioremap_caller.constprop.0+0x131/0x35d
Call Trace:
memremap+0xcb/0x184
xlate_dev_mem_ptr+0x25/0x2f
write_mem+0x94/0xfb
vfs_write+0x128/0x26d
ksys_write+0xac/0xfe
do_syscall_64+0x9a/0xfd
entry_SYSCALL_64_after_hwframe+0x4b/0x53
The details of command execution process are as follows. In the above
resource tree, "System RAM" is a descendant of "CXL Window 0" instead of a
top level resource. So, region_intersects() will report no System RAM
resources in the CXL memory region incorrectly, because it only checks the
top level resources. Consequently, devmem_is_allowed() will return 1
(allow access via /dev/mem) for CXL memory region incorrectly.
Fortunately, ioremap() doesn't allow to map System RAM and reject the
access.
So, region_intersects() needs to be fixed to work correctly with the
resource tree with "System RAM" not at top level as above. To fix it, if
we found a unmatched resource in the top level, we will continue to search
matched resources in its descendant resources. So, we will not miss any
matched resources in resource tree anymore.
In the new implementation, an example resource tree
|------------- "CXL Window 0" ------------|
|-- "System RAM" --|
will behave similar as the following fake resource tree for
region_intersects(, IORESOURCE_SYSTEM_RAM, ),
|-- "System RAM" --||-- "CXL Window 0a" --|
Where "CXL Window 0a" is part of the original "CXL Window 0" that
isn't covered by "System RAM".
Link: https://lkml.kernel.org/r/20240906030713.204292-2-ying.huang@intel.com
Fixes: c221c0b0308f ("device-dax: "Hotplug" persistent memory for use like normal RAM")
Signed-off-by: "Huang, Ying" <ying.huang(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Jonathan Cameron <jonathan.cameron(a)huawei.com>
Cc: Dave Jiang <dave.jiang(a)intel.com>
Cc: Alison Schofield <alison.schofield(a)intel.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Cc: Alistair Popple <apopple(a)nvidia.com>
Cc: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/resource.c | 58 +++++++++++++++++++++++++++++++++++++-------
1 file changed, 50 insertions(+), 8 deletions(-)
--- a/kernel/resource.c~resource-fix-region_intersects-vs-add_memory_driver_managed
+++ a/kernel/resource.c
@@ -540,20 +540,62 @@ static int __region_intersects(struct re
size_t size, unsigned long flags,
unsigned long desc)
{
- struct resource res;
+ resource_size_t ostart, oend;
int type = 0; int other = 0;
- struct resource *p;
+ struct resource *p, *dp;
+ bool is_type, covered;
+ struct resource res;
res.start = start;
res.end = start + size - 1;
for (p = parent->child; p ; p = p->sibling) {
- bool is_type = (((p->flags & flags) == flags) &&
- ((desc == IORES_DESC_NONE) ||
- (desc == p->desc)));
-
- if (resource_overlaps(p, &res))
- is_type ? type++ : other++;
+ if (!resource_overlaps(p, &res))
+ continue;
+ is_type = (p->flags & flags) == flags &&
+ (desc == IORES_DESC_NONE || desc == p->desc);
+ if (is_type) {
+ type++;
+ continue;
+ }
+ /*
+ * Continue to search in descendant resources as if the
+ * matched descendant resources cover some ranges of 'p'.
+ *
+ * |------------- "CXL Window 0" ------------|
+ * |-- "System RAM" --|
+ *
+ * will behave similar as the following fake resource
+ * tree when searching "System RAM".
+ *
+ * |-- "System RAM" --||-- "CXL Window 0a" --|
+ */
+ covered = false;
+ ostart = max(res.start, p->start);
+ oend = min(res.end, p->end);
+ for_each_resource(p, dp, false) {
+ if (!resource_overlaps(dp, &res))
+ continue;
+ is_type = (dp->flags & flags) == flags &&
+ (desc == IORES_DESC_NONE || desc == dp->desc);
+ if (is_type) {
+ type++;
+ /*
+ * Range from 'ostart' to 'dp->start'
+ * isn't covered by matched resource.
+ */
+ if (dp->start > ostart)
+ break;
+ if (dp->end >= oend) {
+ covered = true;
+ break;
+ }
+ /* Remove covered range */
+ ostart = max(ostart, dp->end + 1);
+ }
+ }
+ if (!covered)
+ other++;
}
if (type == 0)
_
Patches currently in -mm which might be from ying.huang(a)intel.com are
resource-make-alloc_free_mem_region-works-for-iomem_resource.patch
resource-kunit-add-test-case-for-region_intersects.patch
> I think this patch just hides the real problem.
> How could putcs have become NULL ?
>
> Helge
Oh, you are right!
I will figure it out.
Best,
Qianqiang Liu
When dwc3_resume_common() returns an error, runtime pm is left in
suspended and disabled state in dwc3_resume(). Since the device
is suspended, its parent devices (like the power domain or glue
driver) could also be suspended and may have released resources
that dwc requires. Consequently, calling dwc3_suspend_common() in
this situation could result in attempts to access unclocked or
unpowered registers.
To prevent these problems, runtime PM should always be re-enabled,
even after failed resume attempts. This ensures that
dwc3_suspend_common() is skipped in such cases.
Fixes: 68c26fe58182 ("usb: dwc3: set pm runtime active before resume common")
Cc: stable(a)vger.kernel.org
Signed-off-by: Roy Luo <royluo(a)google.com>
---
drivers/usb/dwc3/core.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index ccc3895dbd7f..4bd73b5fe41b 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -2537,7 +2537,7 @@ static int dwc3_suspend(struct device *dev)
static int dwc3_resume(struct device *dev)
{
struct dwc3 *dwc = dev_get_drvdata(dev);
- int ret;
+ int ret = 0;
pinctrl_pm_select_default_state(dev);
@@ -2545,14 +2545,12 @@ static int dwc3_resume(struct device *dev)
pm_runtime_set_active(dev);
ret = dwc3_resume_common(dwc, PMSG_RESUME);
- if (ret) {
+ if (ret)
pm_runtime_set_suspended(dev);
- return ret;
- }
pm_runtime_enable(dev);
- return 0;
+ return ret;
}
static void dwc3_complete(struct device *dev)
base-commit: ad618736883b8970f66af799e34007475fe33a68
--
2.46.0.662.g92d0881bb0-goog
Spec says SW is expected to round up to the nearest 128K, if not already
aligned for the CC unit view of CCS. We are seeing the assert sometimes
pop on BMG to tell us that there is a hole between GSM and CCS, as well
as popping other asserts with having a vram size with strange alignment,
which is likely caused by misaligned offset here.
v2 (Shuicheng):
- Do the round_up() on final SW address.
BSpec: 68023
Fixes: b5c2ca0372dc ("drm/xe/xe2hpg: Determine flat ccs offset for vram")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray(a)intel.com>
Cc: Akshata Jahagirdar <akshata.jahagirdar(a)intel.com>
Cc: Lucas De Marchi <lucas.demarchi(a)intel.com>
Cc: Shuicheng Lin <shuicheng.lin(a)intel.com>
Cc: Matt Roper <matthew.d.roper(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.10+
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray(a)intel.com>
Tested-by: Shuicheng Lin <shuicheng.lin(a)intel.com>
---
drivers/gpu/drm/xe/xe_vram.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index 7e765b1499b1..2a623bfcda7e 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -182,6 +182,7 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
offset = offset_hi << 32; /* HW view bits 39:32 */
offset |= offset_lo << 6; /* HW view bits 31:6 */
offset *= num_enabled; /* convert to SW view */
+ offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */
/* We don't expect any holes */
xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(>_to_tile(gt)->mmio, GSMBASE) -
--
2.46.0
The rpl sr tunnel code contains calls to dst_cache_*() which are
only present when the dst cache is built.
Select DST_CACHE to build the dst cache, similar to other kconfig
options in the same file.
Fixes: a7a29f9c361f ("net: ipv6: add rpl sr tunnel")
Cc: stable(a)vger.kernel.org
---
Signed-off-by: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>
---
net/ipv6/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 08d4b7132d4c..1c9c686d9522 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -323,6 +323,7 @@ config IPV6_RPL_LWTUNNEL
bool "IPv6: RPL Source Routing Header support"
depends on IPV6
select LWTUNNEL
+ select DST_CACHE
help
Support for RFC6554 RPL Source Routing Header using the lightweight
tunnels mechanism.
---
base-commit: ad060dbbcfcfcba624ef1a75e1d71365a98b86d8
change-id: 20240916-ipv6_rpl_lwtunnel-dst_cache-22561978f35f
Best regards,
--
Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>
Good Morning.
I am Mrs. Rosella Thomas. My dear, would you be able to handle a project
for us? Please contact me by email for more details. My email is this
rosellathomas4(a)gmail.com
Thanks
Mrs. Rosella Thomas
The receiver should no be enabled until the port is opened so drop the
bogus call to start tx from the setup code which is shared with the
console implementation.
This was added for some confused implementation of hibernation support,
but the receiver must not be started unconditionally as the port may not
have been open when hibernating the system.
Fixes: 35781d8356a2 ("tty: serial: qcom-geni-serial: Add support for Hibernation feature")
Cc: stable(a)vger.kernel.org # 6.2
Cc: Aniket Randive <quic_arandive(a)quicinc.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/tty/serial/qcom_geni_serial.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 6f0db310cf69..9ea6bd09e665 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -1152,7 +1152,6 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport)
false, true, true);
geni_se_init(&port->se, UART_RX_WM, port->rx_fifo_depth - 2);
geni_se_select_mode(&port->se, port->dev_data->mode);
- qcom_geni_serial_start_rx(uport);
port->setup = true;
return 0;
--
2.44.2
The quilt patch titled
Subject: ocfs2: reserve space for inline xattr before attaching reflink tree
has been removed from the -mm tree. Its filename was
ocfs2-reserve-space-for-inline-xattr-before-attaching-reflink-tree.patch
This patch was dropped because it had testing failures
------------------------------------------------------
From: Gautham Ananthakrishna <gautham.ananthakrishna(a)oracle.com>
Subject: ocfs2: reserve space for inline xattr before attaching reflink tree
Date: Thu, 12 Sep 2024 06:47:20 +0000
One of our customers reported a crash and a corrupted ocfs2 filesystem.
The crash was due to the detection of corruption. Upon troubleshooting,
the fsck -fn output showed the below corruption
[EXTENT_LIST_FREE] Extent list in owner 33080590 claims 230 as the next free chain record,
but fsck believes the largest valid value is 227. Clamp the next record value? n
The stat output from the debugfs.ocfs2 showed the following corruption
where the "Next Free Rec:" had overshot the "Count:" in the root metadata
block.
Inode: 33080590 Mode: 0640 Generation: 2619713622 (0x9c25a856)
FS Generation: 904309833 (0x35e6ac49)
CRC32: 00000000 ECC: 0000
Type: Regular Attr: 0x0 Flags: Valid
Dynamic Features: (0x16) HasXattr InlineXattr Refcounted
Extended Attributes Block: 0 Extended Attributes Inline Size: 256
User: 0 (root) Group: 0 (root) Size: 281320357888
Links: 1 Clusters: 141738
ctime: 0x66911b56 0x316edcb8 -- Fri Jul 12 06:02:30.829349048 2024
atime: 0x66911d6b 0x7f7a28d -- Fri Jul 12 06:11:23.133669517 2024
mtime: 0x66911b56 0x12ed75d7 -- Fri Jul 12 06:02:30.317552087 2024
dtime: 0x0 -- Wed Dec 31 17:00:00 1969
Refcount Block: 2777346
Last Extblk: 2886943 Orphan Slot: 0
Sub Alloc Slot: 0 Sub Alloc Bit: 14
Tree Depth: 1 Count: 227 Next Free Rec: 230
## Offset Clusters Block#
0 0 2310 2776351
1 2310 2139 2777375
2 4449 1221 2778399
3 5670 731 2779423
4 6401 566 2780447
....... .... .......
....... .... .......
The issue was in the reflink workfow while reserving space for inline
xattr. The problematic function is ocfs2_reflink_xattr_inline(). By the
time this function is called the reflink tree is already recreated at the
destination inode from the source inode. At this point, this function
reserves space for inline xattrs at the destination inode without even
checking if there is space at the root metadata block. It simply reduces
the l_count from 243 to 227 thereby making space of 256 bytes for inline
xattr whereas the inode already has extents beyond this index (in this
case upto 230), thereby causing corruption.
The fix for this is to reserve space for inline metadata at the
destination inode before the reflink tree gets recreated. The customer
has verified the fix.
Link: https://lkml.kernel.org/r/20240912064720.898600-1-gautham.ananthakrishna@or…
Fixes: ef962df057aa ("ocfs2: xattr: fix inlined xattr reflink")
Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/refcounttree.c | 26 ++++++++++++++++++++++++--
fs/ocfs2/xattr.c | 11 +----------
2 files changed, 25 insertions(+), 12 deletions(-)
--- a/fs/ocfs2/refcounttree.c~ocfs2-reserve-space-for-inline-xattr-before-attaching-reflink-tree
+++ a/fs/ocfs2/refcounttree.c
@@ -25,6 +25,7 @@
#include "namei.h"
#include "ocfs2_trace.h"
#include "file.h"
+#include "symlink.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -4155,8 +4156,9 @@ static int __ocfs2_reflink(struct dentry
int ret;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *new_bh = NULL;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
ret = -EINVAL;
mlog_errno(ret);
goto out;
@@ -4182,6 +4184,26 @@ static int __ocfs2_reflink(struct dentry
goto out_unlock;
}
+ if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) &&
+ (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
+ /*
+ * Adjust extent record count to reserve space for extended attribute.
+ * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
+ */
+ struct ocfs2_inode_info *new_oi = OCFS2_I(new_inode);
+
+ if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
+ !(ocfs2_inode_is_fast_symlink(new_inode))) {
+ struct ocfs2_dinode *new_di = new_bh->b_data;
+ struct ocfs2_dinode *old_di = old_bh->b_data;
+ struct ocfs2_extent_list *el = &new_di->id2.i_list;
+ int inline_size = le16_to_cpu(old_di->i_xattr_inline_size);
+
+ le16_add_cpu(&el->l_count, -(inline_size /
+ sizeof(struct ocfs2_extent_rec)));
+ }
+ }
+
ret = ocfs2_create_reflink_node(inode, old_bh,
new_inode, new_bh, preserve);
if (ret) {
@@ -4189,7 +4211,7 @@ static int __ocfs2_reflink(struct dentry
goto inode_unlock;
}
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+ if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
ret = ocfs2_reflink_xattrs(inode, old_bh,
new_inode, new_bh,
preserve);
--- a/fs/ocfs2/xattr.c~ocfs2-reserve-space-for-inline-xattr-before-attaching-reflink-tree
+++ a/fs/ocfs2/xattr.c
@@ -6520,16 +6520,7 @@ static int ocfs2_reflink_xattr_inline(st
}
new_oi = OCFS2_I(args->new_inode);
- /*
- * Adjust extent record count to reserve space for extended attribute.
- * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
- */
- if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
- !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
- struct ocfs2_extent_list *el = &new_di->id2.i_list;
- le16_add_cpu(&el->l_count, -(inline_size /
- sizeof(struct ocfs2_extent_rec)));
- }
+
spin_lock(&new_oi->ip_lock);
new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
_
Patches currently in -mm which might be from gautham.ananthakrishna(a)oracle.com are
One of our customers reported a crash and a corrupted ocfs2 filesystem.
The crash was due to the detection of corruption. Upon troubleshooting,
the fsck -fn output showed the below corruption
[EXTENT_LIST_FREE] Extent list in owner 33080590 claims 230 as the next free chain record,
but fsck believes the largest valid value is 227. Clamp the next record value? n
The stat output from the debugfs.ocfs2 showed the following corruption
where the "Next Free Rec:" had overshot the "Count:" in the root metadata
block.
Inode: 33080590 Mode: 0640 Generation: 2619713622 (0x9c25a856)
FS Generation: 904309833 (0x35e6ac49)
CRC32: 00000000 ECC: 0000
Type: Regular Attr: 0x0 Flags: Valid
Dynamic Features: (0x16) HasXattr InlineXattr Refcounted
Extended Attributes Block: 0 Extended Attributes Inline Size: 256
User: 0 (root) Group: 0 (root) Size: 281320357888
Links: 1 Clusters: 141738
ctime: 0x66911b56 0x316edcb8 -- Fri Jul 12 06:02:30.829349048 2024
atime: 0x66911d6b 0x7f7a28d -- Fri Jul 12 06:11:23.133669517 2024
mtime: 0x66911b56 0x12ed75d7 -- Fri Jul 12 06:02:30.317552087 2024
dtime: 0x0 -- Wed Dec 31 17:00:00 1969
Refcount Block: 2777346
Last Extblk: 2886943 Orphan Slot: 0
Sub Alloc Slot: 0 Sub Alloc Bit: 14
Tree Depth: 1 Count: 227 Next Free Rec: 230
## Offset Clusters Block#
0 0 2310 2776351
1 2310 2139 2777375
2 4449 1221 2778399
3 5670 731 2779423
4 6401 566 2780447
....... .... .......
....... .... .......
The issue was in the reflink workfow while reserving space for inline xattr.
The problematic function is ocfs2_reflink_xattr_inline(). By the time this
function is called the reflink tree is already recreated at the destination
inode from the source inode. At this point, this function reserves space
for inline xattrs at the destination inode without even checking if there
is space at the root metadata block. It simply reduces the l_count from 243
to 227 thereby making space of 256 bytes for inline xattr whereas the inode
already has extents beyond this index (in this case upto 230), thereby causing
corruption.
The fix for this is to reserve space for inline metadata at the destination
inode before the reflink tree gets recreated. The customer has verified the
fix.
Fixes: ef962df057aa ("ocfs2: xattr: fix inlined xattr reflink")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
---
fs/ocfs2/refcounttree.c | 26 ++++++++++++++++++++++++--
fs/ocfs2/xattr.c | 11 +----------
2 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3f80a56d0d60..05105d271fc8 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -25,6 +25,7 @@
#include "namei.h"
#include "ocfs2_trace.h"
#include "file.h"
+#include "symlink.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -4155,8 +4156,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
int ret;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *new_bh = NULL;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
ret = -EINVAL;
mlog_errno(ret);
goto out;
@@ -4182,6 +4184,26 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto out_unlock;
}
+ if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) &&
+ (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
+ /*
+ * Adjust extent record count to reserve space for extended attribute.
+ * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
+ */
+ struct ocfs2_inode_info *new_oi = OCFS2_I(new_inode);
+
+ if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
+ !(ocfs2_inode_is_fast_symlink(new_inode))) {
+ struct ocfs2_dinode *new_di = new_bh->b_data;
+ struct ocfs2_dinode *old_di = old_bh->b_data;
+ struct ocfs2_extent_list *el = &new_di->id2.i_list;
+ int inline_size = le16_to_cpu(old_di->i_xattr_inline_size);
+
+ le16_add_cpu(&el->l_count, -(inline_size /
+ sizeof(struct ocfs2_extent_rec)));
+ }
+ }
+
ret = ocfs2_create_reflink_node(inode, old_bh,
new_inode, new_bh, preserve);
if (ret) {
@@ -4189,7 +4211,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto inode_unlock;
}
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+ if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
ret = ocfs2_reflink_xattrs(inode, old_bh,
new_inode, new_bh,
preserve);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3b81213ed7b8..a9f716ec89e2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -6511,16 +6511,7 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
}
new_oi = OCFS2_I(args->new_inode);
- /*
- * Adjust extent record count to reserve space for extended attribute.
- * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
- */
- if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
- !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
- struct ocfs2_extent_list *el = &new_di->id2.i_list;
- le16_add_cpu(&el->l_count, -(inline_size /
- sizeof(struct ocfs2_extent_rec)));
- }
+
spin_lock(&new_oi->ip_lock);
new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
--
2.39.3
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 49ac6f05ace5bb0070c68a0193aa05d3c25d4c83
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024091343-excusably-laborer-3bef@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
49ac6f05ace5 ("selftests: mptcp: join: restrict fullmesh endp on 1st sf")
4878f9f8421f ("selftests: mptcp: join: validate fullmesh endp on 1st sf")
e571fb09c893 ("selftests: mptcp: add speed env var")
4aadde088a58 ("selftests: mptcp: add fullmesh env var")
080b7f5733fd ("selftests: mptcp: add fastclose env var")
662aa22d7dcd ("selftests: mptcp: set all env vars as local ones")
9e9d176df8e9 ("selftests: mptcp: add pm_nl_set_endpoint helper")
1534f87ee0dc ("selftests: mptcp: drop sflags parameter")
595ef566a2ef ("selftests: mptcp: drop addr_nr_ns1/2 parameters")
0c93af1f8907 ("selftests: mptcp: drop test_linkfail parameter")
be7e9786c915 ("selftests: mptcp: set FAILING_LINKS in run_tests")
4369c198e599 ("selftests: mptcp: test userspace pm out of transfer")
ae947bb2c253 ("selftests: mptcp: join: skip Fastclose tests if not supported")
d4c81bbb8600 ("selftests: mptcp: join: support local endpoint being tracked or not")
4a0b866a3f7d ("selftests: mptcp: join: skip test if iptables/tc cmds fail")
0c4cd3f86a40 ("selftests: mptcp: join: use 'iptables-legacy' if available")
6c160b636c91 ("selftests: mptcp: update userspace pm subflow tests")
48d73f609dcc ("selftests: mptcp: update userspace pm addr tests")
8697a258ae24 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 49ac6f05ace5bb0070c68a0193aa05d3c25d4c83 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org>
Date: Tue, 10 Sep 2024 21:06:36 +0200
Subject: [PATCH] selftests: mptcp: join: restrict fullmesh endp on 1st sf
A new endpoint using the IP of the initial subflow has been recently
added to increase the code coverage. But it breaks the test when using
old kernels not having commit 86e39e04482b ("mptcp: keep track of local
endpoint still available for each msk"), e.g. on v5.15.
Similar to commit d4c81bbb8600 ("selftests: mptcp: join: support local
endpoint being tracked or not"), it is possible to add the new endpoint
conditionally, by checking if "mptcp_pm_subflow_check_next" is present
in kallsyms: this is not directly linked to the commit introducing this
symbol but for the parent one which is linked anyway. So we can know in
advance what will be the expected behaviour, and add the new endpoint
only when it makes sense to do so.
Fixes: 4878f9f8421f ("selftests: mptcp: join: validate fullmesh endp on 1st sf")
Cc: stable(a)vger.kernel.org
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20240910-net-selftests-mptcp-fix-install-v1-1-8f12…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index a4762c49a878..cde041c93906 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -3064,7 +3064,9 @@ fullmesh_tests()
pm_nl_set_limits $ns1 1 3
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh
+ if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh
+ fi
fullmesh=1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
The `impl Sync for LockedBy` implementation has insufficient trait
bounds, as it only requires `T: Send`. However, `T: Sync` is also
required for soundness because the `LockedBy::access` method could be
used to provide shared access to the inner value from several threads in
parallel.
Cc: stable(a)vger.kernel.org
Fixes: 7b1f55e3a984 ("rust: sync: introduce `LockedBy`")
Signed-off-by: Alice Ryhl <aliceryhl(a)google.com>
---
rust/kernel/sync/locked_by.rs | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/rust/kernel/sync/locked_by.rs b/rust/kernel/sync/locked_by.rs
index babc731bd5f6..153ba4edcb03 100644
--- a/rust/kernel/sync/locked_by.rs
+++ b/rust/kernel/sync/locked_by.rs
@@ -83,9 +83,10 @@ pub struct LockedBy<T: ?Sized, U: ?Sized> {
// SAFETY: `LockedBy` can be transferred across thread boundaries iff the data it protects can.
unsafe impl<T: ?Sized + Send, U: ?Sized> Send for LockedBy<T, U> {}
-// SAFETY: `LockedBy` serialises the interior mutability it provides, so it is `Sync` as long as the
-// data it protects is `Send`.
-unsafe impl<T: ?Sized + Send, U: ?Sized> Sync for LockedBy<T, U> {}
+// SAFETY: Shared access to the `LockedBy` can provide both `&mut T` references in a synchronized
+// manner, or `&T` access in an unsynchronized manner. The `Send` trait is sufficient for the first
+// case, and `Sync` is sufficient for the second case.
+unsafe impl<T: ?Sized + Send + Sync, U: ?Sized> Sync for LockedBy<T, U> {}
impl<T, U> LockedBy<T, U> {
/// Constructs a new instance of [`LockedBy`].
@@ -127,7 +128,7 @@ pub fn access<'a>(&'a self, owner: &'a U) -> &'a T {
panic!("mismatched owners");
}
- // SAFETY: `owner` is evidence that the owner is locked.
+ // SAFETY: `owner` is evidence that there are only shared references to the owner.
unsafe { &*self.data.get() }
}
---
base-commit: 93dc3be19450447a3a7090bd1dfb9f3daac3e8d2
change-id: 20240912-locked-by-sync-fix-07193df52f98
Best regards,
--
Alice Ryhl <aliceryhl(a)google.com>
The patch titled
Subject: zram: free secondary algorithms names
has been added to the -mm mm-unstable branch. Its filename is
zram-free-secondary-algorithms-names.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Subject: zram: free secondary algorithms names
Date: Wed, 11 Sep 2024 11:54:56 +0900
We need to kfree() secondary algorithms names when reset zram device that
had multi-streams, otherwise we leak memory.
Link: https://lkml.kernel.org/r/20240911025600.3681789-1-senozhatsky@chromium.org
Fixes: 001d92735701 ("zram: add recompression algorithm sysfs knob")
Signed-off-by: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/block/zram/zram_drv.c | 7 +++++++
1 file changed, 7 insertions(+)
--- a/drivers/block/zram/zram_drv.c~zram-free-secondary-algorithms-names
+++ a/drivers/block/zram/zram_drv.c
@@ -2112,6 +2112,13 @@ static void zram_destroy_comps(struct zr
zram->num_active_comps--;
}
+ for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
+ if (!zram->comp_algs[prio])
+ continue;
+ kfree(zram->comp_algs[prio]);
+ zram->comp_algs[prio] = NULL;
+ }
+
zram_comp_params_reset(zram);
}
_
Patches currently in -mm which might be from senozhatsky(a)chromium.org are
zsmalloc-use-unique-zsmalloc-caches-names.patch
zram-free-secondary-algorithms-names.patch
Code expects array only with 2 items which should be checked.
But also item checking is not working as it should likely because of
incorrect items description.
Fixes: d50f974c4f7f ("dt-bindings: serial: Convert rs485 bindings to json-schema")
Signed-off-by: Michal Simek <michal.simek(a)amd.com>
Cc: <stable(a)vger.kernel.org>
---
Changes in v3:
- Remove incorrectly assigned value for the first item 50/100 because of
my testing
Changes in v2:
- Remove maxItems properties which are not needed
- Add stable ML to CC
.../devicetree/bindings/serial/rs485.yaml | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/Documentation/devicetree/bindings/serial/rs485.yaml b/Documentation/devicetree/bindings/serial/rs485.yaml
index 9418fd66a8e9..b93254ad2a28 100644
--- a/Documentation/devicetree/bindings/serial/rs485.yaml
+++ b/Documentation/devicetree/bindings/serial/rs485.yaml
@@ -18,16 +18,15 @@ properties:
description: prop-encoded-array <a b>
$ref: /schemas/types.yaml#/definitions/uint32-array
items:
- items:
- - description: Delay between rts signal and beginning of data sent in
- milliseconds. It corresponds to the delay before sending data.
- default: 0
- maximum: 100
- - description: Delay between end of data sent and rts signal in milliseconds.
- It corresponds to the delay after sending data and actual release
- of the line.
- default: 0
- maximum: 100
+ - description: Delay between rts signal and beginning of data sent in
+ milliseconds. It corresponds to the delay before sending data.
+ default: 0
+ maximum: 100
+ - description: Delay between end of data sent and rts signal in milliseconds.
+ It corresponds to the delay after sending data and actual release
+ of the line.
+ default: 0
+ maximum: 100
rs485-rts-active-high:
description: drive RTS high when sending (this is the default).
--
2.43.0
The patch titled
Subject: ocfs2: reserve space for inline xattr before attaching reflink tree
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
ocfs2-reserve-space-for-inline-xattr-before-attaching-reflink-tree.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Gautham Ananthakrishna <gautham.ananthakrishna(a)oracle.com>
Subject: ocfs2: reserve space for inline xattr before attaching reflink tree
Date: Thu, 12 Sep 2024 06:47:20 +0000
One of our customers reported a crash and a corrupted ocfs2 filesystem.
The crash was due to the detection of corruption. Upon troubleshooting,
the fsck -fn output showed the below corruption
[EXTENT_LIST_FREE] Extent list in owner 33080590 claims 230 as the next free chain record,
but fsck believes the largest valid value is 227. Clamp the next record value? n
The stat output from the debugfs.ocfs2 showed the following corruption
where the "Next Free Rec:" had overshot the "Count:" in the root metadata
block.
Inode: 33080590 Mode: 0640 Generation: 2619713622 (0x9c25a856)
FS Generation: 904309833 (0x35e6ac49)
CRC32: 00000000 ECC: 0000
Type: Regular Attr: 0x0 Flags: Valid
Dynamic Features: (0x16) HasXattr InlineXattr Refcounted
Extended Attributes Block: 0 Extended Attributes Inline Size: 256
User: 0 (root) Group: 0 (root) Size: 281320357888
Links: 1 Clusters: 141738
ctime: 0x66911b56 0x316edcb8 -- Fri Jul 12 06:02:30.829349048 2024
atime: 0x66911d6b 0x7f7a28d -- Fri Jul 12 06:11:23.133669517 2024
mtime: 0x66911b56 0x12ed75d7 -- Fri Jul 12 06:02:30.317552087 2024
dtime: 0x0 -- Wed Dec 31 17:00:00 1969
Refcount Block: 2777346
Last Extblk: 2886943 Orphan Slot: 0
Sub Alloc Slot: 0 Sub Alloc Bit: 14
Tree Depth: 1 Count: 227 Next Free Rec: 230
## Offset Clusters Block#
0 0 2310 2776351
1 2310 2139 2777375
2 4449 1221 2778399
3 5670 731 2779423
4 6401 566 2780447
....... .... .......
....... .... .......
The issue was in the reflink workfow while reserving space for inline
xattr. The problematic function is ocfs2_reflink_xattr_inline(). By the
time this function is called the reflink tree is already recreated at the
destination inode from the source inode. At this point, this function
reserves space for inline xattrs at the destination inode without even
checking if there is space at the root metadata block. It simply reduces
the l_count from 243 to 227 thereby making space of 256 bytes for inline
xattr whereas the inode already has extents beyond this index (in this
case upto 230), thereby causing corruption.
The fix for this is to reserve space for inline metadata at the
destination inode before the reflink tree gets recreated. The customer
has verified the fix.
Link: https://lkml.kernel.org/r/20240912064720.898600-1-gautham.ananthakrishna@or…
Fixes: ef962df057aa ("ocfs2: xattr: fix inlined xattr reflink")
Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/refcounttree.c | 26 ++++++++++++++++++++++++--
fs/ocfs2/xattr.c | 11 +----------
2 files changed, 25 insertions(+), 12 deletions(-)
--- a/fs/ocfs2/refcounttree.c~ocfs2-reserve-space-for-inline-xattr-before-attaching-reflink-tree
+++ a/fs/ocfs2/refcounttree.c
@@ -25,6 +25,7 @@
#include "namei.h"
#include "ocfs2_trace.h"
#include "file.h"
+#include "symlink.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -4155,8 +4156,9 @@ static int __ocfs2_reflink(struct dentry
int ret;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *new_bh = NULL;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
ret = -EINVAL;
mlog_errno(ret);
goto out;
@@ -4182,6 +4184,26 @@ static int __ocfs2_reflink(struct dentry
goto out_unlock;
}
+ if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) &&
+ (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
+ /*
+ * Adjust extent record count to reserve space for extended attribute.
+ * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
+ */
+ struct ocfs2_inode_info *new_oi = OCFS2_I(new_inode);
+
+ if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
+ !(ocfs2_inode_is_fast_symlink(new_inode))) {
+ struct ocfs2_dinode *new_di = new_bh->b_data;
+ struct ocfs2_dinode *old_di = old_bh->b_data;
+ struct ocfs2_extent_list *el = &new_di->id2.i_list;
+ int inline_size = le16_to_cpu(old_di->i_xattr_inline_size);
+
+ le16_add_cpu(&el->l_count, -(inline_size /
+ sizeof(struct ocfs2_extent_rec)));
+ }
+ }
+
ret = ocfs2_create_reflink_node(inode, old_bh,
new_inode, new_bh, preserve);
if (ret) {
@@ -4189,7 +4211,7 @@ static int __ocfs2_reflink(struct dentry
goto inode_unlock;
}
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+ if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
ret = ocfs2_reflink_xattrs(inode, old_bh,
new_inode, new_bh,
preserve);
--- a/fs/ocfs2/xattr.c~ocfs2-reserve-space-for-inline-xattr-before-attaching-reflink-tree
+++ a/fs/ocfs2/xattr.c
@@ -6520,16 +6520,7 @@ static int ocfs2_reflink_xattr_inline(st
}
new_oi = OCFS2_I(args->new_inode);
- /*
- * Adjust extent record count to reserve space for extended attribute.
- * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
- */
- if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
- !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
- struct ocfs2_extent_list *el = &new_di->id2.i_list;
- le16_add_cpu(&el->l_count, -(inline_size /
- sizeof(struct ocfs2_extent_rec)));
- }
+
spin_lock(&new_oi->ip_lock);
new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
_
Patches currently in -mm which might be from gautham.ananthakrishna(a)oracle.com are
ocfs2-reserve-space-for-inline-xattr-before-attaching-reflink-tree.patch
From: MrRurikov <grurikovsherbakov(a)yandex.ru>
After having been compared to a NULL value at algif_aead.c:191, pointer
'tsgl_src' is passed as 2nd parameter in call to function
'crypto_aead_copy_sgl' at algif_aead.c:244, where it is dereferenced at
algif_aead.c:85.
Change logical operator from && to || because pointer 'tsgl_src' is NULL,
then 'proccessed' will still be non-null
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Cc: stable(a)vger.kernel.org
Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code")
Signed-off-by: MrRurikov <grurikovsherbakov(a)yandex.ru>
---
crypto/algif_aead.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 7d58cbbce4af..135f09a4b3f8 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -191,7 +191,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
if (tsgl_src)
break;
}
- if (processed && !tsgl_src) {
+ if (processed || !tsgl_src) {
err = -EFAULT;
goto free;
}
--
2.34.1
Заявление о конфиденциальности
Данное электронное письмо и любые приложения к нему являются конфиденциальными и предназначены исключительно для адресата. Если Вы не являетесь адресатом данного письма, пожалуйста, уведомите немедленно отправителя, не раскрывайте содержание другим лицам, не используйте его в каких-либо целях, не храните и не копируйте информацию любым способом.
From: Hagar Hemdan <hagarhem(a)amazon.com>
commit d795848ecce24a75dfd46481aee066ae6fe39775 upstream.
Userspace may trigger a speculative read of an address outside the gpio
descriptor array.
Users can do that by calling gpio_ioctl() with an offset out of range.
Offset is copied from user and then used as an array index to get
the gpio descriptor without sanitization in gpio_device_get_desc().
This change ensures that the offset is sanitized by using
array_index_nospec() to mitigate any possibility of speculative
information leaks.
This bug was discovered and resolved using Coverity Static Analysis
Security Testing (SAST) by Synopsys, Inc.
Signed-off-by: Hagar Hemdan <hagarhem(a)amazon.com>
Link: https://lore.kernel.org/r/20240523085332.1801-1-hagarhem@amazon.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource(a)witekio.com>
---
drivers/gpio/gpiolib.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index abdf448b11a3..f83b2214d704 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -4,6 +4,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/nospec.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/device.h>
@@ -147,7 +148,7 @@ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip,
if (hwnum >= gdev->ngpio)
return ERR_PTR(-EINVAL);
- return &gdev->descs[hwnum];
+ return &gdev->descs[array_index_nospec(hwnum, gdev->ngpio)];
}
/**
--
2.43.0
From: Hagar Hemdan <hagarhem(a)amazon.com>
commit d795848ecce24a75dfd46481aee066ae6fe39775 upstream.
Userspace may trigger a speculative read of an address outside the gpio
descriptor array.
Users can do that by calling gpio_ioctl() with an offset out of range.
Offset is copied from user and then used as an array index to get
the gpio descriptor without sanitization in gpio_device_get_desc().
This change ensures that the offset is sanitized by using
array_index_nospec() to mitigate any possibility of speculative
information leaks.
This bug was discovered and resolved using Coverity Static Analysis
Security Testing (SAST) by Synopsys, Inc.
Signed-off-by: Hagar Hemdan <hagarhem(a)amazon.com>
Link: https://lore.kernel.org/r/20240523085332.1801-1-hagarhem@amazon.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource(a)witekio.com>
---
drivers/gpio/gpiolib.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 9d8c78312403..a0c1dabd2939 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -5,6 +5,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/nospec.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/device.h>
@@ -146,7 +147,7 @@ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc,
if (hwnum >= gdev->ngpio)
return ERR_PTR(-EINVAL);
- return &gdev->descs[hwnum];
+ return &gdev->descs[array_index_nospec(hwnum, gdev->ngpio)];
}
EXPORT_SYMBOL_GPL(gpiochip_get_desc);
--
2.43.0
tpm2_load_null() ignores the return value of tpm2_create_primary().
Further, it does not heal from the situation when memcmp() returns zero.
Address this by returning on failure and saving the null key if there
was no detected interference in the bus.
Cc: stable(a)vger.kernel.org # v6.11+
Fixes: eb24c9788cd9 ("tpm: disable the TPM if NULL name changes")
Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org>
---
v2:
- Refined the commit message.
- Reverted tpm2_create_primary() changes. They are not required if
tmp_null_key is used as the parameter.
---
drivers/char/tpm/tpm2-sessions.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
index d63510ad44ab..9c0356d7ce5e 100644
--- a/drivers/char/tpm/tpm2-sessions.c
+++ b/drivers/char/tpm/tpm2-sessions.c
@@ -850,22 +850,32 @@ static int tpm2_parse_start_auth_session(struct tpm2_auth *auth,
static int tpm2_load_null(struct tpm_chip *chip, u32 *null_key)
{
- int rc;
unsigned int offset = 0; /* dummy offset for null seed context */
u8 name[SHA256_DIGEST_SIZE + 2];
+ u32 tmp_null_key;
+ int rc;
rc = tpm2_load_context(chip, chip->null_key_context, &offset,
- null_key);
- if (rc != -EINVAL)
+ &tmp_null_key);
+ if (rc != -EINVAL) {
+ if (!rc)
+ *null_key = tmp_null_key;
return rc;
+ }
/* an integrity failure may mean the TPM has been reset */
dev_err(&chip->dev, "NULL key integrity failure!\n");
- /* check the null name against what we know */
- tpm2_create_primary(chip, TPM2_RH_NULL, NULL, name);
- if (memcmp(name, chip->null_key_name, sizeof(name)) == 0)
- /* name unchanged, assume transient integrity failure */
+
+ rc = tpm2_create_primary(chip, TPM2_RH_NULL, &tmp_null_key, name);
+ if (rc)
return rc;
+
+ /* Return the null key if the name has not been changed: */
+ if (memcmp(name, chip->null_key_name, sizeof(name)) == 0) {
+ *null_key = tmp_null_key;
+ return 0;
+ }
+
/*
* Fatal TPM failure: the NULL seed has actually changed, so
* the TPM must have been illegally reset. All in-kernel TPM
@@ -874,6 +884,7 @@ static int tpm2_load_null(struct tpm_chip *chip, u32 *null_key)
* userspace programmes can't be compromised by it.
*/
dev_err(&chip->dev, "NULL name has changed, disabling TPM due to interference\n");
+ tpm2_flush_context(chip, tmp_null_key);
chip->flags |= TPM_CHIP_FLAG_DISABLE;
return rc;
--
2.46.0
From: Alvin Lee <alvin.lee2(a)amd.com>
commit 8a0f02b7beed7b2b768dbdf3b79960de68f460c5 upstream.
[Why]
There is some logic error where the wrong variable was used to check for
OTG_MASTER and DPP_PIPE.
[How]
Add booleans to confirm that the expected pipes were found before
validating schedulability.
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira(a)amd.com>
Reviewed-by: Samson Tam <samson.tam(a)amd.com>
Reviewed-by: Chaitanya Dhere <chaitanya.dhere(a)amd.com>
Signed-off-by: Alvin Lee <alvin.lee2(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
[m.masimov(a)maxima.ru: In order to adapt this patch to branch 6.1
only changes related to finding the SubVP pipe were applied
as in 6.1 drr_pipe is passed as a function argument.]
Signed-off-by: Murad Masimov <m.masimov(a)maxima.ru>
---
drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 85e0d1c2a908..4b0719392d28 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -862,6 +862,7 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context, struc
int16_t stretched_drr_us = 0;
int16_t drr_stretched_vblank_us = 0;
int16_t max_vblank_mallregion = 0;
+ bool subvp_found = false;
// Find SubVP pipe
for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -873,10 +874,15 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context, struc
continue;
// Find the SubVP pipe
- if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+ if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+ subvp_found = true;
break;
+ }
}
+ if (!subvp_found)
+ return false;
+
main_timing = &pipe->stream->timing;
phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing;
drr_timing = &drr_pipe->stream->timing;
--
2.39.2
From: Dandan Zhang <zhangdandan(a)uniontech.com>
The kvm_hypercall() set for LoongArch is limited to a1-a5. So the
mention of a6 in the comment is undefined that needs to be rectified.
Reviewed-by: Bibo Mao <maobibo(a)loongson.cn>
Signed-off-by: Wentao Guan <guanwentao(a)uniontech.com>
Signed-off-by: Dandan Zhang <zhangdandan(a)uniontech.com>
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
Signed-off-by: WangYuli <wangyuli(a)uniontech.com>
---
arch/loongarch/include/asm/kvm_para.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
index 4ba2312e5f8c..6d5e9b6c5714 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -28,9 +28,9 @@
* Hypercall interface for KVM hypervisor
*
* a0: function identifier
- * a1-a6: args
+ * a1-a5: args
* Return value will be placed in a0.
- * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ * Up to 5 arguments are passed in a1, a2, a3, a4, a5.
*/
static __always_inline long kvm_hypercall0(u64 fid)
{
--
2.43.0
Spec says SW is expected to round up to the nearest 128K, if not already
aligned for the CC unit view of CCS. We are seeing the assert sometimes
pop on BMG to tell us that there is a hole between GSM and CCS, as well
as popping other asserts with having a vram size with strange alignment,
which is likely caused by misaligned offset here.
BSpec: 68023
Fixes: b5c2ca0372dc ("drm/xe/xe2hpg: Determine flat ccs offset for vram")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray(a)intel.com>
Cc: Akshata Jahagirdar <akshata.jahagirdar(a)intel.com>
Cc: Shuicheng Lin <shuicheng.lin(a)intel.com>
Cc: Matt Roper <matthew.d.roper(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.10+
---
drivers/gpu/drm/xe/xe_vram.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index 7e765b1499b1..8e65cb4cc477 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -181,6 +181,7 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
offset = offset_hi << 32; /* HW view bits 39:32 */
offset |= offset_lo << 6; /* HW view bits 31:6 */
+ offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */
offset *= num_enabled; /* convert to SW view */
/* We don't expect any holes */
--
2.46.0
The prop->src_dpn_prop and prop.sink_dpn_prop is allocated for the _number_
of ports and it is forced as 0 index based.
The original code was correct while the change to walk the bits and use
their position as index into the arrays is not correct.
For exmple we can have the prop.source_ports=0x2, which means we have one
port, but the prop.src_dpn_prop[1] is accessing outside of the allocated
memory.
This reverts commit 6fa78e9c41471fe43052cd6feba6eae1b0277ae3.
Cc: stable(a)vger.kernel.org # 6.10.y
Signed-off-by: Peter Ujfalusi <peter.ujfalusi(a)linux.intel.com>
---
Hi,
The reverted patch causes major regression on soundwire causing all audio
to fail.
Interestingly the patch is only in 6.10.8 and 6.10.9, not in mainline or linux-next.
soundwire sdw-master-0-1: Program transport params failed: -22
soundwire sdw-master-0-1: Program params failed: -22
SDW1-Playback: ASoC: error at snd_soc_link_prepare on SDW1-Playback: -22
Regards,
Peter
drivers/soundwire/stream.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index 00191b1d2260..4e9e7d2a942d 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -1286,18 +1286,18 @@ struct sdw_dpn_prop *sdw_get_slave_dpn_prop(struct sdw_slave *slave,
unsigned int port_num)
{
struct sdw_dpn_prop *dpn_prop;
- unsigned long mask;
+ u8 num_ports;
int i;
if (direction == SDW_DATA_DIR_TX) {
- mask = slave->prop.source_ports;
+ num_ports = hweight32(slave->prop.source_ports);
dpn_prop = slave->prop.src_dpn_prop;
} else {
- mask = slave->prop.sink_ports;
+ num_ports = hweight32(slave->prop.sink_ports);
dpn_prop = slave->prop.sink_dpn_prop;
}
- for_each_set_bit(i, &mask, 32) {
+ for (i = 0; i < num_ports; i++) {
if (dpn_prop[i].num == port_num)
return &dpn_prop[i];
}
--
2.46.0
commit ea5ff5d351b520524019f7ff7f9ce418de2dad87 upstream.
Until VM_DONTEXPAND was added in commit 1c1914d6e8c6 ("dma-buf: heaps:
Don't track CMA dma-buf pages under RssFile") it was possible to obtain
a mapping larger than the buffer size via mremap and bypass the overflow
check in dma_buf_mmap_internal. When using such a mapping to attempt to
fault past the end of the buffer, the CMA heap fault handler also checks
the fault offset against the buffer size, but gets the boundary wrong by
1. Fix the boundary check so that we don't read off the end of the pages
array and insert an arbitrary page in the mapping.
Reported-by: Xingyu Jin <xingyuj(a)google.com>
Fixes: a5d2d29e24be ("dma-buf: heaps: Move heap-helper logic into the cma_heap implementation")
Cc: stable(a)vger.kernel.org # Applicable >= 5.10. Needs adjustments only for 5.10.
Signed-off-by: T.J. Mercier <tjmercier(a)google.com>
Acked-by: John Stultz <jstultz(a)google.com>
Signed-off-by: Sumit Semwal <sumit.semwal(a)linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240830192627.2546033-1-tjme…
[TJ: Backport to 5.10. On this kernel the bug is located in
dma_heap_vm_fault which is used by both the CMA and system heaps.]
Signed-off-by: T.J. Mercier <tjmercier(a)google.com>
---
drivers/dma-buf/heaps/heap-helpers.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/dma-buf/heaps/heap-helpers.c b/drivers/dma-buf/heaps/heap-helpers.c
index d0696cf937af..a852b5e8122f 100644
--- a/drivers/dma-buf/heaps/heap-helpers.c
+++ b/drivers/dma-buf/heaps/heap-helpers.c
@@ -161,7 +161,7 @@ static vm_fault_t dma_heap_vm_fault(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
struct heap_helper_buffer *buffer = vma->vm_private_data;
- if (vmf->pgoff > buffer->pagecount)
+ if (vmf->pgoff >= buffer->pagecount)
return VM_FAULT_SIGBUS;
vmf->page = buffer->pages[vmf->pgoff];
--
2.46.0.662.g92d0881bb0-goog
The patch titled
Subject: mm/huge_memory: ensure huge_zero_folio won't have large_rmappable flag set
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-huge_memory-ensure-huge_zero_folio-wont-have-large_rmappable-flag-set.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Miaohe Lin <linmiaohe(a)huawei.com>
Subject: mm/huge_memory: ensure huge_zero_folio won't have large_rmappable flag set
Date: Sat, 14 Sep 2024 09:53:06 +0800
Ensure huge_zero_folio won't have large_rmappable flag set. So it can be
reported as thp,zero correctly through stable_page_flags().
Link: https://lkml.kernel.org/r/20240914015306.3656791-1-linmiaohe@huawei.com
Fixes: 5691753d73a2 ("mm: convert huge_zero_page to huge_zero_folio")
Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/huge_memory.c | 2 ++
1 file changed, 2 insertions(+)
--- a/mm/huge_memory.c~mm-huge_memory-ensure-huge_zero_folio-wont-have-large_rmappable-flag-set
+++ a/mm/huge_memory.c
@@ -220,6 +220,8 @@ retry:
count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
return false;
}
+ /* Ensure zero folio won't have large_rmappable flag set. */
+ folio_clear_large_rmappable(zero_folio);
preempt_disable();
if (cmpxchg(&huge_zero_folio, NULL, zero_folio)) {
preempt_enable();
_
Patches currently in -mm which might be from linmiaohe(a)huawei.com are
mm-huge_memory-ensure-huge_zero_folio-wont-have-large_rmappable-flag-set.patch
mm-memory-failure-fix-vm_bug_on_pagepagepoisonedpage-when-unpoison-memory.patch
The patch titled
Subject: mm/hugetlb.c: fix UAF of vma in hugetlb fault pathway
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-hugetlbc-fix-uaf-of-vma-in-hugetlb-fault-pathway.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: "Vishal Moola (Oracle)" <vishal.moola(a)gmail.com>
Subject: mm/hugetlb.c: fix UAF of vma in hugetlb fault pathway
Date: Sat, 14 Sep 2024 12:41:19 -0700
Syzbot reports a UAF in hugetlb_fault(). This happens because
vmf_anon_prepare() could drop the per-VMA lock and allow the current VMA
to be freed before hugetlb_vma_unlock_read() is called.
We can fix this by using a modified version of vmf_anon_prepare() that
doesn't release the VMA lock on failure, and then release it ourselves
after hugetlb_vma_unlock_read().
Link: https://lkml.kernel.org/r/20240914194243.245-2-vishal.moola@gmail.com
Fixes: 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()")
Reported-by: syzbot+2dab93857ee95f2eeb08(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-mm/00000000000067c20b06219fbc26@google.com/
Signed-off-by: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
--- a/mm/hugetlb.c~mm-hugetlbc-fix-uaf-of-vma-in-hugetlb-fault-pathway
+++ a/mm/hugetlb.c
@@ -6065,7 +6065,7 @@ retry_avoidcopy:
* When the original hugepage is shared one, it does not have
* anon_vma prepared.
*/
- ret = vmf_anon_prepare(vmf);
+ ret = __vmf_anon_prepare(vmf);
if (unlikely(ret))
goto out_release_all;
@@ -6264,7 +6264,7 @@ static vm_fault_t hugetlb_no_page(struct
}
if (!(vma->vm_flags & VM_MAYSHARE)) {
- ret = vmf_anon_prepare(vmf);
+ ret = __vmf_anon_prepare(vmf);
if (unlikely(ret))
goto out;
}
@@ -6395,6 +6395,14 @@ static vm_fault_t hugetlb_no_page(struct
folio_unlock(folio);
out:
hugetlb_vma_unlock_read(vma);
+
+ /*
+ * We must check to release the per-VMA lock. __vmf_anon_prepare() is
+ * the only way ret can be set to VM_FAULT_RETRY.
+ */
+ if (unlikely(ret & VM_FAULT_RETRY))
+ vma_end_read(vma);
+
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
return ret;
@@ -6616,6 +6624,14 @@ out_ptl:
}
out_mutex:
hugetlb_vma_unlock_read(vma);
+
+ /*
+ * We must check to release the per-VMA lock. __vmf_anon_prepare() in
+ * hugetlb_wp() is the only way ret can be set to VM_FAULT_RETRY.
+ */
+ if (unlikely(ret & VM_FAULT_RETRY))
+ vma_end_read(vma);
+
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
/*
* Generally it's safe to hold refcount during waiting page lock. But
_
Patches currently in -mm which might be from vishal.moola(a)gmail.com are
mm-change-vmf_anon_prepare-to-__vmf_anon_prepare.patch
mm-hugetlbc-fix-uaf-of-vma-in-hugetlb-fault-pathway.patch
The patch titled
Subject: mm: change vmf_anon_prepare() to __vmf_anon_prepare()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-change-vmf_anon_prepare-to-__vmf_anon_prepare.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: "Vishal Moola (Oracle)" <vishal.moola(a)gmail.com>
Subject: mm: change vmf_anon_prepare() to __vmf_anon_prepare()
Date: Sat, 14 Sep 2024 12:41:18 -0700
Some callers of vmf_anon_prepare() may not want us to release the per-VMA
lock ourselves. Rename vmf_anon_prepare() to __vmf_anon_prepare() and let
the callers drop the lock when desired.
Also, make vmf_anon_prepare() a wrapper that releases the per-VMA lock
itself for any callers that don't care.
This is in preparation to fix this bug reported by syzbot:
https://lore.kernel.org/linux-mm/00000000000067c20b06219fbc26@google.com/
Link: https://lkml.kernel.org/r/20240914194243.245-1-vishal.moola@gmail.com
Fixes: 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()")
Reported-by: syzbot+2dab93857ee95f2eeb08(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-mm/00000000000067c20b06219fbc26@google.com/
Signed-off-by: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/internal.h | 11 ++++++++++-
mm/memory.c | 8 +++-----
2 files changed, 13 insertions(+), 6 deletions(-)
--- a/mm/internal.h~mm-change-vmf_anon_prepare-to-__vmf_anon_prepare
+++ a/mm/internal.h
@@ -310,7 +310,16 @@ static inline void wake_throttle_isolate
wake_up(wqh);
}
-vm_fault_t vmf_anon_prepare(struct vm_fault *vmf);
+vm_fault_t __vmf_anon_prepare(struct vm_fault *vmf);
+static inline vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
+{
+ vm_fault_t ret = __vmf_anon_prepare(vmf);
+
+ if (unlikely(ret & VM_FAULT_RETRY))
+ vma_end_read(vmf->vma);
+ return ret;
+}
+
vm_fault_t do_swap_page(struct vm_fault *vmf);
void folio_rotate_reclaimable(struct folio *folio);
bool __folio_end_writeback(struct folio *folio);
--- a/mm/memory.c~mm-change-vmf_anon_prepare-to-__vmf_anon_prepare
+++ a/mm/memory.c
@@ -3259,7 +3259,7 @@ static inline vm_fault_t vmf_can_call_fa
}
/**
- * vmf_anon_prepare - Prepare to handle an anonymous fault.
+ * __vmf_anon_prepare - Prepare to handle an anonymous fault.
* @vmf: The vm_fault descriptor passed from the fault handler.
*
* When preparing to insert an anonymous page into a VMA from a
@@ -3273,7 +3273,7 @@ static inline vm_fault_t vmf_can_call_fa
* Return: 0 if fault handling can proceed. Any other value should be
* returned to the caller.
*/
-vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
+vm_fault_t __vmf_anon_prepare(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
vm_fault_t ret = 0;
@@ -3281,10 +3281,8 @@ vm_fault_t vmf_anon_prepare(struct vm_fa
if (likely(vma->anon_vma))
return 0;
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
- if (!mmap_read_trylock(vma->vm_mm)) {
- vma_end_read(vma);
+ if (!mmap_read_trylock(vma->vm_mm))
return VM_FAULT_RETRY;
- }
}
if (__anon_vma_prepare(vma))
ret = VM_FAULT_OOM;
_
Patches currently in -mm which might be from vishal.moola(a)gmail.com are
mm-change-vmf_anon_prepare-to-__vmf_anon_prepare.patch
mm-hugetlbc-fix-uaf-of-vma-in-hugetlb-fault-pathway.patch
Code expects array only with 2 items which should be checked.
But also item checking is not working as it should likely because of
incorrect items description.
Fixes: d50f974c4f7f ("dt-bindings: serial: Convert rs485 bindings to json-schema")
Signed-off-by: Michal Simek <michal.simek(a)amd.com>
Cc: <stable(a)vger.kernel.org>
---
Changes in v2:
- Remove maxItems properties which are not needed
- Add stable ML to CC
.../devicetree/bindings/serial/rs485.yaml | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/Documentation/devicetree/bindings/serial/rs485.yaml b/Documentation/devicetree/bindings/serial/rs485.yaml
index 9418fd66a8e9..9665de41762e 100644
--- a/Documentation/devicetree/bindings/serial/rs485.yaml
+++ b/Documentation/devicetree/bindings/serial/rs485.yaml
@@ -18,16 +18,15 @@ properties:
description: prop-encoded-array <a b>
$ref: /schemas/types.yaml#/definitions/uint32-array
items:
- items:
- - description: Delay between rts signal and beginning of data sent in
- milliseconds. It corresponds to the delay before sending data.
- default: 0
- maximum: 100
- - description: Delay between end of data sent and rts signal in milliseconds.
- It corresponds to the delay after sending data and actual release
- of the line.
- default: 0
- maximum: 100
+ - description: Delay between rts signal and beginning of data sent in
+ milliseconds. It corresponds to the delay before sending data.
+ default: 0
+ maximum: 50
+ - description: Delay between end of data sent and rts signal in milliseconds.
+ It corresponds to the delay after sending data and actual release
+ of the line.
+ default: 0
+ maximum: 100
rs485-rts-active-high:
description: drive RTS high when sending (this is the default).
--
2.43.0
power_supply_property_is_writeable() gets called from the is_visible()
callback for the sysfs attributes of power_supply class devices and for
the sysfs attributes of power_supply core instantiated hwmon class devices.
These sysfs attributes get registered by the device_add() respectively
power_supply_add_hwmon_sysfs() calls in power_supply_register().
use_cnt gets initialized to 0 and is incremented only after these calls.
So when power_supply_property_is_writeable() gets called it always return
-ENODEV because of use_cnt == 0.
This causes all the attributes to have permissions of 444 even those which
should be writable. This used to be a problem only for hwmon sysfs
attributes but since commit be6299c6e55e ("power: supply: sysfs: use
power_supply_property_is_writeable()") this now also impacts power_supply
class sysfs attributes.
Fixes: be6299c6e55e ("power: supply: sysfs: use power_supply_property_is_writeable()")
Fixes: e67d4dfc9ff1 ("power: supply: Add HWMON compatibility layer")
Cc: stable(a)vger.kernel.org
Cc: Thomas Weißschuh <linux(a)weissschuh.net>
Cc: Andrey Smirnov <andrew.smirnov(a)gmail.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
The fixing of be6299c6e55e is a 6.11 regression fix, the fixing of
e67d4dfc9ff1 hwmon attr never being writable is a stable series fix.
This supersedes the "power: supply: sysfs: Revert use
power_supply_property_is_writeable()" patch.
---
drivers/power/supply/power_supply_core.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index 0417fb34e846..49534458a9f7 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -1231,11 +1231,7 @@ EXPORT_SYMBOL_GPL(power_supply_set_property);
int power_supply_property_is_writeable(struct power_supply *psy,
enum power_supply_property psp)
{
- if (atomic_read(&psy->use_cnt) <= 0 ||
- !psy->desc->property_is_writeable)
- return -ENODEV;
-
- return psy->desc->property_is_writeable(psy, psp);
+ return psy->desc->property_is_writeable && psy->desc->property_is_writeable(psy, psp);
}
EXPORT_SYMBOL_GPL(power_supply_property_is_writeable);
--
2.46.0
Syzbot reports a UAF in hugetlb_fault(). This happens because
vmf_anon_prepare() could drop the per-VMA lock and allow the current VMA
to be freed before hugetlb_vma_unlock_read() is called.
We can fix this by using a modified version of vmf_anon_prepare() that
doesn't release the VMA lock on failure, and then release it ourselves
after hugetlb_vma_unlock_read().
Reported-by: syzbot+2dab93857ee95f2eeb08(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-mm/00000000000067c20b06219fbc26@google.com/
Fixes: 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()")
Signed-off-by: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
---
mm/hugetlb.c | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5c77defad295..190fa05635f4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5915,7 +5915,7 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
* When the original hugepage is shared one, it does not have
* anon_vma prepared.
*/
- ret = vmf_anon_prepare(vmf);
+ ret = __vmf_anon_prepare(vmf);
if (unlikely(ret))
goto out_release_all;
@@ -6114,7 +6114,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
}
if (!(vma->vm_flags & VM_MAYSHARE)) {
- ret = vmf_anon_prepare(vmf);
+ ret = __vmf_anon_prepare(vmf);
if (unlikely(ret))
goto out;
}
@@ -6245,6 +6245,14 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
folio_unlock(folio);
out:
hugetlb_vma_unlock_read(vma);
+
+ /*
+ * We must check to release the per-VMA lock. __vmf_anon_prepare() is
+ * the only way ret can be set to VM_FAULT_RETRY.
+ */
+ if (unlikely(ret & VM_FAULT_RETRY))
+ vma_end_read(vma);
+
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
return ret;
@@ -6466,6 +6474,14 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
out_mutex:
hugetlb_vma_unlock_read(vma);
+
+ /*
+ * We must check to release the per-VMA lock. __vmf_anon_prepare() in
+ * hugetlb_wp() is the only way ret can be set to VM_FAULT_RETRY.
+ */
+ if (unlikely(ret & VM_FAULT_RETRY))
+ vma_end_read(vma);
+
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
/*
* Generally it's safe to hold refcount during waiting page lock. But
--
2.45.0
In the svc_i3c_master_probe function, &master->hj_work is bound with
svc_i3c_master_hj_work, &master->ibi_work is bound with
svc_i3c_master_ibi_work. And svc_i3c_master_ibi_work can start the
hj_work, svc_i3c_master_irq_handler can start the ibi_work.
If we remove the module which will call svc_i3c_master_remove to
make cleanup, it will free master->base through i3c_master_unregister
while the work mentioned above will be used. The sequence of operations
that may lead to a UAF bug is as follows:
CPU0 CPU1
| svc_i3c_master_hj_work
svc_i3c_master_remove |
i3c_master_unregister(&master->base)|
device_unregister(&master->dev) |
device_release |
//free master->base |
| i3c_master_do_daa(&master->base)
| //use master->base
Fix it by ensuring that the work is canceled before proceeding with the
cleanup in svc_i3c_master_remove.
Fixes: 0f74f8b6675c ("i3c: Make i3c_master_unregister() return void")
Signed-off-by: Kaixin Wang <kxwang23(a)m.fudan.edu.cn>
Reviewed-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
v2:
- add fixes tag and cc stable, suggested by Frank
- add Reviewed-by label from Miquel
- Link to v1: https://lore.kernel.org/r/20240911150135.839946-1-kxwang23@m.fudan.edu.cn
---
drivers/i3c/master/svc-i3c-master.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c
index 0a68fd1b81d4..e084ba648b4a 100644
--- a/drivers/i3c/master/svc-i3c-master.c
+++ b/drivers/i3c/master/svc-i3c-master.c
@@ -1775,6 +1775,7 @@ static void svc_i3c_master_remove(struct platform_device *pdev)
{
struct svc_i3c_master *master = platform_get_drvdata(pdev);
+ cancel_work_sync(&master->hj_work);
i3c_master_unregister(&master->base);
pm_runtime_dont_use_autosuspend(&pdev->dev);
--
2.39.1.windows.1
This series updates the driver for the veml6030 ALS and adds support for
the veml6035, which shares most of its functionality with the former.
The most relevant updates for the veml6030 are the resolution correction
to meet the datasheet update that took place with Rev 1.7, 28-Nov-2023,
and a fix to avoid a segmentation fault when reading the
in_illuminance_period_available attribute.
Vishay does not host the Product Information Notification where the
resolution correction was introduced, but it can still be found
online[1], and the corrected value is the one listed on the latest
version of the datasheet[2] (Rev. 1.7, 28-Nov-2023) and application
note[3] (Rev. 17-Jan-2024).
Link: https://www.farnell.com/datasheets/4379688.pdf [1]
Link: https://www.vishay.com/docs/84366/veml6030.pdf [2]
Link: https://www.vishay.com/docs/84367/designingveml6030.pdf [3]
Signed-off-by: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
---
Javier Carrasco (7):
dt-bindings: iio: light: veml6030: rename to add manufacturer
dt-bindings: iio: light: veml6030: add veml6035
iio: light: veml6030: fix IIO device retrieval from embedded device
iio: light: veml6030: make use of regmap_set_bits()
iio: light: veml6030: update sensor resolution
iio: light: veml6030: add set up delay after any power on sequence
iio: light: veml6030: add support for veml6035
.../light/{veml6030.yaml => vishay,veml6030.yaml} | 42 ++-
drivers/iio/light/veml6030.c | 323 ++++++++++++++++++---
2 files changed, 319 insertions(+), 46 deletions(-)
---
base-commit: 5acd9952f95fb4b7da6d09a3be39195a80845eb6
change-id: 20240903-veml6035-7a91bc088c6f
Best regards,
--
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
The files in sysfs are created during device_add(). psy->use_cnt
is not incremented yet. So attributes are created readonly
without checking desc->property_is_writeable() and writeable
files are readonly.
To fix this, revert back to calling desc->property_is_writeable()
directly without using the helper.
Fixes: be6299c6e55e ("power: supply: sysfs: use power_supply_property_is_writeable()")
Signed-off-by: Andreas Kemnade <andreas(a)kemnade.info>
Cc: stable(a)vger.kernel.org # 6.11
---
drivers/power/supply/power_supply_sysfs.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
index 3e63d165b2f70..b86e11bdc07ef 100644
--- a/drivers/power/supply/power_supply_sysfs.c
+++ b/drivers/power/supply/power_supply_sysfs.c
@@ -379,7 +379,8 @@ static umode_t power_supply_attr_is_visible(struct kobject *kobj,
int property = psy->desc->properties[i];
if (property == attrno) {
- if (power_supply_property_is_writeable(psy, property) > 0)
+ if (psy->desc->property_is_writeable &&
+ psy->desc->property_is_writeable(psy, property) > 0)
mode |= S_IWUSR;
return mode;
--
2.39.2
Supposing first scenario with a virtio_blk driver.
CPU0 CPU1
blk_mq_try_issue_directly()
__blk_mq_issue_directly()
q->mq_ops->queue_rq()
virtio_queue_rq()
blk_mq_stop_hw_queue()
virtblk_done()
blk_mq_request_bypass_insert() 1) store
blk_mq_start_stopped_hw_queue()
clear_bit(BLK_MQ_S_STOPPED) 3) store
blk_mq_run_hw_queue()
if (!blk_mq_hctx_has_pending()) 4) load
return
blk_mq_sched_dispatch_requests()
blk_mq_run_hw_queue()
if (!blk_mq_hctx_has_pending())
return
blk_mq_sched_dispatch_requests()
if (blk_mq_hctx_stopped()) 2) load
return
__blk_mq_sched_dispatch_requests()
Supposing another scenario.
CPU0 CPU1
blk_mq_requeue_work()
blk_mq_insert_request() 1) store
virtblk_done()
blk_mq_start_stopped_hw_queue()
blk_mq_run_hw_queues() clear_bit(BLK_MQ_S_STOPPED) 3) store
blk_mq_run_hw_queue()
if (!blk_mq_hctx_has_pending()) 4) load
return
blk_mq_sched_dispatch_requests()
if (blk_mq_hctx_stopped()) 2) load
continue
blk_mq_run_hw_queue()
Both scenarios are similar, the full memory barrier should be inserted
between 1) and 2), as well as between 3) and 4) to make sure that either
CPU0 sees BLK_MQ_S_STOPPED is cleared or CPU1 sees dispatch list.
Otherwise, either CPU will not rerun the hardware queue causing starvation
of the request.
The easy way to fix it is to add the essential full memory barrier into
helper of blk_mq_hctx_stopped(). In order to not affect the fast path
(hardware queue is not stopped most of the time), we only insert the
barrier into the slow path. Actually, only slow path needs to care about
missing of dispatching the request to the low-level device driver.
Fixes: 320ae51feed5 ("blk-mq: new multi-queue block IO queueing mechanism")
Cc: stable(a)vger.kernel.org
Cc: Muchun Song <muchun.song(a)linux.dev>
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
---
block/blk-mq.c | 6 ++++++
block/blk-mq.h | 13 +++++++++++++
2 files changed, 19 insertions(+)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ff6df6c7eeb25..b90c1680cb780 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2413,6 +2413,12 @@ void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
return;
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
+ /*
+ * Pairs with the smp_mb() in blk_mq_hctx_stopped() to order the
+ * clearing of BLK_MQ_S_STOPPED above and the checking of dispatch
+ * list in the subsequent routine.
+ */
+ smp_mb__after_atomic();
blk_mq_run_hw_queue(hctx, async);
}
EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 260beea8e332c..f36f3bff70d86 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -228,6 +228,19 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
{
+ /* Fast path: hardware queue is not stopped most of the time. */
+ if (likely(!test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
+ return false;
+
+ /*
+ * This barrier is used to order adding of dispatch list before and
+ * the test of BLK_MQ_S_STOPPED below. Pairs with the memory barrier
+ * in blk_mq_start_stopped_hw_queue() so that dispatch code could
+ * either see BLK_MQ_S_STOPPED is cleared or dispatch list is not
+ * empty to avoid missing dispatching requests.
+ */
+ smp_mb();
+
return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
}
--
2.20.1
Supposing the following scenario.
CPU0 CPU1
blk_mq_insert_request() 1) store
blk_mq_unquiesce_queue()
blk_queue_flag_clear() 3) store
blk_mq_run_hw_queues()
blk_mq_run_hw_queue()
if (!blk_mq_hctx_has_pending()) 4) load
return
blk_mq_run_hw_queue()
if (blk_queue_quiesced()) 2) load
return
blk_mq_sched_dispatch_requests()
The full memory barrier should be inserted between 1) and 2), as well as
between 3) and 4) to make sure that either CPU0 sees QUEUE_FLAG_QUIESCED is
cleared or CPU1 sees dispatch list or setting of bitmap of software queue.
Otherwise, either CPU will not rerun the hardware queue causing starvation.
So the first solution is to 1) add a pair of memory barrier to fix the
problem, another solution is to 2) use hctx->queue->queue_lock to
synchronize QUEUE_FLAG_QUIESCED. Here, we chose 2) to fix it since memory
barrier is not easy to be maintained.
Fixes: f4560ffe8cec ("blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue")
Cc: stable(a)vger.kernel.org
Cc: Muchun Song <muchun.song(a)linux.dev>
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
---
block/blk-mq.c | 47 ++++++++++++++++++++++++++++++++++-------------
1 file changed, 34 insertions(+), 13 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b2d0f22de0c7f..ff6df6c7eeb25 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2202,6 +2202,24 @@ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
}
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
+static inline bool blk_mq_hw_queue_need_run(struct blk_mq_hw_ctx *hctx)
+{
+ bool need_run;
+
+ /*
+ * When queue is quiesced, we may be switching io scheduler, or
+ * updating nr_hw_queues, or other things, and we can't run queue
+ * any more, even blk_mq_hctx_has_pending() can't be called safely.
+ *
+ * And queue will be rerun in blk_mq_unquiesce_queue() if it is
+ * quiesced.
+ */
+ __blk_mq_run_dispatch_ops(hctx->queue, false,
+ need_run = !blk_queue_quiesced(hctx->queue) &&
+ blk_mq_hctx_has_pending(hctx));
+ return need_run;
+}
+
/**
* blk_mq_run_hw_queue - Start to run a hardware queue.
* @hctx: Pointer to the hardware queue to run.
@@ -2222,20 +2240,23 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING);
- /*
- * When queue is quiesced, we may be switching io scheduler, or
- * updating nr_hw_queues, or other things, and we can't run queue
- * any more, even __blk_mq_hctx_has_pending() can't be called safely.
- *
- * And queue will be rerun in blk_mq_unquiesce_queue() if it is
- * quiesced.
- */
- __blk_mq_run_dispatch_ops(hctx->queue, false,
- need_run = !blk_queue_quiesced(hctx->queue) &&
- blk_mq_hctx_has_pending(hctx));
+ need_run = blk_mq_hw_queue_need_run(hctx);
+ if (!need_run) {
+ unsigned long flags;
- if (!need_run)
- return;
+ /*
+ * Synchronize with blk_mq_unquiesce_queue(), because we check
+ * if hw queue is quiesced locklessly above, we need the use
+ * ->queue_lock to make sure we see the up-to-date status to
+ * not miss rerunning the hw queue.
+ */
+ spin_lock_irqsave(&hctx->queue->queue_lock, flags);
+ need_run = blk_mq_hw_queue_need_run(hctx);
+ spin_unlock_irqrestore(&hctx->queue->queue_lock, flags);
+
+ if (!need_run)
+ return;
+ }
if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
blk_mq_delay_run_hw_queue(hctx, 0);
--
2.20.1
Supposing the following scenario with a virtio_blk driver.
CPU0 CPU1 CPU2
blk_mq_try_issue_directly()
__blk_mq_issue_directly()
q->mq_ops->queue_rq()
virtio_queue_rq()
blk_mq_stop_hw_queue()
virtblk_done()
blk_mq_try_issue_directly()
if (blk_mq_hctx_stopped())
blk_mq_request_bypass_insert() blk_mq_run_hw_queue()
blk_mq_run_hw_queue() blk_mq_run_hw_queue()
blk_mq_insert_request()
return
After CPU0 has marked the queue as stopped, CPU1 will see the queue is
stopped. But before CPU1 puts the request on the dispatch list, CPU2
receives the interrupt of completion of request, so it will run the
hardware queue and marks the queue as non-stopped. Meanwhile, CPU1 also
runs the same hardware queue. After both CPU1 and CPU2 complete
blk_mq_run_hw_queue(), CPU1 just puts the request to the same hardware
queue and returns. It misses dispatching a request. Fix it by running
the hardware queue explicitly. And blk_mq_request_issue_directly()
should handle a similar situation. Fix it as well.
Fixes: d964f04a8fde ("blk-mq: fix direct issue")
Cc: stable(a)vger.kernel.org
Cc: Muchun Song <muchun.song(a)linux.dev>
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
---
block/blk-mq.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e3c3c0c21b553..b2d0f22de0c7f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2619,6 +2619,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
blk_mq_insert_request(rq, 0);
+ blk_mq_run_hw_queue(hctx, false);
return;
}
@@ -2649,6 +2650,7 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
blk_mq_insert_request(rq, 0);
+ blk_mq_run_hw_queue(hctx, false);
return BLK_STS_OK;
}
--
2.20.1
Hi Greg! I noticed a bug report in bz:
https://bugzilla.kernel.org/show_bug.cgi?id=219269
> Fair enough, you get a compiler warning:
>
> kernel/trace/trace_kprobe.c: In function ‘validate_probe_symbol’:
> kernel/trace/trace_kprobe.c:810:23: error: implicit declaration of function ‘find_module’; did you mean init_module’? [-Wimplicit-function-declaration]
> 810 | mod = find_module(modname);
> | ^~~~~~~~~~~
> | init_module
> kernel/trace/trace_kprobe.c:810:21: error: assignment to ‘struct module *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion]
> 810 | mod = find_module(modname);
> |
>
> but there is no find_module symbol when CONFIG_MODULES is disabled.
I *very briefly* looked into this. I might be wrong, but looked a bit
like "tracing/kprobes: Add symbol counting check when module loads"
caused this and backporting b10545b6b86b7a ("tracing/kprobes: Fix build
error when find_module() is not available") [v6.11-rc1] would fix this
(which applies cleanly).
Shall I ask the reporter to confirm or is that already enough for you?
Ciao, Thorsten
The driver calls netif_napi_set_irq() and then calls netif_napi_add(),
which calls netif_napi_add_weight(). At the end of
netif_napi_add_weight() is a call to netif_napi_set_irq(napi, -1), which
clears the previously set napi->irq value. Fix this by calling
netif_napi_set_irq() after calling netif_napi_add().
This was found when reviewing another patch and I have no way to test
this, but the fix seemed relatively straight forward.
Cc: stable(a)vger.kernel.org
Fixes: bc6107771bb4 ("eth: fbnic: Allocate a netdevice and napi vectors with queues")
Signed-off-by: Brett Creeley <brett.creeley(a)amd.com>
---
drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index 0ed4c9fff5d8..72f88ae7815f 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -1012,14 +1012,14 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
nv->fbd = fbd;
nv->v_idx = v_idx;
- /* Record IRQ to NAPI struct */
- netif_napi_set_irq(&nv->napi,
- pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx));
-
/* Tie napi to netdev */
list_add(&nv->napis, &fbn->napis);
netif_napi_add(fbn->netdev, &nv->napi, fbnic_poll);
+ /* Record IRQ to NAPI struct */
+ netif_napi_set_irq(&nv->napi,
+ pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx));
+
/* Tie nv back to PCIe dev */
nv->dev = fbd->dev;
--
2.17.1
Callers can pass null in filter (i.e. from returned from the function
dpp1_dscl_get_filter_coeffs_64p) and a null check is added to ensure that
is not the case.
Cc: stable(a)vger.kernel.org
Fixes: 5e9a81b2c465 ("drm/amd/display: separate scl functions out from dcn10_dpp")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_dscl.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_dscl.c
index 808bca9fb804..bcafeb7b5b79 100644
--- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_dscl.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_dscl.c
@@ -248,6 +248,9 @@ static void dpp1_dscl_set_scaler_filter(
int pair;
uint16_t odd_coef, even_coef;
+ if (!filter)
+ return;
+
REG_SET_3(SCL_COEF_RAM_TAP_SELECT, 0,
SCL_COEF_RAM_TAP_PAIR_IDX, 0,
SCL_COEF_RAM_PHASE, 0,
--
2.25.1
The test if a table is a permanently empty one, inspects the address of
the registered ctl_table argument.
However as sysctl_mount_point is an empty array and does not occupy and
space it can end up sharing an address with another object in memory.
If that other object itself is a "struct ctl_table" then registering
that table will fail as it's incorrectly recognized as permanently empty.
Avoid this issue by adding a dummy element to the array so that the
array is not empty anymore and the potential address sharing is avoided.
Explicitly register the table with zero elements as otherwise the dummy
element would be recognized as a sentinel element which would lead to a
runtime warning from the sysctl core.
While the issue seems unlikely to be encountered at this time, this
seems mostly be due to luck.
Also a future change, constifying sysctl_mount_point and root_table, can
reliably trigger this issue on clang 18.
Given that empty arrays are non-standard in the first place,
avoid them if possible.
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202408051453.f638857e-lkp@intel.com
Fixes: 4a7b29f65094 ("sysctl: move sysctl type to ctl_table_header")
Fixes: a35dd3a786f5 ("sysctl: drop now unnecessary out-of-bounds check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net>
---
This was originally part of a feature series [0], but is resubmitted on
its own to make it into v6.11To.
[0] https://lore.kernel.org/lkml/20240805-sysctl-const-api-v2-0-52c85f02ee5e@we…
---
fs/proc/proc_sysctl.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 9553e77c9d31..d11ebc055ce0 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -29,8 +29,13 @@ static const struct inode_operations proc_sys_inode_operations;
static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;
-/* Support for permanently empty directories */
-static struct ctl_table sysctl_mount_point[] = { };
+/*
+ * Support for permanently empty directories.
+ * Must be non-empty to avoid sharing an address with other tables.
+ */
+static struct ctl_table sysctl_mount_point[] = {
+ { }
+};
/**
* register_sysctl_mount_point() - registers a sysctl mount point
@@ -42,7 +47,7 @@ static struct ctl_table sysctl_mount_point[] = { };
*/
struct ctl_table_header *register_sysctl_mount_point(const char *path)
{
- return register_sysctl(path, sysctl_mount_point);
+ return register_sysctl_sz(path, sysctl_mount_point, 0);
}
EXPORT_SYMBOL(register_sysctl_mount_point);
---
base-commit: 3e9bff3bbe1355805de919f688bef4baefbfd436
change-id: 20240827-sysctl-const-shared-identity-9ab816e5fdfb
Best regards,
--
Thomas Weißschuh <linux(a)weissschuh.net>
When dwc3_resume_common() returns an error, runtime pm is left in
disabled state in dwc3_resume(). The next dwc3_suspend_common()
should be skipped as the device is already in suspended state but
it's not because power.disable_depth is non-zero.
Ensures runtime PM is always re-enabled even after failed resume
attempts.
Fixes: 68c26fe58182 ("usb: dwc3: set pm runtime active before resume common")
Cc: stable(a)vger.kernel.org
Signed-off-by: Roy Luo <royluo(a)google.com>
---
drivers/usb/dwc3/core.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index ccc3895dbd7f..1928b074b2df 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -2537,7 +2537,7 @@ static int dwc3_suspend(struct device *dev)
static int dwc3_resume(struct device *dev)
{
struct dwc3 *dwc = dev_get_drvdata(dev);
- int ret;
+ int ret = 0;
pinctrl_pm_select_default_state(dev);
@@ -2547,12 +2547,11 @@ static int dwc3_resume(struct device *dev)
ret = dwc3_resume_common(dwc, PMSG_RESUME);
if (ret) {
pm_runtime_set_suspended(dev);
- return ret;
}
pm_runtime_enable(dev);
- return 0;
+ return ret;
}
static void dwc3_complete(struct device *dev)
base-commit: ad618736883b8970f66af799e34007475fe33a68
--
2.46.0.469.g59c65b2a67-goog
Hello,
This series is based on commit
3e9bff3bbe13 Merge tag 'vfs-6.11-rc6.fixes' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs
of Mainline Linux.
The first patch fixes conversion to "devm_request_threaded_irq()" where
the IRQF_ONESHOT flag should have been added since the handler is NULL.
The second patch fixes the error handling when IRQ request fails in the
probe function. The existing error handling doesn't cleanup the changes
performed prior to the IRQ request invocation.
Regards,
Siddharth.
Siddharth Vadapalli (2):
PCI: dra7xx: Fix threaded IRQ request for "dra7xx-pcie-main" IRQ
PCI: dra7xx: Fix error handling when IRQ request fails in probe
drivers/pci/controller/dwc/pci-dra7xx.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
--
2.40.1
In the linux-4.19.y kernel kcmp_test and compaction_test kselftest fails to compile.
kcmp_test.c: In function ���main���:
kcmp_test.c:92:17: warning: implicit declaration of function ���ksft_set_plan��� [-Wimplicit-function-declaration]
92 | ksft_set_plan(3);
|
^~~~~~~~~~~~~
and
compaction_test.c: In function ���main���:
compaction_test.c:186:9: warning: implicit declaration of function ���ksft_set_plan��� [-Wimplicit-function-declaration]
186 | ksft_set_plan(1);
| ^~~~~~~~~~~~~
The function definition for ksft_set_plan() is introduced by commit
5821ba969511 ("selftests: Add test plan API to kselftest.h and adjust
callers") which is present from linux-5.2.y onwards.
Samasth Norway Ananda (2):
selftests/vm: remove call to ksft_set_plan()
selftests/kcmp: remove call to ksft_set_plan()
tools/testing/selftests/kcmp/kcmp_test.c | 1 -
tools/testing/selftests/vm/compaction_test.c | 2 --
2 files changed, 3 deletions(-)
--
2.45.2
Hi,
I have discovered a 100% reliable soft lockup on boot on my laptop:
Purism Librem 14, Intel Core i7-10710U, 48Gb RAM, Samsung Evo Plus 970
SSD, CoreBoot BIOS, grub bootloader, Arch Linux.
The last working release is kernel 6.9.10, every release from 6.10
onwards reliably exhibit the issue, which, based on journalctl logs,
seems to be triggered somewhere in systemd-udev:
https://gitlab.archlinux.org/-/project/42594/uploads/04583baf22189a0a8bb2f8…
Bisect points to commit 5186ba33234c9a90833f7c93ce7de80e25fac6f5
At a glance, I see two potentially problematic changes in this diff.
Specifically, in the refactoring to move the call to rdt_ctrl_update
inside the loop that walks over r->domains :
1. the change from on_each_cpu_mask to smp_call_function_any means
that preemption is no longer disabled around the call to
rdt_ctrl_update, which could plausibly be a problem
2. there's now a race condition on the msr_params struct: afaict
there's no write barrier, so if the call to rdt_ctrl_update is
executed on a different CPU, it could plausibly read an outdated value
of the dom field, which prior to this series of patches wasn't passed
as an explicit parameter, but derived inside rdt_ctrl_update
For initial report to Arch Linux bugtracker and bisect log see:
https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/74
Best
Hugues
From: Alexander Sverdlin <alexander.sverdlin(a)siemens.com>
dsa_switch_shutdown() doesn't bring down any ports, but only disconnects
slaves from master. Packets still come afterwards into master port and the
ports are being polled for link status. This leads to crashes:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
CPU: 0 PID: 442 Comm: kworker/0:3 Tainted: G O 6.1.99+ #1
Workqueue: events_power_efficient phy_state_machine
pc : lan9303_mdio_phy_read
lr : lan9303_phy_read
Call trace:
lan9303_mdio_phy_read
lan9303_phy_read
dsa_slave_phy_read
__mdiobus_read
mdiobus_read
genphy_update_link
genphy_read_status
phy_check_link_status
phy_state_machine
process_one_work
worker_thread
Call lan9303_remove() instead to really unregister all ports before zeroing
drvdata and dsa_ptr.
Fixes: 0650bf52b31f ("net: dsa: be compatible with masters which unregister on shutdown")
Cc: stable(a)vger.kernel.org
Signed-off-by: Alexander Sverdlin <alexander.sverdlin(a)siemens.com>
---
drivers/net/dsa/lan9303-core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index 268949939636..ecd507355f51 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -1477,7 +1477,7 @@ EXPORT_SYMBOL(lan9303_remove);
void lan9303_shutdown(struct lan9303 *chip)
{
- dsa_switch_shutdown(chip->ds);
+ lan9303_remove(chip);
}
EXPORT_SYMBOL(lan9303_shutdown);
--
2.46.0
From: "Alexey Gladkov (Intel)" <legion(a)kernel.org>
TDX only supports kernel-initiated MMIO operations. The handle_mmio()
function checks if the #VE exception occurred in the kernel and rejects
the operation if it did not.
However, userspace can deceive the kernel into performing MMIO on its
behalf. For example, if userspace can point a syscall to an MMIO address,
syscall does get_user() or put_user() on it, triggering MMIO #VE. The
kernel will treat the #VE as in-kernel MMIO.
Ensure that the target MMIO address is within the kernel before decoding
instruction.
Cc: stable(a)vger.kernel.org
Reviewed-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Signed-off-by: Alexey Gladkov (Intel) <legion(a)kernel.org>
---
arch/x86/coco/tdx/tdx.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 078e2bac2553..d6e6407e3999 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -16,6 +16,7 @@
#include <asm/insn-eval.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
+#include <asm/traps.h>
/* MMIO direction */
#define EPT_READ 0
@@ -434,6 +435,11 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
return -EINVAL;
}
+ if (!fault_in_kernel_space(ve->gla)) {
+ WARN_ONCE(1, "Access to userspace address is not supported");
+ return -EINVAL;
+ }
+
/*
* Reject EPT violation #VEs that split pages.
*
--
2.46.0
From: Tvrtko Ursulin <tvrtko.ursulin(a)igalia.com>
Since drm_sched_entity_modify_sched() can modify the entities run queue,
lets make sure to only dereference the pointer once so both adding and
waking up are guaranteed to be consistent.
Alternative of moving the spin_unlock to after the wake up would for now
be more problematic since the same lock is taken inside
drm_sched_rq_update_fifo().
v2:
* Improve commit message. (Philipp)
* Cache the scheduler pointer directly. (Christian)
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin(a)igalia.com>
Fixes: b37aced31eb0 ("drm/scheduler: implement a function to modify sched list")
Cc: Christian König <christian.koenig(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Luben Tuikov <ltuikov89(a)gmail.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: David Airlie <airlied(a)gmail.com>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: Philipp Stanner <pstanner(a)redhat.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v5.7+
Reviewed-by: Christian König <christian.koenig(a)amd.com>
---
drivers/gpu/drm/scheduler/sched_entity.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index ae8be30472cd..76e422548d40 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -599,6 +599,9 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
/* first job wakes up scheduler */
if (first) {
+ struct drm_gpu_scheduler *sched;
+ struct drm_sched_rq *rq;
+
/* Add the entity to the run queue */
spin_lock(&entity->rq_lock);
if (entity->stopped) {
@@ -608,13 +611,16 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
return;
}
- drm_sched_rq_add_entity(entity->rq, entity);
+ rq = entity->rq;
+ sched = rq->sched;
+
+ drm_sched_rq_add_entity(rq, entity);
spin_unlock(&entity->rq_lock);
if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
drm_sched_rq_update_fifo(entity, submit_ts);
- drm_sched_wakeup(entity->rq->sched, entity);
+ drm_sched_wakeup(sched, entity);
}
}
EXPORT_SYMBOL(drm_sched_entity_push_job);
--
2.46.0
Disabling CRYPTO_AES_GCM_P10 in Kconfig first so that we can apply the
subsequent patches to fix data mismatch over ipsec tunnel.
Signed-off-by: Danny Tsen <dtsen(a)linux.ibm.com>
---
arch/powerpc/crypto/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index 09ebcbdfb34f..46a4c85e85e2 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -107,6 +107,7 @@ config CRYPTO_AES_PPC_SPE
config CRYPTO_AES_GCM_P10
tristate "Stitched AES/GCM acceleration support on P10 or later CPU (PPC)"
+ depends on BROKEN
depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
select CRYPTO_LIB_AES
select CRYPTO_ALGAPI
--
2.43.0
[ Upstream commit 858430db28a5f5a11f8faa3a6fa805438e6f0851 ]
axienet_dma_err_handler can race with axienet_stop in the following
manner:
CPU 1 CPU 2
====================== ==================
axienet_stop()
napi_disable()
axienet_dma_stop()
axienet_dma_err_handler()
napi_disable()
axienet_dma_stop()
axienet_dma_start()
napi_enable()
cancel_work_sync()
free_irq()
Fix this by setting a flag in axienet_stop telling
axienet_dma_err_handler not to bother doing anything. I chose not to use
disable_work_sync to allow for easier backporting.
Signed-off-by: Sean Anderson <sean.anderson(a)linux.dev>
Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
Link: https://patch.msgid.link/20240903175141.4132898-1-sean.anderson@linux.dev
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
[ Adjusted to apply before dmaengine support ]
Signed-off-by: Sean Anderson <sean.anderson(a)linux.dev>
---
This patch is adjusted to apply to v6.6 kernels and earlier, which do
not contain commit 6a91b846af85 ("net: axienet: Introduce dmaengine
support").
drivers/net/ethernet/xilinx/xilinx_axienet.h | 3 +++
drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 8 ++++++++
2 files changed, 11 insertions(+)
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index f09f10f17d7e..2facbdfbb319 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -419,6 +419,8 @@ struct axidma_bd {
* @tx_bytes: TX byte count for statistics
* @tx_stat_sync: Synchronization object for TX stats
* @dma_err_task: Work structure to process Axi DMA errors
+ * @stopping: Set when @dma_err_task shouldn't do anything because we are
+ * about to stop the device.
* @tx_irq: Axidma TX IRQ number
* @rx_irq: Axidma RX IRQ number
* @eth_irq: Ethernet core IRQ number
@@ -481,6 +483,7 @@ struct axienet_local {
struct u64_stats_sync tx_stat_sync;
struct work_struct dma_err_task;
+ bool stopping;
int tx_irq;
int rx_irq;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 144feb7a2fda..65d7aaad43fe 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1162,6 +1162,7 @@ static int axienet_open(struct net_device *ndev)
phylink_start(lp->phylink);
/* Enable worker thread for Axi DMA error handling */
+ lp->stopping = false;
INIT_WORK(&lp->dma_err_task, axienet_dma_err_handler);
napi_enable(&lp->napi_rx);
@@ -1217,6 +1218,9 @@ static int axienet_stop(struct net_device *ndev)
dev_dbg(&ndev->dev, "axienet_close()\n");
+ WRITE_ONCE(lp->stopping, true);
+ flush_work(&lp->dma_err_task);
+
napi_disable(&lp->napi_tx);
napi_disable(&lp->napi_rx);
@@ -1761,6 +1765,10 @@ static void axienet_dma_err_handler(struct work_struct *work)
dma_err_task);
struct net_device *ndev = lp->ndev;
+ /* Don't bother if we are going to stop anyway */
+ if (READ_ONCE(lp->stopping))
+ return;
+
napi_disable(&lp->napi_tx);
napi_disable(&lp->napi_rx);
--
2.35.1.1320.gc452695387.dirty
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 7f828d5fff7d24752e1ecf6bebb6617a81f97b93
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024091354-stock-suspend-e1ee@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
7f828d5fff7d ("clocksource: hyper-v: Use lapic timer in a TDX VM without paravisor")
b967df629351 ("hyperv-tlfs: Rename some HV_REGISTER_* defines for consistency")
0e3f7d120086 ("hyperv-tlfs: Change prefix of generic HV_REGISTER_* MSRs to HV_MSR_*")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f828d5fff7d24752e1ecf6bebb6617a81f97b93 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui(a)microsoft.com>
Date: Thu, 20 Jun 2024 23:16:14 -0700
Subject: [PATCH] clocksource: hyper-v: Use lapic timer in a TDX VM without
paravisor
In a TDX VM without paravisor, currently the default timer is the Hyper-V
timer, which depends on the slow VM Reference Counter MSR: the Hyper-V TSC
page is not enabled in such a VM because the VM uses Invariant TSC as a
better clocksource and it's challenging to mark the Hyper-V TSC page shared
in very early boot.
Lower the rating of the Hyper-V timer so the local APIC timer becomes the
the default timer in such a VM, and print a warning in case Invariant TSC
is unavailable in such a VM. This change should cause no perceivable
performance difference.
Cc: stable(a)vger.kernel.org # 6.6+
Reviewed-by: Roman Kisel <romank(a)linux.microsoft.com>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
Reviewed-by: Michael Kelley <mhklinux(a)outlook.com>
Link: https://lore.kernel.org/r/20240621061614.8339-1-decui@microsoft.com
Signed-off-by: Wei Liu <wei.liu(a)kernel.org>
Message-ID: <20240621061614.8339-1-decui(a)microsoft.com>
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e0fd57a8ba84..954b7cbfa2f0 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -449,9 +449,23 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
if (!ms_hyperv.paravisor_present) {
- /* To be supported: more work is required. */
+ /*
+ * Mark the Hyper-V TSC page feature as disabled
+ * in a TDX VM without paravisor so that the
+ * Invariant TSC, which is a better clocksource
+ * anyway, is used instead.
+ */
ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE;
+ /*
+ * The Invariant TSC is expected to be available
+ * in a TDX VM without paravisor, but if not,
+ * print a warning message. The slower Hyper-V MSR-based
+ * Ref Counter should end up being the clocksource.
+ */
+ if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
+ pr_warn("Hyper-V: Invariant TSC is unavailable\n");
+
/* HV_MSR_CRASH_CTL is unsupported. */
ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index b2a080647e41..99177835cade 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -137,7 +137,21 @@ static int hv_stimer_init(unsigned int cpu)
ce->name = "Hyper-V clockevent";
ce->features = CLOCK_EVT_FEAT_ONESHOT;
ce->cpumask = cpumask_of(cpu);
- ce->rating = 1000;
+
+ /*
+ * Lower the rating of the Hyper-V timer in a TDX VM without paravisor,
+ * so the local APIC timer (lapic_clockevent) is the default timer in
+ * such a VM. The Hyper-V timer is not preferred in such a VM because
+ * it depends on the slow VM Reference Counter MSR (the Hyper-V TSC
+ * page is not enbled in such a VM because the VM uses Invariant TSC
+ * as a better clocksource and it's challenging to mark the Hyper-V
+ * TSC page shared in very early boot).
+ */
+ if (!ms_hyperv.paravisor_present && hv_isolation_type_tdx())
+ ce->rating = 90;
+ else
+ ce->rating = 1000;
+
ce->set_state_shutdown = hv_ce_shutdown;
ce->set_state_oneshot = hv_ce_set_oneshot;
ce->set_next_event = hv_ce_set_next_event;
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x 111fc9f517cb293c4213673733b980123c3b0209
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024091336-family-daffodil-541d@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
111fc9f517cb ("virtio_net: disable premapped mode by default")
dc4547fbba87 ("Revert "virtio_net: rx remove premapped failover code"")
e9f3962441c0 ("virtio_net: xsk: rx: support fill with xsk buffer")
19a5a7710ee1 ("virtio_net: xsk: support wakeup")
09d2b3182c8e ("virtio_net: xsk: bind/unbind xsk for rx")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 111fc9f517cb293c4213673733b980123c3b0209 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo(a)linux.alibaba.com>
Date: Fri, 6 Sep 2024 20:31:37 +0800
Subject: [PATCH] virtio_net: disable premapped mode by default
Now, the premapped mode encounters some problem.
http://lore.kernel.org/all/8b20cc28-45a9-4643-8e87-ba164a540c0a@oracle.com
So we disable the premapped mode by default.
We can re-enable it in the future.
Fixes: f9dac92ba908 ("virtio_ring: enable premapped mode whatever use_dma_api")
Reported-by: "Si-Wei Liu" <si-wei.liu(a)oracle.com>
Closes: http://lore.kernel.org/all/8b20cc28-45a9-4643-8e87-ba164a540c0a@oracle.com
Signed-off-by: Xuan Zhuo <xuanzhuo(a)linux.alibaba.com>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Tested-by: Takero Funaki <flintglass(a)gmail.com>
Link: https://patch.msgid.link/20240906123137.108741-4-xuanzhuo@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1cf80648f82a..5a1c1ec5a64b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -977,22 +977,6 @@ static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp)
return buf;
}
-static void virtnet_rq_set_premapped(struct virtnet_info *vi)
-{
- int i;
-
- /* disable for big mode */
- if (!vi->mergeable_rx_bufs && vi->big_packets)
- return;
-
- for (i = 0; i < vi->max_queue_pairs; i++) {
- if (virtqueue_set_dma_premapped(vi->rq[i].vq))
- continue;
-
- vi->rq[i].do_dma = true;
- }
-}
-
static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
{
struct virtnet_info *vi = vq->vdev->priv;
@@ -6105,8 +6089,6 @@ static int init_vqs(struct virtnet_info *vi)
if (ret)
goto err_free;
- virtnet_rq_set_premapped(vi);
-
cpus_read_lock();
virtnet_set_affinity(vi);
cpus_read_unlock();
The patch below does not apply to the 6.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y
git checkout FETCH_HEAD
git cherry-pick -x 38eef112a8e547b8c207b2a521ad4b077d792100
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024091308-deceit-dingy-d575@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^..
Possible dependencies:
38eef112a8e5 ("Revert "virtio_net: big mode skip the unmap check"")
99c861b44eb1 ("virtio_net: xsk: rx: support recv merge mode")
a4e7ba702701 ("virtio_net: xsk: rx: support recv small mode")
e9f3962441c0 ("virtio_net: xsk: rx: support fill with xsk buffer")
19a5a7710ee1 ("virtio_net: xsk: support wakeup")
09d2b3182c8e ("virtio_net: xsk: bind/unbind xsk for rx")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 38eef112a8e547b8c207b2a521ad4b077d792100 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo(a)linux.alibaba.com>
Date: Fri, 6 Sep 2024 20:31:36 +0800
Subject: [PATCH] Revert "virtio_net: big mode skip the unmap check"
This reverts commit a377ae542d8d0a20a3173da3bbba72e045bea7a9.
Signed-off-by: Xuan Zhuo <xuanzhuo(a)linux.alibaba.com>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Tested-by: Takero Funaki <flintglass(a)gmail.com>
Link: https://patch.msgid.link/20240906123137.108741-3-xuanzhuo@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6fa8aab18484..1cf80648f82a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1006,7 +1006,7 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
return;
}
- if (!vi->big_packets || vi->mergeable_rx_bufs)
+ if (rq->do_dma)
virtnet_rq_unmap(rq, buf, 0);
virtnet_rq_free_buf(vi, rq, buf);
@@ -2716,7 +2716,7 @@ static int virtnet_receive_packets(struct virtnet_info *vi,
}
} else {
while (packets < budget &&
- (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
+ (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) {
receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats);
packets++;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 33297cef3101d950cec0033a0dce0a2d2bd59999
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024091305-rephrase-pastime-be42@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
33297cef3101 ("platform/x86: panasonic-laptop: Allocate 1 entry extra in the sinf array")
f1aaf914654a ("platform/x86: panasonic-laptop: Replace ACPI prints with pr_*() macros")
d5a81d8e864b ("platform/x86: panasonic-laptop: Add support for optical driver power in Y and W series")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33297cef3101d950cec0033a0dce0a2d2bd59999 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede(a)redhat.com>
Date: Mon, 9 Sep 2024 13:32:26 +0200
Subject: [PATCH] platform/x86: panasonic-laptop: Allocate 1 entry extra in the
sinf array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Some DSDT-s have an off-by-one bug where the SINF package count is
one higher than the SQTY reported value, allocate 1 entry extra.
Also make the SQTY <-> SINF package count mismatch error more verbose
to help debugging similar issues in the future.
This fixes the panasonic-laptop driver failing to probe() on some
devices with the following errors:
[ 3.958887] SQTY reports bad SINF length SQTY: 37 SINF-pkg-count: 38
[ 3.958892] Couldn't retrieve BIOS data
[ 3.983685] Panasonic Laptop Support - With Macros: probe of MAT0019:00 failed with error -5
Fixes: 709ee531c153 ("panasonic-laptop: add Panasonic Let's Note laptop extras driver v0.94")
Cc: stable(a)vger.kernel.org
Tested-by: James Harmison <jharmison(a)redhat.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
Link: https://lore.kernel.org/r/20240909113227.254470-2-hdegoede@redhat.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index 39044119d2a6..ebd81846e2d5 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -337,7 +337,8 @@ static int acpi_pcc_retrieve_biosdata(struct pcc_acpi *pcc)
}
if (pcc->num_sifr < hkey->package.count) {
- pr_err("SQTY reports bad SINF length\n");
+ pr_err("SQTY reports bad SINF length SQTY: %lu SINF-pkg-count: %u\n",
+ pcc->num_sifr, hkey->package.count);
status = AE_ERROR;
goto end;
}
@@ -994,6 +995,12 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
return -ENODEV;
}
+ /*
+ * Some DSDT-s have an off-by-one bug where the SINF package count is
+ * one higher than the SQTY reported value, allocate 1 entry extra.
+ */
+ num_sifr++;
+
pcc = kzalloc(sizeof(struct pcc_acpi), GFP_KERNEL);
if (!pcc) {
pr_err("Couldn't allocate mem for pcc");
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 33297cef3101d950cec0033a0dce0a2d2bd59999
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024091304-unwary-translate-85c3@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
33297cef3101 ("platform/x86: panasonic-laptop: Allocate 1 entry extra in the sinf array")
f1aaf914654a ("platform/x86: panasonic-laptop: Replace ACPI prints with pr_*() macros")
d5a81d8e864b ("platform/x86: panasonic-laptop: Add support for optical driver power in Y and W series")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33297cef3101d950cec0033a0dce0a2d2bd59999 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede(a)redhat.com>
Date: Mon, 9 Sep 2024 13:32:26 +0200
Subject: [PATCH] platform/x86: panasonic-laptop: Allocate 1 entry extra in the
sinf array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Some DSDT-s have an off-by-one bug where the SINF package count is
one higher than the SQTY reported value, allocate 1 entry extra.
Also make the SQTY <-> SINF package count mismatch error more verbose
to help debugging similar issues in the future.
This fixes the panasonic-laptop driver failing to probe() on some
devices with the following errors:
[ 3.958887] SQTY reports bad SINF length SQTY: 37 SINF-pkg-count: 38
[ 3.958892] Couldn't retrieve BIOS data
[ 3.983685] Panasonic Laptop Support - With Macros: probe of MAT0019:00 failed with error -5
Fixes: 709ee531c153 ("panasonic-laptop: add Panasonic Let's Note laptop extras driver v0.94")
Cc: stable(a)vger.kernel.org
Tested-by: James Harmison <jharmison(a)redhat.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
Link: https://lore.kernel.org/r/20240909113227.254470-2-hdegoede@redhat.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index 39044119d2a6..ebd81846e2d5 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -337,7 +337,8 @@ static int acpi_pcc_retrieve_biosdata(struct pcc_acpi *pcc)
}
if (pcc->num_sifr < hkey->package.count) {
- pr_err("SQTY reports bad SINF length\n");
+ pr_err("SQTY reports bad SINF length SQTY: %lu SINF-pkg-count: %u\n",
+ pcc->num_sifr, hkey->package.count);
status = AE_ERROR;
goto end;
}
@@ -994,6 +995,12 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
return -ENODEV;
}
+ /*
+ * Some DSDT-s have an off-by-one bug where the SINF package count is
+ * one higher than the SQTY reported value, allocate 1 entry extra.
+ */
+ num_sifr++;
+
pcc = kzalloc(sizeof(struct pcc_acpi), GFP_KERNEL);
if (!pcc) {
pr_err("Couldn't allocate mem for pcc");
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 33297cef3101d950cec0033a0dce0a2d2bd59999
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024091303-dash-sensitive-4aee@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
33297cef3101 ("platform/x86: panasonic-laptop: Allocate 1 entry extra in the sinf array")
f1aaf914654a ("platform/x86: panasonic-laptop: Replace ACPI prints with pr_*() macros")
d5a81d8e864b ("platform/x86: panasonic-laptop: Add support for optical driver power in Y and W series")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33297cef3101d950cec0033a0dce0a2d2bd59999 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede(a)redhat.com>
Date: Mon, 9 Sep 2024 13:32:26 +0200
Subject: [PATCH] platform/x86: panasonic-laptop: Allocate 1 entry extra in the
sinf array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Some DSDT-s have an off-by-one bug where the SINF package count is
one higher than the SQTY reported value, allocate 1 entry extra.
Also make the SQTY <-> SINF package count mismatch error more verbose
to help debugging similar issues in the future.
This fixes the panasonic-laptop driver failing to probe() on some
devices with the following errors:
[ 3.958887] SQTY reports bad SINF length SQTY: 37 SINF-pkg-count: 38
[ 3.958892] Couldn't retrieve BIOS data
[ 3.983685] Panasonic Laptop Support - With Macros: probe of MAT0019:00 failed with error -5
Fixes: 709ee531c153 ("panasonic-laptop: add Panasonic Let's Note laptop extras driver v0.94")
Cc: stable(a)vger.kernel.org
Tested-by: James Harmison <jharmison(a)redhat.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
Link: https://lore.kernel.org/r/20240909113227.254470-2-hdegoede@redhat.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index 39044119d2a6..ebd81846e2d5 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -337,7 +337,8 @@ static int acpi_pcc_retrieve_biosdata(struct pcc_acpi *pcc)
}
if (pcc->num_sifr < hkey->package.count) {
- pr_err("SQTY reports bad SINF length\n");
+ pr_err("SQTY reports bad SINF length SQTY: %lu SINF-pkg-count: %u\n",
+ pcc->num_sifr, hkey->package.count);
status = AE_ERROR;
goto end;
}
@@ -994,6 +995,12 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
return -ENODEV;
}
+ /*
+ * Some DSDT-s have an off-by-one bug where the SINF package count is
+ * one higher than the SQTY reported value, allocate 1 entry extra.
+ */
+ num_sifr++;
+
pcc = kzalloc(sizeof(struct pcc_acpi), GFP_KERNEL);
if (!pcc) {
pr_err("Couldn't allocate mem for pcc");
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x f52e98d16e9bd7dd2b3aef8e38db5cbc9899d6a4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024091346-football-rely-87b8@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
f52e98d16e9b ("platform/x86: panasonic-laptop: Fix SINF array out of bounds accesses")
f1fba0860962 ("platform/x86: panasonic-laptop: remove redundant assignment of variable result")
25dd390c6206 ("platform/x86: panasonic-laptop: Add sysfs attributes for firmware brightness registers")
468f96bfa3a0 ("platform/x86: panasonic-laptop: Add support for battery charging threshold (eco mode)")
ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug")
e3a9afbbc309 ("platform/x86: panasonic-laptop: Add write support to mute")
008563513348 ("platform/x86: panasonic-laptop: Fix sticky key init bug")
80373ad0edb5 ("platform/x86: panasonic-laptop: Fix naming of platform files for consistency with other modules")
0119fbc0215a ("platform/x86: panasonic-laptop: Split MODULE_AUTHOR() by one author per macro call")
f1aaf914654a ("platform/x86: panasonic-laptop: Replace ACPI prints with pr_*() macros")
d5a81d8e864b ("platform/x86: panasonic-laptop: Add support for optical driver power in Y and W series")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f52e98d16e9bd7dd2b3aef8e38db5cbc9899d6a4 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede(a)redhat.com>
Date: Mon, 9 Sep 2024 13:32:25 +0200
Subject: [PATCH] platform/x86: panasonic-laptop: Fix SINF array out of bounds
accesses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The panasonic laptop code in various places uses the SINF array with index
values of 0 - SINF_CUR_BRIGHT(0x0d) without checking that the SINF array
is big enough.
Not all panasonic laptops have this many SINF array entries, for example
the Toughbook CF-18 model only has 10 SINF array entries. So it only
supports the AC+DC brightness entries and mute.
Check that the SINF array has a minimum size which covers all AC+DC
brightness entries and refuse to load if the SINF array is smaller.
For higher SINF indexes hide the sysfs attributes when the SINF array
does not contain an entry for that attribute, avoiding show()/store()
accessing the array out of bounds and add bounds checking to the probe()
and resume() code accessing these.
Fixes: e424fb8cc4e6 ("panasonic-laptop: avoid overflow in acpi_pcc_hotkey_add()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
Link: https://lore.kernel.org/r/20240909113227.254470-1-hdegoede@redhat.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index cf845ee1c7b1..39044119d2a6 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -773,6 +773,24 @@ static DEVICE_ATTR_RW(dc_brightness);
static DEVICE_ATTR_RW(current_brightness);
static DEVICE_ATTR_RW(cdpower);
+static umode_t pcc_sysfs_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct acpi_device *acpi = to_acpi_device(dev);
+ struct pcc_acpi *pcc = acpi_driver_data(acpi);
+
+ if (attr == &dev_attr_mute.attr)
+ return (pcc->num_sifr > SINF_MUTE) ? attr->mode : 0;
+
+ if (attr == &dev_attr_eco_mode.attr)
+ return (pcc->num_sifr > SINF_ECO_MODE) ? attr->mode : 0;
+
+ if (attr == &dev_attr_current_brightness.attr)
+ return (pcc->num_sifr > SINF_CUR_BRIGHT) ? attr->mode : 0;
+
+ return attr->mode;
+}
+
static struct attribute *pcc_sysfs_entries[] = {
&dev_attr_numbatt.attr,
&dev_attr_lcdtype.attr,
@@ -787,8 +805,9 @@ static struct attribute *pcc_sysfs_entries[] = {
};
static const struct attribute_group pcc_attr_group = {
- .name = NULL, /* put in device directory */
- .attrs = pcc_sysfs_entries,
+ .name = NULL, /* put in device directory */
+ .attrs = pcc_sysfs_entries,
+ .is_visible = pcc_sysfs_is_visible,
};
@@ -941,12 +960,15 @@ static int acpi_pcc_hotkey_resume(struct device *dev)
if (!pcc)
return -EINVAL;
- acpi_pcc_write_sset(pcc, SINF_MUTE, pcc->mute);
- acpi_pcc_write_sset(pcc, SINF_ECO_MODE, pcc->eco_mode);
+ if (pcc->num_sifr > SINF_MUTE)
+ acpi_pcc_write_sset(pcc, SINF_MUTE, pcc->mute);
+ if (pcc->num_sifr > SINF_ECO_MODE)
+ acpi_pcc_write_sset(pcc, SINF_ECO_MODE, pcc->eco_mode);
acpi_pcc_write_sset(pcc, SINF_STICKY_KEY, pcc->sticky_key);
acpi_pcc_write_sset(pcc, SINF_AC_CUR_BRIGHT, pcc->ac_brightness);
acpi_pcc_write_sset(pcc, SINF_DC_CUR_BRIGHT, pcc->dc_brightness);
- acpi_pcc_write_sset(pcc, SINF_CUR_BRIGHT, pcc->current_brightness);
+ if (pcc->num_sifr > SINF_CUR_BRIGHT)
+ acpi_pcc_write_sset(pcc, SINF_CUR_BRIGHT, pcc->current_brightness);
return 0;
}
@@ -963,8 +985,12 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
num_sifr = acpi_pcc_get_sqty(device);
- if (num_sifr < 0 || num_sifr > 255) {
- pr_err("num_sifr out of range");
+ /*
+ * pcc->sinf is expected to at least have the AC+DC brightness entries.
+ * Accesses to higher SINF entries are checked against num_sifr.
+ */
+ if (num_sifr <= SINF_DC_CUR_BRIGHT || num_sifr > 255) {
+ pr_err("num_sifr %d out of range %d - 255\n", num_sifr, SINF_DC_CUR_BRIGHT + 1);
return -ENODEV;
}
@@ -1020,11 +1046,14 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
acpi_pcc_write_sset(pcc, SINF_STICKY_KEY, 0);
pcc->sticky_key = 0;
- pcc->eco_mode = pcc->sinf[SINF_ECO_MODE];
- pcc->mute = pcc->sinf[SINF_MUTE];
pcc->ac_brightness = pcc->sinf[SINF_AC_CUR_BRIGHT];
pcc->dc_brightness = pcc->sinf[SINF_DC_CUR_BRIGHT];
- pcc->current_brightness = pcc->sinf[SINF_CUR_BRIGHT];
+ if (pcc->num_sifr > SINF_MUTE)
+ pcc->mute = pcc->sinf[SINF_MUTE];
+ if (pcc->num_sifr > SINF_ECO_MODE)
+ pcc->eco_mode = pcc->sinf[SINF_ECO_MODE];
+ if (pcc->num_sifr > SINF_CUR_BRIGHT)
+ pcc->current_brightness = pcc->sinf[SINF_CUR_BRIGHT];
/* add sysfs attributes */
result = sysfs_create_group(&device->dev.kobj, &pcc_attr_group);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f52e98d16e9bd7dd2b3aef8e38db5cbc9899d6a4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024091345-sweep-flashback-a85d@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
f52e98d16e9b ("platform/x86: panasonic-laptop: Fix SINF array out of bounds accesses")
f1fba0860962 ("platform/x86: panasonic-laptop: remove redundant assignment of variable result")
25dd390c6206 ("platform/x86: panasonic-laptop: Add sysfs attributes for firmware brightness registers")
468f96bfa3a0 ("platform/x86: panasonic-laptop: Add support for battery charging threshold (eco mode)")
ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug")
e3a9afbbc309 ("platform/x86: panasonic-laptop: Add write support to mute")
008563513348 ("platform/x86: panasonic-laptop: Fix sticky key init bug")
80373ad0edb5 ("platform/x86: panasonic-laptop: Fix naming of platform files for consistency with other modules")
0119fbc0215a ("platform/x86: panasonic-laptop: Split MODULE_AUTHOR() by one author per macro call")
f1aaf914654a ("platform/x86: panasonic-laptop: Replace ACPI prints with pr_*() macros")
d5a81d8e864b ("platform/x86: panasonic-laptop: Add support for optical driver power in Y and W series")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f52e98d16e9bd7dd2b3aef8e38db5cbc9899d6a4 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede(a)redhat.com>
Date: Mon, 9 Sep 2024 13:32:25 +0200
Subject: [PATCH] platform/x86: panasonic-laptop: Fix SINF array out of bounds
accesses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The panasonic laptop code in various places uses the SINF array with index
values of 0 - SINF_CUR_BRIGHT(0x0d) without checking that the SINF array
is big enough.
Not all panasonic laptops have this many SINF array entries, for example
the Toughbook CF-18 model only has 10 SINF array entries. So it only
supports the AC+DC brightness entries and mute.
Check that the SINF array has a minimum size which covers all AC+DC
brightness entries and refuse to load if the SINF array is smaller.
For higher SINF indexes hide the sysfs attributes when the SINF array
does not contain an entry for that attribute, avoiding show()/store()
accessing the array out of bounds and add bounds checking to the probe()
and resume() code accessing these.
Fixes: e424fb8cc4e6 ("panasonic-laptop: avoid overflow in acpi_pcc_hotkey_add()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
Link: https://lore.kernel.org/r/20240909113227.254470-1-hdegoede@redhat.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index cf845ee1c7b1..39044119d2a6 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -773,6 +773,24 @@ static DEVICE_ATTR_RW(dc_brightness);
static DEVICE_ATTR_RW(current_brightness);
static DEVICE_ATTR_RW(cdpower);
+static umode_t pcc_sysfs_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct acpi_device *acpi = to_acpi_device(dev);
+ struct pcc_acpi *pcc = acpi_driver_data(acpi);
+
+ if (attr == &dev_attr_mute.attr)
+ return (pcc->num_sifr > SINF_MUTE) ? attr->mode : 0;
+
+ if (attr == &dev_attr_eco_mode.attr)
+ return (pcc->num_sifr > SINF_ECO_MODE) ? attr->mode : 0;
+
+ if (attr == &dev_attr_current_brightness.attr)
+ return (pcc->num_sifr > SINF_CUR_BRIGHT) ? attr->mode : 0;
+
+ return attr->mode;
+}
+
static struct attribute *pcc_sysfs_entries[] = {
&dev_attr_numbatt.attr,
&dev_attr_lcdtype.attr,
@@ -787,8 +805,9 @@ static struct attribute *pcc_sysfs_entries[] = {
};
static const struct attribute_group pcc_attr_group = {
- .name = NULL, /* put in device directory */
- .attrs = pcc_sysfs_entries,
+ .name = NULL, /* put in device directory */
+ .attrs = pcc_sysfs_entries,
+ .is_visible = pcc_sysfs_is_visible,
};
@@ -941,12 +960,15 @@ static int acpi_pcc_hotkey_resume(struct device *dev)
if (!pcc)
return -EINVAL;
- acpi_pcc_write_sset(pcc, SINF_MUTE, pcc->mute);
- acpi_pcc_write_sset(pcc, SINF_ECO_MODE, pcc->eco_mode);
+ if (pcc->num_sifr > SINF_MUTE)
+ acpi_pcc_write_sset(pcc, SINF_MUTE, pcc->mute);
+ if (pcc->num_sifr > SINF_ECO_MODE)
+ acpi_pcc_write_sset(pcc, SINF_ECO_MODE, pcc->eco_mode);
acpi_pcc_write_sset(pcc, SINF_STICKY_KEY, pcc->sticky_key);
acpi_pcc_write_sset(pcc, SINF_AC_CUR_BRIGHT, pcc->ac_brightness);
acpi_pcc_write_sset(pcc, SINF_DC_CUR_BRIGHT, pcc->dc_brightness);
- acpi_pcc_write_sset(pcc, SINF_CUR_BRIGHT, pcc->current_brightness);
+ if (pcc->num_sifr > SINF_CUR_BRIGHT)
+ acpi_pcc_write_sset(pcc, SINF_CUR_BRIGHT, pcc->current_brightness);
return 0;
}
@@ -963,8 +985,12 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
num_sifr = acpi_pcc_get_sqty(device);
- if (num_sifr < 0 || num_sifr > 255) {
- pr_err("num_sifr out of range");
+ /*
+ * pcc->sinf is expected to at least have the AC+DC brightness entries.
+ * Accesses to higher SINF entries are checked against num_sifr.
+ */
+ if (num_sifr <= SINF_DC_CUR_BRIGHT || num_sifr > 255) {
+ pr_err("num_sifr %d out of range %d - 255\n", num_sifr, SINF_DC_CUR_BRIGHT + 1);
return -ENODEV;
}
@@ -1020,11 +1046,14 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
acpi_pcc_write_sset(pcc, SINF_STICKY_KEY, 0);
pcc->sticky_key = 0;
- pcc->eco_mode = pcc->sinf[SINF_ECO_MODE];
- pcc->mute = pcc->sinf[SINF_MUTE];
pcc->ac_brightness = pcc->sinf[SINF_AC_CUR_BRIGHT];
pcc->dc_brightness = pcc->sinf[SINF_DC_CUR_BRIGHT];
- pcc->current_brightness = pcc->sinf[SINF_CUR_BRIGHT];
+ if (pcc->num_sifr > SINF_MUTE)
+ pcc->mute = pcc->sinf[SINF_MUTE];
+ if (pcc->num_sifr > SINF_ECO_MODE)
+ pcc->eco_mode = pcc->sinf[SINF_ECO_MODE];
+ if (pcc->num_sifr > SINF_CUR_BRIGHT)
+ pcc->current_brightness = pcc->sinf[SINF_CUR_BRIGHT];
/* add sysfs attributes */
result = sysfs_create_group(&device->dev.kobj, &pcc_attr_group);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x f52e98d16e9bd7dd2b3aef8e38db5cbc9899d6a4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024091343-editor-sulfite-f306@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
f52e98d16e9b ("platform/x86: panasonic-laptop: Fix SINF array out of bounds accesses")
f1fba0860962 ("platform/x86: panasonic-laptop: remove redundant assignment of variable result")
25dd390c6206 ("platform/x86: panasonic-laptop: Add sysfs attributes for firmware brightness registers")
468f96bfa3a0 ("platform/x86: panasonic-laptop: Add support for battery charging threshold (eco mode)")
ed83c9171829 ("platform/x86: panasonic-laptop: Resolve hotkey double trigger bug")
e3a9afbbc309 ("platform/x86: panasonic-laptop: Add write support to mute")
008563513348 ("platform/x86: panasonic-laptop: Fix sticky key init bug")
80373ad0edb5 ("platform/x86: panasonic-laptop: Fix naming of platform files for consistency with other modules")
0119fbc0215a ("platform/x86: panasonic-laptop: Split MODULE_AUTHOR() by one author per macro call")
f1aaf914654a ("platform/x86: panasonic-laptop: Replace ACPI prints with pr_*() macros")
d5a81d8e864b ("platform/x86: panasonic-laptop: Add support for optical driver power in Y and W series")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f52e98d16e9bd7dd2b3aef8e38db5cbc9899d6a4 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede(a)redhat.com>
Date: Mon, 9 Sep 2024 13:32:25 +0200
Subject: [PATCH] platform/x86: panasonic-laptop: Fix SINF array out of bounds
accesses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The panasonic laptop code in various places uses the SINF array with index
values of 0 - SINF_CUR_BRIGHT(0x0d) without checking that the SINF array
is big enough.
Not all panasonic laptops have this many SINF array entries, for example
the Toughbook CF-18 model only has 10 SINF array entries. So it only
supports the AC+DC brightness entries and mute.
Check that the SINF array has a minimum size which covers all AC+DC
brightness entries and refuse to load if the SINF array is smaller.
For higher SINF indexes hide the sysfs attributes when the SINF array
does not contain an entry for that attribute, avoiding show()/store()
accessing the array out of bounds and add bounds checking to the probe()
and resume() code accessing these.
Fixes: e424fb8cc4e6 ("panasonic-laptop: avoid overflow in acpi_pcc_hotkey_add()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
Link: https://lore.kernel.org/r/20240909113227.254470-1-hdegoede@redhat.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index cf845ee1c7b1..39044119d2a6 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -773,6 +773,24 @@ static DEVICE_ATTR_RW(dc_brightness);
static DEVICE_ATTR_RW(current_brightness);
static DEVICE_ATTR_RW(cdpower);
+static umode_t pcc_sysfs_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct acpi_device *acpi = to_acpi_device(dev);
+ struct pcc_acpi *pcc = acpi_driver_data(acpi);
+
+ if (attr == &dev_attr_mute.attr)
+ return (pcc->num_sifr > SINF_MUTE) ? attr->mode : 0;
+
+ if (attr == &dev_attr_eco_mode.attr)
+ return (pcc->num_sifr > SINF_ECO_MODE) ? attr->mode : 0;
+
+ if (attr == &dev_attr_current_brightness.attr)
+ return (pcc->num_sifr > SINF_CUR_BRIGHT) ? attr->mode : 0;
+
+ return attr->mode;
+}
+
static struct attribute *pcc_sysfs_entries[] = {
&dev_attr_numbatt.attr,
&dev_attr_lcdtype.attr,
@@ -787,8 +805,9 @@ static struct attribute *pcc_sysfs_entries[] = {
};
static const struct attribute_group pcc_attr_group = {
- .name = NULL, /* put in device directory */
- .attrs = pcc_sysfs_entries,
+ .name = NULL, /* put in device directory */
+ .attrs = pcc_sysfs_entries,
+ .is_visible = pcc_sysfs_is_visible,
};
@@ -941,12 +960,15 @@ static int acpi_pcc_hotkey_resume(struct device *dev)
if (!pcc)
return -EINVAL;
- acpi_pcc_write_sset(pcc, SINF_MUTE, pcc->mute);
- acpi_pcc_write_sset(pcc, SINF_ECO_MODE, pcc->eco_mode);
+ if (pcc->num_sifr > SINF_MUTE)
+ acpi_pcc_write_sset(pcc, SINF_MUTE, pcc->mute);
+ if (pcc->num_sifr > SINF_ECO_MODE)
+ acpi_pcc_write_sset(pcc, SINF_ECO_MODE, pcc->eco_mode);
acpi_pcc_write_sset(pcc, SINF_STICKY_KEY, pcc->sticky_key);
acpi_pcc_write_sset(pcc, SINF_AC_CUR_BRIGHT, pcc->ac_brightness);
acpi_pcc_write_sset(pcc, SINF_DC_CUR_BRIGHT, pcc->dc_brightness);
- acpi_pcc_write_sset(pcc, SINF_CUR_BRIGHT, pcc->current_brightness);
+ if (pcc->num_sifr > SINF_CUR_BRIGHT)
+ acpi_pcc_write_sset(pcc, SINF_CUR_BRIGHT, pcc->current_brightness);
return 0;
}
@@ -963,8 +985,12 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
num_sifr = acpi_pcc_get_sqty(device);
- if (num_sifr < 0 || num_sifr > 255) {
- pr_err("num_sifr out of range");
+ /*
+ * pcc->sinf is expected to at least have the AC+DC brightness entries.
+ * Accesses to higher SINF entries are checked against num_sifr.
+ */
+ if (num_sifr <= SINF_DC_CUR_BRIGHT || num_sifr > 255) {
+ pr_err("num_sifr %d out of range %d - 255\n", num_sifr, SINF_DC_CUR_BRIGHT + 1);
return -ENODEV;
}
@@ -1020,11 +1046,14 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
acpi_pcc_write_sset(pcc, SINF_STICKY_KEY, 0);
pcc->sticky_key = 0;
- pcc->eco_mode = pcc->sinf[SINF_ECO_MODE];
- pcc->mute = pcc->sinf[SINF_MUTE];
pcc->ac_brightness = pcc->sinf[SINF_AC_CUR_BRIGHT];
pcc->dc_brightness = pcc->sinf[SINF_DC_CUR_BRIGHT];
- pcc->current_brightness = pcc->sinf[SINF_CUR_BRIGHT];
+ if (pcc->num_sifr > SINF_MUTE)
+ pcc->mute = pcc->sinf[SINF_MUTE];
+ if (pcc->num_sifr > SINF_ECO_MODE)
+ pcc->eco_mode = pcc->sinf[SINF_ECO_MODE];
+ if (pcc->num_sifr > SINF_CUR_BRIGHT)
+ pcc->current_brightness = pcc->sinf[SINF_CUR_BRIGHT];
/* add sysfs attributes */
result = sysfs_create_group(&device->dev.kobj, &pcc_attr_group);
Slackware 15.0 64-bit compiles and runs fine.
Slackware 15.0 32-bit fails to build and gives the "out of memory" error:
cc1: out of memory allocating 180705472 bytes after a total of 284454912
bytes
...
make[4]: *** [scripts/Makefile.build:289:
drivers/staging/media/atomisp/pci/isp/kernels/ynr/ynr_1.0/ia_css_ynr.ho
st.o] Error 1
Patching it with help from Lorenzo Stoakes allows the build to
run:
https://lore.kernel.org/lkml/5882b96e-1287-4390-8174-3316d39038ef@lucifer.l…
And then 32-bit runs fine too.
Richard Narron
Spec says SW is expected to round up to the nearest 128K, if not already
aligned. We are seeing the assert sometimes pop on BMG to tell us that
there is a hole between GSM and CCS, as well as popping other asserts
with having a vram size with strange alignment, which is likely caused
by misaligned offset here.
BSpec: 68023
Fixes: b5c2ca0372dc ("drm/xe/xe2hpg: Determine flat ccs offset for vram")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray(a)intel.com>
Cc: Akshata Jahagirdar <akshata.jahagirdar(a)intel.com>
Cc: Shuicheng Lin <shuicheng.lin(a)intel.com>
Cc: Matt Roper <matthew.d.roper(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.10+
---
drivers/gpu/drm/xe/xe_vram.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index 7e765b1499b1..8e65cb4cc477 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -181,6 +181,7 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
offset = offset_hi << 32; /* HW view bits 39:32 */
offset |= offset_lo << 6; /* HW view bits 31:6 */
+ offset = round_up(offset, SZ_128K); /* SW must round up to nearest 128K */
offset *= num_enabled; /* convert to SW view */
/* We don't expect any holes */
--
2.46.0
Registering a gadget driver is expected to complete synchronously and
immediately after calling driver_register() this function checks that
the driver has bound so as to return an error.
Set PROBE_FORCE_SYNCHRONOUS to ensure this is the case even when
asynchronous probing is set as the default.
Fixes: fc274c1e99731 ("USB: gadget: Add a new bus for gadgets")
Cc: stable(a)vger.kernel.org
Signed-off-by: John Keeping <jkeeping(a)inmusicbrands.com>
---
v3:
- Add cc: stable
v2:
- Add "Fixes" trailer
drivers/usb/gadget/udc/core.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index cf6478f97f4a..a6f46364be65 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1696,6 +1696,7 @@ int usb_gadget_register_driver_owner(struct usb_gadget_driver *driver,
driver->driver.bus = &gadget_bus_type;
driver->driver.owner = owner;
driver->driver.mod_name = mod_name;
+ driver->driver.probe_type = PROBE_FORCE_SYNCHRONOUS;
ret = driver_register(&driver->driver);
if (ret) {
pr_warn("%s: driver registration failed: %d\n",
--
2.46.0
Streams should flush their TRB cache, re-read TRBs, and start executing
TRBs from the beginning of the new dequeue pointer after a 'Set TR Dequeue
Pointer' command.
Cadence controllers may fail to start from the beginning of the dequeue
TRB as it doesn't clear the Opaque 'RsvdO' field of the stream context
during 'Set TR Dequeue' command. This stream context area is where xHC
stores information about the last partially executed TD when a stream
is stopped. xHC uses this information to resume the transfer where it left
mid TD, when the stream is restarted.
Patch fixes this by clearing out all RsvdO fields before initializing new
Stream transfer using a 'Set TR Dequeue Pointer' command.
Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
cc: <stable(a)vger.kernel.org>
Signed-off-by: Pawel Laszczak <pawell(a)cadence.com>
---
Changelog:
v3:
- changed patch to patch Cadence specific
v2:
- removed restoring of EDTLA field
drivers/usb/cdns3/host.c | 4 +++-
drivers/usb/host/xhci-pci.c | 7 +++++++
drivers/usb/host/xhci-ring.c | 14 ++++++++++++++
drivers/usb/host/xhci.h | 1 +
4 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c
index ceca4d839dfd..7ba760ee62e3 100644
--- a/drivers/usb/cdns3/host.c
+++ b/drivers/usb/cdns3/host.c
@@ -62,7 +62,9 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = {
.resume_quirk = xhci_cdns3_resume_quirk,
};
-static const struct xhci_plat_priv xhci_plat_cdnsp_xhci;
+static const struct xhci_plat_priv xhci_plat_cdnsp_xhci = {
+ .quirks = XHCI_CDNS_SCTX_QUIRK,
+};
static int __cdns_host_init(struct cdns *cdns)
{
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index b9ae5c2a2527..9199dbfcea07 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -74,6 +74,9 @@
#define PCI_DEVICE_ID_ASMEDIA_2142_XHCI 0x2142
#define PCI_DEVICE_ID_ASMEDIA_3242_XHCI 0x3242
+#define PCI_DEVICE_ID_CADENCE 0x17CD
+#define PCI_DEVICE_ID_CADENCE_SSP 0x0200
+
static const char hcd_name[] = "xhci_hcd";
static struct hc_driver __read_mostly xhci_pci_hc_driver;
@@ -532,6 +535,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH;
}
+ if (pdev->vendor == PCI_DEVICE_ID_CADENCE &&
+ pdev->device == PCI_DEVICE_ID_CADENCE_SSP)
+ xhci->quirks |= XHCI_CDNS_SCTX_QUIRK;
+
/* xHC spec requires PCI devices to support D3hot and D3cold */
if (xhci->hci_version >= 0x120)
xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 1dde53f6eb31..a1ad2658c0c7 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1386,6 +1386,20 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id,
struct xhci_stream_ctx *ctx =
&ep->stream_info->stream_ctx_array[stream_id];
deq = le64_to_cpu(ctx->stream_ring) & SCTX_DEQ_MASK;
+
+ /*
+ * Cadence xHCI controllers store some endpoint state
+ * information within Rsvd0 fields of Stream Endpoint
+ * context. This field is not cleared during Set TR
+ * Dequeue Pointer command which causes XDMA to skip
+ * over transfer ring and leads to data loss on stream
+ * pipe.
+ * To fix this issue driver must clear Rsvd0 field.
+ */
+ if (xhci->quirks & XHCI_CDNS_SCTX_QUIRK) {
+ ctx->reserved[0] = 0;
+ ctx->reserved[1] = 0;
+ }
} else {
deq = le64_to_cpu(ep_ctx->deq) & ~EP_CTX_CYCLE_MASK;
}
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 101e74c9060f..4cbd58eed214 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1907,6 +1907,7 @@ struct xhci_hcd {
#define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45)
#define XHCI_ZHAOXIN_HOST BIT_ULL(46)
#define XHCI_WRITE_64_HI_LO BIT_ULL(47)
+#define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48)
unsigned int num_active_eps;
unsigned int limit_active_eps;
--
2.43.0
The return value of drm_atomic_get_crtc_state() needs to be
checked. To avoid use of error pointer 'crtc_state' in case
of the failure.
Cc: stable(a)vger.kernel.org
Fixes: dec92020671c ("drm: Use the state pointer directly in planes atomic_check")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/gpu/drm/sti/sti_cursor.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/sti/sti_cursor.c b/drivers/gpu/drm/sti/sti_cursor.c
index db0a1eb53532..e460f5ba2d87 100644
--- a/drivers/gpu/drm/sti/sti_cursor.c
+++ b/drivers/gpu/drm/sti/sti_cursor.c
@@ -200,6 +200,8 @@ static int sti_cursor_atomic_check(struct drm_plane *drm_plane,
return 0;
crtc_state = drm_atomic_get_crtc_state(state, crtc);
+ if (IS_ERR(crtc_state))
+ return PTR_ERR(crtc_state);
mode = &crtc_state->mode;
dst_x = new_plane_state->crtc_x;
dst_y = new_plane_state->crtc_y;
--
2.25.1
Call work_on_cpu(cpu, fn, arg) in pci_call_probe() while the argument
@cpu is a offline cpu would cause system stuck forever.
This can be happen if a node is online while all its CPUs are
offline (We can use "maxcpus=1" without "nr_cpus=1" to reproduce it).
So, in the above case, let pci_call_probe() call local_pci_probe()
instead of work_on_cpu() when the best selected cpu is offline.
Fixes: 69a18b18699b ("PCI: Restrict probe functions to housekeeping CPUs")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
Signed-off-by: Hongchen Zhang <zhanghongchen(a)loongson.cn>
---
v2 -> v3: Modify commit message according to Markus's suggestion
v1 -> v2: Add a method to reproduce the problem
---
drivers/pci/pci-driver.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index af2996d0d17f..32a99828e6a3 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -386,7 +386,7 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
free_cpumask_var(wq_domain_mask);
}
- if (cpu < nr_cpu_ids)
+ if ((cpu < nr_cpu_ids) && cpu_online(cpu))
error = work_on_cpu(cpu, local_pci_probe, &ddi);
else
error = local_pci_probe(&ddi);
--
2.33.0
From: Peter Wang <peter.wang(a)mediatek.com>
ufshcd_abort_all forcibly aborts all on-going commands and the host
controller will automatically fill in the OCS field of the corresponding
response with OCS_ABORTED based on different working modes.
MCQ mode: aborts a command using SQ cleanup, The host controller
will post a Completion Queue entry with OCS = ABORTED.
SDB mode: aborts a command using UTRLCLR. Task Management response
which means a Transfer Request was aborted.
For these two cases, set a flag to notify SCSI to requeue the
command after receiving response with OCS_ABORTED.
Fixes: ab248643d3d6 ("scsi: ufs: core: Add error handling for MCQ mode")
Cc: stable(a)vger.kernel.org
Signed-off-by: Peter Wang <peter.wang(a)mediatek.com>
---
drivers/ufs/core/ufshcd.c | 35 ++++++++++++++++++++---------------
include/ufs/ufshcd.h | 3 +++
2 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index a6f818cdef0e..a0548a495f30 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -3006,6 +3006,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
ufshcd_prepare_lrbp_crypto(scsi_cmd_to_rq(cmd), lrbp);
lrbp->req_abort_skip = false;
+ lrbp->abort_initiated_by_eh = false;
ufshcd_comp_scsi_upiu(hba, lrbp);
@@ -5404,7 +5405,10 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp,
}
break;
case OCS_ABORTED:
- result |= DID_ABORT << 16;
+ if (lrbp->abort_initiated_by_eh)
+ result |= DID_REQUEUE << 16;
+ else
+ result |= DID_ABORT << 16;
break;
case OCS_INVALID_COMMAND_STATUS:
result |= DID_REQUEUE << 16;
@@ -6471,26 +6475,12 @@ static bool ufshcd_abort_one(struct request *rq, void *priv)
struct scsi_device *sdev = cmd->device;
struct Scsi_Host *shost = sdev->host;
struct ufs_hba *hba = shost_priv(shost);
- struct ufshcd_lrb *lrbp = &hba->lrb[tag];
- struct ufs_hw_queue *hwq;
- unsigned long flags;
*ret = ufshcd_try_to_abort_task(hba, tag);
dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
*ret ? "failed" : "succeeded");
- /* Release cmd in MCQ mode if abort succeeds */
- if (hba->mcq_enabled && (*ret == 0)) {
- hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
- if (!hwq)
- return 0;
- spin_lock_irqsave(&hwq->cq_lock, flags);
- if (ufshcd_cmd_inflight(lrbp->cmd))
- ufshcd_release_scsi_cmd(hba, lrbp);
- spin_unlock_irqrestore(&hwq->cq_lock, flags);
- }
-
return *ret == 0;
}
@@ -7561,6 +7551,21 @@ int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
goto out;
}
+ /*
+ * When the host software receives a "FUNCTION COMPLETE", set flag
+ * to requeue command after receive response with OCS_ABORTED
+ * SDB mode: UTRLCLR Task Management response which means a Transfer
+ * Request was aborted.
+ * MCQ mode: Host will post to CQ with OCS_ABORTED after SQ cleanup
+ *
+ * This flag is set because error handler ufshcd_abort_all forcibly
+ * aborts all commands, and the host controller will automatically
+ * fill in the OCS field of the corresponding response with OCS_ABORTED.
+ * Therefore, upon receiving this response, it needs to be requeued.
+ */
+ if (!err && ufshcd_eh_in_progress(hba))
+ lrbp->abort_initiated_by_eh = true;
+
err = ufshcd_clear_cmd(hba, tag);
if (err)
dev_err(hba->dev, "%s: Failed clearing cmd at tag %d, err %d\n",
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 0fd2aebac728..07b5e60e6c44 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -173,6 +173,8 @@ struct ufs_pm_lvl_states {
* @crypto_key_slot: the key slot to use for inline crypto (-1 if none)
* @data_unit_num: the data unit number for the first block for inline crypto
* @req_abort_skip: skip request abort task flag
+ * @abort_initiated_by_eh: The flag is specifically used to handle
+ * ufshcd_err_handler forcibly aborts.
*/
struct ufshcd_lrb {
struct utp_transfer_req_desc *utr_descriptor_ptr;
@@ -202,6 +204,7 @@ struct ufshcd_lrb {
#endif
bool req_abort_skip;
+ bool abort_initiated_by_eh;
};
/**
--
2.45.2
devm_kasprintf() can return a NULL pointer on failure but this returned
value is not checked. Fix this lack and check the returned value.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: a0f160ffcb83 ("pinctrl: add pinctrl/GPIO driver for Apple SoCs")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/pinctrl/pinctrl-apple-gpio.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/pinctrl/pinctrl-apple-gpio.c b/drivers/pinctrl/pinctrl-apple-gpio.c
index 3751c7de37aa..f861e63f4115 100644
--- a/drivers/pinctrl/pinctrl-apple-gpio.c
+++ b/drivers/pinctrl/pinctrl-apple-gpio.c
@@ -474,6 +474,9 @@ static int apple_gpio_pinctrl_probe(struct platform_device *pdev)
for (i = 0; i < npins; i++) {
pins[i].number = i;
pins[i].name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "PIN%u", i);
+ if (!pins[i].name)
+ return -ENOMEM;
+
pins[i].drv_data = pctl;
pin_names[i] = pins[i].name;
pin_nums[i] = i;
--
2.25.1
read_hv_sched_clock_tsc() assumes that the Hyper-V clock counter is
bigger than the variable hv_sched_clock_offset, which is cached during
early boot, but depending on the timing this assumption may be false
when a hibernated VM starts again (the clock counter starts from 0
again) and is resuming back (Note: hv_init_tsc_clocksource() is not
called during hibernation/resume); consequently,
read_hv_sched_clock_tsc() may return a negative integer (which is
interpreted as a huge positive integer since the return type is u64)
and new kernel messages are prefixed with huge timestamps before
read_hv_sched_clock_tsc() grows big enough (which typically takes
several seconds).
Fix the issue by saving the Hyper-V clock counter just before the
suspend, and using it to correct the hv_sched_clock_offset in
resume. Override x86_platform.save_sched_clock_state and
x86_platform.restore_sched_clock_state.
Note: if Invariant TSC is available, the issue doesn't happen because
1) we don't register read_hv_sched_clock_tsc() for sched clock:
See commit e5313f1c5404 ("clocksource/drivers/hyper-v: Rework
clocksource and sched clock setup");
2) the common x86 code adjusts TSC similarly: see
__restore_processor_state() -> tsc_verify_tsc_adjust(true) and
x86_platform.restore_sched_clock_state().
Cc: stable(a)vger.kernel.org
Fixes: 1349401ff1aa ("clocksource/drivers/hyper-v: Suspend/resume Hyper-V clocksource for hibernation")
Co-developed-by: Dexuan Cui <decui(a)microsoft.com>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
Signed-off-by: Naman Jain <namjain(a)linux.microsoft.com>
---
Changes from v1:
https://lore.kernel.org/all/20240909053923.8512-1-namjain@linux.microsoft.c…
* Reorganized code as per Michael's comment, and moved the logic to x86
specific files, to keep hyperv_timer.c arch independent.
---
arch/x86/kernel/cpu/mshyperv.c | 70 ++++++++++++++++++++++++++++++
drivers/clocksource/hyperv_timer.c | 8 +++-
include/clocksource/hyperv_timer.h | 8 ++++
3 files changed, 85 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e0fd57a8ba84..d83a694e387c 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -224,6 +224,75 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs)
hyperv_cleanup();
}
#endif /* CONFIG_CRASH_DUMP */
+
+static u64 hv_sched_clock_offset_saved;
+static void (*old_save_sched_clock_state)(void);
+static void (*old_restore_sched_clock_state)(void);
+
+/*
+ * Hyper-V clock counter resets during hibernation. Save and restore clock
+ * offset during suspend/resume, while also considering the time passed
+ * before suspend. This is to make sure that sched_clock using hv tsc page
+ * based clocksource, proceeds from where it left off during suspend and
+ * it shows correct time for the timestamps of kernel messages after resume.
+ */
+static void save_hv_clock_tsc_state(void)
+{
+ hv_sched_clock_offset_saved = hv_read_reference_counter();
+}
+
+static void restore_hv_clock_tsc_state(void)
+{
+ /*
+ * hv_sched_clock_offset = offset that is used by hyperv_timer clocksource driver
+ * to get time.
+ * Time passed before suspend = hv_sched_clock_offset_saved
+ * - hv_sched_clock_offset (old)
+ *
+ * After Hyper-V clock counter resets, hv_sched_clock_offset needs a correction.
+ *
+ * New time = hv_read_reference_counter() (future) - hv_sched_clock_offset (new)
+ * New time = Time passed before suspend + hv_read_reference_counter() (future)
+ * - hv_read_reference_counter() (now)
+ *
+ * Solving the above two equations gives:
+ *
+ * hv_sched_clock_offset (new) = hv_sched_clock_offset (old)
+ * - hv_sched_clock_offset_saved
+ * + hv_read_reference_counter() (now))
+ */
+ hv_adj_sched_clock_offset(hv_sched_clock_offset_saved - hv_read_reference_counter());
+}
+
+/*
+ * Functions to override save_sched_clock_state and restore_sched_clock_state
+ * functions of x86_platform. The Hyper-V clock counter is reset during
+ * suspend-resume and the offset used to measure time needs to be
+ * corrected, post resume.
+ */
+static void hv_save_sched_clock_state(void)
+{
+ old_save_sched_clock_state();
+ save_hv_clock_tsc_state();
+}
+
+static void hv_restore_sched_clock_state(void)
+{
+ restore_hv_clock_tsc_state();
+ old_restore_sched_clock_state();
+}
+
+static void __init x86_setup_ops_for_tsc_pg_clock(void)
+{
+ if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
+ return;
+
+ old_save_sched_clock_state = x86_platform.save_sched_clock_state;
+ x86_platform.save_sched_clock_state = hv_save_sched_clock_state;
+
+ old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
+ x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
+}
#endif /* CONFIG_HYPERV */
static uint32_t __init ms_hyperv_platform(void)
@@ -575,6 +644,7 @@ static void __init ms_hyperv_init_platform(void)
/* Register Hyper-V specific clocksource */
hv_init_clocksource();
+ x86_setup_ops_for_tsc_pg_clock();
hv_vtl_init_platform();
#endif
/*
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index b2a080647e41..e424892444ed 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -27,7 +27,8 @@
#include <asm/mshyperv.h>
static struct clock_event_device __percpu *hv_clock_event;
-static u64 hv_sched_clock_offset __ro_after_init;
+/* Note: offset can hold negative values after hibernation. */
+static u64 hv_sched_clock_offset __read_mostly;
/*
* If false, we're using the old mechanism for stimer0 interrupts
@@ -456,6 +457,11 @@ static void resume_hv_clock_tsc(struct clocksource *arg)
hv_set_msr(HV_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
}
+void hv_adj_sched_clock_offset(u64 offset)
+{
+ hv_sched_clock_offset -= offset;
+}
+
#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK
static int hv_cs_enable(struct clocksource *cs)
{
diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h
index 6cdc873ac907..62e2bad754c0 100644
--- a/include/clocksource/hyperv_timer.h
+++ b/include/clocksource/hyperv_timer.h
@@ -38,6 +38,14 @@ extern void hv_remap_tsc_clocksource(void);
extern unsigned long hv_get_tsc_pfn(void);
extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
+/*
+ * Called during resume from hibernation, from overridden
+ * x86_platform.restore_sched_clock_state routine. This is to adjust offsets
+ * used to calculate time for hv tsc page based sched_clock, to account for
+ * time spent before hibernation.
+ */
+extern void hv_adj_sched_clock_offset(u64 offset);
+
static __always_inline bool
hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
u64 *cur_tsc, u64 *time)
base-commit: da3ea35007d0af457a0afc87e84fddaebc4e0b63
--
2.25.1
From: Willem de Bruijn <willemb(a)google.com>
The referenced commit drops bad input, but has false positives.
Tighten the check to avoid these.
The check detects illegal checksum offload requests, which produce
csum_start/csum_off beyond end of packet after segmentation.
But it is based on two incorrect assumptions:
1. virtio_net_hdr_to_skb with VIRTIO_NET_HDR_GSO_TCP[46] implies GSO.
True in callers that inject into the tx path, such as tap.
But false in callers that inject into rx, like virtio-net.
Here, the flags indicate GRO, and CHECKSUM_UNNECESSARY or
CHECKSUM_NONE without VIRTIO_NET_HDR_F_NEEDS_CSUM is normal.
2. TSO requires checksum offload, i.e., ip_summed == CHECKSUM_PARTIAL.
False, as tcp[46]_gso_segment will fix up csum_start and offset for
all other ip_summed by calling __tcp_v4_send_check.
Because of 2, we can limit the scope of the fix to virtio_net_hdr
that do try to set these fields, with a bogus value.
Link: https://lore.kernel.org/netdev/20240909094527.GA3048202@port70.net/
Fixes: 89add40066f9 ("net: drop bad gso csum_start and offset in virtio_net_hdr")
Signed-off-by: Willem de Bruijn <willemb(a)google.com>
Acked-by: Jason Wang <jasowang(a)redhat.com>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Cc: stable(a)vger.kernel.org
---
Changes v1->v2:
- Fix Cc:
- Add Acks from v1
---
include/linux/virtio_net.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 6c395a2600e8d..276ca543ef44d 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -173,7 +173,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
break;
case SKB_GSO_TCPV4:
case SKB_GSO_TCPV6:
- if (skb->csum_offset != offsetof(struct tcphdr, check))
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb->csum_offset != offsetof(struct tcphdr, check))
return -EINVAL;
break;
}
--
2.46.0.598.g6f2099f65c-goog
From: Lai Jiangshan <jiangshan.ljs(a)antgroup.com>
Marc Hartmayer reported:
[ 23.133876] Unable to handle kernel pointer dereference in virtual kernel address space
[ 23.133950] Failing address: 0000000000000000 TEID: 0000000000000483
[ 23.133954] Fault in home space mode while using kernel ASCE.
[ 23.133957] AS:000000001b8f0007 R3:0000000056cf4007 S:0000000056cf3800 P:000000000000003d
[ 23.134207] Oops: 0004 ilc:2 [#1] SMP
(snip)
[ 23.134516] Call Trace:
[ 23.134520] [<0000024e326caf28>] worker_thread+0x48/0x430
[ 23.134525] ([<0000024e326caf18>] worker_thread+0x38/0x430)
[ 23.134528] [<0000024e326d3a3e>] kthread+0x11e/0x130
[ 23.134533] [<0000024e3264b0dc>] __ret_from_fork+0x3c/0x60
[ 23.134536] [<0000024e333fb37a>] ret_from_fork+0xa/0x38
[ 23.134552] Last Breaking-Event-Address:
[ 23.134553] [<0000024e333f4c04>] mutex_unlock+0x24/0x30
[ 23.134562] Kernel panic - not syncing: Fatal exception: panic_on_oops
With debuging and analysis, worker_thread() accesses to the nullified
worker->pool when the newly created worker is destroyed before being
waken-up, in which case worker_thread() can see the result detach_worker()
reseting worker->pool to NULL at the begining.
Move the code "worker->pool = NULL;" out from detach_worker() to fix the
problem.
worker->pool had been designed to be constant for regular workers and
changeable for rescuer. To share attaching/detaching code for regular
and rescuer workers and to avoid worker->pool being accessed inadvertently
when the worker has been detached, worker->pool is reset to NULL when
detached no matter the worker is rescuer or not.
To maintain worker->pool being reset after detached, move the code
"worker->pool = NULL;" in the worker thread context after detached.
It is either be in the regular worker thread context after PF_WQ_WORKER
is cleared or in rescuer worker thread context with wq_pool_attach_mutex
held. So it is safe to do so.
Cc: Marc Hartmayer <mhartmay(a)linux.ibm.com>
Link: https://lore.kernel.org/lkml/87wmjj971b.fsf@linux.ibm.com/
Reported-by: Marc Hartmayer <mhartmay(a)linux.ibm.com>
Fixes: f4b7b53c94af ("workqueue: Detach workers directly in idle_cull_fn()")
Cc: stable(a)vger.kernel.org # v6.11+
Signed-off-by: Lai Jiangshan <jiangshan.ljs(a)antgroup.com>
---
kernel/workqueue.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e7b005ff3750..6f2545037e57 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2709,7 +2709,6 @@ static void detach_worker(struct worker *worker)
unbind_worker(worker);
list_del(&worker->node);
- worker->pool = NULL;
}
/**
@@ -2729,6 +2728,7 @@ static void worker_detach_from_pool(struct worker *worker)
mutex_lock(&wq_pool_attach_mutex);
detach_worker(worker);
+ worker->pool = NULL;
mutex_unlock(&wq_pool_attach_mutex);
/* clear leftover flags without pool->lock after it is detached */
@@ -3349,7 +3349,11 @@ static int worker_thread(void *__worker)
if (unlikely(worker->flags & WORKER_DIE)) {
raw_spin_unlock_irq(&pool->lock);
set_pf_worker(false);
-
+ /*
+ * The worker is dead and PF_WQ_WORKER is cleared, worker->pool
+ * shouldn't be accessed, reset it to NULL in case otherwise.
+ */
+ worker->pool = NULL;
ida_free(&pool->worker_ida, worker->id);
return 0;
}
--
2.19.1.6.gb485710b
The violation of atomicity occurs when the brbd_uuid_set_bm function is
executed simultaneously with modifying the value of
device->ldev->md.uuid[UI_BITMAP]. Consider a scenario where, while
device->ldev->md.uuid[UI_BITMAP] passes the validity check when its value
is not zero, the value of device->ldev->md.uuid[UI_BITMAP] is written to
zero. In this case, the check in brbd_uuid_set_bm might refer to the old
value of device->ldev->md.uuid[UI_BITMAP] (before locking), which allows
an invalid value to pass the validity check, resulting in inconsistency.
To address this issue, it is recommended to include the data validity check
within the locked section of the function. This modification ensures that
the value of device->ldev->md.uuid[UI_BITMAP] does not change during the
validation process, thereby maintaining its integrity.
This possible bug is found by an experimental static analysis tool
developed by our team. This tool analyzes the locking APIs to extract
function pairs that can be concurrently executed, and then analyzes the
instructions in the paired functions to identify possible concurrency bugs
including data races and atomicity violations.
Fixes: 9f2247bb9b75 ("drbd: Protect accesses to the uuid set with a spinlock")
Cc: stable(a)vger.kernel.org
Signed-off-by: Qiu-ji Chen <chenqiuji666(a)gmail.com>
---
drivers/block/drbd/drbd_main.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a9e49b212341..abafc4edf9ed 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3399,10 +3399,12 @@ void drbd_uuid_new_current(struct drbd_device *device) __must_hold(local)
void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local)
{
unsigned long flags;
- if (device->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
+ spin_lock_irqsave(&device->ldev->md.uuid_lock, flags);
+ if (device->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) {
+ spin_unlock_irqrestore(&device->ldev->md.uuid_lock, flags);
return;
+ }
- spin_lock_irqsave(&device->ldev->md.uuid_lock, flags);
if (val == 0) {
drbd_uuid_move_history(device);
device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
--
2.34.1
As we process the second byte of a control transfer, transfers
of less than 2 bytes must be discarded.
This bug is as old as the driver.
SIgned-off-by: Oliver Neukum <oneukum(a)suse.com>
CC: stable(a)vger.kernel.org
---
drivers/usb/misc/cypress_cy7c63.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/usb/misc/cypress_cy7c63.c b/drivers/usb/misc/cypress_cy7c63.c
index cecd7693b741..75f5a740cba3 100644
--- a/drivers/usb/misc/cypress_cy7c63.c
+++ b/drivers/usb/misc/cypress_cy7c63.c
@@ -88,6 +88,9 @@ static int vendor_command(struct cypress *dev, unsigned char request,
USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_OTHER,
address, data, iobuf, CYPRESS_MAX_REQSIZE,
USB_CTRL_GET_TIMEOUT);
+ /* we must not process garbage */
+ if (retval < 2)
+ goto err_buf;
/* store returned data (more READs to be added) */
switch (request) {
@@ -107,6 +110,7 @@ static int vendor_command(struct cypress *dev, unsigned char request,
break;
}
+err_buf:
kfree(iobuf);
error:
return retval;
--
2.45.2
There is a small window during probing when IO is running
but the backlight is not registered. Processing events
during that time will crash. The completion handler
needs to check for a backlight before scheduling work.
The bug is as old as the driver.
Signed-off-by: Oliver Neukum <oneukum(a)suse.com>
CC: stable(a)vger.kernel.org
---
drivers/usb/misc/appledisplay.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c
index c8098e9b432e..62b5a30edc42 100644
--- a/drivers/usb/misc/appledisplay.c
+++ b/drivers/usb/misc/appledisplay.c
@@ -107,7 +107,12 @@ static void appledisplay_complete(struct urb *urb)
case ACD_BTN_BRIGHT_UP:
case ACD_BTN_BRIGHT_DOWN:
pdata->button_pressed = 1;
- schedule_delayed_work(&pdata->work, 0);
+ /*
+ * there is a window during which no device
+ * is registered
+ */
+ if (pdata->bd )
+ schedule_delayed_work(&pdata->work, 0);
break;
case ACD_BTN_NONE:
default:
@@ -202,6 +207,7 @@ static int appledisplay_probe(struct usb_interface *iface,
const struct usb_device_id *id)
{
struct backlight_properties props;
+ struct backlight_device *backlight;
struct appledisplay *pdata;
struct usb_device *udev = interface_to_usbdev(iface);
struct usb_endpoint_descriptor *endpoint;
@@ -272,13 +278,14 @@ static int appledisplay_probe(struct usb_interface *iface,
memset(&props, 0, sizeof(struct backlight_properties));
props.type = BACKLIGHT_RAW;
props.max_brightness = 0xff;
- pdata->bd = backlight_device_register(bl_name, NULL, pdata,
+ backlight = backlight_device_register(bl_name, NULL, pdata,
&appledisplay_bl_data, &props);
- if (IS_ERR(pdata->bd)) {
+ if (IS_ERR(backlight)) {
dev_err(&iface->dev, "Backlight registration failed\n");
- retval = PTR_ERR(pdata->bd);
+ retval = PTR_ERR(backlight);
goto error;
}
+ pdata->bd = backlight;
/* Try to get brightness */
brightness = appledisplay_bl_get_brightness(pdata->bd);
--
2.45.2
Virtual machines under Xen Hypervisor (DomU) running in Xen PV mode use a
special, nonstandard synthetized CPU topology which "just works" under
kernels 6.9.x while newer kernels wrongly assuming a "crash kernel" and
disable SMP (reducing to one CPU core) because the newer topology
implementation produces a wrong error "[Firmware Bug]: APIC enumeration
order not specification compliant" after new topology checks which are
improper for Xen PV platform. As a result, the kernel disables SMT and
activates just one CPU core within the VM (DomU).
The patch disables the regarding checks if it is running in Xen PV
mode (only) and bring back SMP / all CPUs as in the past to such DomU
VMs.
Signed-off-by: Niels Dettenbach <nd(a)syndicat.com>
---
The current behaviour leads all of our production Xen Host platforms
(amd64 - HPE proliant) unusable after updating to newer linux kernels
(with just one CPU available/activated per VM) while older kernels and
other OS (current NetBSD PV DomU) still work fully (and stable since many
years on the platform).
Xen PV mode is still provided by current Xen and widely used - even
if less wide as the newer Xen PVH mode today. So a solution probably
will be required.
So we assume that bug affects stable(a)vger.kernel.org as well.
dmesg from affected kernel:
-- snip --
[ 0.640364] CPU topo: Enumerated BSP APIC 0 is not marked in APICBASE MSR
[ 0.640367] CPU topo: Assuming crash kernel. Limiting to one CPU to prevent machine INIT
[ 0.640368] CPU topo: [Firmware Bug]: APIC enumeration order not specification compliant
[ 0.640376] CPU topo: Max. logical packages: 1
[ 0.640378] CPU topo: Max. logical dies: 1
[ 0.640379] CPU topo: Max. dies per package: 1
[ 0.640386] CPU topo: Max. threads per core: 1
[ 0.640388] CPU topo: Num. cores per package: 1
[ 0.640389] CPU topo: Num. threads per package: 1
[ 0.640390] CPU topo: Allowing 1 present CPUs plus 0 hotplug CPUs
[ 0.640402] Cannot find an available gap in the 32-bit address range
-- snap --
after patch applied:
-- snip --
[ 0.369439] CPU topo: Max. logical packages: 1
[ 0.369441] CPU topo: Max. logical dies: 1
[ 0.369442] CPU topo: Max. dies per package: 1
[ 0.369450] CPU topo: Max. threads per core: 2
[ 0.369452] CPU topo: Num. cores per package: 3
[ 0.369453] CPU topo: Num. threads per package: 6
[ 0.369453] CPU topo: Allowing 6 present CPUs plus 0 hotplug CPUs
-- snap --
We tested the patch intensely under productive / high load since 2 days now with no issues.
references:
arch/x86/kernel/cpu/topology.c
[line 448]
-- snip --
/*
* XEN PV is special as it does not advertise the local APIC
* properly, but provides a fake topology for it so that the
* infrastructure works. So don't apply the restrictions vs. APIC
* here.
*/
--snap --
--- linux/arch/x86/kernel/cpu/topology.c 2024-09-11 17:42:42.699278317 +0200
+++ linux/arch/x86/kernel/cpu/topology.c.orig 2024-09-11 09:53:16.194095250 +0200
@@ -132,14 +132,6 @@
u64 msr;
/*
- * assume Xen PV has a working (special) topology
- */
- if (xen_pv_domain()) {
- topo_info.real_bsp_apic_id = topo_info.boot_cpu_apic_id;
- return false;
- }
-
- /*
* There is no real good way to detect whether this a kdump()
* kernel, but except on the Voyager SMP monstrosity which is not
* longer supported, the real BSP APIC ID is the first one which is
Witam,
Pomagamy pozyskiwać nowych klientów B2B.
Czy interesuje Państwa dotarcie do nowych potencjalnych partnerów oraz uruchomienie z nimi rozmów handlowch ?
Pozdrawiam
Szymon Jankowski
From: Eliav Bar-ilan <eliavb(a)nvidia.com>
An incorrect argument order calling amd_iommu_dev_flush_pasid_pages()
causes improper flushing of the IOMMU, leaving the old value of GCR3 from
a previous process attached to the same PASID.
The function has the signature:
void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
ioasid_t pasid, u64 address, size_t size)
Correct the argument order.
Cc: stable(a)vger.kernel.org
Fixes: 474bf01ed9f0 ("iommu/amd: Add support for device based TLB invalidation")
Signed-off-by: Eliav Bar-ilan <eliavb(a)nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com>
---
drivers/iommu/amd/iommu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
This was discovered while testing SVA, but I suppose it is probably a bigger issue.
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index b19e8c0f48fa25..6bc4030a6ba8ed 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1552,8 +1552,8 @@ void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data,
ioasid_t pasid)
{
- amd_iommu_dev_flush_pasid_pages(dev_data, 0,
- CMD_INV_IOMMU_ALL_PAGES_ADDRESS, pasid);
+ amd_iommu_dev_flush_pasid_pages(dev_data, pasid, 0,
+ CMD_INV_IOMMU_ALL_PAGES_ADDRESS);
}
void amd_iommu_domain_flush_complete(struct protection_domain *domain)
base-commit: cf2840f59119f41de3d9641a8b18a5da1b2cf6bf
--
2.46.0
One of our customers reported a crash and a corrupted ocfs2 filesystem.
The crash was due to the detection of corruption. Upon troubleshooting,
the fsck -fn output showed the below corruption
[EXTENT_LIST_FREE] Extent list in owner 33080590 claims 230 as the next free chain record,
but fsck believes the largest valid value is 227. Clamp the next record value? n
The stat output from the debugfs.ocfs2 showed the following corruption
where the "Next Free Rec:" had overshot the "Count:" in the root metadata
block.
Inode: 33080590 Mode: 0640 Generation: 2619713622 (0x9c25a856)
FS Generation: 904309833 (0x35e6ac49)
CRC32: 00000000 ECC: 0000
Type: Regular Attr: 0x0 Flags: Valid
Dynamic Features: (0x16) HasXattr InlineXattr Refcounted
Extended Attributes Block: 0 Extended Attributes Inline Size: 256
User: 0 (root) Group: 0 (root) Size: 281320357888
Links: 1 Clusters: 141738
ctime: 0x66911b56 0x316edcb8 -- Fri Jul 12 06:02:30.829349048 2024
atime: 0x66911d6b 0x7f7a28d -- Fri Jul 12 06:11:23.133669517 2024
mtime: 0x66911b56 0x12ed75d7 -- Fri Jul 12 06:02:30.317552087 2024
dtime: 0x0 -- Wed Dec 31 17:00:00 1969
Refcount Block: 2777346
Last Extblk: 2886943 Orphan Slot: 0
Sub Alloc Slot: 0 Sub Alloc Bit: 14
Tree Depth: 1 Count: 227 Next Free Rec: 230
## Offset Clusters Block#
0 0 2310 2776351
1 2310 2139 2777375
2 4449 1221 2778399
3 5670 731 2779423
4 6401 566 2780447
....... .... .......
....... .... .......
The issue was in the reflink workfow while reserving space for inline xattr.
The problematic function is ocfs2_reflink_xattr_inline(). By the time this
function is called the reflink tree is already recreated at the destination
inode from the source inode. At this point, this function reserves space
space inline xattrs at the destination inode without even checking if there
is space at the root metadata block. It simply reduces the l_count from 243
to 227 thereby making space of 256 bytes for inline xattr whereas the inode
already has extents beyond this index (in this case upto 230), thereby causing
corruption.
The fix for this is to reserve space for inline metadata before the at the
destination inode before the reflink tree gets recreated. The customer has
verified the fix.
Fixes: ef962df057aa ("ocfs2: xattr: fix inlined xattr reflink")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna(a)oracle.com>
---
fs/ocfs2/refcounttree.c | 26 ++++++++++++++++++++++++--
fs/ocfs2/xattr.c | 11 +----------
2 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3f80a56d0d60..05105d271fc8 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -25,6 +25,7 @@
#include "namei.h"
#include "ocfs2_trace.h"
#include "file.h"
+#include "symlink.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -4155,8 +4156,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
int ret;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *new_bh = NULL;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
ret = -EINVAL;
mlog_errno(ret);
goto out;
@@ -4182,6 +4184,26 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto out_unlock;
}
+ if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) &&
+ (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
+ /*
+ * Adjust extent record count to reserve space for extended attribute.
+ * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
+ */
+ struct ocfs2_inode_info *new_oi = OCFS2_I(new_inode);
+
+ if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
+ !(ocfs2_inode_is_fast_symlink(new_inode))) {
+ struct ocfs2_dinode *new_di = new_bh->b_data;
+ struct ocfs2_dinode *old_di = old_bh->b_data;
+ struct ocfs2_extent_list *el = &new_di->id2.i_list;
+ int inline_size = le16_to_cpu(old_di->i_xattr_inline_size);
+
+ le16_add_cpu(&el->l_count, -(inline_size /
+ sizeof(struct ocfs2_extent_rec)));
+ }
+ }
+
ret = ocfs2_create_reflink_node(inode, old_bh,
new_inode, new_bh, preserve);
if (ret) {
@@ -4189,7 +4211,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto inode_unlock;
}
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+ if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
ret = ocfs2_reflink_xattrs(inode, old_bh,
new_inode, new_bh,
preserve);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3b81213ed7b8..a9f716ec89e2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -6511,16 +6511,7 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
}
new_oi = OCFS2_I(args->new_inode);
- /*
- * Adjust extent record count to reserve space for extended attribute.
- * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
- */
- if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
- !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
- struct ocfs2_extent_list *el = &new_di->id2.i_list;
- le16_add_cpu(&el->l_count, -(inline_size /
- sizeof(struct ocfs2_extent_rec)));
- }
+
spin_lock(&new_oi->ip_lock);
new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
--
2.39.3
Supposing the following scenario.
CPU0 CPU1
blk_mq_insert_request() 1) store blk_mq_unquiesce_queue()
blk_mq_run_hw_queue() blk_queue_flag_clear(QUEUE_FLAG_QUIESCED) 3) store
if (blk_queue_quiesced()) 2) load blk_mq_run_hw_queues()
return blk_mq_run_hw_queue()
blk_mq_sched_dispatch_requests() if (!blk_mq_hctx_has_pending()) 4) load
return
The full memory barrier should be inserted between 1) and 2), as well as
between 3) and 4) to make sure that either CPU0 sees QUEUE_FLAG_QUIESCED is
cleared or CPU1 sees dispatch list or setting of bitmap of software queue.
Otherwise, either CPU will not re-run the hardware queue causing starvation.
So the first solution is to 1) add a pair of memory barrier to fix the
problem, another solution is to 2) use hctx->queue->queue_lock to synchronize
QUEUE_FLAG_QUIESCED. Here, we chose 2) to fix it since memory barrier is not
easy to be maintained.
Fixes: f4560ffe8cec1 ("blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue")
Cc: stable(a)vger.kernel.org
Cc: Muchun Song <muchun.song(a)linux.dev>
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
---
block/blk-mq.c | 47 ++++++++++++++++++++++++++++++++++-------------
1 file changed, 34 insertions(+), 13 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b2d0f22de0c7f..ac39f2a346a52 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2202,6 +2202,24 @@ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
}
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
+static inline bool blk_mq_hw_queue_need_run(struct blk_mq_hw_ctx *hctx)
+{
+ bool need_run;
+
+ /*
+ * When queue is quiesced, we may be switching io scheduler, or
+ * updating nr_hw_queues, or other things, and we can't run queue
+ * any more, even blk_mq_hctx_has_pending() can't be called safely.
+ *
+ * And queue will be rerun in blk_mq_unquiesce_queue() if it is
+ * quiesced.
+ */
+ __blk_mq_run_dispatch_ops(hctx->queue, false,
+ need_run = !blk_queue_quiesced(hctx->queue) &&
+ blk_mq_hctx_has_pending(hctx));
+ return need_run;
+}
+
/**
* blk_mq_run_hw_queue - Start to run a hardware queue.
* @hctx: Pointer to the hardware queue to run.
@@ -2222,20 +2240,23 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING);
- /*
- * When queue is quiesced, we may be switching io scheduler, or
- * updating nr_hw_queues, or other things, and we can't run queue
- * any more, even __blk_mq_hctx_has_pending() can't be called safely.
- *
- * And queue will be rerun in blk_mq_unquiesce_queue() if it is
- * quiesced.
- */
- __blk_mq_run_dispatch_ops(hctx->queue, false,
- need_run = !blk_queue_quiesced(hctx->queue) &&
- blk_mq_hctx_has_pending(hctx));
+ need_run = blk_mq_hw_queue_need_run(hctx);
+ if (!need_run) {
+ unsigned long flags;
- if (!need_run)
- return;
+ /*
+ * Synchronize with blk_mq_unquiesce_queue(), becuase we check
+ * if hw queue is quiesced locklessly above, we need the use
+ * ->queue_lock to make sure we see the up-to-date status to
+ * not miss rerunning the hw queue.
+ */
+ spin_lock_irqsave(&hctx->queue->queue_lock, flags);
+ need_run = blk_mq_hw_queue_need_run(hctx);
+ spin_unlock_irqrestore(&hctx->queue->queue_lock, flags);
+
+ if (!need_run)
+ return;
+ }
if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
blk_mq_delay_run_hw_queue(hctx, 0);
--
2.20.1
virtual machines under Xen Hypervisor (DomU) running in Xen PV mode use a
special, nonstandard synthetized CPU topology which "just works" under
kernels 6.9.x while newer kernels assuming a "crash kernel" and disable
SMT (reducing to one CPU core) because the newer topology implementation
produces a wrong error "[Firmware Bug]: APIC enumeration order not
specification compliant" after new topology checks which are improper for
Xen PV platform. As a result, the kernel disables SMT and activates just
one CPU core within the VM (DomU).
The patch disables the regarding checks if it is running in Xen PV
mode (only) and bring back SMT / all CPUs as in the past to such DomU
VMs.
Signed-off-by: Niels Dettenbach <nd(a)syndicat.com>
---
The current behaviour leads all of our production Xen Host platforms
unusable after updating to newer linux kernels (with just one CPU
available/activated per VM) while older kernels other OS still work
fully (and stable since many years on the platform).
Xen PV mode is still provided by current Xen and widely used - even
if less wide as the newer Xen PVH mode today. So a solution probably
will be required.
So we assume that bug affects stable(a)vger.kernel.org as well.
dmesg from affected kernel:
-- snip --
Sep 10 14:35:50 ffm1 kernel: [ 0.640364] CPU topo: Enumerated BSP APIC 0 is not marked in APICBASE MSR
Sep 10 14:35:50 ffm1 kernel: [ 0.640367] CPU topo: Assuming crash kernel. Limiting to one CPU to prevent machine INIT
Sep 10 14:35:50 ffm1 kernel: [ 0.640368] CPU topo: [Firmware Bug]: APIC enumeration order not specification compliant
Sep 10 14:35:50 ffm1 kernel: [ 0.640376] CPU topo: Max. logical packages: 1
Sep 10 14:35:50 ffm1 kernel: [ 0.640378] CPU topo: Max. logical dies: 1
Sep 10 14:35:50 ffm1 kernel: [ 0.640379] CPU topo: Max. dies per package: 1
Sep 10 14:35:50 ffm1 kernel: [ 0.640386] CPU topo: Max. threads per core: 1
Sep 10 14:35:50 ffm1 kernel: [ 0.640388] CPU topo: Num. cores per package: 1
Sep 10 14:35:50 ffm1 kernel: [ 0.640389] CPU topo: Num. threads per package: 1
Sep 10 14:35:50 ffm1 kernel: [ 0.640390] CPU topo: Allowing 1 present CPUs plus 0 hotplug CPUs
Sep 10 14:35:50 ffm1 kernel: [ 0.640402] Cannot find an available gap in the 32-bit address range
-- snap --
We tested the patch intensely under productive / high load since 2 days now with no issues.
references:
arch/x86/kernel/cpu/topology.c
[line 448]
--- snip ---
/*
* XEN PV is special as it does not advertise the local APIC
* properly, but provides a fake topology for it so that the
* infrastructure works. So don't apply the restrictions vs. APIC
* here.
*/
---snap ---
Related errors / tickets:
https://forum.qubes-os.org/t/fedora-sees-only-1-cpu-core-after-updating-the…
--- linux/arch/x86/kernel/cpu/topology.c.orig 2024-09-11 09:53:16.194095250 +0200
+++ linux/arch/x86/kernel/cpu/topology.c 2024-09-11 09:55:17.338448094 +0200
@@ -158,7 +158,7 @@ static __init bool check_for_real_bsp(u3
is_bsp = !!(msr & MSR_IA32_APICBASE_BSP);
}
- if (apic_id == topo_info.boot_cpu_apic_id) {
+ if (!xen_pv_domain() && apic_id == topo_info.boot_cpu_apic_id) {
/*
* If the boot CPU has the APIC BSP bit set then the
* firmware enumeration is agreeing. If the CPU does not
@@ -185,7 +185,7 @@ static __init bool check_for_real_bsp(u3
pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x != %x\n",
topo_info.boot_cpu_apic_id, apic_id);
- if (is_bsp) {
+ if (!xen_pv_domain() && is_bsp) {
/*
* The boot CPU has the APIC BSP bit set. Use it and complain
* about the broken firmware enumeration.
One of our customers reported a crash and a corrupted ocfs2 filesystem.
The crash was due to the detection of corruption. Upon troubleshooting,
the fsck -fn output showed the below corruption
[EXTENT_LIST_FREE] Extent list in owner 33080590 claims 230 as the next free chain record,
but fsck believes the largest valid value is 227. Clamp the next record value? n
The stat output from the debugfs.ocfs2 showed the following corruption
where the "Next Free Rec:" had overshot the "Count:" in the root metadata
block.
Inode: 33080590 Mode: 0640 Generation: 2619713622 (0x9c25a856)
FS Generation: 904309833 (0x35e6ac49)
CRC32: 00000000 ECC: 0000
Type: Regular Attr: 0x0 Flags: Valid
Dynamic Features: (0x16) HasXattr InlineXattr Refcounted
Extended Attributes Block: 0 Extended Attributes Inline Size: 256
User: 0 (root) Group: 0 (root) Size: 281320357888
Links: 1 Clusters: 141738
ctime: 0x66911b56 0x316edcb8 -- Fri Jul 12 06:02:30.829349048 2024
atime: 0x66911d6b 0x7f7a28d -- Fri Jul 12 06:11:23.133669517 2024
mtime: 0x66911b56 0x12ed75d7 -- Fri Jul 12 06:02:30.317552087 2024
dtime: 0x0 -- Wed Dec 31 17:00:00 1969
Refcount Block: 2777346
Last Extblk: 2886943 Orphan Slot: 0
Sub Alloc Slot: 0 Sub Alloc Bit: 14
Tree Depth: 1 Count: 227 Next Free Rec: 230
## Offset Clusters Block#
0 0 2310 2776351
1 2310 2139 2777375
2 4449 1221 2778399
3 5670 731 2779423
4 6401 566 2780447
....... .... .......
....... .... .......
The issue was in the reflink workfow while reserving space for inline xattr.
The problematic function is ocfs2_reflink_xattr_inline(). By the time this
function is called the reflink tree is already recreated at the destination
inode from the source inode. At this point, this function reserves space
space inline xattrs at the destination inode without even checking if there
is space at the root metadata block. It simply reduces the l_count from 243
to 227 thereby making space of 256 bytes for inline xattr whereas the inode
already has extents beyond this index (in this case upto 230), thereby causing
corruption.
The fix for this is to reserve space for inline metadata before the at the
destination inode before the reflink tree gets recreated. The customer has
verified the fix.
Fixes: ef962df057aa ("ocfs2: xattr: fix inlined xattr reflink")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna(a)oracle.com>
---
fs/ocfs2/refcounttree.c | 26 ++++++++++++++++++++++++--
fs/ocfs2/xattr.c | 11 +----------
2 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3f80a56d0d60..1d427da06bee 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -25,6 +25,7 @@
#include "namei.h"
#include "ocfs2_trace.h"
#include "file.h"
+#include "symlink.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -4155,8 +4156,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
int ret;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *new_bh = NULL;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
ret = -EINVAL;
mlog_errno(ret);
goto out;
@@ -4182,6 +4184,26 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto out_unlock;
}
+ if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) &&
+ (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
+ /*
+ * Adjust extent record count to reserve space for extended attribute.
+ * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
+ */
+ struct ocfs2_inode_info *ni = OCFS2_I(new_inode);
+
+ if (!(ni->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
+ !(ocfs2_inode_is_fast_symlink(new_inode))) {
+ struct ocfs2_dinode *new_di = new_bh->b_data;
+ struct ocfs2_dinode *old_di = old_bh->b_data;
+ struct ocfs2_extent_list *el = &new_di->id2.i_list;
+ int inline_size = le16_to_cpu(old_di->i_xattr_inline_size);
+
+ le16_add_cpu(&el->l_count, -(inline_size /
+ sizeof(struct ocfs2_extent_rec)));
+ }
+ }
+
ret = ocfs2_create_reflink_node(inode, old_bh,
new_inode, new_bh, preserve);
if (ret) {
@@ -4189,7 +4211,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto inode_unlock;
}
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+ if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
ret = ocfs2_reflink_xattrs(inode, old_bh,
new_inode, new_bh,
preserve);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3b81213ed7b8..a9f716ec89e2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -6511,16 +6511,7 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
}
new_oi = OCFS2_I(args->new_inode);
- /*
- * Adjust extent record count to reserve space for extended attribute.
- * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
- */
- if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
- !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
- struct ocfs2_extent_list *el = &new_di->id2.i_list;
- le16_add_cpu(&el->l_count, -(inline_size /
- sizeof(struct ocfs2_extent_rec)));
- }
+
spin_lock(&new_oi->ip_lock);
new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
--
2.39.3
One of our customers reported a crash and a corrupted ocfs2 filesystem.
The crash was due to the detection of corruption. Upon troubleshooting,
the fsck -fn output showed the below corruption
[EXTENT_LIST_FREE] Extent list in owner 33080590 claims 230 as the next free chain record,
but fsck believes the largest valid value is 227. Clamp the next record value? n
The stat output from the debugfs.ocfs2 showed the following corruption
where the "Next Free Rec:" had overshot the "Count:" in the root metadata
block.
Inode: 33080590 Mode: 0640 Generation: 2619713622 (0x9c25a856)
FS Generation: 904309833 (0x35e6ac49)
CRC32: 00000000 ECC: 0000
Type: Regular Attr: 0x0 Flags: Valid
Dynamic Features: (0x16) HasXattr InlineXattr Refcounted
Extended Attributes Block: 0 Extended Attributes Inline Size: 256
User: 0 (root) Group: 0 (root) Size: 281320357888
Links: 1 Clusters: 141738
ctime: 0x66911b56 0x316edcb8 -- Fri Jul 12 06:02:30.829349048 2024
atime: 0x66911d6b 0x7f7a28d -- Fri Jul 12 06:11:23.133669517 2024
mtime: 0x66911b56 0x12ed75d7 -- Fri Jul 12 06:02:30.317552087 2024
dtime: 0x0 -- Wed Dec 31 17:00:00 1969
Refcount Block: 2777346
Last Extblk: 2886943 Orphan Slot: 0
Sub Alloc Slot: 0 Sub Alloc Bit: 14
Tree Depth: 1 Count: 227 Next Free Rec: 230
## Offset Clusters Block#
0 0 2310 2776351
1 2310 2139 2777375
2 4449 1221 2778399
3 5670 731 2779423
4 6401 566 2780447
....... .... .......
....... .... .......
The issue was in the reflink workfow while reserving space for inline xattr.
The problematic function is ocfs2_reflink_xattr_inline(). By the time this
function is called the reflink tree is already recreated at the destination
inode from the source inode. At this point, this function reserves space
space inline xattrs at the destination inode without even checking if there
is space at the root metadata block. It simply reduces the l_count from 243
to 227 thereby making space of 256 bytes for inline xattr whereas the inode
already has extents beyond this index (in this case upto 230), thereby causing
corruption.
The fix for this is to reserve space for inline metadata before the at the
destination inode before the reflink tree gets recreated. The customer has
verified the fix.
Fixes: ef962df057aa ("ocfs2: xattr: fix inlined xattr reflink")
Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna(a)oracle.com>
---
fs/ocfs2/refcounttree.c | 26 ++++++++++++++++++++++++--
fs/ocfs2/xattr.c | 11 +----------
2 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3f80a56d0d60..1d427da06bee 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -25,6 +25,7 @@
#include "namei.h"
#include "ocfs2_trace.h"
#include "file.h"
+#include "symlink.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -4155,8 +4156,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
int ret;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *new_bh = NULL;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
ret = -EINVAL;
mlog_errno(ret);
goto out;
@@ -4182,6 +4184,26 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto out_unlock;
}
+ if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) &&
+ (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
+ /*
+ * Adjust extent record count to reserve space for extended attribute.
+ * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
+ */
+ struct ocfs2_inode_info *ni = OCFS2_I(new_inode);
+
+ if (!(ni->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
+ !(ocfs2_inode_is_fast_symlink(new_inode))) {
+ struct ocfs2_dinode *new_di = new_bh->b_data;
+ struct ocfs2_dinode *old_di = old_bh->b_data;
+ struct ocfs2_extent_list *el = &new_di->id2.i_list;
+ int inline_size = le16_to_cpu(old_di->i_xattr_inline_size);
+
+ le16_add_cpu(&el->l_count, -(inline_size /
+ sizeof(struct ocfs2_extent_rec)));
+ }
+ }
+
ret = ocfs2_create_reflink_node(inode, old_bh,
new_inode, new_bh, preserve);
if (ret) {
@@ -4189,7 +4211,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto inode_unlock;
}
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+ if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
ret = ocfs2_reflink_xattrs(inode, old_bh,
new_inode, new_bh,
preserve);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3b81213ed7b8..a9f716ec89e2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -6511,16 +6511,7 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
}
new_oi = OCFS2_I(args->new_inode);
- /*
- * Adjust extent record count to reserve space for extended attribute.
- * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
- */
- if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
- !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
- struct ocfs2_extent_list *el = &new_di->id2.i_list;
- le16_add_cpu(&el->l_count, -(inline_size /
- sizeof(struct ocfs2_extent_rec)));
- }
+
spin_lock(&new_oi->ip_lock);
new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
--
2.39.3
From: Dumitru Ceclan <mitrutzceclan(a)gmail.com>
commit 61cbfb5368dd50ed0d65ce21d305aa923581db2b upstream
The cfg pointer is set before reading the channel number that the
configuration should point to. This causes configurations to be shifted
by one channel.
For example setting bipolar to the first channel defined in the DT will
cause bipolar mode to be active on the second defined channel.
Fix by moving the cfg pointer setting after reading the channel number.
Fixes: 7b8d045e497a ("iio: adc: ad7124: allow more than 8 channels")
Signed-off-by: Dumitru Ceclan <dumitru.ceclan(a)analog.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://patch.msgid.link/20240806085133.114547-1-dumitru.ceclan@analog.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Signed-off-by: He Lugang <helugang(a)uniontech.com>
---
drivers/iio/adc/ad7124.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index e7b1d517d3de..089398a7664a 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -837,8 +837,6 @@ static int ad7124_parse_channel_config(struct iio_dev *indio_dev,
st->channels = channels;
device_for_each_child_node_scoped(dev, child) {
- cfg = &st->channels[channel].cfg;
-
ret = fwnode_property_read_u32(child, "reg", &channel);
if (ret)
return ret;
@@ -856,6 +854,7 @@ static int ad7124_parse_channel_config(struct iio_dev *indio_dev,
st->channels[channel].ain = AD7124_CHANNEL_AINP(ain[0]) |
AD7124_CHANNEL_AINM(ain[1]);
+ cfg = &st->channels[channel].cfg;
cfg->bipolar = fwnode_property_read_bool(child, "bipolar");
ret = fwnode_property_read_u32(child, "adi,reference-select", &tmp);
--
2.45.2
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 61cbfb5368dd50ed0d65ce21d305aa923581db2b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024090914-province-underdone-1eea@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
61cbfb5368dd ("iio: adc: ad7124: fix DT configuration parsing")
a6eaf02b8274 ("iio: adc: ad7124: Switch from of specific to fwnode based property handling")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 61cbfb5368dd50ed0d65ce21d305aa923581db2b Mon Sep 17 00:00:00 2001
From: Dumitru Ceclan <mitrutzceclan(a)gmail.com>
Date: Tue, 6 Aug 2024 11:51:33 +0300
Subject: [PATCH] iio: adc: ad7124: fix DT configuration parsing
The cfg pointer is set before reading the channel number that the
configuration should point to. This causes configurations to be shifted
by one channel.
For example setting bipolar to the first channel defined in the DT will
cause bipolar mode to be active on the second defined channel.
Fix by moving the cfg pointer setting after reading the channel number.
Fixes: 7b8d045e497a ("iio: adc: ad7124: allow more than 8 channels")
Signed-off-by: Dumitru Ceclan <dumitru.ceclan(a)analog.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://patch.msgid.link/20240806085133.114547-1-dumitru.ceclan@analog.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index afb5f4d741e6..108e9ccab1ef 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -844,8 +844,6 @@ static int ad7124_parse_channel_config(struct iio_dev *indio_dev,
st->channels = channels;
device_for_each_child_node_scoped(dev, child) {
- cfg = &st->channels[channel].cfg;
-
ret = fwnode_property_read_u32(child, "reg", &channel);
if (ret)
return ret;
@@ -863,6 +861,7 @@ static int ad7124_parse_channel_config(struct iio_dev *indio_dev,
st->channels[channel].ain = AD7124_CHANNEL_AINP(ain[0]) |
AD7124_CHANNEL_AINM(ain[1]);
+ cfg = &st->channels[channel].cfg;
cfg->bipolar = fwnode_property_read_bool(child, "bipolar");
ret = fwnode_property_read_u32(child, "adi,reference-select", &tmp);
From: Dumitru Ceclan <mitrutzceclan(a)gmail.com>
From: Dumitru Ceclan <dumitru.ceclan(a)analog.com>
[ Upstream commit 61cbfb5368dd50ed0d65ce21d305aa923581db2b ]
The cfg pointer is set before reading the channel number that the
configuration should point to. This causes configurations to be shifted
by one channel.
For example setting bipolar to the first channel defined in the DT will
cause bipolar mode to be active on the second defined channel.
Fix by moving the cfg pointer setting after reading the channel number.
Fixes: 7b8d045e497a ("iio: adc: ad7124: allow more than 8 channels")
Signed-off-by: Dumitru Ceclan <dumitru.ceclan(a)analog.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://patch.msgid.link/20240806085133.114547-1-dumitru.ceclan@analog.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Signed-off-by: He Lugang <helugang(a)uniontech.com>
---
drivers/iio/adc/ad7124.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index e7b1d517d3de..089398a7664a 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -837,8 +837,6 @@ static int ad7124_parse_channel_config(struct iio_dev *indio_dev,
st->channels = channels;
device_for_each_child_node_scoped(dev, child) {
- cfg = &st->channels[channel].cfg;
-
ret = fwnode_property_read_u32(child, "reg", &channel);
if (ret)
return ret;
@@ -856,6 +854,7 @@ static int ad7124_parse_channel_config(struct iio_dev *indio_dev,
st->channels[channel].ain = AD7124_CHANNEL_AINP(ain[0]) |
AD7124_CHANNEL_AINM(ain[1]);
+ cfg = &st->channels[channel].cfg;
cfg->bipolar = fwnode_property_read_bool(child, "bipolar");
ret = fwnode_property_read_u32(child, "adi,reference-select", &tmp);
--
2.45.2
The number of transmit and receive descriptors must be a multiple of 128
due to the hardware limitation. If it is set to a multiple of 8 instead of
a multiple 128, the queues will easily be hung.
Cc: stable(a)vger.kernel.org
Fixes: 883b5984a5d2 ("net: wangxun: add ethtool_ops for ring parameters")
Signed-off-by: Jiawen Wu <jiawenwu(a)trustnetic.com>
---
drivers/net/ethernet/wangxun/libwx/wx_type.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 1d57b047817b..b54bffda027b 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -426,9 +426,9 @@ enum WX_MSCA_CMD_value {
#define WX_MIN_RXD 128
#define WX_MIN_TXD 128
-/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
-#define WX_REQ_RX_DESCRIPTOR_MULTIPLE 8
-#define WX_REQ_TX_DESCRIPTOR_MULTIPLE 8
+/* Number of Transmit and Receive Descriptors must be a multiple of 128 */
+#define WX_REQ_RX_DESCRIPTOR_MULTIPLE 128
+#define WX_REQ_TX_DESCRIPTOR_MULTIPLE 128
#define WX_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */
#define VMDQ_P(p) p
--
2.27.0
Here are some various fixes for the MPTCP selftests.
Patch 1 fixes a recently modified test to continue to work as expected
on older kernels. This is a fix for a recent fix that can be backported
up to v5.15.
Patch 2 and 3 include dependences when exporting or installing the
tests. Two fixes for v6.11-rc1.
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Matthieu Baerts (NGI0) (3):
selftests: mptcp: join: restrict fullmesh endp on 1st sf
selftests: mptcp: include lib.sh file
selftests: mptcp: include net_helper.sh file
tools/testing/selftests/net/mptcp/Makefile | 2 ++
tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 +++-
2 files changed, 5 insertions(+), 1 deletion(-)
---
base-commit: 48aa361c5db0b380c2b75c24984c0d3e7c1e8c09
change-id: 20240910-net-selftests-mptcp-fix-install-2b82ae5a99c8
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
It is possible that the usb power_supply is registered after the probe
of dwc3. In this case, trying to get the usb power_supply during the
probe will fail and there is no chance to try again. Also the usb
power_supply might be unregistered at anytime so that the handle of it
in dwc3 would become invalid. To fix this, get the handle right before
calling to power_supply functions and put it afterward.
Fixes: 6f0764b5adea ("usb: dwc3: add a power supply for current control")
Cc: stable(a)vger.kernel.org
Signed-off-by: Kyle Tso <kyletso(a)google.com>
---
drivers/usb/dwc3/core.c | 25 +++++--------------------
drivers/usb/dwc3/core.h | 4 ++--
drivers/usb/dwc3/gadget.c | 19 ++++++++++++++-----
3 files changed, 21 insertions(+), 27 deletions(-)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 734de2a8bd21..ab563edd9b4c 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1631,8 +1631,6 @@ static void dwc3_get_properties(struct dwc3 *dwc)
u8 tx_thr_num_pkt_prd = 0;
u8 tx_max_burst_prd = 0;
u8 tx_fifo_resize_max_num;
- const char *usb_psy_name;
- int ret;
/* default to highest possible threshold */
lpm_nyet_threshold = 0xf;
@@ -1667,12 +1665,7 @@ static void dwc3_get_properties(struct dwc3 *dwc)
dwc->sys_wakeup = device_may_wakeup(dwc->sysdev);
- ret = device_property_read_string(dev, "usb-psy-name", &usb_psy_name);
- if (ret >= 0) {
- dwc->usb_psy = power_supply_get_by_name(usb_psy_name);
- if (!dwc->usb_psy)
- dev_err(dev, "couldn't get usb power supply\n");
- }
+ device_property_read_string(dev, "usb-psy-name", &dwc->usb_psy_name);
dwc->has_lpm_erratum = device_property_read_bool(dev,
"snps,has-lpm-erratum");
@@ -2133,18 +2126,16 @@ static int dwc3_probe(struct platform_device *pdev)
dwc3_get_software_properties(dwc);
dwc->reset = devm_reset_control_array_get_optional_shared(dev);
- if (IS_ERR(dwc->reset)) {
- ret = PTR_ERR(dwc->reset);
- goto err_put_psy;
- }
+ if (IS_ERR(dwc->reset))
+ return PTR_ERR(dwc->reset);
ret = dwc3_get_clocks(dwc);
if (ret)
- goto err_put_psy;
+ return ret;
ret = reset_control_deassert(dwc->reset);
if (ret)
- goto err_put_psy;
+ return ret;
ret = dwc3_clk_enable(dwc);
if (ret)
@@ -2245,9 +2236,6 @@ static int dwc3_probe(struct platform_device *pdev)
dwc3_clk_disable(dwc);
err_assert_reset:
reset_control_assert(dwc->reset);
-err_put_psy:
- if (dwc->usb_psy)
- power_supply_put(dwc->usb_psy);
return ret;
}
@@ -2276,9 +2264,6 @@ static void dwc3_remove(struct platform_device *pdev)
pm_runtime_set_suspended(&pdev->dev);
dwc3_free_event_buffers(dwc);
-
- if (dwc->usb_psy)
- power_supply_put(dwc->usb_psy);
}
#ifdef CONFIG_PM
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 1e561fd8b86e..ecfe2cc224f7 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1045,7 +1045,7 @@ struct dwc3_scratchpad_array {
* @role_sw: usb_role_switch handle
* @role_switch_default_mode: default operation mode of controller while
* usb role is USB_ROLE_NONE.
- * @usb_psy: pointer to power supply interface.
+ * @usb_psy_name: name of the usb power supply interface.
* @usb2_phy: pointer to USB2 PHY
* @usb3_phy: pointer to USB3 PHY
* @usb2_generic_phy: pointer to array of USB2 PHYs
@@ -1223,7 +1223,7 @@ struct dwc3 {
struct usb_role_switch *role_sw;
enum usb_dr_mode role_switch_default_mode;
- struct power_supply *usb_psy;
+ const char *usb_psy_name;
u32 fladj;
u32 ref_clk_per;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 89fc690fdf34..c89b5b5a64cf 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -3049,20 +3049,29 @@ static void dwc3_gadget_set_ssp_rate(struct usb_gadget *g,
static int dwc3_gadget_vbus_draw(struct usb_gadget *g, unsigned int mA)
{
- struct dwc3 *dwc = gadget_to_dwc(g);
+ struct dwc3 *dwc = gadget_to_dwc(g);
+ struct power_supply *usb_psy;
union power_supply_propval val = {0};
int ret;
if (dwc->usb2_phy)
return usb_phy_set_power(dwc->usb2_phy, mA);
- if (!dwc->usb_psy)
+ if (!dwc->usb_psy_name)
return -EOPNOTSUPP;
- val.intval = 1000 * mA;
- ret = power_supply_set_property(dwc->usb_psy, POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT, &val);
+ usb_psy = power_supply_get_by_name(dwc->usb_psy_name);
+ if (usb_psy) {
+ val.intval = 1000 * mA;
+ ret = power_supply_set_property(usb_psy, POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT,
+ &val);
+ power_supply_put(usb_psy);
+ return ret;
+ }
- return ret;
+ dev_err(dwc->dev, "couldn't get usb power supply\n");
+
+ return -EOPNOTSUPP;
}
/**
--
2.45.2.993.g49e7a77208-goog
The qcom_geni_serial_poll_bit() can be used to wait for events like
command completion and is supposed to wait for the time it takes to
clear a full fifo before timing out.
As noted by Doug, the current implementation does not account for start,
stop and parity bits when determining the timeout. The helper also does
not currently account for the shift register and the two-word
intermediate transfer register.
A too short timeout can specifically lead to lost characters when
waiting for a transfer to complete as the transfer is cancelled on
timeout.
Instead of determining the poll timeout on every call, store the fifo
timeout when updating it in set_termios() and make sure to take the
shift and intermediate registers into account. Note that serial core has
already added a 20 ms margin to the fifo timeout.
Also note that the current uart_fifo_timeout() interface does
unnecessary calculations on every call and did not exist in earlier
kernels so only store its result once. This facilitates backports too as
earlier kernels can derive the timeout from uport->timeout, which has
since been removed.
Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP")
Cc: stable(a)vger.kernel.org # 4.17
Reported-by: Douglas Anderson <dianders(a)chromium.org>
Tested-by: Nícolas F. R. A. Prado <nfraprado(a)collabora.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/tty/serial/qcom_geni_serial.c | 31 +++++++++++++++------------
1 file changed, 17 insertions(+), 14 deletions(-)
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 69a632fefc41..309c0bddf26a 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -124,7 +124,7 @@ struct qcom_geni_serial_port {
dma_addr_t tx_dma_addr;
dma_addr_t rx_dma_addr;
bool setup;
- unsigned int baud;
+ unsigned long poll_timeout_us;
unsigned long clk_rate;
void *rx_buf;
u32 loopback;
@@ -270,22 +270,13 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
{
u32 reg;
struct qcom_geni_serial_port *port;
- unsigned int baud;
- unsigned int fifo_bits;
unsigned long timeout_us = 20000;
struct qcom_geni_private_data *private_data = uport->private_data;
if (private_data->drv) {
port = to_dev_port(uport);
- baud = port->baud;
- if (!baud)
- baud = 115200;
- fifo_bits = port->tx_fifo_depth * port->tx_fifo_width;
- /*
- * Total polling iterations based on FIFO worth of bytes to be
- * sent at current baud. Add a little fluff to the wait.
- */
- timeout_us = ((fifo_bits * USEC_PER_SEC) / baud) + 500;
+ if (port->poll_timeout_us)
+ timeout_us = port->poll_timeout_us;
}
/*
@@ -1244,11 +1235,11 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport,
unsigned long clk_rate;
u32 ver, sampling_rate;
unsigned int avg_bw_core;
+ unsigned long timeout;
qcom_geni_serial_stop_rx(uport);
/* baud rate */
baud = uart_get_baud_rate(uport, termios, old, 300, 4000000);
- port->baud = baud;
sampling_rate = UART_OVERSAMPLING;
/* Sampling rate is halved for IP versions >= 2.5 */
@@ -1326,9 +1317,21 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport,
else
tx_trans_cfg |= UART_CTS_MASK;
- if (baud)
+ if (baud) {
uart_update_timeout(uport, termios->c_cflag, baud);
+ /*
+ * Make sure that qcom_geni_serial_poll_bitfield() waits for
+ * the FIFO, two-word intermediate transfer register and shift
+ * register to clear.
+ *
+ * Note that uart_fifo_timeout() also adds a 20 ms margin.
+ */
+ timeout = jiffies_to_usecs(uart_fifo_timeout(uport));
+ timeout += 3 * timeout / port->tx_fifo_depth;
+ WRITE_ONCE(port->poll_timeout_us, timeout);
+ }
+
if (!uart_console(uport))
writel(port->loopback,
uport->membase + SE_UART_LOOPBACK_CFG);
--
2.44.2
Avoid unnecessary nested min()/max() which results in egregious macro
expansion. Use clamp_t() as this introduces the least possible expansion.
Not doing so results in an impact on build times.
This resolves an issue with slackware 15.0 32-bit compilation as reported
by Richard Narron.
Presumably the min/max fixups would be difficult to backport, this patch
should be easier and fix's Richard's problem in 5.15.
Reported-by: Richard Narron <richard(a)aaazen.com>
Closes: https://lore.kernel.org/all/4a5321bd-b1f-1832-f0c-cea8694dc5aa@aaazen.com/
Fixes: 867046cc7027 ("minmax: relax check to allow comparison between unsigned arguments and signed constants")
Cc: stable(a)vger.kernel.org
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
---
.../staging/media/atomisp/pci/sh_css_frac.h | 26 ++++++++++++++-----
1 file changed, 19 insertions(+), 7 deletions(-)
diff --git a/drivers/staging/media/atomisp/pci/sh_css_frac.h b/drivers/staging/media/atomisp/pci/sh_css_frac.h
index b90b5b330dfa..8ba65161f7a9 100644
--- a/drivers/staging/media/atomisp/pci/sh_css_frac.h
+++ b/drivers/staging/media/atomisp/pci/sh_css_frac.h
@@ -32,12 +32,24 @@
#define uISP_VAL_MAX ((unsigned int)((1 << uISP_REG_BIT) - 1))
/* a:fraction bits for 16bit precision, b:fraction bits for ISP precision */
-#define sDIGIT_FITTING(v, a, b) \
- min_t(int, max_t(int, (((v) >> sSHIFT) >> max(sFRACTION_BITS_FITTING(a) - (b), 0)), \
- sISP_VAL_MIN), sISP_VAL_MAX)
-#define uDIGIT_FITTING(v, a, b) \
- min((unsigned int)max((unsigned)(((v) >> uSHIFT) \
- >> max((int)(uFRACTION_BITS_FITTING(a) - (b)), 0)), \
- uISP_VAL_MIN), uISP_VAL_MAX)
+static inline int sDIGIT_FITTING(int v, int a, int b)
+{
+ int fit_shift = sFRACTION_BITS_FITTING(a) - b;
+
+ v >>= sSHIFT;
+ v >>= fit_shift > 0 ? fit_shift : 0;
+
+ return clamp_t(int, v, sISP_VAL_MIN, sISP_VAL_MAX);
+}
+
+static inline unsigned int uDIGIT_FITTING(unsigned int v, int a, int b)
+{
+ int fit_shift = uFRACTION_BITS_FITTING(a) - b;
+
+ v >>= uSHIFT;
+ v >>= fit_shift > 0 ? fit_shift : 0;
+
+ return clamp_t(unsigned int, v, uISP_VAL_MIN, uISP_VAL_MAX);
+}
#endif /* __SH_CSS_FRAC_H */
--
2.46.0
Avoid unnecessary nested min()/max() which reults in egregious macro
expansion.
The nesting occurs when NET_SKB_PAD is used in a min() or max() invocation,
for instance, various ethernet drivers wrap it in a max().
Not doing so results in an impact on build times.
Cc: stable(a)vger.kernel.org
Fixes: 867046cc7027 ("minmax: relax check to allow comparison between unsigned arguments and signed constants")
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
---
include/linux/skbuff.h | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 29c3ea5b6e93..d53b296df504 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3164,7 +3164,11 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
* NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8)
*/
#ifndef NET_SKB_PAD
-#define NET_SKB_PAD max(32, L1_CACHE_BYTES)
+#if L1_CACHE_BYTES < 32
+#define NET_SKB_PAD 32
+#else
+#define NET_SKB_PAD L1_CACHE_BYTES
+#endif
#endif
int ___pskb_trim(struct sk_buff *skb, unsigned int len);
--
2.46.0
Avoid unnecessary nested min()/max() which reults in egregious macro
expansion. Use clamp_t() as this introduces the least possible expansion.
Not doing so results in an impact on build times.
Cc: stable(a)vger.kernel.org
Fixes: 867046cc7027 ("minmax: relax check to allow comparison between unsigned arguments and signed constants")
Reviewed-by: Andrew Lunn <andrew(a)lunn.ch>
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
---
drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index e809f91c08fb..8b431f90efc3 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -23,7 +23,7 @@
/* The PacketOffset field is measured in units of 32 bytes and is 3 bits wide,
* so the maximum offset is 7 * 32 = 224
*/
-#define MVPP2_SKB_HEADROOM min(max(XDP_PACKET_HEADROOM, NET_SKB_PAD), 224)
+#define MVPP2_SKB_HEADROOM clamp_t(int, XDP_PACKET_HEADROOM, NET_SKB_PAD, 224)
#define MVPP2_XDP_PASS 0
#define MVPP2_XDP_DROPPED BIT(0)
--
2.46.0
Avoid nested min()/max() which results in egregious macro expansion.
This issue was introduced by commit 867046cc7027 ("minmax: relax check to
allow comparison between unsigned arguments and signed constants") [2].
Work has been done to address the issue of egregious min()/max() macro
expansion in commit 22f546873149 ("minmax: improve macro expansion and type
checking") and related, however it appears that some issues remain on more
tightly constrained systems.
Adjust a few known-bad cases of deeply nested macros to avoid doing so to
mitigate this. Porting the patch first proposed in [1] to Linus's tree.
Running an allmodconfig build using the methodology described in [2] we
observe a 35 MiB reduction in generated code.
The difference is much more significant prior to recent minmax fixes which
were not backported. As per [1] prior these the reduction is more like 200
MiB.
This resolves an issue with slackware 15.0 32-bit compilation as reported
by Richard Narron.
Presumably the min/max fixups would be difficult to backport, this patch
should be easier and fix's Richard's problem in 5.15.
[0]:https://lore.kernel.org/all/b97faef60ad24922b530241c5d7c933c@AcuMS.acula…
[1]:https://lore.kernel.org/lkml/5882b96e-1287-4390-8174-3316d39038ef@lucife…
[2]:https://lore.kernel.org/linux-mm/36aa2cad-1db1-4abf-8dd2-fb20484aabc3@lu…
Reported-by: Richard Narron <richard(a)aaazen.com>
Closes: https://lore.kernel.org/all/4a5321bd-b1f-1832-f0c-cea8694dc5aa@aaazen.com/
Fixes: 867046cc7027 ("minmax: relax check to allow comparison between unsigned arguments and signed constants")
Cc: stable(a)vger.kernel.org
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
---
drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 2 +-
.../staging/media/atomisp/pci/sh_css_frac.h | 26 ++++++++++++++-----
include/linux/skbuff.h | 6 ++++-
3 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index e809f91c08fb..8b431f90efc3 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -23,7 +23,7 @@
/* The PacketOffset field is measured in units of 32 bytes and is 3 bits wide,
* so the maximum offset is 7 * 32 = 224
*/
-#define MVPP2_SKB_HEADROOM min(max(XDP_PACKET_HEADROOM, NET_SKB_PAD), 224)
+#define MVPP2_SKB_HEADROOM clamp_t(int, XDP_PACKET_HEADROOM, NET_SKB_PAD, 224)
#define MVPP2_XDP_PASS 0
#define MVPP2_XDP_DROPPED BIT(0)
diff --git a/drivers/staging/media/atomisp/pci/sh_css_frac.h b/drivers/staging/media/atomisp/pci/sh_css_frac.h
index b90b5b330dfa..a973394c5bc0 100644
--- a/drivers/staging/media/atomisp/pci/sh_css_frac.h
+++ b/drivers/staging/media/atomisp/pci/sh_css_frac.h
@@ -32,12 +32,24 @@
#define uISP_VAL_MAX ((unsigned int)((1 << uISP_REG_BIT) - 1))
/* a:fraction bits for 16bit precision, b:fraction bits for ISP precision */
-#define sDIGIT_FITTING(v, a, b) \
- min_t(int, max_t(int, (((v) >> sSHIFT) >> max(sFRACTION_BITS_FITTING(a) - (b), 0)), \
- sISP_VAL_MIN), sISP_VAL_MAX)
-#define uDIGIT_FITTING(v, a, b) \
- min((unsigned int)max((unsigned)(((v) >> uSHIFT) \
- >> max((int)(uFRACTION_BITS_FITTING(a) - (b)), 0)), \
- uISP_VAL_MIN), uISP_VAL_MAX)
+static inline int sDIGIT_FITTING(short v, int a, int b)
+{
+ int fit_shift = sFRACTION_BITS_FITTING(a) - b;
+
+ v >>= sSHIFT;
+ v >>= fit_shift > 0 ? fit_shift : 0;
+
+ return clamp_t(int, v, sISP_VAL_MIN, sISP_VAL_MAX);
+}
+
+static inline unsigned int uDIGIT_FITTING(unsigned int v, int a, int b)
+{
+ int fit_shift = uFRACTION_BITS_FITTING(a) - b;
+
+ v >>= uSHIFT;
+ v >>= fit_shift > 0 ? fit_shift : 0;
+
+ return clamp_t(unsigned int, v, uISP_VAL_MIN, uISP_VAL_MAX);
+}
#endif /* __SH_CSS_FRAC_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 29c3ea5b6e93..d53b296df504 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3164,7 +3164,11 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
* NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8)
*/
#ifndef NET_SKB_PAD
-#define NET_SKB_PAD max(32, L1_CACHE_BYTES)
+#if L1_CACHE_BYTES < 32
+#define NET_SKB_PAD 32
+#else
+#define NET_SKB_PAD L1_CACHE_BYTES
+#endif
#endif
int ___pskb_trim(struct sk_buff *skb, unsigned int len);
--
2.46.0
This patch addresses a reference count handling issue in the
ice_dpll_init_rclk_pins() function. The function calls ice_dpll_get_pins(),
which increments the reference count of the relevant resources. However,
if the condition WARN_ON((!vsi || !vsi->netdev)) is met, the function
currently returns an error without properly releasing the resources
acquired by ice_dpll_get_pins(), leading to a reference count leak.
To resolve this, the check has been moved to the top of the function. This
ensures that the function verifies the state before any resources are
acquired, avoiding the need for additional resource management in the
error path.
This bug was identified by an experimental static analysis tool developed
by our team. The tool specializes in analyzing reference count operations
and detecting potential issues where resources are not properly managed.
In this case, the tool flagged the missing release operation as a
potential problem, which led to the development of this patch.
Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gui-Dong Han <hanguidong02(a)outlook.com>
---
v2:
* In this patch v2, the check for vsi and vsi->netdev has been moved to
the top of the function to simplify error handling and avoid the need for
resource unwinding.
Thanks to Simon Horman for suggesting this improvement.
---
drivers/net/ethernet/intel/ice/ice_dpll.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c
index e92be6f130a3..b403d55303b8 100644
--- a/drivers/net/ethernet/intel/ice/ice_dpll.c
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.c
@@ -1626,6 +1626,8 @@ ice_dpll_init_rclk_pins(struct ice_pf *pf, struct ice_dpll_pin *pin,
struct dpll_pin *parent;
int ret, i;
+ if (WARN_ON((!vsi || !vsi->netdev)))
+ return -EINVAL;
ret = ice_dpll_get_pins(pf, pin, start_idx, ICE_DPLL_RCLK_NUM_PER_PF,
pf->dplls.clock_id);
if (ret)
@@ -1641,8 +1643,6 @@ ice_dpll_init_rclk_pins(struct ice_pf *pf, struct ice_dpll_pin *pin,
if (ret)
goto unregister_pins;
}
- if (WARN_ON((!vsi || !vsi->netdev)))
- return -EINVAL;
dpll_netdev_pin_set(vsi->netdev, pf->dplls.rclk.pin);
return 0;
--
2.25.1
From: Roman Li <Roman.Li(a)amd.com>
[WHY]
RCG state of IPX in idle is more stable for DCN351 and some variants of
DCN35 than IPS2.
[HOW]
Rework dm_get_default_ips_mode() to specify default per ASIC and update
DCN35/DCN351 defaults accordingly.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Sun peng Li <sunpeng.li(a)amd.com>
Signed-off-by: Roman Li <Roman.Li(a)amd.com>
Signed-off-by: Alex Hung <alex.hung(a)amd.com>
---
.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 50 ++++++++++++-------
1 file changed, 33 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c77982245f60..c50880422502 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1771,25 +1771,41 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device *
static enum dmub_ips_disable_type dm_get_default_ips_mode(
struct amdgpu_device *adev)
{
- /*
- * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to
- * cause a hard hang. A fix exists for newer PMFW.
- *
- * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest
- * IPS state in all cases, except for s0ix and all displays off (DPMS),
- * where IPS2 is allowed.
- *
- * When checking pmfw version, use the major and minor only.
- */
- if (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(3, 5, 0) &&
- (adev->pm.fw_version & 0x00FFFF00) < 0x005D6300)
- return DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ enum dmub_ips_disable_type ret = DMUB_IPS_ENABLE;
- if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0))
- return DMUB_IPS_ENABLE;
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 5, 0):
+ /*
+ * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to
+ * cause a hard hang. A fix exists for newer PMFW.
+ *
+ * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest
+ * IPS state in all cases, except for s0ix and all displays off (DPMS),
+ * where IPS2 is allowed.
+ *
+ * When checking pmfw version, use the major and minor only.
+ */
+ if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300)
+ ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0))
+ /*
+ * Other ASICs with DCN35 that have residency issues with
+ * IPS2 in idle.
+ * We want them to use IPS2 only in display off cases.
+ */
+ ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ break;
+ case IP_VERSION(3, 5, 1):
+ ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ break;
+ default:
+ /* ASICs older than DCN35 do not have IPSs */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 5, 0))
+ ret = DMUB_IPS_DISABLE_ALL;
+ break;
+ }
- /* ASICs older than DCN35 do not have IPSs */
- return DMUB_IPS_DISABLE_ALL;
+ return ret;
}
static int amdgpu_dm_init(struct amdgpu_device *adev)
--
2.34.1
From: Charlene Liu <Charlene.Liu(a)amd.com>
[WHY & HOW]
1) We did linear/non linear transition properly long ago
2) We used that path to handle SystemDisplayEnable
3) We fixed a SystemDisplayEnable inability to fallback to passive by
impacting the transition flow generically
4) AFMF later relied on the generic transition behavior
Separating the two flows to make (3) non-generic is the best immediate
coarse of action.
DC can discern SSAMPO3 very easily from SDE.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Chris Park <chris.park(a)amd.com>
Signed-off-by: Charlene Liu <Charlene.Liu(a)amd.com>
Signed-off-by: Alex Hung <alex.hung(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/core/dc.c | 6 +++---
drivers/gpu/drm/amd/display/dc/dc.h | 1 +
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 67812fbbb006..a1652130e4be 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2376,7 +2376,7 @@ static bool is_surface_in_context(
return false;
}
-static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u)
+static enum surface_update_type get_plane_info_update_type(const struct dc *dc, const struct dc_surface_update *u)
{
union surface_update_flags *update_flags = &u->surface->update_flags;
enum surface_update_type update_type = UPDATE_TYPE_FAST;
@@ -2455,7 +2455,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa
/* todo: below are HW dependent, we should add a hook to
* DCE/N resource and validated there.
*/
- if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) {
+ if (!dc->debug.skip_full_updated_if_possible) {
/* swizzled mode requires RQ to be setup properly,
* thus need to run DML to calculate RQ settings
*/
@@ -2547,7 +2547,7 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
update_flags->raw = 0; // Reset all flags
- type = get_plane_info_update_type(u);
+ type = get_plane_info_update_type(dc, u);
elevate_update_type(&overall_type, type);
type = get_scaling_info_update_type(dc, u);
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index e659f4fed19f..78ebe636389e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1060,6 +1060,7 @@ struct dc_debug_options {
bool enable_ips_visual_confirm;
unsigned int sharpen_policy;
unsigned int scale_to_sharpness_policy;
+ bool skip_full_updated_if_possible;
};
--
2.34.1
From: Zhikai Zhai <zhikai.zhai(a)amd.com>
[WHY]
It makes DSC enable when we commit the stream which need
keep power off, and then it will skip to disable DSC if
pipe reset at this situation as power has been off. It may
cause the DSC unexpected enable on the pipe with the
next new stream which doesn't support DSC.
[HOW]
Check the DSC used on current pipe status when update stream.
Skip to enable if it has been off. The operation enable
DSC should happen when set power on.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Wenjing Liu <wenjing.liu(a)amd.com>
Signed-off-by: Zhikai Zhai <zhikai.zhai(a)amd.com>
Signed-off-by: Alex Hung <alex.hung(a)amd.com>
---
.../drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 14 ++++++++++++++
.../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 13 +++++++++++++
2 files changed, 27 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index a36e11606f90..2e8c9f738259 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -1032,6 +1032,20 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
struct dsc_config dsc_cfg;
struct dsc_optc_config dsc_optc_cfg = {0};
enum optc_dsc_mode optc_dsc_mode;
+ struct dcn_dsc_state dsc_state = {0};
+
+ if (!dsc) {
+ DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+
+ if (dsc->funcs->dsc_read_state) {
+ dsc->funcs->dsc_read_state(dsc, &dsc_state);
+ if (!dsc_state.dsc_fw_en) {
+ DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+ }
/* Enable DSC hw block */
dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 479fd3e89e5a..bd309dbdf7b2 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -334,7 +334,20 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
struct dsc_config dsc_cfg;
struct dsc_optc_config dsc_optc_cfg = {0};
enum optc_dsc_mode optc_dsc_mode;
+ struct dcn_dsc_state dsc_state = {0};
+ if (!dsc) {
+ DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+
+ if (dsc->funcs->dsc_read_state) {
+ dsc->funcs->dsc_read_state(dsc, &dsc_state);
+ if (!dsc_state.dsc_fw_en) {
+ DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+ }
/* Enable DSC hw block */
dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
--
2.34.1
From: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
[WHY & HOW]
When underscan is set through xrandr, it causes the stream destination
rect to change in a way it becomes complicated to handle the calculations
for subvp. Since this is a corner case, disable subvp when underscan is
set.
Fix the existing check that is supposed to catch this corner case by
adding a check based on the parameters in the stream
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Dillon Varone <dillon.varone(a)amd.com>
Reviewed-by: Rodrigo Siqueira <rodrigo.siqueira(a)amd.com>
Signed-off-by: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
Signed-off-by: Alex Hung <alex.hung(a)amd.com>
---
.../drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
index b0d9aed0f265..8697eac1e1f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -858,7 +858,9 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm
plane->immediate_flip = plane_state->flip_immediate;
- plane->composition.rect_out_height_spans_vactive = plane_state->dst_rect.height >= stream->timing.v_addressable;
+ plane->composition.rect_out_height_spans_vactive =
+ plane_state->dst_rect.height >= stream->timing.v_addressable &&
+ stream->dst.height >= stream->timing.v_addressable;
}
//TODO : Could be possibly moved to a common helper layer.
--
2.34.1
From: Martin Tsai <martin.tsai(a)amd.com>
[WHY]
DSC on eDP could be enabled during VBIOS post. The enabled
DSC may not be disabled when enter to OS, once the system was
in second screen only mode before entering to S4. In this
case, OS will not send setTimings to reset eDP path again.
The enabled DSC HW will make a new stream without DSC cannot
output normally if it reused this pipe with enabled DSC.
[HOW]
In accelerated mode, to clean up DSC blocks if eDP is on link
but not active when we are not in fast boot and seamless boot.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Charlene Liu <charlene.liu(a)amd.com>
Signed-off-by: Martin Tsai <martin.tsai(a)amd.com>
Signed-off-by: Alex Hung <alex.hung(a)amd.com>
---
.../amd/display/dc/hwss/dce110/dce110_hwseq.c | 50 +++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index d52ce58c6a98..aa7479b31898 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -57,6 +57,7 @@
#include "panel_cntl.h"
#include "dc_state_priv.h"
#include "dpcd_defs.h"
+#include "dsc.h"
/* include DCE11 register header files */
#include "dce/dce_11_0_d.h"
#include "dce/dce_11_0_sh_mask.h"
@@ -1823,6 +1824,48 @@ static void get_edp_links_with_sink(
}
}
+static void clean_up_dsc_blocks(struct dc *dc)
+{
+ struct display_stream_compressor *dsc = NULL;
+ struct timing_generator *tg = NULL;
+ struct stream_encoder *se = NULL;
+ struct dccg *dccg = dc->res_pool->dccg;
+ struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
+ int i;
+
+ if (dc->ctx->dce_version != DCN_VERSION_3_5 &&
+ dc->ctx->dce_version != DCN_VERSION_3_51)
+ return;
+
+ for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) {
+ struct dcn_dsc_state s = {0};
+
+ dsc = dc->res_pool->dscs[i];
+ dsc->funcs->dsc_read_state(dsc, &s);
+ if (s.dsc_fw_en) {
+ /* disable DSC in OPTC */
+ if (i < dc->res_pool->timing_generator_count) {
+ tg = dc->res_pool->timing_generators[i];
+ tg->funcs->set_dsc_config(tg, OPTC_DSC_DISABLED, 0, 0);
+ }
+ /* disable DSC in stream encoder */
+ if (i < dc->res_pool->stream_enc_count) {
+ se = dc->res_pool->stream_enc[i];
+ se->funcs->dp_set_dsc_config(se, OPTC_DSC_DISABLED, 0, 0);
+ se->funcs->dp_set_dsc_pps_info_packet(se, false, NULL, true);
+ }
+ /* disable DSC block */
+ if (dccg->funcs->set_ref_dscclk)
+ dccg->funcs->set_ref_dscclk(dccg, dsc->inst);
+ dsc->funcs->dsc_disable(dsc);
+
+ /* power down DSC */
+ if (pg_cntl != NULL)
+ pg_cntl->funcs->dsc_pg_control(pg_cntl, dsc->inst, false);
+ }
+ }
+}
+
/*
* When ASIC goes from VBIOS/VGA mode to driver/accelerated mode we need:
* 1. Power down all DC HW blocks
@@ -1927,6 +1970,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
power_down_all_hw_blocks(dc);
+
+ /* DSC could be enabled on eDP during VBIOS post.
+ * To clean up dsc blocks if eDP is in link but not active.
+ */
+ if (edp_link_with_sink && (edp_stream_num == 0))
+ clean_up_dsc_blocks(dc);
+
disable_vga_and_power_gate_all_controllers(dc);
if (edp_link_with_sink && !keep_edp_vdd_on)
dc->hwss.edp_power_control(edp_link_with_sink, false);
--
2.34.1
There is a real deadlock as well as sleeping in atomic() bug in here, if
the bo put happens to be the last ref, since bo destruction wants to
grab the same spinlock and sleeping locks. Fix that by dropping the ref
using xe_bo_put_deferred(), and moving the final commit outside of the
lock. Dropping the lock around the put is tricky since the bo can go
out of scope and delete itself from the list, making it difficult to
navigate to the next list entry.
Fixes: 0845233388f8 ("drm/xe: Implement fdinfo memory stats printing")
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2727
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray(a)intel.com>
Cc: Tejas Upadhyay <tejas.upadhyay(a)intel.com>
Cc: "Thomas Hellström" <thomas.hellstrom(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
Reviewed-by: Matthew Brost <matthew.brost(a)intel.com>
Reviewed-by: Tejas Upadhyay <tejas.upadhyay(a)intel.com>
---
drivers/gpu/drm/xe/xe_drm_client.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index e64f4b645e2e..badfa045ead8 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -196,6 +196,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
struct xe_drm_client *client;
struct drm_gem_object *obj;
struct xe_bo *bo;
+ LLIST_HEAD(deferred);
unsigned int id;
u32 mem_type;
@@ -215,11 +216,14 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
list_for_each_entry(bo, &client->bos_list, client_link) {
if (!kref_get_unless_zero(&bo->ttm.base.refcount))
continue;
+
bo_meminfo(bo, stats);
- xe_bo_put(bo);
+ xe_bo_put_deferred(bo, &deferred);
}
spin_unlock(&client->bos_lock);
+ xe_bo_put_commit(&deferred);
+
for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) {
if (!xe_mem_type_to_name[mem_type])
continue;
--
2.46.0
From: "Alexey Gladkov (Intel)" <legion(a)kernel.org>
TDX only supports kernel-initiated MMIO operations. The handle_mmio()
function checks if the #VE exception occurred in the kernel and rejects
the operation if it did not.
However, userspace can deceive the kernel into performing MMIO on its
behalf. For example, if userspace can point a syscall to an MMIO address,
syscall does get_user() or put_user() on it, triggering MMIO #VE. The
kernel will treat the #VE as in-kernel MMIO.
Ensure that the target MMIO address is within the kernel before decoding
instruction.
Cc: stable(a)vger.kernel.org
Signed-off-by: Alexey Gladkov (Intel) <legion(a)kernel.org>
---
arch/x86/coco/tdx/tdx.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 078e2bac2553..c90d2fdb5fc4 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -405,6 +405,11 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val)
EPT_WRITE, addr, val);
}
+static inline bool is_kernel_addr(unsigned long addr)
+{
+ return (long)addr < 0;
+}
+
static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
{
unsigned long *reg, val, vaddr;
@@ -434,6 +439,11 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
return -EINVAL;
}
+ if (!user_mode(regs) && !is_kernel_addr(ve->gla)) {
+ WARN_ONCE(1, "Access to userspace address is not supported");
+ return -EINVAL;
+ }
+
/*
* Reject EPT violation #VEs that split pages.
*
--
2.46.0
Commit 413db06c05e7 ("phy: qcom-qmp-usb: clean up probe initialisation")
removed most users of the platform device driver data from the
qcom-qmp-usb driver, but mistakenly also removed the initialisation
despite the data still being used in the runtime PM callbacks. This bug
was later reproduced when the driver was copied to create the qmp-usbc
driver.
Restore the driver data initialisation at probe to avoid a NULL-pointer
dereference on runtime suspend.
Apparently no one uses runtime PM, which currently needs to be enabled
manually through sysfs, with these drivers.
Fixes: 19281571a4d5 ("phy: qcom: qmp-usb: split USB-C PHY driver")
Cc: stable(a)vger.kernel.org # 6.9
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/phy/qualcomm/phy-qcom-qmp-usbc.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c b/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c
index 5cbc5fd529eb..dea3456f88b1 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c
@@ -1049,6 +1049,7 @@ static int qmp_usbc_probe(struct platform_device *pdev)
return -ENOMEM;
qmp->dev = dev;
+ dev_set_drvdata(dev, qmp);
qmp->orientation = TYPEC_ORIENTATION_NORMAL;
--
2.44.2
Commit 413db06c05e7 ("phy: qcom-qmp-usb: clean up probe initialisation")
removed most users of the platform device driver data from the
qcom-qmp-usb driver, but mistakenly also removed the initialisation
despite the data still being used in the runtime PM callbacks. This bug
was later reproduced when the driver was copied to create the
qmp-usb-legacy driver.
Restore the driver data initialisation at probe to avoid a NULL-pointer
dereference on runtime suspend.
Apparently no one uses runtime PM, which currently needs to be enabled
manually through sysfs, with these drivers.
Fixes: e464a3180a43 ("phy: qcom-qmp-usb: split off the legacy USB+dp_com support")
Cc: stable(a)vger.kernel.org # 6.6
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/phy/qualcomm/phy-qcom-qmp-usb-legacy.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb-legacy.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb-legacy.c
index 6d0ba39c1943..8bf951b0490c 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb-legacy.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb-legacy.c
@@ -1248,6 +1248,7 @@ static int qmp_usb_legacy_probe(struct platform_device *pdev)
return -ENOMEM;
qmp->dev = dev;
+ dev_set_drvdata(dev, qmp);
qmp->cfg = of_device_get_match_data(dev);
if (!qmp->cfg)
--
2.44.2
Commit 413db06c05e7 ("phy: qcom-qmp-usb: clean up probe initialisation")
removed most users of the platform device driver data, but mistakenly
also removed the initialisation despite the data still being used in the
runtime PM callbacks.
Restore the driver data initialisation at probe to avoid a NULL-pointer
dereference on runtime suspend.
Apparently no one uses runtime PM, which currently needs to be enabled
manually through sysfs, with this driver.
Fixes: 413db06c05e7 ("phy: qcom-qmp-usb: clean up probe initialisation")
Cc: stable(a)vger.kernel.org # 6.2
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 49f4a53f9b2c..76068393e4ba 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2191,6 +2191,7 @@ static int qmp_usb_probe(struct platform_device *pdev)
return -ENOMEM;
qmp->dev = dev;
+ dev_set_drvdata(dev, qmp);
qmp->cfg = of_device_get_match_data(dev);
if (!qmp->cfg)
--
2.44.2
From: Kan Liang <kan.liang(a)linux.intel.com>
The BPF subsystem may capture LBR data on a counting event. However, the
current implementation assumes that LBR can/should only be used with
sampling events.
For instance, retsnoop tool ([0]) makes an extensive use of this
functionality and sets up perf event as follows:
struct perf_event_attr attr;
memset(&attr, 0, sizeof(attr));
attr.size = sizeof(attr);
attr.type = PERF_TYPE_HARDWARE;
attr.config = PERF_COUNT_HW_CPU_CYCLES;
attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
attr.branch_sample_type = PERF_SAMPLE_BRANCH_KERNEL;
To limit the LBR for a sampling event is to avoid unnecessary branch
stack setup for a counting event in the sample read. Because LBR is only
read in the sampling event's overflow.
Although in most cases LBR is used in sampling, there is no HW limit to
bind LBR to the sampling mode. Allow an LBR setup for a counting event
unless in the sample read mode.
Fixes: 85846b27072d ("perf/x86: Add PERF_X86_EVENT_NEEDS_BRANCH_STACK flag")
Reported-by: Andrii Nakryiko <andrii.nakryiko(a)gmail.com>
Closes: https://lore.kernel.org/lkml/20240905180055.1221620-1-andrii@kernel.org/
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/events/intel/core.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 605ed19043ed..2b5ff112d8d1 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3981,8 +3981,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
x86_pmu.pebs_aliases(event);
}
- if (needs_branch_stack(event) && is_sampling_event(event))
- event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+ if (needs_branch_stack(event)) {
+ /* Avoid branch stack setup for counting events in SAMPLE READ */
+ if (is_sampling_event(event) ||
+ !(event->attr.sample_type & PERF_SAMPLE_READ))
+ event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+ }
if (branch_sample_counters(event)) {
struct perf_event *leader, *sibling;
--
2.38.1
There is a real deadlock as well as sleeping in atomic() bug in here, if
the bo put happens to be the last ref, since bo destruction wants to
grab the same spinlock and sleeping locks. Fix that by dropping the ref
using xe_bo_put_deferred(), and moving the final commit outside of the
lock. Dropping the lock around the put is tricky since the bo can go
out of scope and delete itself from the list, making it difficult to
navigate to the next list entry.
Fixes: 0845233388f8 ("drm/xe: Implement fdinfo memory stats printing")
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2727
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray(a)intel.com>
Cc: Tejas Upadhyay <tejas.upadhyay(a)intel.com>
Cc: "Thomas Hellström" <thomas.hellstrom(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_drm_client.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index e64f4b645e2e..badfa045ead8 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -196,6 +196,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
struct xe_drm_client *client;
struct drm_gem_object *obj;
struct xe_bo *bo;
+ LLIST_HEAD(deferred);
unsigned int id;
u32 mem_type;
@@ -215,11 +216,14 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
list_for_each_entry(bo, &client->bos_list, client_link) {
if (!kref_get_unless_zero(&bo->ttm.base.refcount))
continue;
+
bo_meminfo(bo, stats);
- xe_bo_put(bo);
+ xe_bo_put_deferred(bo, &deferred);
}
spin_unlock(&client->bos_lock);
+ xe_bo_put_commit(&deferred);
+
for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) {
if (!xe_mem_type_to_name[mem_type])
continue;
--
2.46.0
This reverts commit ab8d66d132bc8f1992d3eb6cab8d32dda6733c84 because it
breaks codecs using non-continuous masks in source and sink ports. The
commit missed the point that port numbers are not used as indices for
iterating over prop.sink_ports or prop.source_ports.
Soundwire core and existing codecs expect that the array passed as
prop.sink_ports and prop.source_ports is continuous. The port mask still
might be non-continuous, but that's unrelated.
Reported-by: Bard Liao <yung-chuan.liao(a)linux.intel.com>
Closes: https://lore.kernel.org/all/b6c75eee-761d-44c8-8413-2a5b34ee2f98@linux.inte…
Fixes: ab8d66d132bc ("soundwire: stream: fix programming slave ports for non-continous port maps")
Acked-by: Bard Liao <yung-chuan.liao(a)linux.intel.com>
Reviewed-by: Charles Keepax <ckeepax(a)opensource.cirrus.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
---
Resending with Ack/Rb tags and missing Cc-stable.
---
drivers/soundwire/stream.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index f275143d7b18..7aa4900dcf31 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -1291,18 +1291,18 @@ struct sdw_dpn_prop *sdw_get_slave_dpn_prop(struct sdw_slave *slave,
unsigned int port_num)
{
struct sdw_dpn_prop *dpn_prop;
- unsigned long mask;
+ u8 num_ports;
int i;
if (direction == SDW_DATA_DIR_TX) {
- mask = slave->prop.source_ports;
+ num_ports = hweight32(slave->prop.source_ports);
dpn_prop = slave->prop.src_dpn_prop;
} else {
- mask = slave->prop.sink_ports;
+ num_ports = hweight32(slave->prop.sink_ports);
dpn_prop = slave->prop.sink_dpn_prop;
}
- for_each_set_bit(i, &mask, 32) {
+ for (i = 0; i < num_ports; i++) {
if (dpn_prop[i].num == port_num)
return &dpn_prop[i];
}
--
2.43.0
Hi,
the issue was so far handled in https://lore.kernel.org/regressions/ZsyMzW-4ee_U8NoX@eldamar.lan/T/#m390d6e… and https://bugzilla.kernel.org/show_bug.cgi?id=219129
I haven’t seen any communication whether a backport for 5.15 is already in progress, so I thought I’d follow up here.
Today we rolled out 5.15.165 and immediately ran into the same issue. There’s at least one other person asking for a 5.15 backport in Bugzilla.
Hugs,
Christian
--
Christian Theune - A97C62CE - 0179 7808366
@theuni - christian(a)theune.cc
From: Peter Wang <peter.wang(a)mediatek.com>
ufshcd_abort_all forcibly aborts all on-going commands and the host
controller will automatically fill in the OCS field of the corresponding
response with OCS_ABORTED based on different working modes.
MCQ mode: aborts a command using SQ cleanup, The host controller
will post a Completion Queue entry with OCS = ABORTED.
SDB mode: aborts a command using UTRLCLR. Task Management response
which means a Transfer Request was aborted.
For these two cases, set a flag to notify SCSI to requeue the
command after receiving response with OCS_ABORTED.
Fixes: ab248643d3d6 ("scsi: ufs: core: Add error handling for MCQ mode")
Cc: stable(a)vger.kernel.org
Signed-off-by: Peter Wang <peter.wang(a)mediatek.com>
---
drivers/ufs/core/ufshcd.c | 35 ++++++++++++++++++++---------------
include/ufs/ufshcd.h | 3 +++
2 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index a6f818cdef0e..8380e8861d83 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -3006,6 +3006,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
ufshcd_prepare_lrbp_crypto(scsi_cmd_to_rq(cmd), lrbp);
lrbp->req_abort_skip = false;
+ lrbp->abort_initiated_by_eh = false;
ufshcd_comp_scsi_upiu(hba, lrbp);
@@ -5404,7 +5405,10 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp,
}
break;
case OCS_ABORTED:
- result |= DID_ABORT << 16;
+ if (lrbp->abort_initiated_by_eh)
+ result |= DID_REQUEUE << 16;
+ else
+ result |= DID_ABORT << 16;
break;
case OCS_INVALID_COMMAND_STATUS:
result |= DID_REQUEUE << 16;
@@ -6471,26 +6475,12 @@ static bool ufshcd_abort_one(struct request *rq, void *priv)
struct scsi_device *sdev = cmd->device;
struct Scsi_Host *shost = sdev->host;
struct ufs_hba *hba = shost_priv(shost);
- struct ufshcd_lrb *lrbp = &hba->lrb[tag];
- struct ufs_hw_queue *hwq;
- unsigned long flags;
*ret = ufshcd_try_to_abort_task(hba, tag);
dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
*ret ? "failed" : "succeeded");
- /* Release cmd in MCQ mode if abort succeeds */
- if (hba->mcq_enabled && (*ret == 0)) {
- hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
- if (!hwq)
- return 0;
- spin_lock_irqsave(&hwq->cq_lock, flags);
- if (ufshcd_cmd_inflight(lrbp->cmd))
- ufshcd_release_scsi_cmd(hba, lrbp);
- spin_unlock_irqrestore(&hwq->cq_lock, flags);
- }
-
return *ret == 0;
}
@@ -7561,6 +7551,21 @@ int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
goto out;
}
+ /*
+ * When the host software receives a "FUNCTION COMPLETE", set flag
+ * to requeue command after receive response with OCS_ABORTED
+ * SDB mode: UTRLCLR Task Management response which means a Transfer
+ * Request was aborted.
+ * MCQ mode: Host will post to CQ with OCS_ABORTED after SQ cleanup
+ *
+ * This flag is set because error handler ufshcd_abort_all forcibly
+ * aborts all commands, and the host controller will automatically
+ * fill in the OCS field of the corresponding response with OCS_ABORTED.
+ * Therefore, upon receiving this response, it needs to be requeued.
+ */
+ if (!err)
+ lrbp->abort_initiated_by_eh = true;
+
err = ufshcd_clear_cmd(hba, tag);
if (err)
dev_err(hba->dev, "%s: Failed clearing cmd at tag %d, err %d\n",
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 0fd2aebac728..07b5e60e6c44 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -173,6 +173,8 @@ struct ufs_pm_lvl_states {
* @crypto_key_slot: the key slot to use for inline crypto (-1 if none)
* @data_unit_num: the data unit number for the first block for inline crypto
* @req_abort_skip: skip request abort task flag
+ * @abort_initiated_by_eh: The flag is specifically used to handle
+ * ufshcd_err_handler forcibly aborts.
*/
struct ufshcd_lrb {
struct utp_transfer_req_desc *utr_descriptor_ptr;
@@ -202,6 +204,7 @@ struct ufshcd_lrb {
#endif
bool req_abort_skip;
+ bool abort_initiated_by_eh;
};
/**
--
2.45.2
From: Jason Andryuk <jason.andryuk(a)amd.com>
Probing xen-fbfront faults in video_is_primary_device(). The passed-in
struct device is NULL since xen-fbfront doesn't assign it and the
memory is kzalloc()-ed. Assign fb_info->device to avoid this.
This was exposed by the conversion of fb_is_primary_device() to
video_is_primary_device() which dropped a NULL check for struct device.
Fixes: f178e96de7f0 ("arch: Remove struct fb_info from video helpers")
Reported-by: Arthur Borsboom <arthurborsboom(a)gmail.com>
Closes: https://lore.kernel.org/xen-devel/CALUcmUncX=LkXWeiSiTKsDY-cOe8QksWhFvcCneO…
Tested-by: Arthur Borsboom <arthurborsboom(a)gmail.com>
CC: stable(a)vger.kernel.org
Signed-off-by: Jason Andryuk <jason.andryuk(a)amd.com>
---
The other option would be to re-instate the NULL check in
video_is_primary_device()
---
drivers/video/fbdev/xen-fbfront.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c
index 66d4628a96ae..c90f48ebb15e 100644
--- a/drivers/video/fbdev/xen-fbfront.c
+++ b/drivers/video/fbdev/xen-fbfront.c
@@ -407,6 +407,7 @@ static int xenfb_probe(struct xenbus_device *dev,
/* complete the abuse: */
fb_info->pseudo_palette = fb_info->par;
fb_info->par = info;
+ fb_info->device = &dev->dev;
fb_info->screen_buffer = info->fb;
--
2.43.0
From: Willem de Bruijn <willemb(a)google.com>
The referenced commit drops bad input, but has false positives.
Tighten the check to avoid these.
The check detects illegal checksum offload requests, which produce
csum_start/csum_off beyond end of packet after segmentation.
But it is based on two incorrect assumptions:
1. virtio_net_hdr_to_skb with VIRTIO_NET_HDR_GSO_TCP[46] implies GSO.
True in callers that inject into the tx path, such as tap.
But false in callers that inject into rx, like virtio-net.
Here, the flags indicate GRO, and CHECKSUM_UNNECESSARY or
CHECKSUM_NONE without VIRTIO_NET_HDR_F_NEEDS_CSUM is normal.
2. TSO requires checksum offload, i.e., ip_summed == CHECKSUM_PARTIAL.
False, as tcp[46]_gso_segment will fix up csum_start and offset for
all other ip_summed by calling __tcp_v4_send_check.
Because of 2, we can limit the scope of the fix to virtio_net_hdr
that do try to set these fields, with a bogus value.
Link: https://lore.kernel.org/netdev/20240909094527.GA3048202@port70.net/
Fixes: 89add40066f9 ("net: drop bad gso csum_start and offset in virtio_net_hdr")
Signed-off-by: Willem de Bruijn <willemb(a)google.com>
Cc: <stable(a)vger.kernel.net>
---
Verified that the syzbot repro is still caught.
An equivalent alternative would be to move the check for csum_offset
to where the csum_start check is in segmentation:
- if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb)))
+ if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb) ||
+ skb->csum_offset != offsetof(struct tcphdr, check)))
Cleaner, but messier stable backport.
We'll need an equivalent patch to this for VIRTIO_NET_HDR_GSO_UDP_L4.
But that csum_offset test was in a different commit, so different
Fixes tag.
---
include/linux/virtio_net.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 6c395a2600e8d..276ca543ef44d 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -173,7 +173,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
break;
case SKB_GSO_TCPV4:
case SKB_GSO_TCPV6:
- if (skb->csum_offset != offsetof(struct tcphdr, check))
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb->csum_offset != offsetof(struct tcphdr, check))
return -EINVAL;
break;
}
--
2.46.0.598.g6f2099f65c-goog
Supposing first scenario with a virtio_blk driver.
CPU0 CPU1
blk_mq_try_issue_directly()
__blk_mq_issue_directly()
q->mq_ops->queue_rq()
virtio_queue_rq()
blk_mq_stop_hw_queue()
virtblk_done()
blk_mq_request_bypass_insert() blk_mq_start_stopped_hw_queues()
/* Add IO request to dispatch list */ 1) store blk_mq_start_stopped_hw_queue()
clear_bit(BLK_MQ_S_STOPPED) 3) store
blk_mq_run_hw_queue() blk_mq_run_hw_queue()
if (!blk_mq_hctx_has_pending()) if (!blk_mq_hctx_has_pending()) 4) load
return return
blk_mq_sched_dispatch_requests() blk_mq_sched_dispatch_requests()
if (blk_mq_hctx_stopped()) 2) load if (blk_mq_hctx_stopped())
return return
__blk_mq_sched_dispatch_requests() __blk_mq_sched_dispatch_requests()
Supposing another scenario.
CPU0 CPU1
blk_mq_requeue_work()
/* Add IO request to dispatch list */ 1) store virtblk_done()
blk_mq_run_hw_queues()/blk_mq_delay_run_hw_queues() blk_mq_start_stopped_hw_queues()
if (blk_mq_hctx_stopped()) 2) load blk_mq_start_stopped_hw_queue()
continue clear_bit(BLK_MQ_S_STOPPED) 3) store
blk_mq_run_hw_queue()/blk_mq_delay_run_hw_queue() blk_mq_run_hw_queue()
if (!blk_mq_hctx_has_pending()) 4) load
return
blk_mq_sched_dispatch_requests()
Both scenarios are similar, the full memory barrier should be inserted between
1) and 2), as well as between 3) and 4) to make sure that either CPU0 sees
BLK_MQ_S_STOPPED is cleared or CPU1 sees dispatch list. Otherwise, either CPU
will not rerun the hardware queue causing starvation of the request.
The easy way to fix it is to add the essential full memory barrier into helper
of blk_mq_hctx_stopped(). In order to not affect the fast path (hardware queue
is not stopped most of the time), we only insert the barrier into the slow path.
Actually, only slow path needs to care about missing of dispatching the request
to the low-level device driver.
Fixes: 320ae51feed5c ("blk-mq: new multi-queue block IO queueing mechanism")
Cc: stable(a)vger.kernel.org
Cc: Muchun Song <muchun.song(a)linux.dev>
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
---
block/blk-mq.c | 6 ++++++
block/blk-mq.h | 13 +++++++++++++
2 files changed, 19 insertions(+)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ac39f2a346a52..48a6a437fba5e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2413,6 +2413,12 @@ void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
return;
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
+ /*
+ * Pairs with the smp_mb() in blk_mq_hctx_stopped() to order the
+ * clearing of BLK_MQ_S_STOPPED above and the checking of dispatch
+ * list in the subsequent routine.
+ */
+ smp_mb__after_atomic();
blk_mq_run_hw_queue(hctx, async);
}
EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 260beea8e332c..f36f3bff70d86 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -228,6 +228,19 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
{
+ /* Fast path: hardware queue is not stopped most of the time. */
+ if (likely(!test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
+ return false;
+
+ /*
+ * This barrier is used to order adding of dispatch list before and
+ * the test of BLK_MQ_S_STOPPED below. Pairs with the memory barrier
+ * in blk_mq_start_stopped_hw_queue() so that dispatch code could
+ * either see BLK_MQ_S_STOPPED is cleared or dispatch list is not
+ * empty to avoid missing dispatching requests.
+ */
+ smp_mb();
+
return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
}
--
2.20.1
Supposing the following scenario with a virtio_blk driver.
CPU0 CPU1 CPU2
blk_mq_try_issue_directly()
__blk_mq_issue_directly()
q->mq_ops->queue_rq()
virtio_queue_rq()
blk_mq_stop_hw_queue()
blk_mq_try_issue_directly() virtblk_done()
if (blk_mq_hctx_stopped())
blk_mq_request_bypass_insert() blk_mq_start_stopped_hw_queue()
blk_mq_run_hw_queue() blk_mq_run_hw_queue()
blk_mq_insert_request()
return // Who is responsible for dispatching this IO request?
After CPU0 has marked the queue as stopped, CPU1 will see the queue is stopped.
But before CPU1 puts the request on the dispatch list, CPU2 receives the interrupt
of completion of request, so it will run the hardware queue and marks the queue
as non-stopped. Meanwhile, CPU1 also runs the same hardware queue. After both CPU1
and CPU2 complete blk_mq_run_hw_queue(), CPU1 just puts the request to the same
hardware queue and returns. It misses dispatching a request. Fix it by running
the hardware queue explicitly. And blk_mq_request_issue_directly() should handle
a similar situation. Fix it as well.
Fixes: d964f04a8fde8 ("blk-mq: fix direct issue")
Cc: stable(a)vger.kernel.org
Cc: Muchun Song <muchun.song(a)linux.dev>
Signed-off-by: Muchun Song <songmuchun(a)bytedance.com>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
---
block/blk-mq.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e3c3c0c21b553..b2d0f22de0c7f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2619,6 +2619,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
blk_mq_insert_request(rq, 0);
+ blk_mq_run_hw_queue(hctx, false);
return;
}
@@ -2649,6 +2650,7 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
blk_mq_insert_request(rq, 0);
+ blk_mq_run_hw_queue(hctx, false);
return BLK_STS_OK;
}
--
2.20.1
When we share memory through FF-A and the description of the buffers
exceeds the size of the mapped buffer, the fragmentation API is used.
The fragmentation API allows specifying chunks of descriptors in subsequent
FF-A fragment calls and no upper limit has been established for this.
The entire memory region transferred is identified by a handle which can be
used to reclaim the transferred memory.
To be able to reclaim the memory, the description of the buffers has to fit
in the ffa_desc_buf.
Add a bounds check on the FF-A sharing path to prevent the memory reclaim
from failing.
Also do_ffa_mem_xfer() does not need __always_inline
Fixes: 634d90cf0ac65 ("KVM: arm64: Handle FFA_MEM_LEND calls from the host")
Cc: stable(a)vger.kernel.org
Reviewed-by: Sebastian Ene <sebastianene(a)google.com>
Signed-off-by: Snehal Koukuntla <snehalreddy(a)google.com>
---
arch/arm64/kvm/hyp/nvhe/ffa.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index e715c157c2c4..637425f63fd1 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -426,7 +426,7 @@ static void do_ffa_mem_frag_tx(struct arm_smccc_res *res,
return;
}
-static __always_inline void do_ffa_mem_xfer(const u64 func_id,
+static void do_ffa_mem_xfer(const u64 func_id,
struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
@@ -461,6 +461,11 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id,
goto out_unlock;
}
+ if (len > ffa_desc_buf.len) {
+ ret = FFA_RET_NO_MEMORY;
+ goto out_unlock;
+ }
+
buf = hyp_buffers.tx;
memcpy(buf, host_buffers.tx, fraglen);
--
2.46.0.598.g6f2099f65c-goog
Add explicit casting to prevent expantion of 32th bit of
u32 into highest half of u64.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
From: Andrea Parri <parri.andrea(a)gmail.com>
[ Upstream commit d6cfd1770f20392d7009ae1fdb04733794514fa9 ]
The membarrier system call requires a full memory barrier after storing
to rq->curr, before going back to user-space. The barrier is only
needed when switching between processes: the barrier is implied by
mmdrop() when switching from kernel to userspace, and it's not needed
when switching from userspace to kernel.
Rely on the feature/mechanism ARCH_HAS_MEMBARRIER_CALLBACKS and on the
primitive membarrier_arch_switch_mm(), already adopted by the PowerPC
architecture, to insert the required barrier.
Fixes: fab957c11efe2f ("RISC-V: Atomic and Locking Code")
Signed-off-by: Andrea Parri <parri.andrea(a)gmail.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Link: https://lore.kernel.org/r/20240131144936.29190-2-parri.andrea@gmail.com
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
Signed-off-by: WangYuli <wangyuli(a)uniontech.com>
---
MAINTAINERS | 2 +-
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/membarrier.h | 31 +++++++++++++++++++++++++++++
arch/riscv/mm/context.c | 2 ++
kernel/sched/core.c | 5 +++--
5 files changed, 38 insertions(+), 3 deletions(-)
create mode 100644 arch/riscv/include/asm/membarrier.h
diff --git a/MAINTAINERS b/MAINTAINERS
index f09415b2b3c5..ae4c0cec5073 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13702,7 +13702,7 @@ M: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
M: "Paul E. McKenney" <paulmck(a)kernel.org>
L: linux-kernel(a)vger.kernel.org
S: Supported
-F: arch/powerpc/include/asm/membarrier.h
+F: arch/*/include/asm/membarrier.h
F: include/uapi/linux/membarrier.h
F: kernel/sched/membarrier.c
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c785a0200573..1df4afccc381 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -27,6 +27,7 @@ config RISCV
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MMIOWB
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API
diff --git a/arch/riscv/include/asm/membarrier.h b/arch/riscv/include/asm/membarrier.h
new file mode 100644
index 000000000000..6c016ebb5020
--- /dev/null
+++ b/arch/riscv/include/asm/membarrier.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_RISCV_MEMBARRIER_H
+#define _ASM_RISCV_MEMBARRIER_H
+
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+ struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ /*
+ * Only need the full barrier when switching between processes.
+ * Barrier when switching from kernel to userspace is not
+ * required here, given that it is implied by mmdrop(). Barrier
+ * when switching from userspace to kernel is not needed after
+ * store to rq->curr.
+ */
+ if (IS_ENABLED(CONFIG_SMP) &&
+ likely(!(atomic_read(&next->membarrier_state) &
+ (MEMBARRIER_STATE_PRIVATE_EXPEDITED |
+ MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
+ return;
+
+ /*
+ * The membarrier system call requires a full memory barrier
+ * after storing to rq->curr, before going back to user-space.
+ * Matches a full barrier in the proximity of the membarrier
+ * system call entry.
+ */
+ smp_mb();
+}
+
+#endif /* _ASM_RISCV_MEMBARRIER_H */
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 217fd4de6134..ba8eb3944687 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -323,6 +323,8 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
if (unlikely(prev == next))
return;
+ membarrier_arch_switch_mm(prev, next, task);
+
/*
* Mark the current MM context as inactive, and the next as
* active. This is at least used by the icache flushing
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 97571d390f18..9b406d988654 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6679,8 +6679,9 @@ static void __sched notrace __schedule(unsigned int sched_mode)
*
* Here are the schemes providing that barrier on the
* various architectures:
- * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
- * switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
+ * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC,
+ * RISC-V. switch_mm() relies on membarrier_arch_switch_mm()
+ * on PowerPC and on RISC-V.
* - finish_lock_switch() for weakly-ordered
* architectures where spin_unlock is a full barrier,
* - switch_to() for arm64 (weakly-ordered, spin_unlock
--
2.43.4