The patch below does not apply to the 6.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.8.y
git checkout FETCH_HEAD
git cherry-pick -x 598c2fafc06fe5c56a1a415fb7b544b31453d637
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040137-tragedy-chapter-f8c0@gregkh' --subject-prefix 'PATCH 6.8.y' HEAD^..
Possible dependencies:
598c2fafc06f ("perf/x86/amd/lbr: Use freeze based on availability")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 598c2fafc06fe5c56a1a415fb7b544b31453d637 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:01:45 +0530
Subject: [PATCH] perf/x86/amd/lbr: Use freeze based on availability
Currently, the LBR code assumes that LBR Freeze is supported on all processors
when X86_FEATURE_AMD_LBR_V2 is available i.e. CPUID leaf 0x80000022[EAX]
bit 1 is set. This is incorrect as the availability of the feature is
additionally dependent on CPUID leaf 0x80000022[EAX] bit 2 being set,
which may not be set for all Zen 4 processors.
Define a new feature bit for LBR and PMC freeze and set the freeze enable bit
(FLBRI) in DebugCtl (MSR 0x1d9) conditionally.
It should still be possible to use LBR without freeze for profile-guided
optimization of user programs by using an user-only branch filter during
profiling. When the user-only filter is enabled, branches are no longer
recorded after the transition to CPL 0 upon PMI arrival. When branch
entries are read in the PMI handler, the branch stack does not change.
E.g.
$ perf record -j any,u -e ex_ret_brn_tkn ./workload
Since the feature bit is visible under flags in /proc/cpuinfo, it can be
used to determine the feasibility of use-cases which require LBR Freeze
to be supported by the hardware such as profile-guided optimization of
kernels.
Fixes: ca5b7c0d9621 ("perf/x86/amd/lbr: Add LbrExtV2 branch record support")
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lore.kernel.org/r/69a453c97cfd11c6f2584b19f937fe6df741510f.17110915…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index aec16e581f5b..5692e827afef 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -904,8 +904,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!status)
goto done;
- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index 4a1e600314d5..5149830c7c4f 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void)
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
@@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void)
return;
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
}
__init int amd_pmu_lbr_init(void)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4d850a780f7e..a38f8f9ba657 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -459,6 +459,14 @@
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 0dad49a09b7a..a515328d9d7d 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 598c2fafc06fe5c56a1a415fb7b544b31453d637
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040136-sinuous-creasing-8217@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
598c2fafc06f ("perf/x86/amd/lbr: Use freeze based on availability")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 598c2fafc06fe5c56a1a415fb7b544b31453d637 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:01:45 +0530
Subject: [PATCH] perf/x86/amd/lbr: Use freeze based on availability
Currently, the LBR code assumes that LBR Freeze is supported on all processors
when X86_FEATURE_AMD_LBR_V2 is available i.e. CPUID leaf 0x80000022[EAX]
bit 1 is set. This is incorrect as the availability of the feature is
additionally dependent on CPUID leaf 0x80000022[EAX] bit 2 being set,
which may not be set for all Zen 4 processors.
Define a new feature bit for LBR and PMC freeze and set the freeze enable bit
(FLBRI) in DebugCtl (MSR 0x1d9) conditionally.
It should still be possible to use LBR without freeze for profile-guided
optimization of user programs by using an user-only branch filter during
profiling. When the user-only filter is enabled, branches are no longer
recorded after the transition to CPL 0 upon PMI arrival. When branch
entries are read in the PMI handler, the branch stack does not change.
E.g.
$ perf record -j any,u -e ex_ret_brn_tkn ./workload
Since the feature bit is visible under flags in /proc/cpuinfo, it can be
used to determine the feasibility of use-cases which require LBR Freeze
to be supported by the hardware such as profile-guided optimization of
kernels.
Fixes: ca5b7c0d9621 ("perf/x86/amd/lbr: Add LbrExtV2 branch record support")
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lore.kernel.org/r/69a453c97cfd11c6f2584b19f937fe6df741510f.17110915…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index aec16e581f5b..5692e827afef 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -904,8 +904,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!status)
goto done;
- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index 4a1e600314d5..5149830c7c4f 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void)
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
@@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void)
return;
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
}
__init int amd_pmu_lbr_init(void)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4d850a780f7e..a38f8f9ba657 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -459,6 +459,14 @@
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 0dad49a09b7a..a515328d9d7d 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 598c2fafc06fe5c56a1a415fb7b544b31453d637
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040111-friend-dispersal-bc2a@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
598c2fafc06f ("perf/x86/amd/lbr: Use freeze based on availability")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 598c2fafc06fe5c56a1a415fb7b544b31453d637 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:01:45 +0530
Subject: [PATCH] perf/x86/amd/lbr: Use freeze based on availability
Currently, the LBR code assumes that LBR Freeze is supported on all processors
when X86_FEATURE_AMD_LBR_V2 is available i.e. CPUID leaf 0x80000022[EAX]
bit 1 is set. This is incorrect as the availability of the feature is
additionally dependent on CPUID leaf 0x80000022[EAX] bit 2 being set,
which may not be set for all Zen 4 processors.
Define a new feature bit for LBR and PMC freeze and set the freeze enable bit
(FLBRI) in DebugCtl (MSR 0x1d9) conditionally.
It should still be possible to use LBR without freeze for profile-guided
optimization of user programs by using an user-only branch filter during
profiling. When the user-only filter is enabled, branches are no longer
recorded after the transition to CPL 0 upon PMI arrival. When branch
entries are read in the PMI handler, the branch stack does not change.
E.g.
$ perf record -j any,u -e ex_ret_brn_tkn ./workload
Since the feature bit is visible under flags in /proc/cpuinfo, it can be
used to determine the feasibility of use-cases which require LBR Freeze
to be supported by the hardware such as profile-guided optimization of
kernels.
Fixes: ca5b7c0d9621 ("perf/x86/amd/lbr: Add LbrExtV2 branch record support")
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lore.kernel.org/r/69a453c97cfd11c6f2584b19f937fe6df741510f.17110915…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index aec16e581f5b..5692e827afef 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -904,8 +904,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!status)
goto done;
- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index 4a1e600314d5..5149830c7c4f 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void)
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
@@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void)
return;
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
}
__init int amd_pmu_lbr_init(void)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4d850a780f7e..a38f8f9ba657 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -459,6 +459,14 @@
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 0dad49a09b7a..a515328d9d7d 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 598c2fafc06fe5c56a1a415fb7b544b31453d637
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040110-spongy-stress-e02e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
598c2fafc06f ("perf/x86/amd/lbr: Use freeze based on availability")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 598c2fafc06fe5c56a1a415fb7b544b31453d637 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:01:45 +0530
Subject: [PATCH] perf/x86/amd/lbr: Use freeze based on availability
Currently, the LBR code assumes that LBR Freeze is supported on all processors
when X86_FEATURE_AMD_LBR_V2 is available i.e. CPUID leaf 0x80000022[EAX]
bit 1 is set. This is incorrect as the availability of the feature is
additionally dependent on CPUID leaf 0x80000022[EAX] bit 2 being set,
which may not be set for all Zen 4 processors.
Define a new feature bit for LBR and PMC freeze and set the freeze enable bit
(FLBRI) in DebugCtl (MSR 0x1d9) conditionally.
It should still be possible to use LBR without freeze for profile-guided
optimization of user programs by using an user-only branch filter during
profiling. When the user-only filter is enabled, branches are no longer
recorded after the transition to CPL 0 upon PMI arrival. When branch
entries are read in the PMI handler, the branch stack does not change.
E.g.
$ perf record -j any,u -e ex_ret_brn_tkn ./workload
Since the feature bit is visible under flags in /proc/cpuinfo, it can be
used to determine the feasibility of use-cases which require LBR Freeze
to be supported by the hardware such as profile-guided optimization of
kernels.
Fixes: ca5b7c0d9621 ("perf/x86/amd/lbr: Add LbrExtV2 branch record support")
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lore.kernel.org/r/69a453c97cfd11c6f2584b19f937fe6df741510f.17110915…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index aec16e581f5b..5692e827afef 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -904,8 +904,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!status)
goto done;
- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index 4a1e600314d5..5149830c7c4f 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void)
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
@@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void)
return;
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
}
__init int amd_pmu_lbr_init(void)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4d850a780f7e..a38f8f9ba657 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -459,6 +459,14 @@
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 0dad49a09b7a..a515328d9d7d 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x c7b2edd8377be983442c1344cb940cd2ac21b601
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040100-clinking-stylized-0888@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7b2edd8377be983442c1344cb940cd2ac21b601 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:17:53 +0530
Subject: [PATCH] perf/x86/amd/core: Update and fix stalled-cycles-* events for
Zen 2 and later
AMD processors based on Zen 2 and later microarchitectures do not
support PMCx087 (instruction pipe stalls) which is used as the backing
event for "stalled-cycles-frontend" and "stalled-cycles-backend".
Use PMCx0A9 (cycles where micro-op queue is empty) instead to count
frontend stalls and remove the entry for backend stalls since there
is no direct replacement.
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.17113521…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5692e827afef..af8add6c11ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x c7b2edd8377be983442c1344cb940cd2ac21b601
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040114-diaper-unlovable-0dab@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7b2edd8377be983442c1344cb940cd2ac21b601 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:17:53 +0530
Subject: [PATCH] perf/x86/amd/core: Update and fix stalled-cycles-* events for
Zen 2 and later
AMD processors based on Zen 2 and later microarchitectures do not
support PMCx087 (instruction pipe stalls) which is used as the backing
event for "stalled-cycles-frontend" and "stalled-cycles-backend".
Use PMCx0A9 (cycles where micro-op queue is empty) instead to count
frontend stalls and remove the entry for backend stalls since there
is no direct replacement.
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.17113521…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5692e827afef..af8add6c11ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
When ident_pud_init() uses only gbpages to create identity maps, large
ranges of addresses not actually requested can be included in the
resulting table; a 4K request will map a full GB. On UV systems, this
ends up including regions that will cause hardware to halt the system
if accessed (these are marked "reserved" by BIOS). Even processor
speculation into these regions is enough to trigger the system halt.
And MTRRs cannot be used to restrict this speculation, there are not
enough MTRRs to cover all the reserved regions.
The fix for that would be to only use gbpages when map creation
requests include the full GB page of space, and falling back to using
smaller 2M pages when only portions of a GB page are included in the
request.
But on some other systems, possibly due to buggy bios, that solution
leaves some areas out of the identity map that are needed for kexec to
succeed. It is believed that these areas are not marked properly for
map_acpi_tables() in arch/x86/kernel/machine_kexec_64.c to catch and
map them. The nogbpages kernel command line option also causes these
systems to fail even without these changes.
So, create kexec identity maps using full GB pages on all platforms
but UV; on UV, use narrower 2MB pages in the identity map where a full
GB page would include areas outside the region requested.
No attempt is made to coalesce mapping requests. If a request requires
a map entry at the 2M (pmd) level, subsequent mapping requests within
the same 1G region will also be at the pmd level, even if adjacent or
overlapping such requests could have been combined to map a full
gbpage. Existing usage starts with larger regions and then adds
smaller regions, so this should not have any great consequence.
Signed-off-by: Steve Wahl <steve.wahl(a)hpe.com>
Fixes: d794734c9bbf ("x86/mm/ident_map: Use gbpages only where full GB page should be mapped.")
Reported-by: Pavin Joseph <me(a)pavinjoseph.com>
Closes: https://lore.kernel.org/all/3a1b9909-45ac-4f97-ad68-d16ef1ce99db@pavinjosep…
Link: https://lore.kernel.org/all/20240322162135.3984233-1-steve.wahl@hpe.com/
Tested-by: Pavin Joseph <me(a)pavinjoseph.com>
Tested-by: Eric Hagberg <ehagberg(a)gmail.com>
Tested-by: Sarah Brofeldt <srhb(a)dbc.dk>
---
v4: Incorporate fix for regression on systems relying on gbpages
mapping more than the ranges actually requested for successful
kexec, by limiting the effects of the change to UV systems.
This patch based on tip/x86/urgent.
v3: per Dave Hansen review, re-arrange changelog info,
refactor code to use bool variable and split out conditions.
v2: per Dave Hansen review: Additional changelog info,
moved pud_large() check earlier in the code, and
improved the comment describing the conditions
that restrict gbpage usage.
arch/x86/include/asm/init.h | 1 +
arch/x86/kernel/machine_kexec_64.c | 10 ++++++++++
arch/x86/mm/ident_map.c | 24 +++++++++++++++++++-----
3 files changed, 30 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index cc9ccf61b6bd..371d9faea8bc 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -10,6 +10,7 @@ struct x86_mapping_info {
unsigned long page_flag; /* page flag for PMD or PUD entry */
unsigned long offset; /* ident mapping offset */
bool direct_gbpages; /* PUD level 1GB page support */
+ bool direct_gbpages_only; /* use 1GB pages exclusively */
unsigned long kernpg_flag; /* kernel pagetable flag override */
};
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index b180d8e497c3..3a2f5d291a88 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -28,6 +28,7 @@
#include <asm/setup.h>
#include <asm/set_memory.h>
#include <asm/cpu.h>
+#include <asm/uv/uv.h>
#ifdef CONFIG_ACPI
/*
@@ -212,6 +213,15 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
if (direct_gbpages)
info.direct_gbpages = true;
+ /*
+ * UV systems need restrained use of gbpages in the identity
+ * maps to avoid system halts. But some other systems rely on
+ * using gbpages to expand mappings outside the regions
+ * actually listed, to include areas required for kexec but
+ * not explicitly named by the bios.
+ */
+ if (!is_uv_system())
+ info.direct_gbpages_only = true;
for (i = 0; i < nr_pfn_mapped; i++) {
mstart = pfn_mapped[i].start << PAGE_SHIFT;
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 968d7005f4a7..a538a54aba5d 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,18 +26,32 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
+ bool use_gbpage;
next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;
- if (info->direct_gbpages) {
- pud_t pudval;
+ /* if this is already a gbpage, this portion is already mapped */
+ if (pud_leaf(*pud))
+ continue;
+
+ /* Is using a gbpage allowed? */
+ use_gbpage = info->direct_gbpages;
- if (pud_present(*pud))
- continue;
+ if (!info->direct_gbpages_only) {
+ /* Don't use gbpage if it maps more than the requested region. */
+ /* at the beginning: */
+ use_gbpage &= ((addr & ~PUD_MASK) == 0);
+ /* ... or at the end: */
+ use_gbpage &= ((next & ~PUD_MASK) == 0);
+ }
+ /* Never overwrite existing mappings */
+ use_gbpage &= !pud_present(*pud);
+
+ if (use_gbpage) {
+ pud_t pudval;
- addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
base-commit: b6540de9b5c867b4c8bc31225db181cc017d8cc7
--
2.26.2
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x c7b2edd8377be983442c1344cb940cd2ac21b601
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040106-slacking-uncanny-50dc@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7b2edd8377be983442c1344cb940cd2ac21b601 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:17:53 +0530
Subject: [PATCH] perf/x86/amd/core: Update and fix stalled-cycles-* events for
Zen 2 and later
AMD processors based on Zen 2 and later microarchitectures do not
support PMCx087 (instruction pipe stalls) which is used as the backing
event for "stalled-cycles-frontend" and "stalled-cycles-backend".
Use PMCx0A9 (cycles where micro-op queue is empty) instead to count
frontend stalls and remove the entry for backend stalls since there
is no direct replacement.
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.17113521…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5692e827afef..af8add6c11ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x c7b2edd8377be983442c1344cb940cd2ac21b601
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040105-simply-footpath-ff09@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7b2edd8377be983442c1344cb940cd2ac21b601 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:17:53 +0530
Subject: [PATCH] perf/x86/amd/core: Update and fix stalled-cycles-* events for
Zen 2 and later
AMD processors based on Zen 2 and later microarchitectures do not
support PMCx087 (instruction pipe stalls) which is used as the backing
event for "stalled-cycles-frontend" and "stalled-cycles-backend".
Use PMCx0A9 (cycles where micro-op queue is empty) instead to count
frontend stalls and remove the entry for backend stalls since there
is no direct replacement.
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.17113521…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5692e827afef..af8add6c11ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x c7b2edd8377be983442c1344cb940cd2ac21b601
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040104-avid-embolism-6b9b@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7b2edd8377be983442c1344cb940cd2ac21b601 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:17:53 +0530
Subject: [PATCH] perf/x86/amd/core: Update and fix stalled-cycles-* events for
Zen 2 and later
AMD processors based on Zen 2 and later microarchitectures do not
support PMCx087 (instruction pipe stalls) which is used as the backing
event for "stalled-cycles-frontend" and "stalled-cycles-backend".
Use PMCx0A9 (cycles where micro-op queue is empty) instead to count
frontend stalls and remove the entry for backend stalls since there
is no direct replacement.
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.17113521…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5692e827afef..af8add6c11ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x c7b2edd8377be983442c1344cb940cd2ac21b601
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040103-scholar-tall-0cf6@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
c7b2edd8377b ("perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later")
3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7b2edd8377be983442c1344cb940cd2ac21b601 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Mon, 25 Mar 2024 13:17:53 +0530
Subject: [PATCH] perf/x86/amd/core: Update and fix stalled-cycles-* events for
Zen 2 and later
AMD processors based on Zen 2 and later microarchitectures do not
support PMCx087 (instruction pipe stalls) which is used as the backing
event for "stalled-cycles-frontend" and "stalled-cycles-backend".
Use PMCx0A9 (cycles where micro-op queue is empty) instead to count
frontend stalls and remove the entry for backend stalls since there
is no direct replacement.
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Fixes: 3fe3331bb285 ("perf/x86/amd: Add event map for AMD Family 17h")
Link: https://lore.kernel.org/r/03d7fc8fa2a28f9be732116009025bdec1b3ec97.17113521…
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 5692e827afef..af8add6c11ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 8e68a458bcf5b5cb9c3624598bae28f08251601f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040152-bobtail-animate-4d38@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
8e68a458bcf5 ("scsi: libsas: Fix disk not being scanned in after being removed")
d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info")
7b27c5fe247b ("scsi: libsas: Stop hardcoding SAS address length")
15ba7806c316 ("scsi: libsas: Drop SAS_DPRINTK() and revise logs levels")
71a4a9923122 ("scsi: libsas: Drop sas_printk()")
d188e5db9d27 ("scsi: libsas: Use pr_fmt(fmt)")
32c850bf587f ("scsi: libsas: always unregister the old device if going to discover new")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e68a458bcf5b5cb9c3624598bae28f08251601f Mon Sep 17 00:00:00 2001
From: Xingui Yang <yangxingui(a)huawei.com>
Date: Thu, 7 Mar 2024 14:14:13 +0000
Subject: [PATCH] scsi: libsas: Fix disk not being scanned in after being
removed
As of commit d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to
update PHY info"), do discovery will send a new SMP_DISCOVER and update
phy->phy_change_count. We found that if the disk is reconnected and phy
change_count changes at this time, the disk scanning process will not be
triggered.
Therefore, call sas_set_ex_phy() to update the PHY info with the results of
the last query. And because the previous phy info will be used when calling
sas_unregister_devs_sas_addr(), sas_unregister_devs_sas_addr() should be
called before sas_set_ex_phy().
Fixes: d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info")
Signed-off-by: Xingui Yang <yangxingui(a)huawei.com>
Link: https://lore.kernel.org/r/20240307141413.48049-3-yangxingui@huawei.com
Reviewed-by: John Garry <john.g.garry(a)oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index de9dee488277..5c261005b74e 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1945,6 +1945,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
struct expander_device *ex = &dev->ex_dev;
struct ex_phy *phy = &ex->ex_phy[phy_id];
enum sas_device_type type = SAS_PHY_UNUSED;
+ struct smp_disc_resp *disc_resp;
u8 sas_addr[SAS_ADDR_SIZE];
char msg[80] = "";
int res;
@@ -1956,33 +1957,41 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
SAS_ADDR(dev->sas_addr), phy_id, msg);
memset(sas_addr, 0, SAS_ADDR_SIZE);
- res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type);
+ disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+ if (!disc_resp)
+ return -ENOMEM;
+
+ res = sas_get_phy_discover(dev, phy_id, disc_resp);
switch (res) {
case SMP_RESP_NO_PHY:
phy->phy_state = PHY_NOT_PRESENT;
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return res;
+ goto out_free_resp;
case SMP_RESP_PHY_VACANT:
phy->phy_state = PHY_VACANT;
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return res;
+ goto out_free_resp;
case SMP_RESP_FUNC_ACC:
break;
case -ECOMM:
break;
default:
- return res;
+ goto out_free_resp;
}
+ if (res == 0)
+ sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, &type);
+
if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) {
phy->phy_state = PHY_EMPTY;
sas_unregister_devs_sas_addr(dev, phy_id, last);
/*
- * Even though the PHY is empty, for convenience we discover
- * the PHY to update the PHY info, like negotiated linkrate.
+ * Even though the PHY is empty, for convenience we update
+ * the PHY info, like negotiated linkrate.
*/
- sas_ex_phy_discover(dev, phy_id);
- return res;
+ if (res == 0)
+ sas_set_ex_phy(dev, phy_id, disc_resp);
+ goto out_free_resp;
} else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) &&
dev_type_flutter(type, phy->attached_dev_type)) {
struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id);
@@ -1994,7 +2003,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
action = ", needs recovery";
pr_debug("ex %016llx phy%02d broadcast flutter%s\n",
SAS_ADDR(dev->sas_addr), phy_id, action);
- return res;
+ goto out_free_resp;
}
/* we always have to delete the old device when we went here */
@@ -2003,7 +2012,10 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
SAS_ADDR(phy->attached_sas_addr));
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return sas_discover_new(dev, phy_id);
+ res = sas_discover_new(dev, phy_id);
+out_free_resp:
+ kfree(disc_resp);
+ return res;
}
/**
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 978b63f7464abcfd364a6c95f734282c50f3decf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040118-pebble-afoot-d19f@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
978b63f7464a ("btrfs: fix race when detecting delalloc ranges during fiemap")
418b09027743 ("btrfs: ensure fiemap doesn't race with writes when FIEMAP_FLAG_SYNC is given")
a1a4a9ca77f1 ("btrfs: fix race between ordered extent completion and fiemap")
b0ad381fa769 ("btrfs: fix deadlock with fiemap and extent locking")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 978b63f7464abcfd364a6c95f734282c50f3decf Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Wed, 28 Feb 2024 11:37:56 +0000
Subject: [PATCH] btrfs: fix race when detecting delalloc ranges during fiemap
For fiemap we recently stopped locking the target extent range for the
whole duration of the fiemap call, in order to avoid a deadlock in a
scenario where the fiemap buffer happens to be a memory mapped range of
the same file. This use case is very unlikely to be useful in practice but
it may be triggered by fuzz testing (syzbot, etc).
This however introduced a race that makes us miss delalloc ranges for
file regions that are currently holes, so the caller of fiemap will not
be aware that there's data for some file regions. This can be quite
serious for some use cases - for example in coreutils versions before 9.0,
the cp program used fiemap to detect holes and data in the source file,
copying only regions with data (extents or delalloc) from the source file
to the destination file in order to preserve holes (see the documentation
for its --sparse command line option). This means that if cp was used
with a source file that had delalloc in a hole, the destination file could
end up without that data, which is effectively a data loss issue, if it
happened to hit the race described below.
The race happens like this:
1) Fiemap is called, without the FIEMAP_FLAG_SYNC flag, for a file that
has delalloc in the file range [64M, 65M[, which is currently a hole;
2) Fiemap locks the inode in shared mode, then starts iterating the
inode's subvolume tree searching for file extent items, without having
the whole fiemap target range locked in the inode's io tree - the
change introduced recently by commit b0ad381fa769 ("btrfs: fix
deadlock with fiemap and extent locking"). It only locks ranges in
the io tree when it finds a hole or prealloc extent since that
commit;
3) Note that fiemap clones each leaf before using it, and this is to
avoid deadlocks when locking a file range in the inode's io tree and
the fiemap buffer is memory mapped to some file, because writing
to the page with btrfs_page_mkwrite() will wait on any ordered extent
for the page's range and the ordered extent needs to lock the range
and may need to modify the same leaf, therefore leading to a deadlock
on the leaf;
4) While iterating the file extent items in the cloned leaf before
finding the hole in the range [64M, 65M[, the delalloc in that range
is flushed and its ordered extent completes - meaning the corresponding
file extent item is in the inode's subvolume tree, but not present in
the cloned leaf that fiemap is iterating over;
5) When fiemap finds the hole in the [64M, 65M[ range by seeing the gap in
the cloned leaf (or a file extent item with disk_bytenr == 0 in case
the NO_HOLES feature is not enabled), it will lock that file range in
the inode's io tree and then search for delalloc by checking for the
EXTENT_DELALLOC bit in the io tree for that range and ordered extents
(with btrfs_find_delalloc_in_range()). But it finds nothing since the
delalloc in that range was already flushed and the ordered extent
completed and is gone - as a result fiemap will not report that there's
delalloc or an extent for the range [64M, 65M[, so user space will be
mislead into thinking that there's a hole in that range.
This could actually be sporadically triggered with test case generic/094
from fstests, which reports a missing extent/delalloc range like this:
generic/094 2s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//generic/094.out.bad)
--- tests/generic/094.out 2020-06-10 19:29:03.830519425 +0100
+++ /home/fdmanana/git/hub/xfstests/results//generic/094.out.bad 2024-02-28 11:00:00.381071525 +0000
@@ -1,3 +1,9 @@
QA output created by 094
fiemap run with sync
fiemap run without sync
+ERROR: couldn't find extent at 7
+map is 'HHDDHPPDPHPH'
+logical: [ 5.. 6] phys: 301517.. 301518 flags: 0x800 tot: 2
+logical: [ 8.. 8] phys: 301520.. 301520 flags: 0x800 tot: 1
...
(Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/generic/094.out /home/fdmanana/git/hub/xfstests/results//generic/094.out.bad' to see the entire diff)
So in order to fix this, while still avoiding deadlocks in the case where
the fiemap buffer is memory mapped to the same file, change fiemap to work
like the following:
1) Always lock the whole range in the inode's io tree before starting to
iterate the inode's subvolume tree searching for file extent items,
just like we did before commit b0ad381fa769 ("btrfs: fix deadlock with
fiemap and extent locking");
2) Now instead of writing to the fiemap buffer every time we have an extent
to report, write instead to a temporary buffer (1 page), and when that
buffer becomes full, stop iterating the file extent items, unlock the
range in the io tree, release the search path, submit all the entries
kept in that buffer to the fiemap buffer, and then resume the search
for file extent items after locking again the remainder of the range in
the io tree.
The buffer having a size of a page, allows for 146 entries in a system
with 4K pages. This is a large enough value to have a good performance
by avoiding too many restarts of the search for file extent items.
In other words this preserves the huge performance gains made in the
last two years to fiemap, while avoiding the deadlocks in case the
fiemap buffer is memory mapped to the same file (useless in practice,
but possible and exercised by fuzz testing and syzbot).
Fixes: b0ad381fa769 ("btrfs: fix deadlock with fiemap and extent locking")
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e6a2b6eb89e1..fbb05b0f7ebc 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2453,12 +2453,65 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
return try_release_extent_state(tree, page, mask);
}
+struct btrfs_fiemap_entry {
+ u64 offset;
+ u64 phys;
+ u64 len;
+ u32 flags;
+};
+
/*
- * To cache previous fiemap extent
+ * Indicate the caller of emit_fiemap_extent() that it needs to unlock the file
+ * range from the inode's io tree, unlock the subvolume tree search path, flush
+ * the fiemap cache and relock the file range and research the subvolume tree.
+ * The value here is something negative that can't be confused with a valid
+ * errno value and different from 1 because that's also a return value from
+ * fiemap_fill_next_extent() and also it's often used to mean some btree search
+ * did not find a key, so make it some distinct negative value.
+ */
+#define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1))
+
+/*
+ * Used to:
*
- * Will be used for merging fiemap extent
+ * - Cache the next entry to be emitted to the fiemap buffer, so that we can
+ * merge extents that are contiguous and can be grouped as a single one;
+ *
+ * - Store extents ready to be written to the fiemap buffer in an intermediary
+ * buffer. This intermediary buffer is to ensure that in case the fiemap
+ * buffer is memory mapped to the fiemap target file, we don't deadlock
+ * during btrfs_page_mkwrite(). This is because during fiemap we are locking
+ * an extent range in order to prevent races with delalloc flushing and
+ * ordered extent completion, which is needed in order to reliably detect
+ * delalloc in holes and prealloc extents. And this can lead to a deadlock
+ * if the fiemap buffer is memory mapped to the file we are running fiemap
+ * against (a silly, useless in practice scenario, but possible) because
+ * btrfs_page_mkwrite() will try to lock the same extent range.
*/
struct fiemap_cache {
+ /* An array of ready fiemap entries. */
+ struct btrfs_fiemap_entry *entries;
+ /* Number of entries in the entries array. */
+ int entries_size;
+ /* Index of the next entry in the entries array to write to. */
+ int entries_pos;
+ /*
+ * Once the entries array is full, this indicates what's the offset for
+ * the next file extent item we must search for in the inode's subvolume
+ * tree after unlocking the extent range in the inode's io tree and
+ * releasing the search path.
+ */
+ u64 next_search_offset;
+ /*
+ * This matches struct fiemap_extent_info::fi_mapped_extents, we use it
+ * to count ourselves emitted extents and stop instead of relying on
+ * fiemap_fill_next_extent() because we buffer ready fiemap entries at
+ * the @entries array, and we want to stop as soon as we hit the max
+ * amount of extents to map, not just to save time but also to make the
+ * logic at extent_fiemap() simpler.
+ */
+ unsigned int extents_mapped;
+ /* Fields for the cached extent (unsubmitted, not ready, extent). */
u64 offset;
u64 phys;
u64 len;
@@ -2466,6 +2519,28 @@ struct fiemap_cache {
bool cached;
};
+static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache)
+{
+ for (int i = 0; i < cache->entries_pos; i++) {
+ struct btrfs_fiemap_entry *entry = &cache->entries[i];
+ int ret;
+
+ ret = fiemap_fill_next_extent(fieinfo, entry->offset,
+ entry->phys, entry->len,
+ entry->flags);
+ /*
+ * Ignore 1 (reached max entries) because we keep track of that
+ * ourselves in emit_fiemap_extent().
+ */
+ if (ret < 0)
+ return ret;
+ }
+ cache->entries_pos = 0;
+
+ return 0;
+}
+
/*
* Helper to submit fiemap extent.
*
@@ -2480,8 +2555,8 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache,
u64 offset, u64 phys, u64 len, u32 flags)
{
+ struct btrfs_fiemap_entry *entry;
u64 cache_end;
- int ret = 0;
/* Set at the end of extent_fiemap(). */
ASSERT((flags & FIEMAP_EXTENT_LAST) == 0);
@@ -2494,7 +2569,9 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
* find an extent that starts at an offset behind the end offset of the
* previous extent we processed. This happens if fiemap is called
* without FIEMAP_FLAG_SYNC and there are ordered extents completing
- * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()).
+ * after we had to unlock the file range, release the search path, emit
+ * the fiemap extents stored in the buffer (cache->entries array) and
+ * the lock the remainder of the range and re-search the btree.
*
* For example we are in leaf X processing its last item, which is the
* file extent item for file range [512K, 1M[, and after
@@ -2607,11 +2684,35 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
emit:
/* Not mergeable, need to submit cached one */
- ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
- cache->len, cache->flags);
- cache->cached = false;
- if (ret)
- return ret;
+
+ if (cache->entries_pos == cache->entries_size) {
+ /*
+ * We will need to research for the end offset of the last
+ * stored extent and not from the current offset, because after
+ * unlocking the range and releasing the path, if there's a hole
+ * between that end offset and this current offset, a new extent
+ * may have been inserted due to a new write, so we don't want
+ * to miss it.
+ */
+ entry = &cache->entries[cache->entries_size - 1];
+ cache->next_search_offset = entry->offset + entry->len;
+ cache->cached = false;
+
+ return BTRFS_FIEMAP_FLUSH_CACHE;
+ }
+
+ entry = &cache->entries[cache->entries_pos];
+ entry->offset = cache->offset;
+ entry->phys = cache->phys;
+ entry->len = cache->len;
+ entry->flags = cache->flags;
+ cache->entries_pos++;
+ cache->extents_mapped++;
+
+ if (cache->extents_mapped == fieinfo->fi_extents_max) {
+ cache->cached = false;
+ return 1;
+ }
assign:
cache->cached = true;
cache->offset = offset;
@@ -2737,8 +2838,8 @@ static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path
* neighbour leaf).
* We also need the private clone because holding a read lock on an
* extent buffer of the subvolume's b+tree will make lockdep unhappy
- * when we call fiemap_fill_next_extent(), because that may cause a page
- * fault when filling the user space buffer with fiemap data.
+ * when we check if extents are shared, as backref walking may need to
+ * lock the same leaf we are processing.
*/
clone = btrfs_clone_extent_buffer(path->nodes[0]);
if (!clone)
@@ -2778,34 +2879,16 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
* it beyond i_size.
*/
while (cur_offset < end && cur_offset < i_size) {
- struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
u64 prealloc_start;
- u64 lockstart;
- u64 lockend;
u64 prealloc_len = 0;
bool delalloc;
- lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
- lockend = round_up(end, inode->root->fs_info->sectorsize);
-
- /*
- * We are only locking for the delalloc range because that's the
- * only thing that can change here. With fiemap we have a lock
- * on the inode, so no buffered or direct writes can happen.
- *
- * However mmaps and normal page writeback will cause this to
- * change arbitrarily. We have to lock the extent lock here to
- * make sure that nobody messes with the tree while we're doing
- * btrfs_find_delalloc_in_range.
- */
- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
delalloc_cached_state,
&delalloc_start,
&delalloc_end);
- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
break;
@@ -2973,6 +3056,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
const u64 ino = btrfs_ino(inode);
+ struct extent_state *cached_state = NULL;
struct extent_state *delalloc_cached_state = NULL;
struct btrfs_path *path;
struct fiemap_cache cache = { 0 };
@@ -2985,26 +3069,33 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
bool stopped = false;
int ret;
+ cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry);
+ cache.entries = kmalloc_array(cache.entries_size,
+ sizeof(struct btrfs_fiemap_entry),
+ GFP_KERNEL);
backref_ctx = btrfs_alloc_backref_share_check_ctx();
path = btrfs_alloc_path();
- if (!backref_ctx || !path) {
+ if (!cache.entries || !backref_ctx || !path) {
ret = -ENOMEM;
goto out;
}
+restart:
range_start = round_down(start, sectorsize);
range_end = round_up(start + len, sectorsize);
prev_extent_end = range_start;
+ lock_extent(&inode->io_tree, range_start, range_end, &cached_state);
+
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
if (ret < 0)
- goto out;
+ goto out_unlock;
btrfs_release_path(path);
path->reada = READA_FORWARD;
ret = fiemap_search_slot(inode, path, range_start);
if (ret < 0) {
- goto out;
+ goto out_unlock;
} else if (ret > 0) {
/*
* No file extent item found, but we may have delalloc between
@@ -3051,7 +3142,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
backref_ctx, 0, 0, 0,
prev_extent_end, hole_end);
if (ret < 0) {
- goto out;
+ goto out_unlock;
} else if (ret > 0) {
/* fiemap_fill_next_extent() told us to stop. */
stopped = true;
@@ -3107,7 +3198,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
extent_gen,
backref_ctx);
if (ret < 0)
- goto out;
+ goto out_unlock;
else if (ret > 0)
flags |= FIEMAP_EXTENT_SHARED;
}
@@ -3118,9 +3209,9 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}
if (ret < 0) {
- goto out;
+ goto out_unlock;
} else if (ret > 0) {
- /* fiemap_fill_next_extent() told us to stop. */
+ /* emit_fiemap_extent() told us to stop. */
stopped = true;
break;
}
@@ -3129,12 +3220,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
next_item:
if (fatal_signal_pending(current)) {
ret = -EINTR;
- goto out;
+ goto out_unlock;
}
ret = fiemap_next_leaf_item(inode, path);
if (ret < 0) {
- goto out;
+ goto out_unlock;
} else if (ret > 0) {
/* No more file extent items for this inode. */
break;
@@ -3143,22 +3234,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}
check_eof_delalloc:
- /*
- * Release (and free) the path before emitting any final entries to
- * fiemap_fill_next_extent() to keep lockdep happy. This is because
- * once we find no more file extent items exist, we may have a
- * non-cloned leaf, and fiemap_fill_next_extent() can trigger page
- * faults when copying data to the user space buffer.
- */
- btrfs_free_path(path);
- path = NULL;
-
if (!stopped && prev_extent_end < range_end) {
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state, backref_ctx,
0, 0, 0, prev_extent_end, range_end - 1);
if (ret < 0)
- goto out;
+ goto out_unlock;
prev_extent_end = range_end;
}
@@ -3166,28 +3247,16 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
const u64 i_size = i_size_read(&inode->vfs_inode);
if (prev_extent_end < i_size) {
- struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
- u64 lockstart;
- u64 lockend;
bool delalloc;
- lockstart = round_down(prev_extent_end, sectorsize);
- lockend = round_up(i_size, sectorsize);
-
- /*
- * See the comment in fiemap_process_hole as to why
- * we're doing the locking here.
- */
- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode,
prev_extent_end,
i_size - 1,
&delalloc_cached_state,
&delalloc_start,
&delalloc_end);
- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
cache.flags |= FIEMAP_EXTENT_LAST;
} else {
@@ -3195,9 +3264,39 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}
}
+out_unlock:
+ unlock_extent(&inode->io_tree, range_start, range_end, &cached_state);
+
+ if (ret == BTRFS_FIEMAP_FLUSH_CACHE) {
+ btrfs_release_path(path);
+ ret = flush_fiemap_cache(fieinfo, &cache);
+ if (ret)
+ goto out;
+ len -= cache.next_search_offset - start;
+ start = cache.next_search_offset;
+ goto restart;
+ } else if (ret < 0) {
+ goto out;
+ }
+
+ /*
+ * Must free the path before emitting to the fiemap buffer because we
+ * may have a non-cloned leaf and if the fiemap buffer is memory mapped
+ * to a file, a write into it (through btrfs_page_mkwrite()) may trigger
+ * waiting for an ordered extent that in order to complete needs to
+ * modify that leaf, therefore leading to a deadlock.
+ */
+ btrfs_free_path(path);
+ path = NULL;
+
+ ret = flush_fiemap_cache(fieinfo, &cache);
+ if (ret)
+ goto out;
+
ret = emit_last_fiemap_cache(fieinfo, &cache);
out:
free_extent_state(delalloc_cached_state);
+ kfree(cache.entries);
btrfs_free_backref_share_ctx(backref_ctx);
btrfs_free_path(path);
return ret;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 978b63f7464abcfd364a6c95f734282c50f3decf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040115-paparazzi-shortcut-137f@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
978b63f7464a ("btrfs: fix race when detecting delalloc ranges during fiemap")
418b09027743 ("btrfs: ensure fiemap doesn't race with writes when FIEMAP_FLAG_SYNC is given")
a1a4a9ca77f1 ("btrfs: fix race between ordered extent completion and fiemap")
b0ad381fa769 ("btrfs: fix deadlock with fiemap and extent locking")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 978b63f7464abcfd364a6c95f734282c50f3decf Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Wed, 28 Feb 2024 11:37:56 +0000
Subject: [PATCH] btrfs: fix race when detecting delalloc ranges during fiemap
For fiemap we recently stopped locking the target extent range for the
whole duration of the fiemap call, in order to avoid a deadlock in a
scenario where the fiemap buffer happens to be a memory mapped range of
the same file. This use case is very unlikely to be useful in practice but
it may be triggered by fuzz testing (syzbot, etc).
This however introduced a race that makes us miss delalloc ranges for
file regions that are currently holes, so the caller of fiemap will not
be aware that there's data for some file regions. This can be quite
serious for some use cases - for example in coreutils versions before 9.0,
the cp program used fiemap to detect holes and data in the source file,
copying only regions with data (extents or delalloc) from the source file
to the destination file in order to preserve holes (see the documentation
for its --sparse command line option). This means that if cp was used
with a source file that had delalloc in a hole, the destination file could
end up without that data, which is effectively a data loss issue, if it
happened to hit the race described below.
The race happens like this:
1) Fiemap is called, without the FIEMAP_FLAG_SYNC flag, for a file that
has delalloc in the file range [64M, 65M[, which is currently a hole;
2) Fiemap locks the inode in shared mode, then starts iterating the
inode's subvolume tree searching for file extent items, without having
the whole fiemap target range locked in the inode's io tree - the
change introduced recently by commit b0ad381fa769 ("btrfs: fix
deadlock with fiemap and extent locking"). It only locks ranges in
the io tree when it finds a hole or prealloc extent since that
commit;
3) Note that fiemap clones each leaf before using it, and this is to
avoid deadlocks when locking a file range in the inode's io tree and
the fiemap buffer is memory mapped to some file, because writing
to the page with btrfs_page_mkwrite() will wait on any ordered extent
for the page's range and the ordered extent needs to lock the range
and may need to modify the same leaf, therefore leading to a deadlock
on the leaf;
4) While iterating the file extent items in the cloned leaf before
finding the hole in the range [64M, 65M[, the delalloc in that range
is flushed and its ordered extent completes - meaning the corresponding
file extent item is in the inode's subvolume tree, but not present in
the cloned leaf that fiemap is iterating over;
5) When fiemap finds the hole in the [64M, 65M[ range by seeing the gap in
the cloned leaf (or a file extent item with disk_bytenr == 0 in case
the NO_HOLES feature is not enabled), it will lock that file range in
the inode's io tree and then search for delalloc by checking for the
EXTENT_DELALLOC bit in the io tree for that range and ordered extents
(with btrfs_find_delalloc_in_range()). But it finds nothing since the
delalloc in that range was already flushed and the ordered extent
completed and is gone - as a result fiemap will not report that there's
delalloc or an extent for the range [64M, 65M[, so user space will be
mislead into thinking that there's a hole in that range.
This could actually be sporadically triggered with test case generic/094
from fstests, which reports a missing extent/delalloc range like this:
generic/094 2s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//generic/094.out.bad)
--- tests/generic/094.out 2020-06-10 19:29:03.830519425 +0100
+++ /home/fdmanana/git/hub/xfstests/results//generic/094.out.bad 2024-02-28 11:00:00.381071525 +0000
@@ -1,3 +1,9 @@
QA output created by 094
fiemap run with sync
fiemap run without sync
+ERROR: couldn't find extent at 7
+map is 'HHDDHPPDPHPH'
+logical: [ 5.. 6] phys: 301517.. 301518 flags: 0x800 tot: 2
+logical: [ 8.. 8] phys: 301520.. 301520 flags: 0x800 tot: 1
...
(Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/generic/094.out /home/fdmanana/git/hub/xfstests/results//generic/094.out.bad' to see the entire diff)
So in order to fix this, while still avoiding deadlocks in the case where
the fiemap buffer is memory mapped to the same file, change fiemap to work
like the following:
1) Always lock the whole range in the inode's io tree before starting to
iterate the inode's subvolume tree searching for file extent items,
just like we did before commit b0ad381fa769 ("btrfs: fix deadlock with
fiemap and extent locking");
2) Now instead of writing to the fiemap buffer every time we have an extent
to report, write instead to a temporary buffer (1 page), and when that
buffer becomes full, stop iterating the file extent items, unlock the
range in the io tree, release the search path, submit all the entries
kept in that buffer to the fiemap buffer, and then resume the search
for file extent items after locking again the remainder of the range in
the io tree.
The buffer having a size of a page, allows for 146 entries in a system
with 4K pages. This is a large enough value to have a good performance
by avoiding too many restarts of the search for file extent items.
In other words this preserves the huge performance gains made in the
last two years to fiemap, while avoiding the deadlocks in case the
fiemap buffer is memory mapped to the same file (useless in practice,
but possible and exercised by fuzz testing and syzbot).
Fixes: b0ad381fa769 ("btrfs: fix deadlock with fiemap and extent locking")
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e6a2b6eb89e1..fbb05b0f7ebc 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2453,12 +2453,65 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
return try_release_extent_state(tree, page, mask);
}
+struct btrfs_fiemap_entry {
+ u64 offset;
+ u64 phys;
+ u64 len;
+ u32 flags;
+};
+
/*
- * To cache previous fiemap extent
+ * Indicate the caller of emit_fiemap_extent() that it needs to unlock the file
+ * range from the inode's io tree, unlock the subvolume tree search path, flush
+ * the fiemap cache and relock the file range and research the subvolume tree.
+ * The value here is something negative that can't be confused with a valid
+ * errno value and different from 1 because that's also a return value from
+ * fiemap_fill_next_extent() and also it's often used to mean some btree search
+ * did not find a key, so make it some distinct negative value.
+ */
+#define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1))
+
+/*
+ * Used to:
*
- * Will be used for merging fiemap extent
+ * - Cache the next entry to be emitted to the fiemap buffer, so that we can
+ * merge extents that are contiguous and can be grouped as a single one;
+ *
+ * - Store extents ready to be written to the fiemap buffer in an intermediary
+ * buffer. This intermediary buffer is to ensure that in case the fiemap
+ * buffer is memory mapped to the fiemap target file, we don't deadlock
+ * during btrfs_page_mkwrite(). This is because during fiemap we are locking
+ * an extent range in order to prevent races with delalloc flushing and
+ * ordered extent completion, which is needed in order to reliably detect
+ * delalloc in holes and prealloc extents. And this can lead to a deadlock
+ * if the fiemap buffer is memory mapped to the file we are running fiemap
+ * against (a silly, useless in practice scenario, but possible) because
+ * btrfs_page_mkwrite() will try to lock the same extent range.
*/
struct fiemap_cache {
+ /* An array of ready fiemap entries. */
+ struct btrfs_fiemap_entry *entries;
+ /* Number of entries in the entries array. */
+ int entries_size;
+ /* Index of the next entry in the entries array to write to. */
+ int entries_pos;
+ /*
+ * Once the entries array is full, this indicates what's the offset for
+ * the next file extent item we must search for in the inode's subvolume
+ * tree after unlocking the extent range in the inode's io tree and
+ * releasing the search path.
+ */
+ u64 next_search_offset;
+ /*
+ * This matches struct fiemap_extent_info::fi_mapped_extents, we use it
+ * to count ourselves emitted extents and stop instead of relying on
+ * fiemap_fill_next_extent() because we buffer ready fiemap entries at
+ * the @entries array, and we want to stop as soon as we hit the max
+ * amount of extents to map, not just to save time but also to make the
+ * logic at extent_fiemap() simpler.
+ */
+ unsigned int extents_mapped;
+ /* Fields for the cached extent (unsubmitted, not ready, extent). */
u64 offset;
u64 phys;
u64 len;
@@ -2466,6 +2519,28 @@ struct fiemap_cache {
bool cached;
};
+static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache)
+{
+ for (int i = 0; i < cache->entries_pos; i++) {
+ struct btrfs_fiemap_entry *entry = &cache->entries[i];
+ int ret;
+
+ ret = fiemap_fill_next_extent(fieinfo, entry->offset,
+ entry->phys, entry->len,
+ entry->flags);
+ /*
+ * Ignore 1 (reached max entries) because we keep track of that
+ * ourselves in emit_fiemap_extent().
+ */
+ if (ret < 0)
+ return ret;
+ }
+ cache->entries_pos = 0;
+
+ return 0;
+}
+
/*
* Helper to submit fiemap extent.
*
@@ -2480,8 +2555,8 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache,
u64 offset, u64 phys, u64 len, u32 flags)
{
+ struct btrfs_fiemap_entry *entry;
u64 cache_end;
- int ret = 0;
/* Set at the end of extent_fiemap(). */
ASSERT((flags & FIEMAP_EXTENT_LAST) == 0);
@@ -2494,7 +2569,9 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
* find an extent that starts at an offset behind the end offset of the
* previous extent we processed. This happens if fiemap is called
* without FIEMAP_FLAG_SYNC and there are ordered extents completing
- * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()).
+ * after we had to unlock the file range, release the search path, emit
+ * the fiemap extents stored in the buffer (cache->entries array) and
+ * the lock the remainder of the range and re-search the btree.
*
* For example we are in leaf X processing its last item, which is the
* file extent item for file range [512K, 1M[, and after
@@ -2607,11 +2684,35 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
emit:
/* Not mergeable, need to submit cached one */
- ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
- cache->len, cache->flags);
- cache->cached = false;
- if (ret)
- return ret;
+
+ if (cache->entries_pos == cache->entries_size) {
+ /*
+ * We will need to research for the end offset of the last
+ * stored extent and not from the current offset, because after
+ * unlocking the range and releasing the path, if there's a hole
+ * between that end offset and this current offset, a new extent
+ * may have been inserted due to a new write, so we don't want
+ * to miss it.
+ */
+ entry = &cache->entries[cache->entries_size - 1];
+ cache->next_search_offset = entry->offset + entry->len;
+ cache->cached = false;
+
+ return BTRFS_FIEMAP_FLUSH_CACHE;
+ }
+
+ entry = &cache->entries[cache->entries_pos];
+ entry->offset = cache->offset;
+ entry->phys = cache->phys;
+ entry->len = cache->len;
+ entry->flags = cache->flags;
+ cache->entries_pos++;
+ cache->extents_mapped++;
+
+ if (cache->extents_mapped == fieinfo->fi_extents_max) {
+ cache->cached = false;
+ return 1;
+ }
assign:
cache->cached = true;
cache->offset = offset;
@@ -2737,8 +2838,8 @@ static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path
* neighbour leaf).
* We also need the private clone because holding a read lock on an
* extent buffer of the subvolume's b+tree will make lockdep unhappy
- * when we call fiemap_fill_next_extent(), because that may cause a page
- * fault when filling the user space buffer with fiemap data.
+ * when we check if extents are shared, as backref walking may need to
+ * lock the same leaf we are processing.
*/
clone = btrfs_clone_extent_buffer(path->nodes[0]);
if (!clone)
@@ -2778,34 +2879,16 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
* it beyond i_size.
*/
while (cur_offset < end && cur_offset < i_size) {
- struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
u64 prealloc_start;
- u64 lockstart;
- u64 lockend;
u64 prealloc_len = 0;
bool delalloc;
- lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
- lockend = round_up(end, inode->root->fs_info->sectorsize);
-
- /*
- * We are only locking for the delalloc range because that's the
- * only thing that can change here. With fiemap we have a lock
- * on the inode, so no buffered or direct writes can happen.
- *
- * However mmaps and normal page writeback will cause this to
- * change arbitrarily. We have to lock the extent lock here to
- * make sure that nobody messes with the tree while we're doing
- * btrfs_find_delalloc_in_range.
- */
- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
delalloc_cached_state,
&delalloc_start,
&delalloc_end);
- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
break;
@@ -2973,6 +3056,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
const u64 ino = btrfs_ino(inode);
+ struct extent_state *cached_state = NULL;
struct extent_state *delalloc_cached_state = NULL;
struct btrfs_path *path;
struct fiemap_cache cache = { 0 };
@@ -2985,26 +3069,33 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
bool stopped = false;
int ret;
+ cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry);
+ cache.entries = kmalloc_array(cache.entries_size,
+ sizeof(struct btrfs_fiemap_entry),
+ GFP_KERNEL);
backref_ctx = btrfs_alloc_backref_share_check_ctx();
path = btrfs_alloc_path();
- if (!backref_ctx || !path) {
+ if (!cache.entries || !backref_ctx || !path) {
ret = -ENOMEM;
goto out;
}
+restart:
range_start = round_down(start, sectorsize);
range_end = round_up(start + len, sectorsize);
prev_extent_end = range_start;
+ lock_extent(&inode->io_tree, range_start, range_end, &cached_state);
+
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
if (ret < 0)
- goto out;
+ goto out_unlock;
btrfs_release_path(path);
path->reada = READA_FORWARD;
ret = fiemap_search_slot(inode, path, range_start);
if (ret < 0) {
- goto out;
+ goto out_unlock;
} else if (ret > 0) {
/*
* No file extent item found, but we may have delalloc between
@@ -3051,7 +3142,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
backref_ctx, 0, 0, 0,
prev_extent_end, hole_end);
if (ret < 0) {
- goto out;
+ goto out_unlock;
} else if (ret > 0) {
/* fiemap_fill_next_extent() told us to stop. */
stopped = true;
@@ -3107,7 +3198,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
extent_gen,
backref_ctx);
if (ret < 0)
- goto out;
+ goto out_unlock;
else if (ret > 0)
flags |= FIEMAP_EXTENT_SHARED;
}
@@ -3118,9 +3209,9 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}
if (ret < 0) {
- goto out;
+ goto out_unlock;
} else if (ret > 0) {
- /* fiemap_fill_next_extent() told us to stop. */
+ /* emit_fiemap_extent() told us to stop. */
stopped = true;
break;
}
@@ -3129,12 +3220,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
next_item:
if (fatal_signal_pending(current)) {
ret = -EINTR;
- goto out;
+ goto out_unlock;
}
ret = fiemap_next_leaf_item(inode, path);
if (ret < 0) {
- goto out;
+ goto out_unlock;
} else if (ret > 0) {
/* No more file extent items for this inode. */
break;
@@ -3143,22 +3234,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}
check_eof_delalloc:
- /*
- * Release (and free) the path before emitting any final entries to
- * fiemap_fill_next_extent() to keep lockdep happy. This is because
- * once we find no more file extent items exist, we may have a
- * non-cloned leaf, and fiemap_fill_next_extent() can trigger page
- * faults when copying data to the user space buffer.
- */
- btrfs_free_path(path);
- path = NULL;
-
if (!stopped && prev_extent_end < range_end) {
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state, backref_ctx,
0, 0, 0, prev_extent_end, range_end - 1);
if (ret < 0)
- goto out;
+ goto out_unlock;
prev_extent_end = range_end;
}
@@ -3166,28 +3247,16 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
const u64 i_size = i_size_read(&inode->vfs_inode);
if (prev_extent_end < i_size) {
- struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
- u64 lockstart;
- u64 lockend;
bool delalloc;
- lockstart = round_down(prev_extent_end, sectorsize);
- lockend = round_up(i_size, sectorsize);
-
- /*
- * See the comment in fiemap_process_hole as to why
- * we're doing the locking here.
- */
- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode,
prev_extent_end,
i_size - 1,
&delalloc_cached_state,
&delalloc_start,
&delalloc_end);
- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
cache.flags |= FIEMAP_EXTENT_LAST;
} else {
@@ -3195,9 +3264,39 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}
}
+out_unlock:
+ unlock_extent(&inode->io_tree, range_start, range_end, &cached_state);
+
+ if (ret == BTRFS_FIEMAP_FLUSH_CACHE) {
+ btrfs_release_path(path);
+ ret = flush_fiemap_cache(fieinfo, &cache);
+ if (ret)
+ goto out;
+ len -= cache.next_search_offset - start;
+ start = cache.next_search_offset;
+ goto restart;
+ } else if (ret < 0) {
+ goto out;
+ }
+
+ /*
+ * Must free the path before emitting to the fiemap buffer because we
+ * may have a non-cloned leaf and if the fiemap buffer is memory mapped
+ * to a file, a write into it (through btrfs_page_mkwrite()) may trigger
+ * waiting for an ordered extent that in order to complete needs to
+ * modify that leaf, therefore leading to a deadlock.
+ */
+ btrfs_free_path(path);
+ path = NULL;
+
+ ret = flush_fiemap_cache(fieinfo, &cache);
+ if (ret)
+ goto out;
+
ret = emit_last_fiemap_cache(fieinfo, &cache);
out:
free_extent_state(delalloc_cached_state);
+ kfree(cache.entries);
btrfs_free_backref_share_ctx(backref_ctx);
btrfs_free_path(path);
return ret;
The patch below does not apply to the 6.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.8.y
git checkout FETCH_HEAD
git cherry-pick -x 32fbe5246582af4f611ccccee33fd6e559087252
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033005-graded-dangle-3a21@gregkh' --subject-prefix 'PATCH 6.8.y' HEAD^..
Possible dependencies:
32fbe5246582 ("crash: use macro to add crashk_res into iomem early for specific arch")
85fcde402db1 ("kexec: split crashkernel reservation code out from crash_core.c")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 32fbe5246582af4f611ccccee33fd6e559087252 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe(a)redhat.com>
Date: Mon, 25 Mar 2024 09:50:50 +0800
Subject: [PATCH] crash: use macro to add crashk_res into iomem early for
specific arch
There are regression reports[1][2] that crashkernel region on x86_64 can't
be added into iomem tree sometime. This causes the later failure of kdump
loading.
This happened after commit 4a693ce65b18 ("kdump: defer the insertion of
crashkernel resources") was merged.
Even though, these reported issues are proved to be related to other
component, they are just exposed after above commmit applied, I still
would like to keep crashk_res and crashk_low_res being added into iomem
early as before because the early adding has been always there on x86_64
and working very well. For safety of kdump, Let's change it back.
Here, add a macro HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY to limit that
only ARCH defining the macro can have the early adding
crashk_res/_low_res into iomem. Then define
HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY on x86 to enable it.
Note: In reserve_crashkernel_low(), there's a remnant of crashk_low_res
handling which was mistakenly added back in commit 85fcde402db1 ("kexec:
split crashkernel reservation code out from crash_core.c").
[1]
[PATCH V2] x86/kexec: do not update E820 kexec table for setup_data
https://lore.kernel.org/all/Zfv8iCL6CT2JqLIC@darkstar.users.ipa.redhat.com/…
[2]
Question about Address Range Validation in Crash Kernel Allocation
https://lore.kernel.org/all/4eeac1f733584855965a2ea62fa4da58@huawei.com/T/#u
Link: https://lkml.kernel.org/r/ZgDYemRQ2jxjLkq+@MiWiFi-R3L-srv
Fixes: 4a693ce65b18 ("kdump: defer the insertion of crashkernel resources")
Signed-off-by: Baoquan He <bhe(a)redhat.com>
Cc: Dave Young <dyoung(a)redhat.com>
Cc: Huacai Chen <chenhuacai(a)loongson.cn>
Cc: Ingo Molnar <mingo(a)kernel.org>
Cc: Jiri Bohac <jbohac(a)suse.cz>
Cc: Li Huafei <lihuafei1(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/arch/x86/include/asm/crash_reserve.h b/arch/x86/include/asm/crash_reserve.h
index 152239f95541..7835b2cdff04 100644
--- a/arch/x86/include/asm/crash_reserve.h
+++ b/arch/x86/include/asm/crash_reserve.h
@@ -39,4 +39,6 @@ static inline unsigned long crash_low_size_default(void)
#endif
}
+#define HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
+
#endif /* _X86_CRASH_RESERVE_H */
diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c
index bbb6c3cb00e4..066668799f75 100644
--- a/kernel/crash_reserve.c
+++ b/kernel/crash_reserve.c
@@ -366,7 +366,9 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
+#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
insert_resource(&iomem_resource, &crashk_low_res);
+#endif
#endif
return 0;
}
@@ -448,8 +450,12 @@ void __init reserve_crashkernel_generic(char *cmdline,
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
+#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
+ insert_resource(&iomem_resource, &crashk_res);
+#endif
}
+#ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
static __init int insert_crashkernel_resources(void)
{
if (crashk_res.start < crashk_res.end)
@@ -462,3 +468,4 @@ static __init int insert_crashkernel_resources(void)
}
early_initcall(insert_crashkernel_resources);
#endif
+#endif
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 0c76106cb97548810214def8ee22700bbbb90543
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040127-defraud-ladle-60f4@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
0c76106cb975 ("scsi: sd: Fix TCG OPAL unlock on system resume")
99398d2070ab ("scsi: sd: Do not issue commands to suspended disks on shutdown")
8b4d9469d0b0 ("ata: libata-scsi: Fix delayed scsi_rescan_device() execution")
ff48b37802e5 ("scsi: Do not attempt to rescan suspended devices")
aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop")
3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management")
2a5a4326e583 ("Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0c76106cb97548810214def8ee22700bbbb90543 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal(a)kernel.org>
Date: Tue, 19 Mar 2024 16:12:09 +0900
Subject: [PATCH] scsi: sd: Fix TCG OPAL unlock on system resume
Commit 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop
management") introduced the manage_system_start_stop scsi_device flag to
allow libata to indicate to the SCSI disk driver that nothing should be
done when resuming a disk on system resume. This change turned the
execution of sd_resume() into a no-op for ATA devices on system
resume. While this solved deadlock issues during device resume, this change
also wrongly removed the execution of opal_unlock_from_suspend(). As a
result, devices with TCG OPAL locking enabled remain locked and
inaccessible after a system resume from sleep.
To fix this issue, introduce the SCSI driver resume method and implement it
with the sd_resume() function calling opal_unlock_from_suspend(). The
former sd_resume() function is renamed to sd_resume_common() and modified
to call the new sd_resume() function. For non-ATA devices, this result in
no functional changes.
In order for libata to explicitly execute sd_resume() when a device is
resumed during system restart, the function scsi_resume_device() is
introduced. libata calls this function from the revalidation work executed
on devie resume, a state that is indicated with the new device flag
ATA_DFLAG_RESUMING. Doing so, locked TCG OPAL enabled devices are unlocked
on resume, allowing normal operation.
Fixes: 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=218538
Cc: stable(a)vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal(a)kernel.org>
Link: https://lore.kernel.org/r/20240319071209.1179257-1-dlemoal@kernel.org
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index b0d6e69c4a5b..214b935c2ced 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -712,8 +712,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
ehc->saved_ncq_enabled |= 1 << devno;
/* If we are resuming, wake up the device */
- if (ap->pflags & ATA_PFLAG_RESUMING)
+ if (ap->pflags & ATA_PFLAG_RESUMING) {
+ dev->flags |= ATA_DFLAG_RESUMING;
ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE;
+ }
}
}
@@ -3169,6 +3171,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
return 0;
err:
+ dev->flags &= ~ATA_DFLAG_RESUMING;
*r_failed_dev = dev;
return rc;
}
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 0a0f483124c3..2f4c58837641 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4730,6 +4730,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
struct ata_link *link;
struct ata_device *dev;
unsigned long flags;
+ bool do_resume;
int ret = 0;
mutex_lock(&ap->scsi_scan_mutex);
@@ -4751,7 +4752,15 @@ void ata_scsi_dev_rescan(struct work_struct *work)
if (scsi_device_get(sdev))
continue;
+ do_resume = dev->flags & ATA_DFLAG_RESUMING;
+
spin_unlock_irqrestore(ap->lock, flags);
+ if (do_resume) {
+ ret = scsi_resume_device(sdev);
+ if (ret == -EWOULDBLOCK)
+ goto unlock;
+ dev->flags &= ~ATA_DFLAG_RESUMING;
+ }
ret = scsi_rescan_device(sdev);
scsi_device_put(sdev);
spin_lock_irqsave(ap->lock, flags);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 8d06475de17a..ffd7e7e72933 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1642,6 +1642,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel,
}
EXPORT_SYMBOL(scsi_add_device);
+int scsi_resume_device(struct scsi_device *sdev)
+{
+ struct device *dev = &sdev->sdev_gendev;
+ int ret = 0;
+
+ device_lock(dev);
+
+ /*
+ * Bail out if the device or its queue are not running. Otherwise,
+ * the rescan may block waiting for commands to be executed, with us
+ * holding the device lock. This can result in a potential deadlock
+ * in the power management core code when system resume is on-going.
+ */
+ if (sdev->sdev_state != SDEV_RUNNING ||
+ blk_queue_pm_only(sdev->request_queue)) {
+ ret = -EWOULDBLOCK;
+ goto unlock;
+ }
+
+ if (dev->driver && try_module_get(dev->driver->owner)) {
+ struct scsi_driver *drv = to_scsi_driver(dev->driver);
+
+ if (drv->resume)
+ ret = drv->resume(dev);
+ module_put(dev->driver->owner);
+ }
+
+unlock:
+ device_unlock(dev);
+
+ return ret;
+}
+EXPORT_SYMBOL(scsi_resume_device);
+
int scsi_rescan_device(struct scsi_device *sdev)
{
struct device *dev = &sdev->sdev_gendev;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index ccff8f2e2e75..3cf898670290 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -4108,7 +4108,21 @@ static int sd_suspend_runtime(struct device *dev)
return sd_suspend_common(dev, true);
}
-static int sd_resume(struct device *dev, bool runtime)
+static int sd_resume(struct device *dev)
+{
+ struct scsi_disk *sdkp = dev_get_drvdata(dev);
+
+ sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+
+ if (opal_unlock_from_suspend(sdkp->opal_dev)) {
+ sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int sd_resume_common(struct device *dev, bool runtime)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
int ret;
@@ -4124,7 +4138,7 @@ static int sd_resume(struct device *dev, bool runtime)
sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
ret = sd_start_stop_device(sdkp, 1);
if (!ret) {
- opal_unlock_from_suspend(sdkp->opal_dev);
+ sd_resume(dev);
sdkp->suspended = false;
}
@@ -4143,7 +4157,7 @@ static int sd_resume_system(struct device *dev)
return 0;
}
- return sd_resume(dev, false);
+ return sd_resume_common(dev, false);
}
static int sd_resume_runtime(struct device *dev)
@@ -4170,7 +4184,7 @@ static int sd_resume_runtime(struct device *dev)
"Failed to clear sense data\n");
}
- return sd_resume(dev, true);
+ return sd_resume_common(dev, true);
}
static const struct dev_pm_ops sd_pm_ops = {
@@ -4193,6 +4207,7 @@ static struct scsi_driver sd_template = {
.pm = &sd_pm_ops,
},
.rescan = sd_rescan,
+ .resume = sd_resume,
.init_command = sd_init_command,
.uninit_command = sd_uninit_command,
.done = sd_done,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 26d68115afb8..324d792e7c78 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -107,6 +107,7 @@ enum {
ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */
ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */
+ ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */
ATA_DFLAG_DETACH = (1 << 24),
ATA_DFLAG_DETACHED = (1 << 25),
ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 4ce1988b2ba0..f40915d2ecee 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -12,6 +12,7 @@ struct request;
struct scsi_driver {
struct device_driver gendrv;
+ int (*resume)(struct device *);
void (*rescan)(struct device *);
blk_status_t (*init_command)(struct scsi_cmnd *);
void (*uninit_command)(struct scsi_cmnd *);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index b259d42a1e1a..129001f600fc 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -767,6 +767,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht);
#define scsi_template_proc_dir(sht) NULL
#endif
extern void scsi_scan_host(struct Scsi_Host *);
+extern int scsi_resume_device(struct scsi_device *sdev);
extern int scsi_rescan_device(struct scsi_device *sdev);
extern void scsi_remove_host(struct Scsi_Host *);
extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
commit 27f58c04a8f4 ("scsi: sg: Avoid sg device teardown race")
introduced an incorrect WARN_ON_ONCE() and missed a sequence where
sg_device_destroy() after scsi_device_put() when handling errors.
sg_device_destroy() is accessing the parent scsi_device request_queue which
will already be set to NULL when the preceding call to scsi_device_put()
removed the last reference to the parent scsi_device.
Drop the incorrect WARN_ON_ONCE() - allowing more than one concurrent
access to the sg device - and make sure sg_device_destroy() is not used
after scsi_device_put() in the error handling.
Link: https://lore.kernel.org/all/5375B275-D137-4D5F-BE25-6AF8ACAE41EF@linux.ibm.…
Fixes: 27f58c04a8f4 ("scsi: sg: Avoid sg device teardown race")
Cc: stable(a)vger.kernel.org
Signed-off-by: Alexander Wetzel <Alexander(a)wetzel-home.de>
---
The WARN_ON_ONCE() was kind of stupid to add:
We get add reference for each sg_open(). So opening a second session and
then closing either one will trigger the warning... Nothing to warn
about here.
Alexander
---
drivers/scsi/sg.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 386981c6976a..833c9277419b 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -372,8 +372,9 @@ sg_open(struct inode *inode, struct file *filp)
error_out:
scsi_autopm_put_device(sdp->device);
sdp_put:
+ kref_put(&sdp->d_ref, sg_device_destroy);
scsi_device_put(sdp->device);
- goto sg_put;
+ return retval;
}
/* Release resources associated with a successful sg_open()
@@ -2233,7 +2234,6 @@ sg_remove_sfp_usercontext(struct work_struct *work)
"sg_remove_sfp: sfp=0x%p\n", sfp));
kfree(sfp);
- WARN_ON_ONCE(kref_read(&sdp->d_ref) != 1);
kref_put(&sdp->d_ref, sg_device_destroy);
scsi_device_put(device);
module_put(THIS_MODULE);
--
2.44.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x f4d1960764d8a70318b02f15203a1be2b2554ca1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040119-ranked-doormat-088b@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
f4d1960764d8 ("USB: core: Fix deadlock in port "disable" sysfs attribute")
f061f43d7418 ("usb: hub: port: add sysfs entry to switch port power")
8c67d06f3fd9 ("usb: Link the ports to the connectors they are attached to")
b8f1ba99cea5 ("usb: hub: make wait_for_connected() take an int instead of a pointer to int")
f59f93cd1d72 ("usb: hub: avoid warm port reset during USB3 disconnect")
7142452387c7 ("USB: Verify the port status when timeout happens during port suspend")
975f94c7d6c3 ("usb: core: hub: fix race condition about TRSMRCY of resume")
355c74e55e99 ("usb: export firmware port location in sysfs")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f4d1960764d8a70318b02f15203a1be2b2554ca1 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Fri, 15 Mar 2024 13:06:33 -0400
Subject: [PATCH] USB: core: Fix deadlock in port "disable" sysfs attribute
The show and store callback routines for the "disable" sysfs attribute
file in port.c acquire the device lock for the port's parent hub
device. This can cause problems if another process has locked the hub
to remove it or change its configuration:
Removing the hub or changing its configuration requires the
hub interface to be removed, which requires the port device
to be removed, and device_del() waits until all outstanding
sysfs attribute callbacks for the ports have returned. The
lock can't be released until then.
But the disable_show() or disable_store() routine can't return
until after it has acquired the lock.
The resulting deadlock can be avoided by calling
sysfs_break_active_protection(). This will cause the sysfs core not
to wait for the attribute's callback routine to return, allowing the
removal to proceed. The disadvantage is that after making this call,
there is no guarantee that the hub structure won't be deallocated at
any moment. To prevent this, we have to acquire a reference to it
first by calling hub_get().
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/f7a8c135-a495-4ce6-bd49-405a45e7ea9a@rowland.harv…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index 5b5e613a11e5..686c01af03e6 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -56,11 +56,22 @@ static ssize_t disable_show(struct device *dev,
u16 portstatus, unused;
bool disabled;
int rc;
+ struct kernfs_node *kn;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
@@ -70,9 +81,13 @@ static ssize_t disable_show(struct device *dev,
usb_hub_port_status(hub, port1, &portstatus, &unused);
disabled = !usb_port_is_power_on(hub, portstatus);
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
if (rc)
return rc;
@@ -90,15 +105,26 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
int port1 = port_dev->portnum;
bool disabled;
int rc;
+ struct kernfs_node *kn;
rc = kstrtobool(buf, &disabled);
if (rc)
return rc;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
@@ -119,9 +145,13 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
if (!rc)
rc = count;
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
return rc;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f4d1960764d8a70318b02f15203a1be2b2554ca1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040117-shallow-faceless-7f3d@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
f4d1960764d8 ("USB: core: Fix deadlock in port "disable" sysfs attribute")
f061f43d7418 ("usb: hub: port: add sysfs entry to switch port power")
8c67d06f3fd9 ("usb: Link the ports to the connectors they are attached to")
b8f1ba99cea5 ("usb: hub: make wait_for_connected() take an int instead of a pointer to int")
f59f93cd1d72 ("usb: hub: avoid warm port reset during USB3 disconnect")
7142452387c7 ("USB: Verify the port status when timeout happens during port suspend")
975f94c7d6c3 ("usb: core: hub: fix race condition about TRSMRCY of resume")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f4d1960764d8a70318b02f15203a1be2b2554ca1 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Fri, 15 Mar 2024 13:06:33 -0400
Subject: [PATCH] USB: core: Fix deadlock in port "disable" sysfs attribute
The show and store callback routines for the "disable" sysfs attribute
file in port.c acquire the device lock for the port's parent hub
device. This can cause problems if another process has locked the hub
to remove it or change its configuration:
Removing the hub or changing its configuration requires the
hub interface to be removed, which requires the port device
to be removed, and device_del() waits until all outstanding
sysfs attribute callbacks for the ports have returned. The
lock can't be released until then.
But the disable_show() or disable_store() routine can't return
until after it has acquired the lock.
The resulting deadlock can be avoided by calling
sysfs_break_active_protection(). This will cause the sysfs core not
to wait for the attribute's callback routine to return, allowing the
removal to proceed. The disadvantage is that after making this call,
there is no guarantee that the hub structure won't be deallocated at
any moment. To prevent this, we have to acquire a reference to it
first by calling hub_get().
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/f7a8c135-a495-4ce6-bd49-405a45e7ea9a@rowland.harv…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index 5b5e613a11e5..686c01af03e6 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -56,11 +56,22 @@ static ssize_t disable_show(struct device *dev,
u16 portstatus, unused;
bool disabled;
int rc;
+ struct kernfs_node *kn;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
@@ -70,9 +81,13 @@ static ssize_t disable_show(struct device *dev,
usb_hub_port_status(hub, port1, &portstatus, &unused);
disabled = !usb_port_is_power_on(hub, portstatus);
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
if (rc)
return rc;
@@ -90,15 +105,26 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
int port1 = port_dev->portnum;
bool disabled;
int rc;
+ struct kernfs_node *kn;
rc = kstrtobool(buf, &disabled);
if (rc)
return rc;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
@@ -119,9 +145,13 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
if (!rc)
rc = count;
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
return rc;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x f4d1960764d8a70318b02f15203a1be2b2554ca1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040116-mortician-grudging-9be5@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
f4d1960764d8 ("USB: core: Fix deadlock in port "disable" sysfs attribute")
f061f43d7418 ("usb: hub: port: add sysfs entry to switch port power")
8c67d06f3fd9 ("usb: Link the ports to the connectors they are attached to")
b8f1ba99cea5 ("usb: hub: make wait_for_connected() take an int instead of a pointer to int")
f59f93cd1d72 ("usb: hub: avoid warm port reset during USB3 disconnect")
7142452387c7 ("USB: Verify the port status when timeout happens during port suspend")
975f94c7d6c3 ("usb: core: hub: fix race condition about TRSMRCY of resume")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f4d1960764d8a70318b02f15203a1be2b2554ca1 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Fri, 15 Mar 2024 13:06:33 -0400
Subject: [PATCH] USB: core: Fix deadlock in port "disable" sysfs attribute
The show and store callback routines for the "disable" sysfs attribute
file in port.c acquire the device lock for the port's parent hub
device. This can cause problems if another process has locked the hub
to remove it or change its configuration:
Removing the hub or changing its configuration requires the
hub interface to be removed, which requires the port device
to be removed, and device_del() waits until all outstanding
sysfs attribute callbacks for the ports have returned. The
lock can't be released until then.
But the disable_show() or disable_store() routine can't return
until after it has acquired the lock.
The resulting deadlock can be avoided by calling
sysfs_break_active_protection(). This will cause the sysfs core not
to wait for the attribute's callback routine to return, allowing the
removal to proceed. The disadvantage is that after making this call,
there is no guarantee that the hub structure won't be deallocated at
any moment. To prevent this, we have to acquire a reference to it
first by calling hub_get().
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/f7a8c135-a495-4ce6-bd49-405a45e7ea9a@rowland.harv…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index 5b5e613a11e5..686c01af03e6 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -56,11 +56,22 @@ static ssize_t disable_show(struct device *dev,
u16 portstatus, unused;
bool disabled;
int rc;
+ struct kernfs_node *kn;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
@@ -70,9 +81,13 @@ static ssize_t disable_show(struct device *dev,
usb_hub_port_status(hub, port1, &portstatus, &unused);
disabled = !usb_port_is_power_on(hub, portstatus);
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
if (rc)
return rc;
@@ -90,15 +105,26 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
int port1 = port_dev->portnum;
bool disabled;
int rc;
+ struct kernfs_node *kn;
rc = kstrtobool(buf, &disabled);
if (rc)
return rc;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
@@ -119,9 +145,13 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
if (!rc)
rc = count;
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
return rc;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x f4d1960764d8a70318b02f15203a1be2b2554ca1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040114-outer-channel-e465@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
f4d1960764d8 ("USB: core: Fix deadlock in port "disable" sysfs attribute")
f061f43d7418 ("usb: hub: port: add sysfs entry to switch port power")
8c67d06f3fd9 ("usb: Link the ports to the connectors they are attached to")
b8f1ba99cea5 ("usb: hub: make wait_for_connected() take an int instead of a pointer to int")
f59f93cd1d72 ("usb: hub: avoid warm port reset during USB3 disconnect")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f4d1960764d8a70318b02f15203a1be2b2554ca1 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Fri, 15 Mar 2024 13:06:33 -0400
Subject: [PATCH] USB: core: Fix deadlock in port "disable" sysfs attribute
The show and store callback routines for the "disable" sysfs attribute
file in port.c acquire the device lock for the port's parent hub
device. This can cause problems if another process has locked the hub
to remove it or change its configuration:
Removing the hub or changing its configuration requires the
hub interface to be removed, which requires the port device
to be removed, and device_del() waits until all outstanding
sysfs attribute callbacks for the ports have returned. The
lock can't be released until then.
But the disable_show() or disable_store() routine can't return
until after it has acquired the lock.
The resulting deadlock can be avoided by calling
sysfs_break_active_protection(). This will cause the sysfs core not
to wait for the attribute's callback routine to return, allowing the
removal to proceed. The disadvantage is that after making this call,
there is no guarantee that the hub structure won't be deallocated at
any moment. To prevent this, we have to acquire a reference to it
first by calling hub_get().
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/f7a8c135-a495-4ce6-bd49-405a45e7ea9a@rowland.harv…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index 5b5e613a11e5..686c01af03e6 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -56,11 +56,22 @@ static ssize_t disable_show(struct device *dev,
u16 portstatus, unused;
bool disabled;
int rc;
+ struct kernfs_node *kn;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
@@ -70,9 +81,13 @@ static ssize_t disable_show(struct device *dev,
usb_hub_port_status(hub, port1, &portstatus, &unused);
disabled = !usb_port_is_power_on(hub, portstatus);
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
if (rc)
return rc;
@@ -90,15 +105,26 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
int port1 = port_dev->portnum;
bool disabled;
int rc;
+ struct kernfs_node *kn;
rc = kstrtobool(buf, &disabled);
if (rc)
return rc;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
@@ -119,9 +145,13 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
if (!rc)
rc = count;
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
return rc;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 80ba43e9f799cbdd83842fc27db667289b3150f5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040155-shakiness-romp-690e@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
80ba43e9f799 ("USB: core: Fix deadlock in usb_deauthorize_interface()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 80ba43e9f799cbdd83842fc27db667289b3150f5 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Tue, 12 Mar 2024 11:48:23 -0400
Subject: [PATCH] USB: core: Fix deadlock in usb_deauthorize_interface()
Among the attribute file callback routines in
drivers/usb/core/sysfs.c, the interface_authorized_store() function is
the only one which acquires a device lock on an ancestor device: It
calls usb_deauthorize_interface(), which locks the interface's parent
USB device.
The will lead to deadlock if another process already owns that lock
and tries to remove the interface, whether through a configuration
change or because the device has been disconnected. As part of the
removal procedure, device_del() waits for all ongoing sysfs attribute
callbacks to complete. But usb_deauthorize_interface() can't complete
until the device lock has been released, and the lock won't be
released until the removal has finished.
The mechanism provided by sysfs to prevent this kind of deadlock is
to use the sysfs_break_active_protection() function, which tells sysfs
not to wait for the attribute callback.
Reported-and-tested by: Yue Sun <samsun1006219(a)gmail.com>
Reported by: xingwei lee <xrivendell7(a)gmail.com>
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Link: https://lore.kernel.org/linux-usb/CAEkJfYO6jRVC8Tfrd_R=cjO0hguhrV31fDPrLrNO…
Fixes: 310d2b4124c0 ("usb: interface authorization: SysFS part of USB interface authorization")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/1c37eea1-9f56-4534-b9d8-b443438dc869@rowland.harv…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index f98263e21c2a..d83231d6736a 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -1217,14 +1217,24 @@ static ssize_t interface_authorized_store(struct device *dev,
{
struct usb_interface *intf = to_usb_interface(dev);
bool val;
+ struct kernfs_node *kn;
if (kstrtobool(buf, &val) != 0)
return -EINVAL;
- if (val)
+ if (val) {
usb_authorize_interface(intf);
- else
- usb_deauthorize_interface(intf);
+ } else {
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister intf.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (kn) {
+ usb_deauthorize_interface(intf);
+ sysfs_unbreak_active_protection(kn);
+ }
+ }
return count;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 80ba43e9f799cbdd83842fc27db667289b3150f5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040154-partner-wizard-6ead@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
80ba43e9f799 ("USB: core: Fix deadlock in usb_deauthorize_interface()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 80ba43e9f799cbdd83842fc27db667289b3150f5 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Tue, 12 Mar 2024 11:48:23 -0400
Subject: [PATCH] USB: core: Fix deadlock in usb_deauthorize_interface()
Among the attribute file callback routines in
drivers/usb/core/sysfs.c, the interface_authorized_store() function is
the only one which acquires a device lock on an ancestor device: It
calls usb_deauthorize_interface(), which locks the interface's parent
USB device.
The will lead to deadlock if another process already owns that lock
and tries to remove the interface, whether through a configuration
change or because the device has been disconnected. As part of the
removal procedure, device_del() waits for all ongoing sysfs attribute
callbacks to complete. But usb_deauthorize_interface() can't complete
until the device lock has been released, and the lock won't be
released until the removal has finished.
The mechanism provided by sysfs to prevent this kind of deadlock is
to use the sysfs_break_active_protection() function, which tells sysfs
not to wait for the attribute callback.
Reported-and-tested by: Yue Sun <samsun1006219(a)gmail.com>
Reported by: xingwei lee <xrivendell7(a)gmail.com>
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Link: https://lore.kernel.org/linux-usb/CAEkJfYO6jRVC8Tfrd_R=cjO0hguhrV31fDPrLrNO…
Fixes: 310d2b4124c0 ("usb: interface authorization: SysFS part of USB interface authorization")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/1c37eea1-9f56-4534-b9d8-b443438dc869@rowland.harv…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index f98263e21c2a..d83231d6736a 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -1217,14 +1217,24 @@ static ssize_t interface_authorized_store(struct device *dev,
{
struct usb_interface *intf = to_usb_interface(dev);
bool val;
+ struct kernfs_node *kn;
if (kstrtobool(buf, &val) != 0)
return -EINVAL;
- if (val)
+ if (val) {
usb_authorize_interface(intf);
- else
- usb_deauthorize_interface(intf);
+ } else {
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister intf.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (kn) {
+ usb_deauthorize_interface(intf);
+ sysfs_unbreak_active_protection(kn);
+ }
+ }
return count;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 80ba43e9f799cbdd83842fc27db667289b3150f5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040153-verbalize-drum-2ff0@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
80ba43e9f799 ("USB: core: Fix deadlock in usb_deauthorize_interface()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 80ba43e9f799cbdd83842fc27db667289b3150f5 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Tue, 12 Mar 2024 11:48:23 -0400
Subject: [PATCH] USB: core: Fix deadlock in usb_deauthorize_interface()
Among the attribute file callback routines in
drivers/usb/core/sysfs.c, the interface_authorized_store() function is
the only one which acquires a device lock on an ancestor device: It
calls usb_deauthorize_interface(), which locks the interface's parent
USB device.
The will lead to deadlock if another process already owns that lock
and tries to remove the interface, whether through a configuration
change or because the device has been disconnected. As part of the
removal procedure, device_del() waits for all ongoing sysfs attribute
callbacks to complete. But usb_deauthorize_interface() can't complete
until the device lock has been released, and the lock won't be
released until the removal has finished.
The mechanism provided by sysfs to prevent this kind of deadlock is
to use the sysfs_break_active_protection() function, which tells sysfs
not to wait for the attribute callback.
Reported-and-tested by: Yue Sun <samsun1006219(a)gmail.com>
Reported by: xingwei lee <xrivendell7(a)gmail.com>
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Link: https://lore.kernel.org/linux-usb/CAEkJfYO6jRVC8Tfrd_R=cjO0hguhrV31fDPrLrNO…
Fixes: 310d2b4124c0 ("usb: interface authorization: SysFS part of USB interface authorization")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/1c37eea1-9f56-4534-b9d8-b443438dc869@rowland.harv…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index f98263e21c2a..d83231d6736a 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -1217,14 +1217,24 @@ static ssize_t interface_authorized_store(struct device *dev,
{
struct usb_interface *intf = to_usb_interface(dev);
bool val;
+ struct kernfs_node *kn;
if (kstrtobool(buf, &val) != 0)
return -EINVAL;
- if (val)
+ if (val) {
usb_authorize_interface(intf);
- else
- usb_deauthorize_interface(intf);
+ } else {
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister intf.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (kn) {
+ usb_deauthorize_interface(intf);
+ sysfs_unbreak_active_protection(kn);
+ }
+ }
return count;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 80ba43e9f799cbdd83842fc27db667289b3150f5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040151-singular-unfunded-b5a8@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
80ba43e9f799 ("USB: core: Fix deadlock in usb_deauthorize_interface()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 80ba43e9f799cbdd83842fc27db667289b3150f5 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Tue, 12 Mar 2024 11:48:23 -0400
Subject: [PATCH] USB: core: Fix deadlock in usb_deauthorize_interface()
Among the attribute file callback routines in
drivers/usb/core/sysfs.c, the interface_authorized_store() function is
the only one which acquires a device lock on an ancestor device: It
calls usb_deauthorize_interface(), which locks the interface's parent
USB device.
The will lead to deadlock if another process already owns that lock
and tries to remove the interface, whether through a configuration
change or because the device has been disconnected. As part of the
removal procedure, device_del() waits for all ongoing sysfs attribute
callbacks to complete. But usb_deauthorize_interface() can't complete
until the device lock has been released, and the lock won't be
released until the removal has finished.
The mechanism provided by sysfs to prevent this kind of deadlock is
to use the sysfs_break_active_protection() function, which tells sysfs
not to wait for the attribute callback.
Reported-and-tested by: Yue Sun <samsun1006219(a)gmail.com>
Reported by: xingwei lee <xrivendell7(a)gmail.com>
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Link: https://lore.kernel.org/linux-usb/CAEkJfYO6jRVC8Tfrd_R=cjO0hguhrV31fDPrLrNO…
Fixes: 310d2b4124c0 ("usb: interface authorization: SysFS part of USB interface authorization")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/1c37eea1-9f56-4534-b9d8-b443438dc869@rowland.harv…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index f98263e21c2a..d83231d6736a 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -1217,14 +1217,24 @@ static ssize_t interface_authorized_store(struct device *dev,
{
struct usb_interface *intf = to_usb_interface(dev);
bool val;
+ struct kernfs_node *kn;
if (kstrtobool(buf, &val) != 0)
return -EINVAL;
- if (val)
+ if (val) {
usb_authorize_interface(intf);
- else
- usb_deauthorize_interface(intf);
+ } else {
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister intf.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (kn) {
+ usb_deauthorize_interface(intf);
+ sysfs_unbreak_active_protection(kn);
+ }
+ }
return count;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x f9aa41130ac69d13a53ce2a153ca79c70d43f39c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040103-preheated-anthology-d288@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
f9aa41130ac6 ("usb: dwc3: Properly set system wakeup")
047161686b81 ("usb: dwc3: Add remote wakeup handling")
63c4c320ccf7 ("usb: dwc3: gadget: Check for L1/L2/U3 for Start Transfer")
40edb52298df ("usb: dwc3: avoid NULL access of usb_gadget_driver")
c560e76319a9 ("usb: dwc3: gadget: Fix START_TRANSFER link state check")
475e8be53d04 ("usb: dwc3: gadget: Check for disabled LPM quirk")
6f0764b5adea ("usb: dwc3: add a power supply for current control")
82c46b8ed9dc ("usb: dwc3: gadget: Introduce a DWC3 VBUS draw callback")
f580170f135a ("usb: dwc3: Add splitdisable quirk for Hisilicon Kirin Soc")
e81a7018d93a ("usb: dwc3: allocate gadget structure dynamically")
c5a7092f4015 ("usb: dwc3: gadget: make starting isoc transfers more robust")
9af21dd6faeb ("usb: dwc3: Add support for DWC_usb32 IP")
8bb14308a869 ("usb: dwc3: core: Use role-switch default dr_mode")
d0550cd20e52 ("usb: dwc3: gadget: Do link recovery for SS and SSP")
d94ea5319813 ("usb: dwc3: gadget: Properly set maxpacket limit")
586f4335700f ("usb: dwc3: Fix GTXFIFOSIZ.TXFDEP macro name")
5eb5afb07853 ("usb: dwc3: use proper initializers for property entries")
9ba3aca8fe82 ("usb: dwc3: Disable phy suspend after power-on reset")
a0a465569b45 ("usb: dwc3: remove generic PHY calibrate() calls")
c09b73cfac2a ("usb: dwc3: don't set gadget->is_otg flag")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f9aa41130ac69d13a53ce2a153ca79c70d43f39c Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Date: Fri, 8 Mar 2024 02:40:25 +0000
Subject: [PATCH] usb: dwc3: Properly set system wakeup
If the device is configured for system wakeup, then make sure that the
xHCI driver knows about it and make sure to permit wakeup only at the
appropriate time.
For host mode, if the controller goes through the dwc3 code path, then a
child xHCI platform device is created. Make sure the platform device
also inherits the wakeup setting for xHCI to enable remote wakeup.
For device mode, make sure to disable system wakeup if no gadget driver
is bound. We may experience unwanted system wakeup due to the wakeup
signal from the controller PMU detecting connection/disconnection when
in low power (D3). E.g. In the case of Steam Deck, the PCI PME prevents
the system staying in suspend.
Cc: stable(a)vger.kernel.org
Reported-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Closes: https://lore.kernel.org/linux-usb/70a7692d-647c-9be7-00a6-06fc60f77294@igal…
Fixes: d07e8819a03d ("usb: dwc3: add xHCI Host support")
Signed-off-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Tested-by: Sanath S <Sanath.S(a)amd.com>
Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/667cfda7009b502e08462c8fb3f65841d103cc0a.17098654…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 3e55838c0001..31684cdaaae3 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1519,6 +1519,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
else
dwc->sysdev = dwc->dev;
+ dwc->sys_wakeup = device_may_wakeup(dwc->sysdev);
+
ret = device_property_read_string(dev, "usb-psy-name", &usb_psy_name);
if (ret >= 0) {
dwc->usb_psy = power_supply_get_by_name(usb_psy_name);
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index c07edfc954f7..7e80dd3d466b 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1133,6 +1133,7 @@ struct dwc3_scratchpad_array {
* 3 - Reserved
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
+ * @sys_wakeup: set if the device may do system wakeup.
* @wakeup_configured: set if the device is configured for remote wakeup.
* @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
@@ -1357,6 +1358,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
+ unsigned sys_wakeup:1;
unsigned wakeup_configured:1;
unsigned suspended:1;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 40c52dbc28d3..4df2661f6675 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2955,6 +2955,9 @@ static int dwc3_gadget_start(struct usb_gadget *g,
dwc->gadget_driver = driver;
spin_unlock_irqrestore(&dwc->lock, flags);
+ if (dwc->sys_wakeup)
+ device_wakeup_enable(dwc->sysdev);
+
return 0;
}
@@ -2970,6 +2973,9 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
struct dwc3 *dwc = gadget_to_dwc(g);
unsigned long flags;
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
spin_lock_irqsave(&dwc->lock, flags);
dwc->gadget_driver = NULL;
dwc->max_cfg_eps = 0;
@@ -4651,6 +4657,10 @@ int dwc3_gadget_init(struct dwc3 *dwc)
else
dwc3_gadget_set_speed(dwc->gadget, dwc->maximum_speed);
+ /* No system wakeup if no gadget driver bound */
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
return 0;
err5:
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index 5a5cb6ce9946..0204787df81d 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -173,6 +173,14 @@ int dwc3_host_init(struct dwc3 *dwc)
goto err;
}
+ if (dwc->sys_wakeup) {
+ /* Restore wakeup setting if switched from device */
+ device_wakeup_enable(dwc->sysdev);
+
+ /* Pass on wakeup setting to the new xhci platform device */
+ device_init_wakeup(&xhci->dev, true);
+ }
+
return 0;
err:
platform_device_put(xhci);
@@ -181,6 +189,9 @@ int dwc3_host_init(struct dwc3 *dwc)
void dwc3_host_exit(struct dwc3 *dwc)
{
+ if (dwc->sys_wakeup)
+ device_init_wakeup(&dwc->xhci->dev, false);
+
platform_device_unregister(dwc->xhci);
dwc->xhci = NULL;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f9aa41130ac69d13a53ce2a153ca79c70d43f39c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040102-flatterer-enslave-672e@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
f9aa41130ac6 ("usb: dwc3: Properly set system wakeup")
047161686b81 ("usb: dwc3: Add remote wakeup handling")
63c4c320ccf7 ("usb: dwc3: gadget: Check for L1/L2/U3 for Start Transfer")
40edb52298df ("usb: dwc3: avoid NULL access of usb_gadget_driver")
c560e76319a9 ("usb: dwc3: gadget: Fix START_TRANSFER link state check")
475e8be53d04 ("usb: dwc3: gadget: Check for disabled LPM quirk")
6f0764b5adea ("usb: dwc3: add a power supply for current control")
82c46b8ed9dc ("usb: dwc3: gadget: Introduce a DWC3 VBUS draw callback")
f580170f135a ("usb: dwc3: Add splitdisable quirk for Hisilicon Kirin Soc")
e81a7018d93a ("usb: dwc3: allocate gadget structure dynamically")
c5a7092f4015 ("usb: dwc3: gadget: make starting isoc transfers more robust")
9af21dd6faeb ("usb: dwc3: Add support for DWC_usb32 IP")
8bb14308a869 ("usb: dwc3: core: Use role-switch default dr_mode")
d0550cd20e52 ("usb: dwc3: gadget: Do link recovery for SS and SSP")
d94ea5319813 ("usb: dwc3: gadget: Properly set maxpacket limit")
586f4335700f ("usb: dwc3: Fix GTXFIFOSIZ.TXFDEP macro name")
5eb5afb07853 ("usb: dwc3: use proper initializers for property entries")
9ba3aca8fe82 ("usb: dwc3: Disable phy suspend after power-on reset")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f9aa41130ac69d13a53ce2a153ca79c70d43f39c Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Date: Fri, 8 Mar 2024 02:40:25 +0000
Subject: [PATCH] usb: dwc3: Properly set system wakeup
If the device is configured for system wakeup, then make sure that the
xHCI driver knows about it and make sure to permit wakeup only at the
appropriate time.
For host mode, if the controller goes through the dwc3 code path, then a
child xHCI platform device is created. Make sure the platform device
also inherits the wakeup setting for xHCI to enable remote wakeup.
For device mode, make sure to disable system wakeup if no gadget driver
is bound. We may experience unwanted system wakeup due to the wakeup
signal from the controller PMU detecting connection/disconnection when
in low power (D3). E.g. In the case of Steam Deck, the PCI PME prevents
the system staying in suspend.
Cc: stable(a)vger.kernel.org
Reported-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Closes: https://lore.kernel.org/linux-usb/70a7692d-647c-9be7-00a6-06fc60f77294@igal…
Fixes: d07e8819a03d ("usb: dwc3: add xHCI Host support")
Signed-off-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Tested-by: Sanath S <Sanath.S(a)amd.com>
Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/667cfda7009b502e08462c8fb3f65841d103cc0a.17098654…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 3e55838c0001..31684cdaaae3 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1519,6 +1519,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
else
dwc->sysdev = dwc->dev;
+ dwc->sys_wakeup = device_may_wakeup(dwc->sysdev);
+
ret = device_property_read_string(dev, "usb-psy-name", &usb_psy_name);
if (ret >= 0) {
dwc->usb_psy = power_supply_get_by_name(usb_psy_name);
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index c07edfc954f7..7e80dd3d466b 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1133,6 +1133,7 @@ struct dwc3_scratchpad_array {
* 3 - Reserved
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
+ * @sys_wakeup: set if the device may do system wakeup.
* @wakeup_configured: set if the device is configured for remote wakeup.
* @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
@@ -1357,6 +1358,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
+ unsigned sys_wakeup:1;
unsigned wakeup_configured:1;
unsigned suspended:1;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 40c52dbc28d3..4df2661f6675 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2955,6 +2955,9 @@ static int dwc3_gadget_start(struct usb_gadget *g,
dwc->gadget_driver = driver;
spin_unlock_irqrestore(&dwc->lock, flags);
+ if (dwc->sys_wakeup)
+ device_wakeup_enable(dwc->sysdev);
+
return 0;
}
@@ -2970,6 +2973,9 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
struct dwc3 *dwc = gadget_to_dwc(g);
unsigned long flags;
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
spin_lock_irqsave(&dwc->lock, flags);
dwc->gadget_driver = NULL;
dwc->max_cfg_eps = 0;
@@ -4651,6 +4657,10 @@ int dwc3_gadget_init(struct dwc3 *dwc)
else
dwc3_gadget_set_speed(dwc->gadget, dwc->maximum_speed);
+ /* No system wakeup if no gadget driver bound */
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
return 0;
err5:
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index 5a5cb6ce9946..0204787df81d 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -173,6 +173,14 @@ int dwc3_host_init(struct dwc3 *dwc)
goto err;
}
+ if (dwc->sys_wakeup) {
+ /* Restore wakeup setting if switched from device */
+ device_wakeup_enable(dwc->sysdev);
+
+ /* Pass on wakeup setting to the new xhci platform device */
+ device_init_wakeup(&xhci->dev, true);
+ }
+
return 0;
err:
platform_device_put(xhci);
@@ -181,6 +189,9 @@ int dwc3_host_init(struct dwc3 *dwc)
void dwc3_host_exit(struct dwc3 *dwc)
{
+ if (dwc->sys_wakeup)
+ device_init_wakeup(&dwc->xhci->dev, false);
+
platform_device_unregister(dwc->xhci);
dwc->xhci = NULL;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x f9aa41130ac69d13a53ce2a153ca79c70d43f39c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040101-uprising-avid-b607@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
f9aa41130ac6 ("usb: dwc3: Properly set system wakeup")
047161686b81 ("usb: dwc3: Add remote wakeup handling")
63c4c320ccf7 ("usb: dwc3: gadget: Check for L1/L2/U3 for Start Transfer")
40edb52298df ("usb: dwc3: avoid NULL access of usb_gadget_driver")
c560e76319a9 ("usb: dwc3: gadget: Fix START_TRANSFER link state check")
475e8be53d04 ("usb: dwc3: gadget: Check for disabled LPM quirk")
6f0764b5adea ("usb: dwc3: add a power supply for current control")
82c46b8ed9dc ("usb: dwc3: gadget: Introduce a DWC3 VBUS draw callback")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f9aa41130ac69d13a53ce2a153ca79c70d43f39c Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Date: Fri, 8 Mar 2024 02:40:25 +0000
Subject: [PATCH] usb: dwc3: Properly set system wakeup
If the device is configured for system wakeup, then make sure that the
xHCI driver knows about it and make sure to permit wakeup only at the
appropriate time.
For host mode, if the controller goes through the dwc3 code path, then a
child xHCI platform device is created. Make sure the platform device
also inherits the wakeup setting for xHCI to enable remote wakeup.
For device mode, make sure to disable system wakeup if no gadget driver
is bound. We may experience unwanted system wakeup due to the wakeup
signal from the controller PMU detecting connection/disconnection when
in low power (D3). E.g. In the case of Steam Deck, the PCI PME prevents
the system staying in suspend.
Cc: stable(a)vger.kernel.org
Reported-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Closes: https://lore.kernel.org/linux-usb/70a7692d-647c-9be7-00a6-06fc60f77294@igal…
Fixes: d07e8819a03d ("usb: dwc3: add xHCI Host support")
Signed-off-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Tested-by: Sanath S <Sanath.S(a)amd.com>
Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/667cfda7009b502e08462c8fb3f65841d103cc0a.17098654…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 3e55838c0001..31684cdaaae3 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1519,6 +1519,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
else
dwc->sysdev = dwc->dev;
+ dwc->sys_wakeup = device_may_wakeup(dwc->sysdev);
+
ret = device_property_read_string(dev, "usb-psy-name", &usb_psy_name);
if (ret >= 0) {
dwc->usb_psy = power_supply_get_by_name(usb_psy_name);
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index c07edfc954f7..7e80dd3d466b 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1133,6 +1133,7 @@ struct dwc3_scratchpad_array {
* 3 - Reserved
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
+ * @sys_wakeup: set if the device may do system wakeup.
* @wakeup_configured: set if the device is configured for remote wakeup.
* @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
@@ -1357,6 +1358,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
+ unsigned sys_wakeup:1;
unsigned wakeup_configured:1;
unsigned suspended:1;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 40c52dbc28d3..4df2661f6675 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2955,6 +2955,9 @@ static int dwc3_gadget_start(struct usb_gadget *g,
dwc->gadget_driver = driver;
spin_unlock_irqrestore(&dwc->lock, flags);
+ if (dwc->sys_wakeup)
+ device_wakeup_enable(dwc->sysdev);
+
return 0;
}
@@ -2970,6 +2973,9 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
struct dwc3 *dwc = gadget_to_dwc(g);
unsigned long flags;
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
spin_lock_irqsave(&dwc->lock, flags);
dwc->gadget_driver = NULL;
dwc->max_cfg_eps = 0;
@@ -4651,6 +4657,10 @@ int dwc3_gadget_init(struct dwc3 *dwc)
else
dwc3_gadget_set_speed(dwc->gadget, dwc->maximum_speed);
+ /* No system wakeup if no gadget driver bound */
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
return 0;
err5:
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index 5a5cb6ce9946..0204787df81d 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -173,6 +173,14 @@ int dwc3_host_init(struct dwc3 *dwc)
goto err;
}
+ if (dwc->sys_wakeup) {
+ /* Restore wakeup setting if switched from device */
+ device_wakeup_enable(dwc->sysdev);
+
+ /* Pass on wakeup setting to the new xhci platform device */
+ device_init_wakeup(&xhci->dev, true);
+ }
+
return 0;
err:
platform_device_put(xhci);
@@ -181,6 +189,9 @@ int dwc3_host_init(struct dwc3 *dwc)
void dwc3_host_exit(struct dwc3 *dwc)
{
+ if (dwc->sys_wakeup)
+ device_init_wakeup(&dwc->xhci->dev, false);
+
platform_device_unregister(dwc->xhci);
dwc->xhci = NULL;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x f9aa41130ac69d13a53ce2a153ca79c70d43f39c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040159-entrench-bogged-287d@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
f9aa41130ac6 ("usb: dwc3: Properly set system wakeup")
047161686b81 ("usb: dwc3: Add remote wakeup handling")
63c4c320ccf7 ("usb: dwc3: gadget: Check for L1/L2/U3 for Start Transfer")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f9aa41130ac69d13a53ce2a153ca79c70d43f39c Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Date: Fri, 8 Mar 2024 02:40:25 +0000
Subject: [PATCH] usb: dwc3: Properly set system wakeup
If the device is configured for system wakeup, then make sure that the
xHCI driver knows about it and make sure to permit wakeup only at the
appropriate time.
For host mode, if the controller goes through the dwc3 code path, then a
child xHCI platform device is created. Make sure the platform device
also inherits the wakeup setting for xHCI to enable remote wakeup.
For device mode, make sure to disable system wakeup if no gadget driver
is bound. We may experience unwanted system wakeup due to the wakeup
signal from the controller PMU detecting connection/disconnection when
in low power (D3). E.g. In the case of Steam Deck, the PCI PME prevents
the system staying in suspend.
Cc: stable(a)vger.kernel.org
Reported-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Closes: https://lore.kernel.org/linux-usb/70a7692d-647c-9be7-00a6-06fc60f77294@igal…
Fixes: d07e8819a03d ("usb: dwc3: add xHCI Host support")
Signed-off-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Tested-by: Sanath S <Sanath.S(a)amd.com>
Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/667cfda7009b502e08462c8fb3f65841d103cc0a.17098654…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 3e55838c0001..31684cdaaae3 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1519,6 +1519,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
else
dwc->sysdev = dwc->dev;
+ dwc->sys_wakeup = device_may_wakeup(dwc->sysdev);
+
ret = device_property_read_string(dev, "usb-psy-name", &usb_psy_name);
if (ret >= 0) {
dwc->usb_psy = power_supply_get_by_name(usb_psy_name);
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index c07edfc954f7..7e80dd3d466b 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1133,6 +1133,7 @@ struct dwc3_scratchpad_array {
* 3 - Reserved
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
+ * @sys_wakeup: set if the device may do system wakeup.
* @wakeup_configured: set if the device is configured for remote wakeup.
* @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
@@ -1357,6 +1358,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
+ unsigned sys_wakeup:1;
unsigned wakeup_configured:1;
unsigned suspended:1;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 40c52dbc28d3..4df2661f6675 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2955,6 +2955,9 @@ static int dwc3_gadget_start(struct usb_gadget *g,
dwc->gadget_driver = driver;
spin_unlock_irqrestore(&dwc->lock, flags);
+ if (dwc->sys_wakeup)
+ device_wakeup_enable(dwc->sysdev);
+
return 0;
}
@@ -2970,6 +2973,9 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
struct dwc3 *dwc = gadget_to_dwc(g);
unsigned long flags;
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
spin_lock_irqsave(&dwc->lock, flags);
dwc->gadget_driver = NULL;
dwc->max_cfg_eps = 0;
@@ -4651,6 +4657,10 @@ int dwc3_gadget_init(struct dwc3 *dwc)
else
dwc3_gadget_set_speed(dwc->gadget, dwc->maximum_speed);
+ /* No system wakeup if no gadget driver bound */
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
return 0;
err5:
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index 5a5cb6ce9946..0204787df81d 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -173,6 +173,14 @@ int dwc3_host_init(struct dwc3 *dwc)
goto err;
}
+ if (dwc->sys_wakeup) {
+ /* Restore wakeup setting if switched from device */
+ device_wakeup_enable(dwc->sysdev);
+
+ /* Pass on wakeup setting to the new xhci platform device */
+ device_init_wakeup(&xhci->dev, true);
+ }
+
return 0;
err:
platform_device_put(xhci);
@@ -181,6 +189,9 @@ int dwc3_host_init(struct dwc3 *dwc)
void dwc3_host_exit(struct dwc3 *dwc)
{
+ if (dwc->sys_wakeup)
+ device_init_wakeup(&dwc->xhci->dev, false);
+
platform_device_unregister(dwc->xhci);
dwc->xhci = NULL;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x f9aa41130ac69d13a53ce2a153ca79c70d43f39c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040158-headrest-purge-7899@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
f9aa41130ac6 ("usb: dwc3: Properly set system wakeup")
047161686b81 ("usb: dwc3: Add remote wakeup handling")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f9aa41130ac69d13a53ce2a153ca79c70d43f39c Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Date: Fri, 8 Mar 2024 02:40:25 +0000
Subject: [PATCH] usb: dwc3: Properly set system wakeup
If the device is configured for system wakeup, then make sure that the
xHCI driver knows about it and make sure to permit wakeup only at the
appropriate time.
For host mode, if the controller goes through the dwc3 code path, then a
child xHCI platform device is created. Make sure the platform device
also inherits the wakeup setting for xHCI to enable remote wakeup.
For device mode, make sure to disable system wakeup if no gadget driver
is bound. We may experience unwanted system wakeup due to the wakeup
signal from the controller PMU detecting connection/disconnection when
in low power (D3). E.g. In the case of Steam Deck, the PCI PME prevents
the system staying in suspend.
Cc: stable(a)vger.kernel.org
Reported-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Closes: https://lore.kernel.org/linux-usb/70a7692d-647c-9be7-00a6-06fc60f77294@igal…
Fixes: d07e8819a03d ("usb: dwc3: add xHCI Host support")
Signed-off-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Tested-by: Sanath S <Sanath.S(a)amd.com>
Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/667cfda7009b502e08462c8fb3f65841d103cc0a.17098654…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 3e55838c0001..31684cdaaae3 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1519,6 +1519,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
else
dwc->sysdev = dwc->dev;
+ dwc->sys_wakeup = device_may_wakeup(dwc->sysdev);
+
ret = device_property_read_string(dev, "usb-psy-name", &usb_psy_name);
if (ret >= 0) {
dwc->usb_psy = power_supply_get_by_name(usb_psy_name);
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index c07edfc954f7..7e80dd3d466b 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1133,6 +1133,7 @@ struct dwc3_scratchpad_array {
* 3 - Reserved
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
+ * @sys_wakeup: set if the device may do system wakeup.
* @wakeup_configured: set if the device is configured for remote wakeup.
* @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
@@ -1357,6 +1358,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
+ unsigned sys_wakeup:1;
unsigned wakeup_configured:1;
unsigned suspended:1;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 40c52dbc28d3..4df2661f6675 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2955,6 +2955,9 @@ static int dwc3_gadget_start(struct usb_gadget *g,
dwc->gadget_driver = driver;
spin_unlock_irqrestore(&dwc->lock, flags);
+ if (dwc->sys_wakeup)
+ device_wakeup_enable(dwc->sysdev);
+
return 0;
}
@@ -2970,6 +2973,9 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
struct dwc3 *dwc = gadget_to_dwc(g);
unsigned long flags;
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
spin_lock_irqsave(&dwc->lock, flags);
dwc->gadget_driver = NULL;
dwc->max_cfg_eps = 0;
@@ -4651,6 +4657,10 @@ int dwc3_gadget_init(struct dwc3 *dwc)
else
dwc3_gadget_set_speed(dwc->gadget, dwc->maximum_speed);
+ /* No system wakeup if no gadget driver bound */
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
return 0;
err5:
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index 5a5cb6ce9946..0204787df81d 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -173,6 +173,14 @@ int dwc3_host_init(struct dwc3 *dwc)
goto err;
}
+ if (dwc->sys_wakeup) {
+ /* Restore wakeup setting if switched from device */
+ device_wakeup_enable(dwc->sysdev);
+
+ /* Pass on wakeup setting to the new xhci platform device */
+ device_init_wakeup(&xhci->dev, true);
+ }
+
return 0;
err:
platform_device_put(xhci);
@@ -181,6 +189,9 @@ int dwc3_host_init(struct dwc3 *dwc)
void dwc3_host_exit(struct dwc3 *dwc)
{
+ if (dwc->sys_wakeup)
+ device_init_wakeup(&dwc->xhci->dev, false);
+
platform_device_unregister(dwc->xhci);
dwc->xhci = NULL;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 0f4a1e80989aca185d955fcd791d7750082044a2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040110-nimble-unfair-399c@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
0f4a1e80989a ("x86/sev: Skip ROM range scans and validation for SEV-SNP guests")
428080c9b19b ("x86/sev: Move early startup code into .head.text section")
b82a8dbd3d2f ("x86/coco: Disable 32-bit emulation by default on TDX and SEV")
ed766c26119c ("Merge tag 'x86-entry-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0f4a1e80989aca185d955fcd791d7750082044a2 Mon Sep 17 00:00:00 2001
From: Kevin Loughlin <kevinloughlin(a)google.com>
Date: Wed, 13 Mar 2024 12:15:46 +0000
Subject: [PATCH] x86/sev: Skip ROM range scans and validation for SEV-SNP
guests
SEV-SNP requires encrypted memory to be validated before access.
Because the ROM memory range is not part of the e820 table, it is not
pre-validated by the BIOS. Therefore, if a SEV-SNP guest kernel wishes
to access this range, the guest must first validate the range.
The current SEV-SNP code does indeed scan the ROM range during early
boot and thus attempts to validate the ROM range in probe_roms().
However, this behavior is neither sufficient nor necessary for the
following reasons:
* With regards to sufficiency, if EFI_CONFIG_TABLES are not enabled and
CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK is set, the kernel will
attempt to access the memory at SMBIOS_ENTRY_POINT_SCAN_START (which
falls in the ROM range) prior to validation.
For example, Project Oak Stage 0 provides a minimal guest firmware
that currently meets these configuration conditions, meaning guests
booting atop Oak Stage 0 firmware encounter a problematic call chain
during dmi_setup() -> dmi_scan_machine() that results in a crash
during boot if SEV-SNP is enabled.
* With regards to necessity, SEV-SNP guests generally read garbage
(which changes across boots) from the ROM range, meaning these scans
are unnecessary. The guest reads garbage because the legacy ROM range
is unencrypted data but is accessed via an encrypted PMD during early
boot (where the PMD is marked as encrypted due to potentially mapping
actually-encrypted data in other PMD-contained ranges).
In one exceptional case, EISA probing treats the ROM range as
unencrypted data, which is inconsistent with other probing.
Continuing to allow SEV-SNP guests to use garbage and to inconsistently
classify ROM range encryption status can trigger undesirable behavior.
For instance, if garbage bytes appear to be a valid signature, memory
may be unnecessarily reserved for the ROM range. Future code or other
use cases may result in more problematic (arbitrary) behavior that
should be avoided.
While one solution would be to overhaul the early PMD mapping to always
treat the ROM region of the PMD as unencrypted, SEV-SNP guests do not
currently rely on data from the ROM region during early boot (and even
if they did, they would be mostly relying on garbage data anyways).
As a simpler solution, skip the ROM range scans (and the otherwise-
necessary range validation) during SEV-SNP guest early boot. The
potential SEV-SNP guest crash due to lack of ROM range validation is
thus avoided by simply not accessing the ROM range.
In most cases, skip the scans by overriding problematic x86_init
functions during sme_early_init() to SNP-safe variants, which can be
likened to x86_init overrides done for other platforms (ex: Xen); such
overrides also avoid the spread of cc_platform_has() checks throughout
the tree.
In the exceptional EISA case, still use cc_platform_has() for the
simplest change, given (1) checks for guest type (ex: Xen domain status)
are already performed here, and (2) these checks occur in a subsys
initcall instead of an x86_init function.
[ bp: Massage commit message, remove "we"s. ]
Fixes: 9704c07bf9f7 ("x86/kernel: Validate ROM memory before accessing when SEV-SNP is active")
Signed-off-by: Kevin Loughlin <kevinloughlin(a)google.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20240313121546.2964854-1-kevinloughlin@google.com
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 9477b4053bce..07e125f32528 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -218,12 +218,12 @@ void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
void __noreturn snp_abort(void);
+void snp_dmi_setup(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
@@ -244,12 +244,12 @@ static inline void __init
early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
static inline void __init
early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
-static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
static inline void snp_abort(void) { }
+static inline void snp_dmi_setup(void) { }
static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
{
return -ENOTTY;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index b89b40f250e6..6149eabe200f 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -30,12 +30,13 @@ struct x86_init_mpparse {
* @reserve_resources: reserve the standard resources for the
* platform
* @memory_setup: platform specific memory setup
- *
+ * @dmi_setup: platform specific DMI setup
*/
struct x86_init_resources {
void (*probe_roms)(void);
void (*reserve_resources)(void);
char *(*memory_setup)(void);
+ void (*dmi_setup)(void);
};
/**
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index e963344b0449..53935b4d62e3 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -2,6 +2,7 @@
/*
* EISA specific code
*/
+#include <linux/cc_platform.h>
#include <linux/ioport.h>
#include <linux/eisa.h>
#include <linux/io.h>
@@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void)
{
void __iomem *p;
- if (xen_pv_domain() && !xen_initial_domain())
+ if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
return 0;
p = ioremap(0x0FFFD9, 4);
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 319fef37d9dc..cc2c34ba7228 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -203,16 +203,6 @@ void __init probe_roms(void)
unsigned char c;
int i;
- /*
- * The ROM memory range is not part of the e820 table and is therefore not
- * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted
- * memory, and SNP requires encrypted memory to be validated before access.
- * Do that here.
- */
- snp_prep_memory(video_rom_resource.start,
- ((system_rom_resource.end + 1) - video_rom_resource.start),
- SNP_PAGE_STATE_PRIVATE);
-
/* video rom */
upper = adapter_rom_resources[0].start;
for (start = video_rom_resource.start; start < upper; start += 2048) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ef206500ed6f..0109e6c510e0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -9,7 +9,6 @@
#include <linux/console.h>
#include <linux/crash_dump.h>
#include <linux/dma-map-ops.h>
-#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/ima.h>
#include <linux/init_ohci1394_dma.h>
@@ -902,7 +901,7 @@ void __init setup_arch(char **cmdline_p)
efi_init();
reserve_ibft_region();
- dmi_setup();
+ x86_init.resources.dmi_setup();
/*
* VMware detection requires dmi to be available, so this
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index b59b09c2f284..7e1e63cc48e6 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -23,6 +23,7 @@
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/psp-sev.h>
+#include <linux/dmi.h>
#include <uapi/linux/sev-guest.h>
#include <asm/init.h>
@@ -795,21 +796,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
}
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op)
-{
- unsigned long vaddr, npages;
-
- vaddr = (unsigned long)__va(paddr);
- npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
-
- if (op == SNP_PAGE_STATE_PRIVATE)
- early_snp_set_memory_private(vaddr, paddr, npages);
- else if (op == SNP_PAGE_STATE_SHARED)
- early_snp_set_memory_shared(vaddr, paddr, npages);
- else
- WARN(1, "invalid memory op %d\n", op);
-}
-
static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
unsigned long vaddr_end, int op)
{
@@ -2136,6 +2122,17 @@ void __head __noreturn snp_abort(void)
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
}
+/*
+ * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
+ * enabled, as the alternative (fallback) logic for DMI probing in the legacy
+ * ROM region can cause a crash since this region is not pre-validated.
+ */
+void __init snp_dmi_setup(void)
+{
+ if (efi_enabled(EFI_CONFIG_TABLES))
+ dmi_setup();
+}
+
static void dump_cpuid_table(void)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a42830dc151b..d5dc5a92635a 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -3,6 +3,7 @@
*
* For licencing details see kernel-base/COPYING
*/
+#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/export.h>
@@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = {
.probe_roms = probe_roms,
.reserve_resources = reserve_standard_io_resources,
.memory_setup = e820__memory_setup_default,
+ .dmi_setup = dmi_setup,
},
.mpparse = {
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 70b91de2e053..422602f6039b 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -492,6 +492,24 @@ void __init sme_early_init(void)
*/
if (sev_status & MSR_AMD64_SEV_ENABLED)
ia32_disable();
+
+ /*
+ * Override init functions that scan the ROM region in SEV-SNP guests,
+ * as this memory is not pre-validated and would thus cause a crash.
+ */
+ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.pci.init_irq = x86_init_noop;
+ x86_init.resources.probe_roms = x86_init_noop;
+
+ /*
+ * DMI setup behavior for SEV-SNP guests depends on
+ * efi_enabled(EFI_CONFIG_TABLES), which hasn't been
+ * parsed yet. snp_dmi_setup() will run after that
+ * parsing has happened.
+ */
+ x86_init.resources.dmi_setup = snp_dmi_setup;
+ }
}
void __init mem_encrypt_free_decrypted_mem(void)
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 0f4a1e80989aca185d955fcd791d7750082044a2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040108-dreadful-pumice-e64d@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
0f4a1e80989a ("x86/sev: Skip ROM range scans and validation for SEV-SNP guests")
428080c9b19b ("x86/sev: Move early startup code into .head.text section")
b82a8dbd3d2f ("x86/coco: Disable 32-bit emulation by default on TDX and SEV")
ed766c26119c ("Merge tag 'x86-entry-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0f4a1e80989aca185d955fcd791d7750082044a2 Mon Sep 17 00:00:00 2001
From: Kevin Loughlin <kevinloughlin(a)google.com>
Date: Wed, 13 Mar 2024 12:15:46 +0000
Subject: [PATCH] x86/sev: Skip ROM range scans and validation for SEV-SNP
guests
SEV-SNP requires encrypted memory to be validated before access.
Because the ROM memory range is not part of the e820 table, it is not
pre-validated by the BIOS. Therefore, if a SEV-SNP guest kernel wishes
to access this range, the guest must first validate the range.
The current SEV-SNP code does indeed scan the ROM range during early
boot and thus attempts to validate the ROM range in probe_roms().
However, this behavior is neither sufficient nor necessary for the
following reasons:
* With regards to sufficiency, if EFI_CONFIG_TABLES are not enabled and
CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK is set, the kernel will
attempt to access the memory at SMBIOS_ENTRY_POINT_SCAN_START (which
falls in the ROM range) prior to validation.
For example, Project Oak Stage 0 provides a minimal guest firmware
that currently meets these configuration conditions, meaning guests
booting atop Oak Stage 0 firmware encounter a problematic call chain
during dmi_setup() -> dmi_scan_machine() that results in a crash
during boot if SEV-SNP is enabled.
* With regards to necessity, SEV-SNP guests generally read garbage
(which changes across boots) from the ROM range, meaning these scans
are unnecessary. The guest reads garbage because the legacy ROM range
is unencrypted data but is accessed via an encrypted PMD during early
boot (where the PMD is marked as encrypted due to potentially mapping
actually-encrypted data in other PMD-contained ranges).
In one exceptional case, EISA probing treats the ROM range as
unencrypted data, which is inconsistent with other probing.
Continuing to allow SEV-SNP guests to use garbage and to inconsistently
classify ROM range encryption status can trigger undesirable behavior.
For instance, if garbage bytes appear to be a valid signature, memory
may be unnecessarily reserved for the ROM range. Future code or other
use cases may result in more problematic (arbitrary) behavior that
should be avoided.
While one solution would be to overhaul the early PMD mapping to always
treat the ROM region of the PMD as unencrypted, SEV-SNP guests do not
currently rely on data from the ROM region during early boot (and even
if they did, they would be mostly relying on garbage data anyways).
As a simpler solution, skip the ROM range scans (and the otherwise-
necessary range validation) during SEV-SNP guest early boot. The
potential SEV-SNP guest crash due to lack of ROM range validation is
thus avoided by simply not accessing the ROM range.
In most cases, skip the scans by overriding problematic x86_init
functions during sme_early_init() to SNP-safe variants, which can be
likened to x86_init overrides done for other platforms (ex: Xen); such
overrides also avoid the spread of cc_platform_has() checks throughout
the tree.
In the exceptional EISA case, still use cc_platform_has() for the
simplest change, given (1) checks for guest type (ex: Xen domain status)
are already performed here, and (2) these checks occur in a subsys
initcall instead of an x86_init function.
[ bp: Massage commit message, remove "we"s. ]
Fixes: 9704c07bf9f7 ("x86/kernel: Validate ROM memory before accessing when SEV-SNP is active")
Signed-off-by: Kevin Loughlin <kevinloughlin(a)google.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20240313121546.2964854-1-kevinloughlin@google.com
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 9477b4053bce..07e125f32528 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -218,12 +218,12 @@ void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
void __noreturn snp_abort(void);
+void snp_dmi_setup(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
@@ -244,12 +244,12 @@ static inline void __init
early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
static inline void __init
early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
-static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
static inline void snp_abort(void) { }
+static inline void snp_dmi_setup(void) { }
static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
{
return -ENOTTY;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index b89b40f250e6..6149eabe200f 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -30,12 +30,13 @@ struct x86_init_mpparse {
* @reserve_resources: reserve the standard resources for the
* platform
* @memory_setup: platform specific memory setup
- *
+ * @dmi_setup: platform specific DMI setup
*/
struct x86_init_resources {
void (*probe_roms)(void);
void (*reserve_resources)(void);
char *(*memory_setup)(void);
+ void (*dmi_setup)(void);
};
/**
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index e963344b0449..53935b4d62e3 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -2,6 +2,7 @@
/*
* EISA specific code
*/
+#include <linux/cc_platform.h>
#include <linux/ioport.h>
#include <linux/eisa.h>
#include <linux/io.h>
@@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void)
{
void __iomem *p;
- if (xen_pv_domain() && !xen_initial_domain())
+ if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
return 0;
p = ioremap(0x0FFFD9, 4);
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 319fef37d9dc..cc2c34ba7228 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -203,16 +203,6 @@ void __init probe_roms(void)
unsigned char c;
int i;
- /*
- * The ROM memory range is not part of the e820 table and is therefore not
- * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted
- * memory, and SNP requires encrypted memory to be validated before access.
- * Do that here.
- */
- snp_prep_memory(video_rom_resource.start,
- ((system_rom_resource.end + 1) - video_rom_resource.start),
- SNP_PAGE_STATE_PRIVATE);
-
/* video rom */
upper = adapter_rom_resources[0].start;
for (start = video_rom_resource.start; start < upper; start += 2048) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ef206500ed6f..0109e6c510e0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -9,7 +9,6 @@
#include <linux/console.h>
#include <linux/crash_dump.h>
#include <linux/dma-map-ops.h>
-#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/ima.h>
#include <linux/init_ohci1394_dma.h>
@@ -902,7 +901,7 @@ void __init setup_arch(char **cmdline_p)
efi_init();
reserve_ibft_region();
- dmi_setup();
+ x86_init.resources.dmi_setup();
/*
* VMware detection requires dmi to be available, so this
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index b59b09c2f284..7e1e63cc48e6 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -23,6 +23,7 @@
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/psp-sev.h>
+#include <linux/dmi.h>
#include <uapi/linux/sev-guest.h>
#include <asm/init.h>
@@ -795,21 +796,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
}
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op)
-{
- unsigned long vaddr, npages;
-
- vaddr = (unsigned long)__va(paddr);
- npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
-
- if (op == SNP_PAGE_STATE_PRIVATE)
- early_snp_set_memory_private(vaddr, paddr, npages);
- else if (op == SNP_PAGE_STATE_SHARED)
- early_snp_set_memory_shared(vaddr, paddr, npages);
- else
- WARN(1, "invalid memory op %d\n", op);
-}
-
static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
unsigned long vaddr_end, int op)
{
@@ -2136,6 +2122,17 @@ void __head __noreturn snp_abort(void)
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
}
+/*
+ * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
+ * enabled, as the alternative (fallback) logic for DMI probing in the legacy
+ * ROM region can cause a crash since this region is not pre-validated.
+ */
+void __init snp_dmi_setup(void)
+{
+ if (efi_enabled(EFI_CONFIG_TABLES))
+ dmi_setup();
+}
+
static void dump_cpuid_table(void)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a42830dc151b..d5dc5a92635a 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -3,6 +3,7 @@
*
* For licencing details see kernel-base/COPYING
*/
+#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/export.h>
@@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = {
.probe_roms = probe_roms,
.reserve_resources = reserve_standard_io_resources,
.memory_setup = e820__memory_setup_default,
+ .dmi_setup = dmi_setup,
},
.mpparse = {
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 70b91de2e053..422602f6039b 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -492,6 +492,24 @@ void __init sme_early_init(void)
*/
if (sev_status & MSR_AMD64_SEV_ENABLED)
ia32_disable();
+
+ /*
+ * Override init functions that scan the ROM region in SEV-SNP guests,
+ * as this memory is not pre-validated and would thus cause a crash.
+ */
+ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.pci.init_irq = x86_init_noop;
+ x86_init.resources.probe_roms = x86_init_noop;
+
+ /*
+ * DMI setup behavior for SEV-SNP guests depends on
+ * efi_enabled(EFI_CONFIG_TABLES), which hasn't been
+ * parsed yet. snp_dmi_setup() will run after that
+ * parsing has happened.
+ */
+ x86_init.resources.dmi_setup = snp_dmi_setup;
+ }
}
void __init mem_encrypt_free_decrypted_mem(void)
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 0f4a1e80989aca185d955fcd791d7750082044a2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040105-swarm-prenatal-5d72@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
0f4a1e80989a ("x86/sev: Skip ROM range scans and validation for SEV-SNP guests")
428080c9b19b ("x86/sev: Move early startup code into .head.text section")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0f4a1e80989aca185d955fcd791d7750082044a2 Mon Sep 17 00:00:00 2001
From: Kevin Loughlin <kevinloughlin(a)google.com>
Date: Wed, 13 Mar 2024 12:15:46 +0000
Subject: [PATCH] x86/sev: Skip ROM range scans and validation for SEV-SNP
guests
SEV-SNP requires encrypted memory to be validated before access.
Because the ROM memory range is not part of the e820 table, it is not
pre-validated by the BIOS. Therefore, if a SEV-SNP guest kernel wishes
to access this range, the guest must first validate the range.
The current SEV-SNP code does indeed scan the ROM range during early
boot and thus attempts to validate the ROM range in probe_roms().
However, this behavior is neither sufficient nor necessary for the
following reasons:
* With regards to sufficiency, if EFI_CONFIG_TABLES are not enabled and
CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK is set, the kernel will
attempt to access the memory at SMBIOS_ENTRY_POINT_SCAN_START (which
falls in the ROM range) prior to validation.
For example, Project Oak Stage 0 provides a minimal guest firmware
that currently meets these configuration conditions, meaning guests
booting atop Oak Stage 0 firmware encounter a problematic call chain
during dmi_setup() -> dmi_scan_machine() that results in a crash
during boot if SEV-SNP is enabled.
* With regards to necessity, SEV-SNP guests generally read garbage
(which changes across boots) from the ROM range, meaning these scans
are unnecessary. The guest reads garbage because the legacy ROM range
is unencrypted data but is accessed via an encrypted PMD during early
boot (where the PMD is marked as encrypted due to potentially mapping
actually-encrypted data in other PMD-contained ranges).
In one exceptional case, EISA probing treats the ROM range as
unencrypted data, which is inconsistent with other probing.
Continuing to allow SEV-SNP guests to use garbage and to inconsistently
classify ROM range encryption status can trigger undesirable behavior.
For instance, if garbage bytes appear to be a valid signature, memory
may be unnecessarily reserved for the ROM range. Future code or other
use cases may result in more problematic (arbitrary) behavior that
should be avoided.
While one solution would be to overhaul the early PMD mapping to always
treat the ROM region of the PMD as unencrypted, SEV-SNP guests do not
currently rely on data from the ROM region during early boot (and even
if they did, they would be mostly relying on garbage data anyways).
As a simpler solution, skip the ROM range scans (and the otherwise-
necessary range validation) during SEV-SNP guest early boot. The
potential SEV-SNP guest crash due to lack of ROM range validation is
thus avoided by simply not accessing the ROM range.
In most cases, skip the scans by overriding problematic x86_init
functions during sme_early_init() to SNP-safe variants, which can be
likened to x86_init overrides done for other platforms (ex: Xen); such
overrides also avoid the spread of cc_platform_has() checks throughout
the tree.
In the exceptional EISA case, still use cc_platform_has() for the
simplest change, given (1) checks for guest type (ex: Xen domain status)
are already performed here, and (2) these checks occur in a subsys
initcall instead of an x86_init function.
[ bp: Massage commit message, remove "we"s. ]
Fixes: 9704c07bf9f7 ("x86/kernel: Validate ROM memory before accessing when SEV-SNP is active")
Signed-off-by: Kevin Loughlin <kevinloughlin(a)google.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20240313121546.2964854-1-kevinloughlin@google.com
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 9477b4053bce..07e125f32528 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -218,12 +218,12 @@ void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
void __noreturn snp_abort(void);
+void snp_dmi_setup(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
@@ -244,12 +244,12 @@ static inline void __init
early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
static inline void __init
early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
-static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
static inline void snp_abort(void) { }
+static inline void snp_dmi_setup(void) { }
static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
{
return -ENOTTY;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index b89b40f250e6..6149eabe200f 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -30,12 +30,13 @@ struct x86_init_mpparse {
* @reserve_resources: reserve the standard resources for the
* platform
* @memory_setup: platform specific memory setup
- *
+ * @dmi_setup: platform specific DMI setup
*/
struct x86_init_resources {
void (*probe_roms)(void);
void (*reserve_resources)(void);
char *(*memory_setup)(void);
+ void (*dmi_setup)(void);
};
/**
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index e963344b0449..53935b4d62e3 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -2,6 +2,7 @@
/*
* EISA specific code
*/
+#include <linux/cc_platform.h>
#include <linux/ioport.h>
#include <linux/eisa.h>
#include <linux/io.h>
@@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void)
{
void __iomem *p;
- if (xen_pv_domain() && !xen_initial_domain())
+ if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
return 0;
p = ioremap(0x0FFFD9, 4);
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 319fef37d9dc..cc2c34ba7228 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -203,16 +203,6 @@ void __init probe_roms(void)
unsigned char c;
int i;
- /*
- * The ROM memory range is not part of the e820 table and is therefore not
- * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted
- * memory, and SNP requires encrypted memory to be validated before access.
- * Do that here.
- */
- snp_prep_memory(video_rom_resource.start,
- ((system_rom_resource.end + 1) - video_rom_resource.start),
- SNP_PAGE_STATE_PRIVATE);
-
/* video rom */
upper = adapter_rom_resources[0].start;
for (start = video_rom_resource.start; start < upper; start += 2048) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ef206500ed6f..0109e6c510e0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -9,7 +9,6 @@
#include <linux/console.h>
#include <linux/crash_dump.h>
#include <linux/dma-map-ops.h>
-#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/ima.h>
#include <linux/init_ohci1394_dma.h>
@@ -902,7 +901,7 @@ void __init setup_arch(char **cmdline_p)
efi_init();
reserve_ibft_region();
- dmi_setup();
+ x86_init.resources.dmi_setup();
/*
* VMware detection requires dmi to be available, so this
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index b59b09c2f284..7e1e63cc48e6 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -23,6 +23,7 @@
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/psp-sev.h>
+#include <linux/dmi.h>
#include <uapi/linux/sev-guest.h>
#include <asm/init.h>
@@ -795,21 +796,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
}
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op)
-{
- unsigned long vaddr, npages;
-
- vaddr = (unsigned long)__va(paddr);
- npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
-
- if (op == SNP_PAGE_STATE_PRIVATE)
- early_snp_set_memory_private(vaddr, paddr, npages);
- else if (op == SNP_PAGE_STATE_SHARED)
- early_snp_set_memory_shared(vaddr, paddr, npages);
- else
- WARN(1, "invalid memory op %d\n", op);
-}
-
static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
unsigned long vaddr_end, int op)
{
@@ -2136,6 +2122,17 @@ void __head __noreturn snp_abort(void)
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
}
+/*
+ * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
+ * enabled, as the alternative (fallback) logic for DMI probing in the legacy
+ * ROM region can cause a crash since this region is not pre-validated.
+ */
+void __init snp_dmi_setup(void)
+{
+ if (efi_enabled(EFI_CONFIG_TABLES))
+ dmi_setup();
+}
+
static void dump_cpuid_table(void)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a42830dc151b..d5dc5a92635a 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -3,6 +3,7 @@
*
* For licencing details see kernel-base/COPYING
*/
+#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/export.h>
@@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = {
.probe_roms = probe_roms,
.reserve_resources = reserve_standard_io_resources,
.memory_setup = e820__memory_setup_default,
+ .dmi_setup = dmi_setup,
},
.mpparse = {
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 70b91de2e053..422602f6039b 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -492,6 +492,24 @@ void __init sme_early_init(void)
*/
if (sev_status & MSR_AMD64_SEV_ENABLED)
ia32_disable();
+
+ /*
+ * Override init functions that scan the ROM region in SEV-SNP guests,
+ * as this memory is not pre-validated and would thus cause a crash.
+ */
+ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.pci.init_irq = x86_init_noop;
+ x86_init.resources.probe_roms = x86_init_noop;
+
+ /*
+ * DMI setup behavior for SEV-SNP guests depends on
+ * efi_enabled(EFI_CONFIG_TABLES), which hasn't been
+ * parsed yet. snp_dmi_setup() will run after that
+ * parsing has happened.
+ */
+ x86_init.resources.dmi_setup = snp_dmi_setup;
+ }
}
void __init mem_encrypt_free_decrypted_mem(void)
From: Yangxi Xiang <xyangxi5(a)gmail.com>
[ upstream commit 39cdb68c64d8 ]
A memory overlapping copy occurs when deleting a long line. This memory
overlapping copy can cause data corruption when scr_memcpyw is optimized
to memcpy because memcpy does not ensure its behavior if the destination
buffer overlaps with the source buffer. The line buffer is not always
broken, because the memcpy utilizes the hardware acceleration, whose
result is not deterministic.
Fix this problem by using replacing the scr_memcpyw with scr_memmovew.
Fixes: 81732c3b2fed ("tty vt: Fix line garbage in virtual console on command line edition")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Yangxi Xiang <xyangxi5(a)gmail.com>
Link: https://lore.kernel.org/r/20220628093322.5688-1-xyangxi5@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
[ KN: vc_state is not a separate structure in LTS v4.19, v5.4. Adjusted the patch
accordingly by using vc_x instead of state.x for backport. ]
Signed-off-by: Kuntal Nayak <kuntal.nayak(a)broadcom.com>
---
drivers/tty/vt/vt.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index c9083d853..a351e264d 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -855,7 +855,7 @@ static void delete_char(struct vc_data *vc, unsigned int nr)
unsigned short *p = (unsigned short *) vc->vc_pos;
vc_uniscr_delete(vc, nr);
- scr_memcpyw(p, p + nr, (vc->vc_cols - vc->vc_x - nr) * 2);
+ scr_memmovew(p, p + nr, (vc->vc_cols - vc->vc_x - nr) * 2);
scr_memsetw(p + vc->vc_cols - vc->vc_x - nr, vc->vc_video_erase_char,
nr * 2);
vc->vc_need_wrap = 0;
--
2.39.0
The length of Physical Address in General Media Event Record/DRAM Event
Record is 64-bit, so the field mask should be defined as such length.
Otherwise, this causes cxl_general_media and cxl_dram tracepoints to
mask off the upper-32-bits of DPA addresses. The cxl_poison event is
unaffected.
If userspace was doing its own DPA-to-HPA translation this could lead to
incorrect page retirement decisions, but there is no known consumer
(like rasdaemon) of this event today.
Fixes: d54a531a430b ("cxl/mem: Trace General Media Event Record")
Cc: <stable(a)vger.kernel.org>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Davidlohr Bueso <dave(a)stgolabs.net>
Cc: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Signed-off-by: Shiyang Ruan <ruansy.fnst(a)fujitsu.com>
---
drivers/cxl/core/trace.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index e5f13260fc52..e2d1f296df97 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -253,11 +253,11 @@ TRACE_EVENT(cxl_generic_event,
* DRAM Event Record
* CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
*/
-#define CXL_DPA_FLAGS_MASK 0x3F
+#define CXL_DPA_FLAGS_MASK 0x3FULL
#define CXL_DPA_MASK (~CXL_DPA_FLAGS_MASK)
-#define CXL_DPA_VOLATILE BIT(0)
-#define CXL_DPA_NOT_REPAIRABLE BIT(1)
+#define CXL_DPA_VOLATILE BIT_ULL(0)
+#define CXL_DPA_NOT_REPAIRABLE BIT_ULL(1)
#define show_dpa_flags(flags) __print_flags(flags, "|", \
{ CXL_DPA_VOLATILE, "VOLATILE" }, \
{ CXL_DPA_NOT_REPAIRABLE, "NOT_REPAIRABLE" } \
--
2.34.1
After the commit d2689b6a86b9 ("net: usb: ax88179_178a: avoid two
consecutive device resets"), reset is not executed from bind operation and
mac address is not read from the device registers or the devicetree at that
moment. Since the check to configure if the assigned mac address is random
or not for the interface, happens after the bind operation from
usbnet_probe, the interface keeps configured as random address, although the
address is correctly read and set during open operation (the only reset
now).
In order to keep only one reset for the device and to avoid the interface
always configured as random address, after reset, configure correctly the
suitable field from the driver, if the mac address is read successfully from
the device registers or the devicetree.
In addition, if mac address can not be read from the driver, a random
address is configured again, so it is not necessary to call
eth_hw_addr_random from here. Indeed, in this situtatuon, when reset was
also executed from bind, this was invalidating the check to configure if the
assigned mac address for the interface was random or not.
cc: stable(a)vger.kernel.org # 6.6+
Fixes: d2689b6a86b9 ("net: usb: ax88179_178a: avoid two consecutive device resets")
Reported-by: Dave Stevenson <dave.stevenson(a)raspberrypi.com>
Signed-off-by: Jose Ignacio Tornos Martinez <jtornosm(a)redhat.com>
---
drivers/net/usb/ax88179_178a.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 88e084534853..d2324cc02461 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1273,10 +1273,9 @@ static void ax88179_get_mac_addr(struct usbnet *dev)
if (is_valid_ether_addr(mac)) {
eth_hw_addr_set(dev->net, mac);
- } else {
+ dev->net->addr_assign_type = NET_ADDR_PERM;
+ } else
netdev_info(dev->net, "invalid MAC address, using random\n");
- eth_hw_addr_random(dev->net);
- }
ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN,
dev->net->dev_addr);
--
2.44.0
Hello,
I noticed a regression with the mailine kernel pre-compiled by EPEL.
I have just tried linux-6.9-rc1.tar.gz from kernel.org, and it still
misbehaves.
The default setup: a laptop is connected to a dock, Dell WD22TB4, via
a USB-C cable. The dock is connected to an external monitor via a
Display Port cable. With a "good" kernel everything works. With a
"broken" kernel, the external monitor is still correctly identified by
the system, and is shown as enabled in plasma systemsettings. The
system also behaves like the monitor is working, for example, one can
move the mouse pointer off the laptop screen. However the external
monitor screen stays black, and it eventually goes to sleep.
Everything worked with EPEL mainline kernels up to and including
kernel-ml-6.7.9-1.el9.elrepo.x86_64
The breakage is observed in
kernel-ml-6.8.1-1.el9.elrepo.x86_64
kernel-ml-6.8.2-1.el9.elrepo.x86_64
linux-6.9-rc1.tar.gz from kernel.org (with olddefconfig)
Other tests: using an HDMI cable instead of the Display Port cable
between the monitor and the dock does not change things, black screen
with the newer kernels.
Using a small HDMI-to-USB-C adapter instead of the dock results in a
working system, even with the newer kernels. So the breakage appears
to be specific to the Dell WD22TB4 dock.
Operating System: AlmaLinux 9.3 (Shamrock Pampas Cat)
uname -mi: x86_64 x86_64
Laptop: Dell Precision 5470/02RK6V
lsusb |grep dock
Bus 003 Device 007: ID 413c:b06e Dell Computer Corp. Dell dock
Bus 003 Device 008: ID 413c:b06f Dell Computer Corp. Dell dock
Bus 003 Device 006: ID 0bda:5413 Realtek Semiconductor Corp. Dell dock
Bus 003 Device 005: ID 0bda:5487 Realtek Semiconductor Corp. Dell dock
Bus 002 Device 004: ID 0bda:0413 Realtek Semiconductor Corp. Dell dock
Bus 002 Device 003: ID 0bda:0487 Realtek Semiconductor Corp. Dell dock
dmesg and kernel config are attached to
https://bugzilla.kernel.org/show_bug.cgi?id=218663
#regzbot introduced: v6.7.9..v6.8.1
Andrei
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x cd5432c712351a3d5f82512908f5febfca946ca6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033144-armless-overact-1e1d@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cd5432c712351a3d5f82512908f5febfca946ca6 Mon Sep 17 00:00:00 2001
From: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
Date: Thu, 7 Mar 2024 02:08:14 +0800
Subject: [PATCH] USB: UAS: return ENODEV when submit urbs fail with device not
attached
In the scenario of entering hibernation with udisk in the system, if the
udisk was gone or resume fail in the thaw phase of hibernation. Its state
will be set to NOTATTACHED. At this point, usb_hub_wq was already freezed
and can't not handle disconnect event. Next, in the poweroff phase of
hibernation, SYNCHRONIZE_CACHE SCSI command will be sent to this udisk
when poweroff this scsi device, which will cause uas_submit_urbs to be
called to submit URB for sense/data/cmd pipe. However, these URBs will
submit fail as device was set to NOTATTACHED state. Then, uas_submit_urbs
will return a value SCSI_MLQUEUE_DEVICE_BUSY to the caller. That will lead
the SCSI layer go into an ugly loop and system fail to go into hibernation.
On the other hand, when we specially check for -ENODEV in function
uas_queuecommand_lck, returning DID_ERROR to SCSI layer will cause device
poweroff fail and system shutdown instead of entering hibernation.
To fix this issue, let uas_submit_urbs to return original generic error
when submitting URB failed. At the same time, we need to translate -ENODEV
to DID_NOT_CONNECT for the SCSI layer.
Suggested-by: Oliver Neukum <oneukum(a)suse.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
Link: https://lore.kernel.org/r/20240306180814.4897-1-WeitaoWang-oc@zhaoxin.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 71ace274761f..08953f0d4532 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp,
* daft to me.
*/
-static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
+static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
{
struct uas_dev_info *devinfo = cmnd->device->hostdata;
struct urb *urb;
@@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
urb = uas_alloc_sense_urb(devinfo, gfp, cmnd);
if (!urb)
- return NULL;
+ return -ENOMEM;
usb_anchor_urb(urb, &devinfo->sense_urbs);
err = usb_submit_urb(urb, gfp);
if (err) {
usb_unanchor_urb(urb);
uas_log_cmd_state(cmnd, "sense submit err", err);
usb_free_urb(urb);
- return NULL;
}
- return urb;
+ return err;
}
static int uas_submit_urbs(struct scsi_cmnd *cmnd,
struct uas_dev_info *devinfo)
{
struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd);
- struct urb *urb;
int err;
lockdep_assert_held(&devinfo->lock);
if (cmdinfo->state & SUBMIT_STATUS_URB) {
- urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
- if (!urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ err = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
+ if (err)
+ return err;
cmdinfo->state &= ~SUBMIT_STATUS_URB;
}
@@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_FROM_DEVICE);
if (!cmdinfo->data_in_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_IN_URB;
}
@@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_in_urb);
uas_log_cmd_state(cmnd, "data in submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_IN_URB;
cmdinfo->state |= DATA_IN_URB_INFLIGHT;
@@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_TO_DEVICE);
if (!cmdinfo->data_out_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_OUT_URB;
}
@@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_out_urb);
uas_log_cmd_state(cmnd, "data out submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_OUT_URB;
cmdinfo->state |= DATA_OUT_URB_INFLIGHT;
@@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (cmdinfo->state & ALLOC_CMD_URB) {
cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd);
if (!cmdinfo->cmd_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_CMD_URB;
}
@@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->cmd_urb);
uas_log_cmd_state(cmnd, "cmd submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->cmd_urb = NULL;
cmdinfo->state &= ~SUBMIT_CMD_URB;
@@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd)
* of queueing, no matter how fatal the error
*/
if (err == -ENODEV) {
- set_host_byte(cmnd, DID_ERROR);
+ set_host_byte(cmnd, DID_NO_CONNECT);
scsi_done(cmnd);
goto zombie;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x cd5432c712351a3d5f82512908f5febfca946ca6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033142-wisplike-kindle-7914@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cd5432c712351a3d5f82512908f5febfca946ca6 Mon Sep 17 00:00:00 2001
From: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
Date: Thu, 7 Mar 2024 02:08:14 +0800
Subject: [PATCH] USB: UAS: return ENODEV when submit urbs fail with device not
attached
In the scenario of entering hibernation with udisk in the system, if the
udisk was gone or resume fail in the thaw phase of hibernation. Its state
will be set to NOTATTACHED. At this point, usb_hub_wq was already freezed
and can't not handle disconnect event. Next, in the poweroff phase of
hibernation, SYNCHRONIZE_CACHE SCSI command will be sent to this udisk
when poweroff this scsi device, which will cause uas_submit_urbs to be
called to submit URB for sense/data/cmd pipe. However, these URBs will
submit fail as device was set to NOTATTACHED state. Then, uas_submit_urbs
will return a value SCSI_MLQUEUE_DEVICE_BUSY to the caller. That will lead
the SCSI layer go into an ugly loop and system fail to go into hibernation.
On the other hand, when we specially check for -ENODEV in function
uas_queuecommand_lck, returning DID_ERROR to SCSI layer will cause device
poweroff fail and system shutdown instead of entering hibernation.
To fix this issue, let uas_submit_urbs to return original generic error
when submitting URB failed. At the same time, we need to translate -ENODEV
to DID_NOT_CONNECT for the SCSI layer.
Suggested-by: Oliver Neukum <oneukum(a)suse.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
Link: https://lore.kernel.org/r/20240306180814.4897-1-WeitaoWang-oc@zhaoxin.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 71ace274761f..08953f0d4532 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp,
* daft to me.
*/
-static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
+static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
{
struct uas_dev_info *devinfo = cmnd->device->hostdata;
struct urb *urb;
@@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
urb = uas_alloc_sense_urb(devinfo, gfp, cmnd);
if (!urb)
- return NULL;
+ return -ENOMEM;
usb_anchor_urb(urb, &devinfo->sense_urbs);
err = usb_submit_urb(urb, gfp);
if (err) {
usb_unanchor_urb(urb);
uas_log_cmd_state(cmnd, "sense submit err", err);
usb_free_urb(urb);
- return NULL;
}
- return urb;
+ return err;
}
static int uas_submit_urbs(struct scsi_cmnd *cmnd,
struct uas_dev_info *devinfo)
{
struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd);
- struct urb *urb;
int err;
lockdep_assert_held(&devinfo->lock);
if (cmdinfo->state & SUBMIT_STATUS_URB) {
- urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
- if (!urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ err = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
+ if (err)
+ return err;
cmdinfo->state &= ~SUBMIT_STATUS_URB;
}
@@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_FROM_DEVICE);
if (!cmdinfo->data_in_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_IN_URB;
}
@@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_in_urb);
uas_log_cmd_state(cmnd, "data in submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_IN_URB;
cmdinfo->state |= DATA_IN_URB_INFLIGHT;
@@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_TO_DEVICE);
if (!cmdinfo->data_out_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_OUT_URB;
}
@@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_out_urb);
uas_log_cmd_state(cmnd, "data out submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_OUT_URB;
cmdinfo->state |= DATA_OUT_URB_INFLIGHT;
@@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (cmdinfo->state & ALLOC_CMD_URB) {
cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd);
if (!cmdinfo->cmd_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_CMD_URB;
}
@@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->cmd_urb);
uas_log_cmd_state(cmnd, "cmd submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->cmd_urb = NULL;
cmdinfo->state &= ~SUBMIT_CMD_URB;
@@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd)
* of queueing, no matter how fatal the error
*/
if (err == -ENODEV) {
- set_host_byte(cmnd, DID_ERROR);
+ set_host_byte(cmnd, DID_NO_CONNECT);
scsi_done(cmnd);
goto zombie;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x cd5432c712351a3d5f82512908f5febfca946ca6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033141-prototype-camera-04df@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cd5432c712351a3d5f82512908f5febfca946ca6 Mon Sep 17 00:00:00 2001
From: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
Date: Thu, 7 Mar 2024 02:08:14 +0800
Subject: [PATCH] USB: UAS: return ENODEV when submit urbs fail with device not
attached
In the scenario of entering hibernation with udisk in the system, if the
udisk was gone or resume fail in the thaw phase of hibernation. Its state
will be set to NOTATTACHED. At this point, usb_hub_wq was already freezed
and can't not handle disconnect event. Next, in the poweroff phase of
hibernation, SYNCHRONIZE_CACHE SCSI command will be sent to this udisk
when poweroff this scsi device, which will cause uas_submit_urbs to be
called to submit URB for sense/data/cmd pipe. However, these URBs will
submit fail as device was set to NOTATTACHED state. Then, uas_submit_urbs
will return a value SCSI_MLQUEUE_DEVICE_BUSY to the caller. That will lead
the SCSI layer go into an ugly loop and system fail to go into hibernation.
On the other hand, when we specially check for -ENODEV in function
uas_queuecommand_lck, returning DID_ERROR to SCSI layer will cause device
poweroff fail and system shutdown instead of entering hibernation.
To fix this issue, let uas_submit_urbs to return original generic error
when submitting URB failed. At the same time, we need to translate -ENODEV
to DID_NOT_CONNECT for the SCSI layer.
Suggested-by: Oliver Neukum <oneukum(a)suse.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
Link: https://lore.kernel.org/r/20240306180814.4897-1-WeitaoWang-oc@zhaoxin.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 71ace274761f..08953f0d4532 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp,
* daft to me.
*/
-static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
+static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
{
struct uas_dev_info *devinfo = cmnd->device->hostdata;
struct urb *urb;
@@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
urb = uas_alloc_sense_urb(devinfo, gfp, cmnd);
if (!urb)
- return NULL;
+ return -ENOMEM;
usb_anchor_urb(urb, &devinfo->sense_urbs);
err = usb_submit_urb(urb, gfp);
if (err) {
usb_unanchor_urb(urb);
uas_log_cmd_state(cmnd, "sense submit err", err);
usb_free_urb(urb);
- return NULL;
}
- return urb;
+ return err;
}
static int uas_submit_urbs(struct scsi_cmnd *cmnd,
struct uas_dev_info *devinfo)
{
struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd);
- struct urb *urb;
int err;
lockdep_assert_held(&devinfo->lock);
if (cmdinfo->state & SUBMIT_STATUS_URB) {
- urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
- if (!urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ err = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
+ if (err)
+ return err;
cmdinfo->state &= ~SUBMIT_STATUS_URB;
}
@@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_FROM_DEVICE);
if (!cmdinfo->data_in_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_IN_URB;
}
@@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_in_urb);
uas_log_cmd_state(cmnd, "data in submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_IN_URB;
cmdinfo->state |= DATA_IN_URB_INFLIGHT;
@@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_TO_DEVICE);
if (!cmdinfo->data_out_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_OUT_URB;
}
@@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_out_urb);
uas_log_cmd_state(cmnd, "data out submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_OUT_URB;
cmdinfo->state |= DATA_OUT_URB_INFLIGHT;
@@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (cmdinfo->state & ALLOC_CMD_URB) {
cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd);
if (!cmdinfo->cmd_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_CMD_URB;
}
@@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->cmd_urb);
uas_log_cmd_state(cmnd, "cmd submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->cmd_urb = NULL;
cmdinfo->state &= ~SUBMIT_CMD_URB;
@@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd)
* of queueing, no matter how fatal the error
*/
if (err == -ENODEV) {
- set_host_byte(cmnd, DID_ERROR);
+ set_host_byte(cmnd, DID_NO_CONNECT);
scsi_done(cmnd);
goto zombie;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x cd5432c712351a3d5f82512908f5febfca946ca6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033140-goon-residence-7f8c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cd5432c712351a3d5f82512908f5febfca946ca6 Mon Sep 17 00:00:00 2001
From: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
Date: Thu, 7 Mar 2024 02:08:14 +0800
Subject: [PATCH] USB: UAS: return ENODEV when submit urbs fail with device not
attached
In the scenario of entering hibernation with udisk in the system, if the
udisk was gone or resume fail in the thaw phase of hibernation. Its state
will be set to NOTATTACHED. At this point, usb_hub_wq was already freezed
and can't not handle disconnect event. Next, in the poweroff phase of
hibernation, SYNCHRONIZE_CACHE SCSI command will be sent to this udisk
when poweroff this scsi device, which will cause uas_submit_urbs to be
called to submit URB for sense/data/cmd pipe. However, these URBs will
submit fail as device was set to NOTATTACHED state. Then, uas_submit_urbs
will return a value SCSI_MLQUEUE_DEVICE_BUSY to the caller. That will lead
the SCSI layer go into an ugly loop and system fail to go into hibernation.
On the other hand, when we specially check for -ENODEV in function
uas_queuecommand_lck, returning DID_ERROR to SCSI layer will cause device
poweroff fail and system shutdown instead of entering hibernation.
To fix this issue, let uas_submit_urbs to return original generic error
when submitting URB failed. At the same time, we need to translate -ENODEV
to DID_NOT_CONNECT for the SCSI layer.
Suggested-by: Oliver Neukum <oneukum(a)suse.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
Link: https://lore.kernel.org/r/20240306180814.4897-1-WeitaoWang-oc@zhaoxin.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 71ace274761f..08953f0d4532 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp,
* daft to me.
*/
-static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
+static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
{
struct uas_dev_info *devinfo = cmnd->device->hostdata;
struct urb *urb;
@@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
urb = uas_alloc_sense_urb(devinfo, gfp, cmnd);
if (!urb)
- return NULL;
+ return -ENOMEM;
usb_anchor_urb(urb, &devinfo->sense_urbs);
err = usb_submit_urb(urb, gfp);
if (err) {
usb_unanchor_urb(urb);
uas_log_cmd_state(cmnd, "sense submit err", err);
usb_free_urb(urb);
- return NULL;
}
- return urb;
+ return err;
}
static int uas_submit_urbs(struct scsi_cmnd *cmnd,
struct uas_dev_info *devinfo)
{
struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd);
- struct urb *urb;
int err;
lockdep_assert_held(&devinfo->lock);
if (cmdinfo->state & SUBMIT_STATUS_URB) {
- urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
- if (!urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ err = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
+ if (err)
+ return err;
cmdinfo->state &= ~SUBMIT_STATUS_URB;
}
@@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_FROM_DEVICE);
if (!cmdinfo->data_in_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_IN_URB;
}
@@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_in_urb);
uas_log_cmd_state(cmnd, "data in submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_IN_URB;
cmdinfo->state |= DATA_IN_URB_INFLIGHT;
@@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_TO_DEVICE);
if (!cmdinfo->data_out_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_OUT_URB;
}
@@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_out_urb);
uas_log_cmd_state(cmnd, "data out submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_OUT_URB;
cmdinfo->state |= DATA_OUT_URB_INFLIGHT;
@@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (cmdinfo->state & ALLOC_CMD_URB) {
cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd);
if (!cmdinfo->cmd_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_CMD_URB;
}
@@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->cmd_urb);
uas_log_cmd_state(cmnd, "cmd submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->cmd_urb = NULL;
cmdinfo->state &= ~SUBMIT_CMD_URB;
@@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd)
* of queueing, no matter how fatal the error
*/
if (err == -ENODEV) {
- set_host_byte(cmnd, DID_ERROR);
+ set_host_byte(cmnd, DID_NO_CONNECT);
scsi_done(cmnd);
goto zombie;
}
From: Laine Taffin Altman <alexanderaltman(a)me.com>
It is not enough for a type to be a ZST to guarantee that zeroed memory
is a valid value for it; it must also be inhabited. Creating a value of
an uninhabited type, ZST or no, is immediate UB.
Thus remove the implementation of `Zeroable` for `Infallible`, since
that type is not inhabited.
Cc: stable(a)vger.kernel.org
Fixes: 38cde0bd7b67 ("rust: init: add `Zeroable` trait and `init::zeroed` function")
Closes: https://github.com/Rust-for-Linux/pinned-init/pull/13
Signed-off-by: Laine Taffin Altman <alexanderaltman(a)me.com>
Signed-off-by: Benno Lossin <benno.lossin(a)proton.me>
---
rust/kernel/init.rs | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index 424257284d16..538e03cfc84a 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -1292,8 +1292,8 @@ macro_rules! impl_zeroable {
i8, i16, i32, i64, i128, isize,
f32, f64,
- // SAFETY: These are ZSTs, there is nothing to zero.
- {<T: ?Sized>} PhantomData<T>, core::marker::PhantomPinned, Infallible, (),
+ // SAFETY: These are inhabited ZSTs, there is nothing to zero and a valid value exists.
+ {<T: ?Sized>} PhantomData<T>, core::marker::PhantomPinned, (),
// SAFETY: Type is allowed to take any value, including all zeros.
{<T>} MaybeUninit<T>,
base-commit: 768409cff6cc89fe1194da880537a09857b6e4db
--
2.42.0
Hi,
Please backport the following commit back to the Linux stable kernels
6.6, 6.7 and 6.8:
commit 34a956739d295de6010cdaafeed698ccbba87ea4
Author: Ezra Buehler <ezra.buehler(a)husqvarnagroup.com>
Date: Thu Jan 25 22:01:07 2024 +0200
mtd: spinand: Add support for 5-byte IDs
E.g. ESMT chips will return an identification code with a length of 5
bytes. In order to prevent ambiguity, flash chips would actually
need to
return IDs that are up to 17 or more bytes long due to JEDEC's
continuation scheme. I understand that if a manufacturer ID is located
in bank N of JEDEC's database (there are currently 16 banks), N - 1
continuation codes (7Fh) need to be added to the identification code
(comprising of manufacturer ID and device ID). However, most flash chip
manufacturers don't seem to implement this (correctly).
Signed-off-by: Ezra Buehler <ezra.buehler(a)husqvarnagroup.com>
Reviewed-by: Martin Kurbanov <mmkurbanov(a)salutedevices.com>
Tested-by: Martin Kurbanov <mmkurbanov(a)salutedevices.com>
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Link:
https://lore.kernel.org/linux-mtd/20240125200108.24374-2-ezra@easyb.ch
This will fix a regression introduced between Linux kernel 6.6.22 and
6.6.23 in OpenWrt. The esmt NAND flash is not detected any more:
<3>[ 0.885607] spi-nand spi0.0: unknown raw ID c8017f7f
<4>[ 0.890852] spi-nand: probe of spi0.0 failed with error -524
See: https://github.com/openwrt/openwrt/pull/14992
The following commit was backported to 6.6.22, but the commit it depends
on was not backported.
commit 4bd14b2fd8a83a2f5220ba4ef323f741e11bfdfd
Author: Ezra Buehler <ezra.buehler(a)husqvarnagroup.com>
Date: Thu Jan 25 22:01:08 2024 +0200
mtd: spinand: esmt: Extend IDs to 5 bytes
Hauke
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 582dc04b0658ef3b90aeb49cbdd9747c2f1eccc3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033018-corsage-bacteria-dd67@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 582dc04b0658ef3b90aeb49cbdd9747c2f1eccc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala(a)linux.intel.com>
Date: Mon, 25 Mar 2024 19:57:38 +0200
Subject: [PATCH] drm/i915: Pre-populate the cursor physical dma address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Calling i915_gem_object_get_dma_address() from the vblank
evade critical section triggers might_sleep().
While we know that we've already pinned the framebuffer
and thus i915_gem_object_get_dma_address() will in fact
not sleep in this case, it seems reasonable to keep the
unconditional might_sleep() for maximum coverage.
So let's instead pre-populate the dma address during
fb pinning, which all happens before we enter the
vblank evade critical section.
We can use u32 for the dma address as this class of
hardware doesn't support >32bit addresses.
Cc: stable(a)vger.kernel.org
Fixes: 0225a90981c8 ("drm/i915: Make cursor plane registers unlocked")
Reported-by: Borislav Petkov <bp(a)alien8.de>
Closes: https://lore.kernel.org/intel-gfx/20240227100342.GAZd2zfmYcPS_SndtO@fat_cra…
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240325175738.3440-1-ville.s…
Tested-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah(a)intel.com>
(cherry picked from commit c1289a5c3594cf04caa94ebf0edeb50c62009f1f)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index f8b33999d43f..0d3da55e1c24 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -36,12 +36,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
{
struct drm_i915_private *dev_priv =
to_i915(plane_state->uapi.plane->dev);
- const struct drm_framebuffer *fb = plane_state->hw.fb;
- struct drm_i915_gem_object *obj = intel_fb_obj(fb);
u32 base;
if (DISPLAY_INFO(dev_priv)->cursor_needs_physical)
- base = i915_gem_object_get_dma_address(obj, 0);
+ base = plane_state->phys_dma_addr;
else
base = intel_plane_ggtt_offset(plane_state);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index e67cd5b02e84..9104f18753b4 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -727,6 +727,7 @@ struct intel_plane_state {
#define PLANE_HAS_FENCE BIT(0)
struct intel_fb_view view;
+ u32 phys_dma_addr; /* for cursor_needs_physical */
/* Plane pxp decryption state */
bool decrypt;
diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c
index 7b42aef37d2f..b6df9baf481b 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -255,6 +255,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state)
return PTR_ERR(vma);
plane_state->ggtt_vma = vma;
+
+ /*
+ * Pre-populate the dma address before we enter the vblank
+ * evade critical section as i915_gem_object_get_dma_address()
+ * will trigger might_sleep() even if it won't actually sleep,
+ * which is the case when the fb has already been pinned.
+ */
+ if (phys_cursor)
+ plane_state->phys_dma_addr =
+ i915_gem_object_get_dma_address(intel_fb_obj(fb), 0);
} else {
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 582dc04b0658ef3b90aeb49cbdd9747c2f1eccc3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033016-jot-effects-834f@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 582dc04b0658ef3b90aeb49cbdd9747c2f1eccc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala(a)linux.intel.com>
Date: Mon, 25 Mar 2024 19:57:38 +0200
Subject: [PATCH] drm/i915: Pre-populate the cursor physical dma address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Calling i915_gem_object_get_dma_address() from the vblank
evade critical section triggers might_sleep().
While we know that we've already pinned the framebuffer
and thus i915_gem_object_get_dma_address() will in fact
not sleep in this case, it seems reasonable to keep the
unconditional might_sleep() for maximum coverage.
So let's instead pre-populate the dma address during
fb pinning, which all happens before we enter the
vblank evade critical section.
We can use u32 for the dma address as this class of
hardware doesn't support >32bit addresses.
Cc: stable(a)vger.kernel.org
Fixes: 0225a90981c8 ("drm/i915: Make cursor plane registers unlocked")
Reported-by: Borislav Petkov <bp(a)alien8.de>
Closes: https://lore.kernel.org/intel-gfx/20240227100342.GAZd2zfmYcPS_SndtO@fat_cra…
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240325175738.3440-1-ville.s…
Tested-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah(a)intel.com>
(cherry picked from commit c1289a5c3594cf04caa94ebf0edeb50c62009f1f)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index f8b33999d43f..0d3da55e1c24 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -36,12 +36,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
{
struct drm_i915_private *dev_priv =
to_i915(plane_state->uapi.plane->dev);
- const struct drm_framebuffer *fb = plane_state->hw.fb;
- struct drm_i915_gem_object *obj = intel_fb_obj(fb);
u32 base;
if (DISPLAY_INFO(dev_priv)->cursor_needs_physical)
- base = i915_gem_object_get_dma_address(obj, 0);
+ base = plane_state->phys_dma_addr;
else
base = intel_plane_ggtt_offset(plane_state);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index e67cd5b02e84..9104f18753b4 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -727,6 +727,7 @@ struct intel_plane_state {
#define PLANE_HAS_FENCE BIT(0)
struct intel_fb_view view;
+ u32 phys_dma_addr; /* for cursor_needs_physical */
/* Plane pxp decryption state */
bool decrypt;
diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c
index 7b42aef37d2f..b6df9baf481b 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -255,6 +255,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state)
return PTR_ERR(vma);
plane_state->ggtt_vma = vma;
+
+ /*
+ * Pre-populate the dma address before we enter the vblank
+ * evade critical section as i915_gem_object_get_dma_address()
+ * will trigger might_sleep() even if it won't actually sleep,
+ * which is the case when the fb has already been pinned.
+ */
+ if (phys_cursor)
+ plane_state->phys_dma_addr =
+ i915_gem_object_get_dma_address(intel_fb_obj(fb), 0);
} else {
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 582dc04b0658ef3b90aeb49cbdd9747c2f1eccc3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033015-handcart-chaos-edcc@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 582dc04b0658ef3b90aeb49cbdd9747c2f1eccc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala(a)linux.intel.com>
Date: Mon, 25 Mar 2024 19:57:38 +0200
Subject: [PATCH] drm/i915: Pre-populate the cursor physical dma address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Calling i915_gem_object_get_dma_address() from the vblank
evade critical section triggers might_sleep().
While we know that we've already pinned the framebuffer
and thus i915_gem_object_get_dma_address() will in fact
not sleep in this case, it seems reasonable to keep the
unconditional might_sleep() for maximum coverage.
So let's instead pre-populate the dma address during
fb pinning, which all happens before we enter the
vblank evade critical section.
We can use u32 for the dma address as this class of
hardware doesn't support >32bit addresses.
Cc: stable(a)vger.kernel.org
Fixes: 0225a90981c8 ("drm/i915: Make cursor plane registers unlocked")
Reported-by: Borislav Petkov <bp(a)alien8.de>
Closes: https://lore.kernel.org/intel-gfx/20240227100342.GAZd2zfmYcPS_SndtO@fat_cra…
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240325175738.3440-1-ville.s…
Tested-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah(a)intel.com>
(cherry picked from commit c1289a5c3594cf04caa94ebf0edeb50c62009f1f)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index f8b33999d43f..0d3da55e1c24 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -36,12 +36,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
{
struct drm_i915_private *dev_priv =
to_i915(plane_state->uapi.plane->dev);
- const struct drm_framebuffer *fb = plane_state->hw.fb;
- struct drm_i915_gem_object *obj = intel_fb_obj(fb);
u32 base;
if (DISPLAY_INFO(dev_priv)->cursor_needs_physical)
- base = i915_gem_object_get_dma_address(obj, 0);
+ base = plane_state->phys_dma_addr;
else
base = intel_plane_ggtt_offset(plane_state);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index e67cd5b02e84..9104f18753b4 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -727,6 +727,7 @@ struct intel_plane_state {
#define PLANE_HAS_FENCE BIT(0)
struct intel_fb_view view;
+ u32 phys_dma_addr; /* for cursor_needs_physical */
/* Plane pxp decryption state */
bool decrypt;
diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c
index 7b42aef37d2f..b6df9baf481b 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -255,6 +255,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state)
return PTR_ERR(vma);
plane_state->ggtt_vma = vma;
+
+ /*
+ * Pre-populate the dma address before we enter the vblank
+ * evade critical section as i915_gem_object_get_dma_address()
+ * will trigger might_sleep() even if it won't actually sleep,
+ * which is the case when the fb has already been pinned.
+ */
+ if (phys_cursor)
+ plane_state->phys_dma_addr =
+ i915_gem_object_get_dma_address(intel_fb_obj(fb), 0);
} else {
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x b34490879baa847d16fc529c8ea6e6d34f004b38
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033017-hate-turkey-7077@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b34490879baa847d16fc529c8ea6e6d34f004b38 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Date: Mon, 25 Mar 2024 10:02:42 +0100
Subject: [PATCH] gpio: cdev: sanitize the label before requesting the
interrupt
When an interrupt is requested, a procfs directory is created under
"/proc/irq/<irqnum>/<label>" where <label> is the string passed to one of
the request_irq() variants.
What follows is that the string must not contain the "/" character or
the procfs mkdir operation will fail. We don't have such constraints for
GPIO consumer labels which are used verbatim as interrupt labels for
GPIO irqs. We must therefore sanitize the consumer string before
requesting the interrupt.
Let's replace all "/" with ":".
Cc: stable(a)vger.kernel.org
Reported-by: Stefan Wahren <wahrenst(a)gmx.net>
Closes: https://lore.kernel.org/linux-gpio/39fe95cb-aa83-4b8b-8cab-63947a726754@gmx…
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Reviewed-by: Kent Gibson <warthog618(a)gmail.com>
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index f384fa278764..fa9635610251 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -1083,10 +1083,20 @@ static u32 gpio_v2_line_config_debounce_period(struct gpio_v2_line_config *lc,
return 0;
}
+static inline char *make_irq_label(const char *orig)
+{
+ return kstrdup_and_replace(orig, '/', ':', GFP_KERNEL);
+}
+
+static inline void free_irq_label(const char *label)
+{
+ kfree(label);
+}
+
static void edge_detector_stop(struct line *line)
{
if (line->irq) {
- free_irq(line->irq, line);
+ free_irq_label(free_irq(line->irq, line));
line->irq = 0;
}
@@ -1110,6 +1120,7 @@ static int edge_detector_setup(struct line *line,
unsigned long irqflags = 0;
u64 eflags;
int irq, ret;
+ char *label;
eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS;
if (eflags && !kfifo_initialized(&line->req->events)) {
@@ -1146,11 +1157,17 @@ static int edge_detector_setup(struct line *line,
IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING;
irqflags |= IRQF_ONESHOT;
+ label = make_irq_label(line->req->label);
+ if (!label)
+ return -ENOMEM;
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread,
- irqflags, line->req->label, line);
- if (ret)
+ irqflags, label, line);
+ if (ret) {
+ free_irq_label(label);
return ret;
+ }
line->irq = irq;
return 0;
@@ -1973,7 +1990,7 @@ static void lineevent_free(struct lineevent_state *le)
blocking_notifier_chain_unregister(&le->gdev->device_notifier,
&le->device_unregistered_nb);
if (le->irq)
- free_irq(le->irq, le);
+ free_irq_label(free_irq(le->irq, le));
if (le->desc)
gpiod_free(le->desc);
kfree(le->label);
@@ -2114,6 +2131,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
int fd;
int ret;
int irq, irqflags = 0;
+ char *label;
if (copy_from_user(&eventreq, ip, sizeof(eventreq)))
return -EFAULT;
@@ -2198,15 +2216,23 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
if (ret)
goto out_free_le;
+ label = make_irq_label(le->label);
+ if (!label) {
+ ret = -ENOMEM;
+ goto out_free_le;
+ }
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq,
lineevent_irq_handler,
lineevent_irq_thread,
irqflags,
- le->label,
+ label,
le);
- if (ret)
+ if (ret) {
+ free_irq_label(label);
goto out_free_le;
+ }
le->irq = irq;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x b34490879baa847d16fc529c8ea6e6d34f004b38
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033016-ipod-snout-0b26@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b34490879baa847d16fc529c8ea6e6d34f004b38 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Date: Mon, 25 Mar 2024 10:02:42 +0100
Subject: [PATCH] gpio: cdev: sanitize the label before requesting the
interrupt
When an interrupt is requested, a procfs directory is created under
"/proc/irq/<irqnum>/<label>" where <label> is the string passed to one of
the request_irq() variants.
What follows is that the string must not contain the "/" character or
the procfs mkdir operation will fail. We don't have such constraints for
GPIO consumer labels which are used verbatim as interrupt labels for
GPIO irqs. We must therefore sanitize the consumer string before
requesting the interrupt.
Let's replace all "/" with ":".
Cc: stable(a)vger.kernel.org
Reported-by: Stefan Wahren <wahrenst(a)gmx.net>
Closes: https://lore.kernel.org/linux-gpio/39fe95cb-aa83-4b8b-8cab-63947a726754@gmx…
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Reviewed-by: Kent Gibson <warthog618(a)gmail.com>
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index f384fa278764..fa9635610251 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -1083,10 +1083,20 @@ static u32 gpio_v2_line_config_debounce_period(struct gpio_v2_line_config *lc,
return 0;
}
+static inline char *make_irq_label(const char *orig)
+{
+ return kstrdup_and_replace(orig, '/', ':', GFP_KERNEL);
+}
+
+static inline void free_irq_label(const char *label)
+{
+ kfree(label);
+}
+
static void edge_detector_stop(struct line *line)
{
if (line->irq) {
- free_irq(line->irq, line);
+ free_irq_label(free_irq(line->irq, line));
line->irq = 0;
}
@@ -1110,6 +1120,7 @@ static int edge_detector_setup(struct line *line,
unsigned long irqflags = 0;
u64 eflags;
int irq, ret;
+ char *label;
eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS;
if (eflags && !kfifo_initialized(&line->req->events)) {
@@ -1146,11 +1157,17 @@ static int edge_detector_setup(struct line *line,
IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING;
irqflags |= IRQF_ONESHOT;
+ label = make_irq_label(line->req->label);
+ if (!label)
+ return -ENOMEM;
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread,
- irqflags, line->req->label, line);
- if (ret)
+ irqflags, label, line);
+ if (ret) {
+ free_irq_label(label);
return ret;
+ }
line->irq = irq;
return 0;
@@ -1973,7 +1990,7 @@ static void lineevent_free(struct lineevent_state *le)
blocking_notifier_chain_unregister(&le->gdev->device_notifier,
&le->device_unregistered_nb);
if (le->irq)
- free_irq(le->irq, le);
+ free_irq_label(free_irq(le->irq, le));
if (le->desc)
gpiod_free(le->desc);
kfree(le->label);
@@ -2114,6 +2131,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
int fd;
int ret;
int irq, irqflags = 0;
+ char *label;
if (copy_from_user(&eventreq, ip, sizeof(eventreq)))
return -EFAULT;
@@ -2198,15 +2216,23 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
if (ret)
goto out_free_le;
+ label = make_irq_label(le->label);
+ if (!label) {
+ ret = -ENOMEM;
+ goto out_free_le;
+ }
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq,
lineevent_irq_handler,
lineevent_irq_thread,
irqflags,
- le->label,
+ label,
le);
- if (ret)
+ if (ret) {
+ free_irq_label(label);
goto out_free_le;
+ }
le->irq = irq;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x b34490879baa847d16fc529c8ea6e6d34f004b38
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033015-showy-immodest-66bb@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b34490879baa847d16fc529c8ea6e6d34f004b38 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Date: Mon, 25 Mar 2024 10:02:42 +0100
Subject: [PATCH] gpio: cdev: sanitize the label before requesting the
interrupt
When an interrupt is requested, a procfs directory is created under
"/proc/irq/<irqnum>/<label>" where <label> is the string passed to one of
the request_irq() variants.
What follows is that the string must not contain the "/" character or
the procfs mkdir operation will fail. We don't have such constraints for
GPIO consumer labels which are used verbatim as interrupt labels for
GPIO irqs. We must therefore sanitize the consumer string before
requesting the interrupt.
Let's replace all "/" with ":".
Cc: stable(a)vger.kernel.org
Reported-by: Stefan Wahren <wahrenst(a)gmx.net>
Closes: https://lore.kernel.org/linux-gpio/39fe95cb-aa83-4b8b-8cab-63947a726754@gmx…
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Reviewed-by: Kent Gibson <warthog618(a)gmail.com>
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index f384fa278764..fa9635610251 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -1083,10 +1083,20 @@ static u32 gpio_v2_line_config_debounce_period(struct gpio_v2_line_config *lc,
return 0;
}
+static inline char *make_irq_label(const char *orig)
+{
+ return kstrdup_and_replace(orig, '/', ':', GFP_KERNEL);
+}
+
+static inline void free_irq_label(const char *label)
+{
+ kfree(label);
+}
+
static void edge_detector_stop(struct line *line)
{
if (line->irq) {
- free_irq(line->irq, line);
+ free_irq_label(free_irq(line->irq, line));
line->irq = 0;
}
@@ -1110,6 +1120,7 @@ static int edge_detector_setup(struct line *line,
unsigned long irqflags = 0;
u64 eflags;
int irq, ret;
+ char *label;
eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS;
if (eflags && !kfifo_initialized(&line->req->events)) {
@@ -1146,11 +1157,17 @@ static int edge_detector_setup(struct line *line,
IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING;
irqflags |= IRQF_ONESHOT;
+ label = make_irq_label(line->req->label);
+ if (!label)
+ return -ENOMEM;
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread,
- irqflags, line->req->label, line);
- if (ret)
+ irqflags, label, line);
+ if (ret) {
+ free_irq_label(label);
return ret;
+ }
line->irq = irq;
return 0;
@@ -1973,7 +1990,7 @@ static void lineevent_free(struct lineevent_state *le)
blocking_notifier_chain_unregister(&le->gdev->device_notifier,
&le->device_unregistered_nb);
if (le->irq)
- free_irq(le->irq, le);
+ free_irq_label(free_irq(le->irq, le));
if (le->desc)
gpiod_free(le->desc);
kfree(le->label);
@@ -2114,6 +2131,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
int fd;
int ret;
int irq, irqflags = 0;
+ char *label;
if (copy_from_user(&eventreq, ip, sizeof(eventreq)))
return -EFAULT;
@@ -2198,15 +2216,23 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
if (ret)
goto out_free_le;
+ label = make_irq_label(le->label);
+ if (!label) {
+ ret = -ENOMEM;
+ goto out_free_le;
+ }
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq,
lineevent_irq_handler,
lineevent_irq_thread,
irqflags,
- le->label,
+ label,
le);
- if (ret)
+ if (ret) {
+ free_irq_label(label);
goto out_free_le;
+ }
le->irq = irq;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x f67cf45deedb118af302534643627ce59074e8eb
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033013-flaring-scorn-694e@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f67cf45deedb118af302534643627ce59074e8eb Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano(a)linaro.org>
Date: Mon, 25 Mar 2024 23:24:24 +0100
Subject: [PATCH] Revert "thermal: core: Don't update trip points inside the
hysteresis range"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It has been reported the commit cf3986f8c01d3 introduced a regression
when the temperature is wavering in the hysteresis region. The
mitigation stops leading to an uncontrolled temperature increase until
reaching the critical trip point.
Here what happens:
* 'throttle' is when the current temperature is greater than the trip
point temperature
* 'target' is the mitigation level
* 'passive' is positive when there is a mitigation, zero otherwise
* these values are computed in the step_wise governor
Configuration:
trip point 1: temp=95°C, hyst=5°C (passive)
trip point 2: temp=115°C, hyst=0°C (critical)
governor: step_wise
1. The temperature crosses the way up the trip point 1 at 95°C
- trend=raising
- throttle=1, target=1
- passive=1
- set_trips: low=90°C, high=115°C
2. The temperature decreases but stays in the hysteresis region at
93°C
- trend=dropping
- throttle=0, target=0
- passive=1
Before cf3986f8c01d3
- set_trips: low=90°C, high=95°C
After cf3986f8c01d3
- set_trips: low=90°C, high=115°C
3. The temperature increases a bit but stays in the hysteresis region
at 94°C (so below the trip point 1 temp 95°C)
- trend=raising
- throttle=0, target=0
- passive=1
Before cf3986f8c01d3
- set_trips: low=90°C, high=95°C
After cf3986f8c01d3
- set_trips: low=90°C, high=115°C
4. The temperature decreases but stays in the hysteresis region at
93°C
- trend=dropping
- throttle=0, target=THERMAL_NO_TARGET
- passive=0
Before cf3986f8c01d3
- set_trips: low=90°C, high=95°C
After cf3986f8c01d3
- set_trips: low=90°C, high=115°C
At this point, the 'passive' value is zero, there is no mitigation,
the temperature is in the hysteresis region, the next trip point is
115°C. As 'passive' is zero, the timer to monitor the thermal zone is
disabled. Consequently if the temperature continues to increase, no
mitigation will happen and it will reach the 115°C trip point and
reboot.
Before the optimization, the high boundary would have been 95°C, thus
triggering the mitigation again and rearming the polling timer.
The optimization make sense but given the current implementation of
the step_wise governor collaborating via this 'passive' flag with the
core framework it can not work.
From a higher perspective it seems like there is a problem between the
governor which sets a variable to be used by the core framework. That
sounds akward and it would make much more sense if the core framework
controls the governor and not the opposite. But as the devil hides in
the details, there are some subtilities to be addressed before.
Elaborating those would be out of the scope this changelog. So let's
stay simple and revert the change first to fixup all broken mobile
platforms.
This reverts commit cf3986f8c01d3 ("thermal: core: Don't update trip
points inside the hysteresis range") and takes a conflict with commit
0c0c4740c9d26 ("0c0c4740c9d2 thermal: trip: Use for_each_trip() in
__thermal_zone_set_trips()") in drivers/thermal/thermal_trip.c into
account.
Fixes: cf3986f8c01d3 ("thermal: core: Don't update trip points inside the hysteresis range")
Reported-by: Manaf Meethalavalappu Pallikunhi <quic_manafm(a)quicinc.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano(a)linaro.org>
Acked-by: Nícolas F. R. A. Prado <nfraprado(a)collabora.com>
Cc: 6.7+ <stable(a)vger.kernel.org> # 6.7+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c
index 09f6050dd041..497abf0d47ca 100644
--- a/drivers/thermal/thermal_trip.c
+++ b/drivers/thermal/thermal_trip.c
@@ -65,7 +65,6 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz)
{
const struct thermal_trip *trip;
int low = -INT_MAX, high = INT_MAX;
- bool same_trip = false;
int ret;
lockdep_assert_held(&tz->lock);
@@ -74,36 +73,22 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz)
return;
for_each_trip(tz, trip) {
- bool low_set = false;
int trip_low;
trip_low = trip->temperature - trip->hysteresis;
- if (trip_low < tz->temperature && trip_low > low) {
+ if (trip_low < tz->temperature && trip_low > low)
low = trip_low;
- low_set = true;
- same_trip = false;
- }
if (trip->temperature > tz->temperature &&
- trip->temperature < high) {
+ trip->temperature < high)
high = trip->temperature;
- same_trip = low_set;
- }
}
/* No need to change trip points */
if (tz->prev_low_trip == low && tz->prev_high_trip == high)
return;
- /*
- * If "high" and "low" are the same, skip the change unless this is the
- * first time.
- */
- if (same_trip && (tz->prev_low_trip != -INT_MAX ||
- tz->prev_high_trip != INT_MAX))
- return;
-
tz->prev_low_trip = low;
tz->prev_high_trip = high;
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x f67cf45deedb118af302534643627ce59074e8eb
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033009-harmonica-veteran-127c@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f67cf45deedb118af302534643627ce59074e8eb Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano(a)linaro.org>
Date: Mon, 25 Mar 2024 23:24:24 +0100
Subject: [PATCH] Revert "thermal: core: Don't update trip points inside the
hysteresis range"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It has been reported the commit cf3986f8c01d3 introduced a regression
when the temperature is wavering in the hysteresis region. The
mitigation stops leading to an uncontrolled temperature increase until
reaching the critical trip point.
Here what happens:
* 'throttle' is when the current temperature is greater than the trip
point temperature
* 'target' is the mitigation level
* 'passive' is positive when there is a mitigation, zero otherwise
* these values are computed in the step_wise governor
Configuration:
trip point 1: temp=95°C, hyst=5°C (passive)
trip point 2: temp=115°C, hyst=0°C (critical)
governor: step_wise
1. The temperature crosses the way up the trip point 1 at 95°C
- trend=raising
- throttle=1, target=1
- passive=1
- set_trips: low=90°C, high=115°C
2. The temperature decreases but stays in the hysteresis region at
93°C
- trend=dropping
- throttle=0, target=0
- passive=1
Before cf3986f8c01d3
- set_trips: low=90°C, high=95°C
After cf3986f8c01d3
- set_trips: low=90°C, high=115°C
3. The temperature increases a bit but stays in the hysteresis region
at 94°C (so below the trip point 1 temp 95°C)
- trend=raising
- throttle=0, target=0
- passive=1
Before cf3986f8c01d3
- set_trips: low=90°C, high=95°C
After cf3986f8c01d3
- set_trips: low=90°C, high=115°C
4. The temperature decreases but stays in the hysteresis region at
93°C
- trend=dropping
- throttle=0, target=THERMAL_NO_TARGET
- passive=0
Before cf3986f8c01d3
- set_trips: low=90°C, high=95°C
After cf3986f8c01d3
- set_trips: low=90°C, high=115°C
At this point, the 'passive' value is zero, there is no mitigation,
the temperature is in the hysteresis region, the next trip point is
115°C. As 'passive' is zero, the timer to monitor the thermal zone is
disabled. Consequently if the temperature continues to increase, no
mitigation will happen and it will reach the 115°C trip point and
reboot.
Before the optimization, the high boundary would have been 95°C, thus
triggering the mitigation again and rearming the polling timer.
The optimization make sense but given the current implementation of
the step_wise governor collaborating via this 'passive' flag with the
core framework it can not work.
From a higher perspective it seems like there is a problem between the
governor which sets a variable to be used by the core framework. That
sounds akward and it would make much more sense if the core framework
controls the governor and not the opposite. But as the devil hides in
the details, there are some subtilities to be addressed before.
Elaborating those would be out of the scope this changelog. So let's
stay simple and revert the change first to fixup all broken mobile
platforms.
This reverts commit cf3986f8c01d3 ("thermal: core: Don't update trip
points inside the hysteresis range") and takes a conflict with commit
0c0c4740c9d26 ("0c0c4740c9d2 thermal: trip: Use for_each_trip() in
__thermal_zone_set_trips()") in drivers/thermal/thermal_trip.c into
account.
Fixes: cf3986f8c01d3 ("thermal: core: Don't update trip points inside the hysteresis range")
Reported-by: Manaf Meethalavalappu Pallikunhi <quic_manafm(a)quicinc.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano(a)linaro.org>
Acked-by: Nícolas F. R. A. Prado <nfraprado(a)collabora.com>
Cc: 6.7+ <stable(a)vger.kernel.org> # 6.7+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c
index 09f6050dd041..497abf0d47ca 100644
--- a/drivers/thermal/thermal_trip.c
+++ b/drivers/thermal/thermal_trip.c
@@ -65,7 +65,6 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz)
{
const struct thermal_trip *trip;
int low = -INT_MAX, high = INT_MAX;
- bool same_trip = false;
int ret;
lockdep_assert_held(&tz->lock);
@@ -74,36 +73,22 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz)
return;
for_each_trip(tz, trip) {
- bool low_set = false;
int trip_low;
trip_low = trip->temperature - trip->hysteresis;
- if (trip_low < tz->temperature && trip_low > low) {
+ if (trip_low < tz->temperature && trip_low > low)
low = trip_low;
- low_set = true;
- same_trip = false;
- }
if (trip->temperature > tz->temperature &&
- trip->temperature < high) {
+ trip->temperature < high)
high = trip->temperature;
- same_trip = low_set;
- }
}
/* No need to change trip points */
if (tz->prev_low_trip == low && tz->prev_high_trip == high)
return;
- /*
- * If "high" and "low" are the same, skip the change unless this is the
- * first time.
- */
- if (same_trip && (tz->prev_low_trip != -INT_MAX ||
- tz->prev_high_trip != INT_MAX))
- return;
-
tz->prev_low_trip = low;
tz->prev_high_trip = high;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 25cd241408a2adc1ed0ebc90ae0793576c111880
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033007-filth-paver-678f@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
25cd241408a2 ("mm: zswap: fix data loss on SWP_SYNCHRONOUS_IO devices")
a230c20e63ef ("mm/zswap: zswap entry doesn't need refcount anymore")
c2e2ba770200 ("mm/zswap: only support zswap_exclusive_loads_enabled")
9986d35d4ceb ("mm: zswap: function ordering: writeback")
f91e81d31c1e ("mm: zswap: function ordering: compress & decompress functions")
36034bf6fcdb ("mm: zswap: function ordering: move entry section out of tree section")
5182661a11ba ("mm: zswap: function ordering: move entry sections out of LRU section")
506a86c5e221 ("mm: zswap: function ordering: public lru api")
abca07c04aa5 ("mm: zswap: function ordering: pool params")
c1a0ecb82bdc ("mm: zswap: function ordering: zswap_pools")
39f3ec8eaa60 ("mm: zswap: function ordering: pool refcounting")
a984649b5c1f ("mm: zswap: function ordering: pool alloc & free")
fa9ad6e21003 ("mm: zswap: break out zwap_compress()")
ff2972aa1b5d ("mm: zswap: rename __zswap_load() to zswap_decompress()")
dab7711fac6d ("mm: zswap: clean up zswap_entry_put()")
e477559ca602 ("mm: zswap: warn when referencing a dead entry")
7dd1f7f0fc1c ("mm: zswap: move zswap_invalidate_entry() to related functions")
5b297f70bb26 ("mm: zswap: inline and remove zswap_entry_find_get()")
42398be2adb1 ("mm: zswap: rename zswap_free_entry to zswap_entry_free")
5878303c5353 ("mm/zswap: fix race between lru writeback and swapoff")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 25cd241408a2adc1ed0ebc90ae0793576c111880 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes(a)cmpxchg.org>
Date: Sun, 24 Mar 2024 17:04:47 -0400
Subject: [PATCH] mm: zswap: fix data loss on SWP_SYNCHRONOUS_IO devices
Zhongkun He reports data corruption when combining zswap with zram.
The issue is the exclusive loads we're doing in zswap. They assume
that all reads are going into the swapcache, which can assume
authoritative ownership of the data and so the zswap copy can go.
However, zram files are marked SWP_SYNCHRONOUS_IO, and faults will try to
bypass the swapcache. This results in an optimistic read of the swap data
into a page that will be dismissed if the fault fails due to races. In
this case, zswap mustn't drop its authoritative copy.
Link: https://lore.kernel.org/all/CACSyD1N+dUvsu8=zV9P691B9bVq33erwOXNTmEaUbi9DrD…
Fixes: b9c91c43412f ("mm: zswap: support exclusive loads")
Link: https://lkml.kernel.org/r/20240324210447.956973-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reported-by: Zhongkun He <hezhongkun.hzk(a)bytedance.com>
Tested-by: Zhongkun He <hezhongkun.hzk(a)bytedance.com>
Acked-by: Yosry Ahmed <yosryahmed(a)google.com>
Acked-by: Barry Song <baohua(a)kernel.org>
Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Reviewed-by: Nhat Pham <nphamcs(a)gmail.com>
Acked-by: Chris Li <chrisl(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [6.5+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/zswap.c b/mm/zswap.c
index 36612f34b5d7..caed028945b0 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1636,6 +1636,7 @@ bool zswap_load(struct folio *folio)
swp_entry_t swp = folio->swap;
pgoff_t offset = swp_offset(swp);
struct page *page = &folio->page;
+ bool swapcache = folio_test_swapcache(folio);
struct zswap_tree *tree = swap_zswap_tree(swp);
struct zswap_entry *entry;
u8 *dst;
@@ -1648,7 +1649,20 @@ bool zswap_load(struct folio *folio)
spin_unlock(&tree->lock);
return false;
}
- zswap_rb_erase(&tree->rbroot, entry);
+ /*
+ * When reading into the swapcache, invalidate our entry. The
+ * swapcache can be the authoritative owner of the page and
+ * its mappings, and the pressure that results from having two
+ * in-memory copies outweighs any benefits of caching the
+ * compression work.
+ *
+ * (Most swapins go through the swapcache. The notable
+ * exception is the singleton fault on SWP_SYNCHRONOUS_IO
+ * files, which reads into a private page and may free it if
+ * the fault fails. We remain the primary owner of the entry.)
+ */
+ if (swapcache)
+ zswap_rb_erase(&tree->rbroot, entry);
spin_unlock(&tree->lock);
if (entry->length)
@@ -1663,9 +1677,10 @@ bool zswap_load(struct folio *folio)
if (entry->objcg)
count_objcg_event(entry->objcg, ZSWPIN);
- zswap_entry_free(entry);
-
- folio_mark_dirty(folio);
+ if (swapcache) {
+ zswap_entry_free(entry);
+ folio_mark_dirty(folio);
+ }
return true;
}
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 25cd241408a2adc1ed0ebc90ae0793576c111880
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033006-evict-backtrack-360e@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
25cd241408a2 ("mm: zswap: fix data loss on SWP_SYNCHRONOUS_IO devices")
a230c20e63ef ("mm/zswap: zswap entry doesn't need refcount anymore")
c2e2ba770200 ("mm/zswap: only support zswap_exclusive_loads_enabled")
9986d35d4ceb ("mm: zswap: function ordering: writeback")
f91e81d31c1e ("mm: zswap: function ordering: compress & decompress functions")
36034bf6fcdb ("mm: zswap: function ordering: move entry section out of tree section")
5182661a11ba ("mm: zswap: function ordering: move entry sections out of LRU section")
506a86c5e221 ("mm: zswap: function ordering: public lru api")
abca07c04aa5 ("mm: zswap: function ordering: pool params")
c1a0ecb82bdc ("mm: zswap: function ordering: zswap_pools")
39f3ec8eaa60 ("mm: zswap: function ordering: pool refcounting")
a984649b5c1f ("mm: zswap: function ordering: pool alloc & free")
fa9ad6e21003 ("mm: zswap: break out zwap_compress()")
ff2972aa1b5d ("mm: zswap: rename __zswap_load() to zswap_decompress()")
dab7711fac6d ("mm: zswap: clean up zswap_entry_put()")
e477559ca602 ("mm: zswap: warn when referencing a dead entry")
7dd1f7f0fc1c ("mm: zswap: move zswap_invalidate_entry() to related functions")
5b297f70bb26 ("mm: zswap: inline and remove zswap_entry_find_get()")
42398be2adb1 ("mm: zswap: rename zswap_free_entry to zswap_entry_free")
5878303c5353 ("mm/zswap: fix race between lru writeback and swapoff")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 25cd241408a2adc1ed0ebc90ae0793576c111880 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes(a)cmpxchg.org>
Date: Sun, 24 Mar 2024 17:04:47 -0400
Subject: [PATCH] mm: zswap: fix data loss on SWP_SYNCHRONOUS_IO devices
Zhongkun He reports data corruption when combining zswap with zram.
The issue is the exclusive loads we're doing in zswap. They assume
that all reads are going into the swapcache, which can assume
authoritative ownership of the data and so the zswap copy can go.
However, zram files are marked SWP_SYNCHRONOUS_IO, and faults will try to
bypass the swapcache. This results in an optimistic read of the swap data
into a page that will be dismissed if the fault fails due to races. In
this case, zswap mustn't drop its authoritative copy.
Link: https://lore.kernel.org/all/CACSyD1N+dUvsu8=zV9P691B9bVq33erwOXNTmEaUbi9DrD…
Fixes: b9c91c43412f ("mm: zswap: support exclusive loads")
Link: https://lkml.kernel.org/r/20240324210447.956973-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reported-by: Zhongkun He <hezhongkun.hzk(a)bytedance.com>
Tested-by: Zhongkun He <hezhongkun.hzk(a)bytedance.com>
Acked-by: Yosry Ahmed <yosryahmed(a)google.com>
Acked-by: Barry Song <baohua(a)kernel.org>
Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Reviewed-by: Nhat Pham <nphamcs(a)gmail.com>
Acked-by: Chris Li <chrisl(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [6.5+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/zswap.c b/mm/zswap.c
index 36612f34b5d7..caed028945b0 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1636,6 +1636,7 @@ bool zswap_load(struct folio *folio)
swp_entry_t swp = folio->swap;
pgoff_t offset = swp_offset(swp);
struct page *page = &folio->page;
+ bool swapcache = folio_test_swapcache(folio);
struct zswap_tree *tree = swap_zswap_tree(swp);
struct zswap_entry *entry;
u8 *dst;
@@ -1648,7 +1649,20 @@ bool zswap_load(struct folio *folio)
spin_unlock(&tree->lock);
return false;
}
- zswap_rb_erase(&tree->rbroot, entry);
+ /*
+ * When reading into the swapcache, invalidate our entry. The
+ * swapcache can be the authoritative owner of the page and
+ * its mappings, and the pressure that results from having two
+ * in-memory copies outweighs any benefits of caching the
+ * compression work.
+ *
+ * (Most swapins go through the swapcache. The notable
+ * exception is the singleton fault on SWP_SYNCHRONOUS_IO
+ * files, which reads into a private page and may free it if
+ * the fault fails. We remain the primary owner of the entry.)
+ */
+ if (swapcache)
+ zswap_rb_erase(&tree->rbroot, entry);
spin_unlock(&tree->lock);
if (entry->length)
@@ -1663,9 +1677,10 @@ bool zswap_load(struct folio *folio)
if (entry->objcg)
count_objcg_event(entry->objcg, ZSWPIN);
- zswap_entry_free(entry);
-
- folio_mark_dirty(folio);
+ if (swapcache) {
+ zswap_entry_free(entry);
+ folio_mark_dirty(folio);
+ }
return true;
}
The patch below does not apply to the 6.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.8.y
git checkout FETCH_HEAD
git cherry-pick -x 25cd241408a2adc1ed0ebc90ae0793576c111880
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033005-politely-marauding-110a@gregkh' --subject-prefix 'PATCH 6.8.y' HEAD^..
Possible dependencies:
25cd241408a2 ("mm: zswap: fix data loss on SWP_SYNCHRONOUS_IO devices")
a230c20e63ef ("mm/zswap: zswap entry doesn't need refcount anymore")
c2e2ba770200 ("mm/zswap: only support zswap_exclusive_loads_enabled")
9986d35d4ceb ("mm: zswap: function ordering: writeback")
f91e81d31c1e ("mm: zswap: function ordering: compress & decompress functions")
36034bf6fcdb ("mm: zswap: function ordering: move entry section out of tree section")
5182661a11ba ("mm: zswap: function ordering: move entry sections out of LRU section")
506a86c5e221 ("mm: zswap: function ordering: public lru api")
abca07c04aa5 ("mm: zswap: function ordering: pool params")
c1a0ecb82bdc ("mm: zswap: function ordering: zswap_pools")
39f3ec8eaa60 ("mm: zswap: function ordering: pool refcounting")
a984649b5c1f ("mm: zswap: function ordering: pool alloc & free")
fa9ad6e21003 ("mm: zswap: break out zwap_compress()")
ff2972aa1b5d ("mm: zswap: rename __zswap_load() to zswap_decompress()")
dab7711fac6d ("mm: zswap: clean up zswap_entry_put()")
e477559ca602 ("mm: zswap: warn when referencing a dead entry")
7dd1f7f0fc1c ("mm: zswap: move zswap_invalidate_entry() to related functions")
5b297f70bb26 ("mm: zswap: inline and remove zswap_entry_find_get()")
42398be2adb1 ("mm: zswap: rename zswap_free_entry to zswap_entry_free")
5878303c5353 ("mm/zswap: fix race between lru writeback and swapoff")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 25cd241408a2adc1ed0ebc90ae0793576c111880 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes(a)cmpxchg.org>
Date: Sun, 24 Mar 2024 17:04:47 -0400
Subject: [PATCH] mm: zswap: fix data loss on SWP_SYNCHRONOUS_IO devices
Zhongkun He reports data corruption when combining zswap with zram.
The issue is the exclusive loads we're doing in zswap. They assume
that all reads are going into the swapcache, which can assume
authoritative ownership of the data and so the zswap copy can go.
However, zram files are marked SWP_SYNCHRONOUS_IO, and faults will try to
bypass the swapcache. This results in an optimistic read of the swap data
into a page that will be dismissed if the fault fails due to races. In
this case, zswap mustn't drop its authoritative copy.
Link: https://lore.kernel.org/all/CACSyD1N+dUvsu8=zV9P691B9bVq33erwOXNTmEaUbi9DrD…
Fixes: b9c91c43412f ("mm: zswap: support exclusive loads")
Link: https://lkml.kernel.org/r/20240324210447.956973-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reported-by: Zhongkun He <hezhongkun.hzk(a)bytedance.com>
Tested-by: Zhongkun He <hezhongkun.hzk(a)bytedance.com>
Acked-by: Yosry Ahmed <yosryahmed(a)google.com>
Acked-by: Barry Song <baohua(a)kernel.org>
Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Reviewed-by: Nhat Pham <nphamcs(a)gmail.com>
Acked-by: Chris Li <chrisl(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [6.5+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/zswap.c b/mm/zswap.c
index 36612f34b5d7..caed028945b0 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1636,6 +1636,7 @@ bool zswap_load(struct folio *folio)
swp_entry_t swp = folio->swap;
pgoff_t offset = swp_offset(swp);
struct page *page = &folio->page;
+ bool swapcache = folio_test_swapcache(folio);
struct zswap_tree *tree = swap_zswap_tree(swp);
struct zswap_entry *entry;
u8 *dst;
@@ -1648,7 +1649,20 @@ bool zswap_load(struct folio *folio)
spin_unlock(&tree->lock);
return false;
}
- zswap_rb_erase(&tree->rbroot, entry);
+ /*
+ * When reading into the swapcache, invalidate our entry. The
+ * swapcache can be the authoritative owner of the page and
+ * its mappings, and the pressure that results from having two
+ * in-memory copies outweighs any benefits of caching the
+ * compression work.
+ *
+ * (Most swapins go through the swapcache. The notable
+ * exception is the singleton fault on SWP_SYNCHRONOUS_IO
+ * files, which reads into a private page and may free it if
+ * the fault fails. We remain the primary owner of the entry.)
+ */
+ if (swapcache)
+ zswap_rb_erase(&tree->rbroot, entry);
spin_unlock(&tree->lock);
if (entry->length)
@@ -1663,9 +1677,10 @@ bool zswap_load(struct folio *folio)
if (entry->objcg)
count_objcg_event(entry->objcg, ZSWPIN);
- zswap_entry_free(entry);
-
- folio_mark_dirty(folio);
+ if (swapcache) {
+ zswap_entry_free(entry);
+ folio_mark_dirty(folio);
+ }
return true;
}
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 32fbe5246582af4f611ccccee33fd6e559087252
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033006-laptop-tassel-b290@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
32fbe5246582 ("crash: use macro to add crashk_res into iomem early for specific arch")
85fcde402db1 ("kexec: split crashkernel reservation code out from crash_core.c")
4a693ce65b18 ("kdump: defer the insertion of crashkernel resources")
29166371ef67 ("kdump: remove redundant DEFAULT_CRASH_KERNEL_LOW_SIZE")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 32fbe5246582af4f611ccccee33fd6e559087252 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe(a)redhat.com>
Date: Mon, 25 Mar 2024 09:50:50 +0800
Subject: [PATCH] crash: use macro to add crashk_res into iomem early for
specific arch
There are regression reports[1][2] that crashkernel region on x86_64 can't
be added into iomem tree sometime. This causes the later failure of kdump
loading.
This happened after commit 4a693ce65b18 ("kdump: defer the insertion of
crashkernel resources") was merged.
Even though, these reported issues are proved to be related to other
component, they are just exposed after above commmit applied, I still
would like to keep crashk_res and crashk_low_res being added into iomem
early as before because the early adding has been always there on x86_64
and working very well. For safety of kdump, Let's change it back.
Here, add a macro HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY to limit that
only ARCH defining the macro can have the early adding
crashk_res/_low_res into iomem. Then define
HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY on x86 to enable it.
Note: In reserve_crashkernel_low(), there's a remnant of crashk_low_res
handling which was mistakenly added back in commit 85fcde402db1 ("kexec:
split crashkernel reservation code out from crash_core.c").
[1]
[PATCH V2] x86/kexec: do not update E820 kexec table for setup_data
https://lore.kernel.org/all/Zfv8iCL6CT2JqLIC@darkstar.users.ipa.redhat.com/…
[2]
Question about Address Range Validation in Crash Kernel Allocation
https://lore.kernel.org/all/4eeac1f733584855965a2ea62fa4da58@huawei.com/T/#u
Link: https://lkml.kernel.org/r/ZgDYemRQ2jxjLkq+@MiWiFi-R3L-srv
Fixes: 4a693ce65b18 ("kdump: defer the insertion of crashkernel resources")
Signed-off-by: Baoquan He <bhe(a)redhat.com>
Cc: Dave Young <dyoung(a)redhat.com>
Cc: Huacai Chen <chenhuacai(a)loongson.cn>
Cc: Ingo Molnar <mingo(a)kernel.org>
Cc: Jiri Bohac <jbohac(a)suse.cz>
Cc: Li Huafei <lihuafei1(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/arch/x86/include/asm/crash_reserve.h b/arch/x86/include/asm/crash_reserve.h
index 152239f95541..7835b2cdff04 100644
--- a/arch/x86/include/asm/crash_reserve.h
+++ b/arch/x86/include/asm/crash_reserve.h
@@ -39,4 +39,6 @@ static inline unsigned long crash_low_size_default(void)
#endif
}
+#define HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
+
#endif /* _X86_CRASH_RESERVE_H */
diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c
index bbb6c3cb00e4..066668799f75 100644
--- a/kernel/crash_reserve.c
+++ b/kernel/crash_reserve.c
@@ -366,7 +366,9 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
+#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
insert_resource(&iomem_resource, &crashk_low_res);
+#endif
#endif
return 0;
}
@@ -448,8 +450,12 @@ void __init reserve_crashkernel_generic(char *cmdline,
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
+#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
+ insert_resource(&iomem_resource, &crashk_res);
+#endif
}
+#ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
static __init int insert_crashkernel_resources(void)
{
if (crashk_res.start < crashk_res.end)
@@ -462,3 +468,4 @@ static __init int insert_crashkernel_resources(void)
}
early_initcall(insert_crashkernel_resources);
#endif
+#endif
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
This fix is stable-only, since after commit 07e09c483cbe ("mm/huge_memory:
work on folio->swap instead of page->private when splitting folio"),
subpages of a swapcached THP no longer requires the maintenance.
Adding THPs to the swapcache was introduced in commit
38d8b4e6bdc87 ("mm, THP, swap: delay splitting THP during swap out"),
where each subpage of a THP added to the swapcache had its own swapcache
entry and required the ->private field to point to the correct swapcache
entry. Later, when THP migration functionality was implemented in commit
616b8371539a6 ("mm: thp: enable thp migration in generic path"),
it initially did not handle the subpages of swapcached THPs, failing to
update their ->private fields or replace the subpage pointers in the
swapcache. Subsequently, commit e71769ae5260 ("mm: enable thp migration
for shmem thp") addressed the swapcache update aspect. This patch fixes
the update of subpage ->private fields.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 171573613c39..893ea04498f7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -514,8 +514,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
MTD OTP logic is very fragile and can be problematic with some specific
kind of devices.
NVMEM across the years had various iteration on how Cells could be
declared in DT and MTD OTP probably was left behind and
add_legacy_fixed_of_cells was enabled without thinking of the consequences.
That option enables NVMEM to scan the provided of_node and treat each
child as a NVMEM Cell, this was to support legacy NVMEM implementation
and don't cause regression.
This is problematic if we have devices like Nand where the OTP is
triggered by setting a special mode in the flash. In this context real
partitions declared in the Nand node are registered as OTP Cells and
this cause probe fail with -EINVAL error.
This was never notice due to the fact that till now, no Nand supported
the OTP feature. With commit e87161321a40 ("mtd: rawnand: macronix: OTP
access for MX30LFxG18AC") this changed and coincidentally this Nand is
used on an FritzBox 7530 supported on OpenWrt.
Alternative and more robust way to declare OTP Cells are already
prossible by using the fixed-layout node or by declaring a child node
with the compatible set to "otp-user" or "otp-factory".
To fix this and limit any regression with other MTD that makes use of
declaring OTP as direct child of the dev node, disable
add_legacy_fixed_of_cells if we detect the MTD type is Nand.
With the following logic, the OTP NVMEM entry is correctly created with
no Cells and the MTD Nand is correctly probed and partitions are
correctly exposed.
Fixes: 2cc3b37f5b6d ("nvmem: add explicit config option to read old syntax fixed OF cells")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Christian Marangi <ansuelsmth(a)gmail.com>
---
drivers/mtd/mtdcore.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 5887feb347a4..0de87bc63840 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -900,7 +900,7 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd,
config.name = compatible;
config.id = NVMEM_DEVID_AUTO;
config.owner = THIS_MODULE;
- config.add_legacy_fixed_of_cells = true;
+ config.add_legacy_fixed_of_cells = !mtd_type_is_nand(mtd);
config.type = NVMEM_TYPE_OTP;
config.root_only = true;
config.ignore_wp = true;
--
2.43.0
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
commit dbf4ab821804df071c8b566d9813083125e6d97b upstream.
The SC16IS7XX IC supports a burst mode to access the FIFOs where the
initial register address is sent ($00), followed by all the FIFO data
without having to resend the register address each time. In this mode, the
IC doesn't increment the register address for each R/W byte.
The regmap_raw_read() and regmap_raw_write() are functions which can
perform IO over multiple registers. They are currently used to read/write
from/to the FIFO, and although they operate correctly in this burst mode on
the SPI bus, they would corrupt the regmap cache if it was not disabled
manually. The reason is that when the R/W size is more than 1 byte, these
functions assume that the register address is incremented and handle the
cache accordingly.
Convert FIFO R/W functions to use the regmap _noinc_ versions in order to
remove the manual cache control which was a workaround when using the
_raw_ versions. FIFO registers are properly declared as volatile so
cache will not be used/updated for FIFO accesses.
Fixes: dfeae619d781 ("serial: sc16is7xx")
Cc: <stable(a)vger.kernel.org> # 5.10 5.15
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Link: https://lore.kernel.org/r/20231211171353.2901416-6-hugo@hugovil.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Signed-off-by: GONG, Ruiqi <gongruiqi1(a)huawei.com>
---
v2: Backport to both 5.15 and 5.10
The mainline commit dbf4ab821804 ("serial: sc16is7xx: convert from _raw_
to _noinc_ regmap functions for FIFO") by Hugo has been assigned to be
CVE-2023-52488, but for stable branches lower than 6.1 there's no
official backport.
I made up this backport patch for 5.10, and its correctness has been
confirmed in previous communication with Hugo. Let's publicize it and
merge it into upstream.
drivers/tty/serial/sc16is7xx.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 8c09c97f9814..0066a0e23516 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -376,9 +376,7 @@ static void sc16is7xx_fifo_read(struct uart_port *port, unsigned int rxlen)
const u8 line = sc16is7xx_line(port);
u8 addr = (SC16IS7XX_RHR_REG << SC16IS7XX_REG_SHIFT) | line;
- regcache_cache_bypass(s->regmap, true);
- regmap_raw_read(s->regmap, addr, s->buf, rxlen);
- regcache_cache_bypass(s->regmap, false);
+ regmap_noinc_read(s->regmap, addr, s->buf, rxlen);
}
static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send)
@@ -394,9 +392,7 @@ static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send)
if (unlikely(!to_send))
return;
- regcache_cache_bypass(s->regmap, true);
- regmap_raw_write(s->regmap, addr, s->buf, to_send);
- regcache_cache_bypass(s->regmap, false);
+ regmap_noinc_write(s->regmap, addr, s->buf, to_send);
}
static void sc16is7xx_port_update(struct uart_port *port, u8 reg,
@@ -489,6 +485,11 @@ static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg)
return false;
}
+static bool sc16is7xx_regmap_noinc(struct device *dev, unsigned int reg)
+{
+ return reg == SC16IS7XX_RHR_REG;
+}
+
static int sc16is7xx_set_baud(struct uart_port *port, int baud)
{
struct sc16is7xx_port *s = dev_get_drvdata(port->dev);
@@ -1439,6 +1440,8 @@ static struct regmap_config regcfg = {
.cache_type = REGCACHE_RBTREE,
.volatile_reg = sc16is7xx_regmap_volatile,
.precious_reg = sc16is7xx_regmap_precious,
+ .writeable_noinc_reg = sc16is7xx_regmap_noinc,
+ .readable_noinc_reg = sc16is7xx_regmap_noinc,
};
#ifdef CONFIG_SERIAL_SC16IS7XX_SPI
--
2.25.1
This corrects the backport of commit fe9a7082684e ("vfio/pci: Disable
auto-enable of exclusive INTx IRQ"), choosing to adapt the fix to the
current tree which uses an array of eventfd contexts rather than
include a base patch for the conversion to xarray, which is found to
be faulty in isolation.
I include the reverts here for completeness, but if the associated
commits are otherwise already dropped due to previous report[1], the
remainder of this series is still valid.
Largely this just adapts the mainline commits to the eventfd context
array from the current internal API where they're stored in an xarray.
Thanks,
Alex
[1]https://lore.kernel.org/all/20240329110433.156ff56c.alex.williamson@redha…
Alex Williamson (7):
Revert "vfio/pci: Disable auto-enable of exclusive INTx IRQ"
Revert "vfio/pci: Prepare for dynamic interrupt context storage"
vfio/pci: Disable auto-enable of exclusive INTx IRQ
vfio: Introduce interface to flush virqfd inject workqueue
vfio/pci: Create persistent INTx handler
vfio/platform: Create persistent IRQ handlers
vfio/fsl-mc: Block calling interrupt handler without trigger
drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c | 7 +-
drivers/vfio/pci/vfio_pci_intrs.c | 318 +++++++++-------------
drivers/vfio/platform/vfio_platform_irq.c | 101 ++++---
drivers/vfio/virqfd.c | 21 ++
include/linux/vfio.h | 2 +
5 files changed, 220 insertions(+), 229 deletions(-)
--
2.44.0
On Wed, 27 Mar 2024 07:41:33 -0400
Sasha Levin <sashal(a)kernel.org> wrote:
> This is a note to let you know that I've just added the patch titled
>
> vfio/pci: Prepare for dynamic interrupt context storage
>
> to the 6.1-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> vfio-pci-prepare-for-dynamic-interrupt-context-stora.patch
> and it can be found in the queue-6.1 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
>
>
> commit bca808da62c6a87ef168554caa318c2801d19b70
> Author: Reinette Chatre <reinette.chatre(a)intel.com>
> Date: Thu May 11 08:44:30 2023 -0700
>
> vfio/pci: Prepare for dynamic interrupt context storage
>
> [ Upstream commit d977e0f7663961368f6442589e52d27484c2f5c2 ]
>
> Interrupt context storage is statically allocated at the time
> interrupts are allocated. Following allocation, the interrupt
> context is managed by directly accessing the elements of the
> array using the vector as index.
>
> It is possible to allocate additional MSI-X vectors after
> MSI-X has been enabled. Dynamic storage of interrupt context
> is needed to support adding new MSI-X vectors after initial
> allocation.
>
> Replace direct access of array elements with pointers to the
> array elements. Doing so reduces impact of moving to a new data
> structure. Move interactions with the array to helpers to
> mostly contain changes needed to transition to a dynamic
> data structure.
>
> No functional change intended.
>
> Signed-off-by: Reinette Chatre <reinette.chatre(a)intel.com>
> Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
> Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
> Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
> Link: https://lore.kernel.org/r/eab289693c8325ede9aba99380f8b8d5143980a4.16837406…
> Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
> Stable-dep-of: fe9a7082684e ("vfio/pci: Disable auto-enable of exclusive INTx IRQ")
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
...
> @@ -171,15 +225,24 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
>
> static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
> {
> + struct vfio_pci_irq_ctx *ctx;
> + int ret;
> +
> if (!is_irq_none(vdev))
> return -EINVAL;
>
> if (!vdev->pdev->irq)
> return -ENODEV;
>
> - vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT);
> - if (!vdev->ctx)
> - return -ENOMEM;
> + ret = vfio_irq_ctx_alloc_num(vdev, 1);
> + if (ret)
> + return ret;
> +
> + ctx = vfio_irq_ctx_get(vdev, 0);
> + if (!ctx) {
> + vfio_irq_ctx_free_all(vdev);
> + return -EINVAL;
> + }
>
> vdev->num_ctx = 1;
This is broken on it's own, vfio_irq_ctx_get() depends on a valid
num_ctx, therefore this function always returns -EINVAL. This was
resolved upstream by b156e48fffa9 ("vfio/pci: Use xarray for interrupt
context storage") which was from the same series, so this issue was
never apparent upstream. Suggest dropping this and fe9a7082684e
("vfio/pci: Disable auto-enable of exclusive INTx IRQ") for now and
we'll try to rework the latter to remove the dependency. Thanks,
Alex
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 45bcc0346561daa3f59e19a753cc7f3e08e8dff1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032713-identity-slightly-586d@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
45bcc0346561 ("selftests: mptcp: diag: return KSFT_FAIL not test_cnt")
ce9902573652 ("selftests: mptcp: diag: format subtests results in TAP")
dc97251bf0b7 ("selftests: mptcp: diag: skip listen tests if not supported")
e04a30f78809 ("selftest: mptcp: add test for mptcp socket in use")
42fb6cddec3b ("selftests: mptcp: more stable diag tests")
f2ae0fa68e28 ("selftests/mptcp: add diag listen tests")
0cd33c5ffec1 ("selftests: mptcp: fix diag instability")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 45bcc0346561daa3f59e19a753cc7f3e08e8dff1 Mon Sep 17 00:00:00 2001
From: Geliang Tang <tanggeliang(a)kylinos.cn>
Date: Fri, 1 Mar 2024 18:11:22 +0100
Subject: [PATCH] selftests: mptcp: diag: return KSFT_FAIL not test_cnt
The test counter 'test_cnt' should not be returned in diag.sh, e.g. what
if only the 4th test fail? Will do 'exit 4' which is 'exit ${KSFT_SKIP}',
the whole test will be marked as skipped instead of 'failed'!
So we should do ret=${KSFT_FAIL} instead.
Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests")
Cc: stable(a)vger.kernel.org
Fixes: 42fb6cddec3b ("selftests: mptcp: more stable diag tests")
Signed-off-by: Geliang Tang <tanggeliang(a)kylinos.cn>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index f300f4e1eb59..18d37d4695c1 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -69,7 +69,7 @@ __chk_nr()
else
echo "[ fail ] expected $expected found $nr"
mptcp_lib_result_fail "${msg}"
- ret=$test_cnt
+ ret=${KSFT_FAIL}
fi
else
echo "[ ok ]"
@@ -124,11 +124,11 @@ wait_msk_nr()
if [ $i -ge $timeout ]; then
echo "[ fail ] timeout while expecting $expected max $max last $nr"
mptcp_lib_result_fail "${msg} # timeout"
- ret=$test_cnt
+ ret=${KSFT_FAIL}
elif [ $nr != $expected ]; then
echo "[ fail ] expected $expected found $nr"
mptcp_lib_result_fail "${msg} # unexpected result"
- ret=$test_cnt
+ ret=${KSFT_FAIL}
else
echo "[ ok ]"
mptcp_lib_result_pass "${msg}"
From: Li Lingfeng <lilingfeng3(a)huawei.com>
A null-ptr-deref problem may occur since commit 706960d328f5 ("nvme: use
command_id instead of req->tag in trace_nvme_complete_rq()") tries to get
command_id by nvme_req(req)->cmd while nvme_req(req)->cmd is NULL.
The problem has been sloved since the patch has been reverted by commit
929ba86476b3. However, cmd->common.command_id is set to req->tag again
which should be ((genctl & 0xf)< 12 | req->tag).
Generating command_id by nvme_cid() in trace event instead of
nvme_req(req)->cmd->common.command_id to set it to
((genctl & 0xf)< 12 | req->tag) without trigging the null-ptr-deref
problem.
Fixes: commit 706960d328f5 ("nvme: use command_id instead of req->tag in trace_nvme_complete_rq()")
Reported-by: John Sperbeck <jsperbeck(a)google.com>
Link: https://lore.kernel.org/r/20240109181722.228783-1-jsperbeck@google.com
Signed-off-by: Li Lingfeng <lilingfeng3(a)huawei.com>
---
drivers/nvme/host/trace.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
index 700fdce2ecf1..0de057a298dd 100644
--- a/drivers/nvme/host/trace.h
+++ b/drivers/nvme/host/trace.h
@@ -98,7 +98,7 @@ TRACE_EVENT(nvme_complete_rq,
TP_fast_assign(
__entry->ctrl_id = nvme_req(req)->ctrl->instance;
__entry->qid = nvme_req_qid(req);
- __entry->cid = req->tag;
+ __entry->cid = nvme_cid(req);
__entry->result = le64_to_cpu(nvme_req(req)->result.u64);
__entry->retries = nvme_req(req)->retries;
__entry->flags = nvme_req(req)->flags;
--
2.31.1
The patch titled
Subject: selftests/mm: include strings.h for ffsl
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
selftests-mm-include-stringsh-for-ffsl.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Edward Liaw <edliaw(a)google.com>
Subject: selftests/mm: include strings.h for ffsl
Date: Fri, 29 Mar 2024 18:58:10 +0000
Got a compilation error on Android for ffsl after 91b80cc5b39f
("selftests: mm: fix map_hugetlb failure on 64K page size systems")
included vm_util.h.
Link: https://lkml.kernel.org/r/20240329185814.16304-1-edliaw@google.com
Fixes: af605d26a8f2 ("selftests/mm: merge util.h into vm_util.h")
Signed-off-by: Edward Liaw <edliaw(a)google.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: "Mike Rapoport (IBM)" <rppt(a)kernel.org>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/vm_util.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/tools/testing/selftests/mm/vm_util.h~selftests-mm-include-stringsh-for-ffsl
+++ a/tools/testing/selftests/mm/vm_util.h
@@ -3,7 +3,7 @@
#include <stdbool.h>
#include <sys/mman.h>
#include <err.h>
-#include <string.h> /* ffsl() */
+#include <strings.h> /* ffsl() */
#include <unistd.h> /* _SC_PAGESIZE */
#define BIT_ULL(nr) (1ULL << (nr))
_
Patches currently in -mm which might be from edliaw(a)google.com are
selftests-mm-include-stringsh-for-ffsl.patch
Largely the same as the 6.1.y backports, minor context conflict still
using externs in header file and still using GFP_KERNEL rather than
GFP_KERNEL_ACCOUNT. Also picking up 810cd4bb5345 ("vfio/pci: Lock
external INTx masking ops") which was previously included in Sasha's
6.1.y backports but here the prototype of vfio_pci_intx_mask() is
different. Thanks,
Alex
Alex Williamson (6):
vfio/pci: Disable auto-enable of exclusive INTx IRQ
vfio/pci: Lock external INTx masking ops
vfio: Introduce interface to flush virqfd inject workqueue
vfio/pci: Create persistent INTx handler
vfio/platform: Create persistent IRQ handlers
vfio/fsl-mc: Block calling interrupt handler without trigger
drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c | 7 +-
drivers/vfio/pci/vfio_pci_intrs.c | 176 +++++++++++++---------
drivers/vfio/platform/vfio_platform_irq.c | 101 +++++++++----
drivers/vfio/virqfd.c | 21 +++
include/linux/vfio.h | 2 +
5 files changed, 201 insertions(+), 106 deletions(-)
--
2.44.0
From: Roberto Sassu <roberto.sassu(a)huawei.com>
Commit 08abce60d63fi ("security: Introduce path_post_mknod hook")
introduced security_path_post_mknod(), to replace the IMA-specific call to
ima_post_path_mknod().
For symmetry with security_path_mknod(), security_path_post_mknod() is
called after a successful mknod operation, for any file type, rather than
only for regular files at the time there was the IMA call.
However, as reported by VFS maintainers, successful mknod operation does
not mean that the dentry always has an inode attached to it (for example,
not for FIFOs on a SAMBA mount).
If that condition happens, the kernel crashes when
security_path_post_mknod() attempts to verify if the inode associated to
the dentry is private.
Add an extra check to first verify if there is an inode attached to the
dentry, before checking if the inode is private. Also add the same check to
the current users of the path_post_mknod hook, ima_post_path_mknod() and
evm_post_path_mknod().
Finally, use the proper helper, d_backing_inode(), to retrieve the inode
from the dentry in ima_post_path_mknod().
Cc: stable(a)vger.kernel.org # 6.8.x
Reported-by: Steve French <smfrench(a)gmail.com>
Closes: https://lore.kernel.org/linux-kernel/CAH2r5msAVzxCUHHG8VKrMPUKQHmBpE6K9_vjh…
Fixes: 08abce60d63fi ("security: Introduce path_post_mknod hook")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
---
security/integrity/evm/evm_main.c | 6 ++++--
security/integrity/ima/ima_main.c | 5 +++--
security/security.c | 4 +++-
3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 81dbade5b9b3..ec1659273fcf 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -1037,11 +1037,13 @@ static void evm_file_release(struct file *file)
static void evm_post_path_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
{
struct inode *inode = d_backing_inode(dentry);
- struct evm_iint_cache *iint = evm_iint_inode(inode);
+ struct evm_iint_cache *iint;
- if (!S_ISREG(inode->i_mode))
+ /* path_post_mknod hook might pass dentries without attached inode. */
+ if (!inode || !S_ISREG(inode->i_mode))
return;
+ iint = evm_iint_inode(inode);
if (iint)
iint->flags |= EVM_NEW_FILE;
}
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index c84e8c55333d..afc883e60cf3 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -719,10 +719,11 @@ static void ima_post_create_tmpfile(struct mnt_idmap *idmap,
static void ima_post_path_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
{
struct ima_iint_cache *iint;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_backing_inode(dentry);
int must_appraise;
- if (!ima_policy_flag || !S_ISREG(inode->i_mode))
+ /* path_post_mknod hook might pass dentries without attached inode. */
+ if (!ima_policy_flag || !inode || !S_ISREG(inode->i_mode))
return;
must_appraise = ima_must_appraise(idmap, inode, MAY_ACCESS,
diff --git a/security/security.c b/security/security.c
index 7e118858b545..455f0749e1b0 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1801,7 +1801,9 @@ EXPORT_SYMBOL(security_path_mknod);
*/
void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
{
- if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
+ /* Not all dentries have an inode attached after mknod. */
+ if (d_backing_inode(dentry) &&
+ unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return;
call_void_hook(path_post_mknod, idmap, dentry);
}
--
2.34.1
Hi stable team,
Please backport the following the commits to 6.7/6.8 to fix
some i915 type-c/thunderbolt PLL issues:
commit 92b47c3b8b24 ("drm/i915: Replace a memset() with zero initialization")
commit ba407525f824 ("drm/i915: Try to preserve the current shared_dpll for fastset on type-c ports")
commit d283ee5662c6 ("drm/i915: Include the PLL name in the debug messages")
commit 33c7760226c7 ("drm/i915: Suppress old PLL pipe_mask checks for MG/TC/TBT PLLs")
6.7 will need two additional dependencies:
commit f215038f4133 ("drm/i915: Use named initializers for DPLL info")
commit 58046e6cf811 ("drm/i915: Stop printing pipe name as hex")
Thanks.
--
Ville Syrjälä
Intel
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 45bcc0346561daa3f59e19a753cc7f3e08e8dff1
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032708-vagrancy-backlash-61dd@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
45bcc0346561 ("selftests: mptcp: diag: return KSFT_FAIL not test_cnt")
ce9902573652 ("selftests: mptcp: diag: format subtests results in TAP")
dc97251bf0b7 ("selftests: mptcp: diag: skip listen tests if not supported")
e04a30f78809 ("selftest: mptcp: add test for mptcp socket in use")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 45bcc0346561daa3f59e19a753cc7f3e08e8dff1 Mon Sep 17 00:00:00 2001
From: Geliang Tang <tanggeliang(a)kylinos.cn>
Date: Fri, 1 Mar 2024 18:11:22 +0100
Subject: [PATCH] selftests: mptcp: diag: return KSFT_FAIL not test_cnt
The test counter 'test_cnt' should not be returned in diag.sh, e.g. what
if only the 4th test fail? Will do 'exit 4' which is 'exit ${KSFT_SKIP}',
the whole test will be marked as skipped instead of 'failed'!
So we should do ret=${KSFT_FAIL} instead.
Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests")
Cc: stable(a)vger.kernel.org
Fixes: 42fb6cddec3b ("selftests: mptcp: more stable diag tests")
Signed-off-by: Geliang Tang <tanggeliang(a)kylinos.cn>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index f300f4e1eb59..18d37d4695c1 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -69,7 +69,7 @@ __chk_nr()
else
echo "[ fail ] expected $expected found $nr"
mptcp_lib_result_fail "${msg}"
- ret=$test_cnt
+ ret=${KSFT_FAIL}
fi
else
echo "[ ok ]"
@@ -124,11 +124,11 @@ wait_msk_nr()
if [ $i -ge $timeout ]; then
echo "[ fail ] timeout while expecting $expected max $max last $nr"
mptcp_lib_result_fail "${msg} # timeout"
- ret=$test_cnt
+ ret=${KSFT_FAIL}
elif [ $nr != $expected ]; then
echo "[ fail ] expected $expected found $nr"
mptcp_lib_result_fail "${msg} # unexpected result"
- ret=$test_cnt
+ ret=${KSFT_FAIL}
else
echo "[ ok ]"
mptcp_lib_result_pass "${msg}"
From: Kan Liang <kan.liang(a)linux.intel.com>
[The patch set is to fix the perf top failure on all Intel hybrid
machines. Without the patch, the default perf top command is broken.
I have verified that the patches on both stable 6.6 and 6.7. They can
be applied to stable 6.6 and 6.7 tree without any modification as well.
Please consider to apply them to stable 6.6 and 6.7. Thanks]
------------------
From: Kan Liang <kan.liang(a)linux.intel.com>
[ Upstream commit 5fa695e7da4975e8d21ce49f3718d6cf00ecb75e ]
perf top errors out on a hybrid machine
$perf top
Error:
The cycles:P event is not supported.
The perf top expects that the "cycles" is collected on all CPUs in the
system. But for hybrid there is no single "cycles" event which can cover
all CPUs. Perf has to split it into two cycles events, e.g.,
cpu_core/cycles/ and cpu_atom/cycles/. Each event has its own CPU mask.
If a event is opened on the unsupported CPU. The open fails. That's the
reason of the above error out.
Perf should only open the cycles event on the corresponding CPU. The
commit ef91871c960e ("perf evlist: Propagate user CPU maps intersecting
core PMU maps") intersect the requested CPU map with the CPU map of the
PMU. Use the evsel's cpus to replace user_requested_cpus.
The evlist's threads are also propagated to the evsel's threads in
__perf_evlist__propagate_maps(). For a system-wide event, perf appends
a dummy event and assign it to the evsel's threads. For a per-thread
event, the evlist's thread_map is assigned to the evsel's threads. The
same as the other tools, e.g., perf record, using the evsel's threads
when opening an event.
Reported-by: Arnaldo Carvalho de Melo <acme(a)kernel.org>
Reviewed-by: Ian Rogers <irogers(a)google.com>
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme(a)redhat.com>
Cc: Hector Martin <marcan(a)marcan.st>
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Namhyung Kim <namhyung(a)kernel.org>
Closes: https://lore.kernel.org/linux-perf-users/ZXNnDrGKXbEELMXV@kernel.org/
Link: https://lore.kernel.org/r/20231214144612.1092028-1-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme(a)redhat.com>
---
tools/perf/builtin-top.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ea8c7eca5eee..cce9350177e2 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1027,8 +1027,8 @@ static int perf_top__start_counters(struct perf_top *top)
evlist__for_each_entry(evlist, counter) {
try_again:
- if (evsel__open(counter, top->evlist->core.user_requested_cpus,
- top->evlist->core.threads) < 0) {
+ if (evsel__open(counter, counter->core.cpus,
+ counter->core.threads) < 0) {
/*
* Specially handle overwrite fall back.
--
2.34.1
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 35f20786c481d5ced9283ff42de5c69b65e5ed13 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan(a)kernel.org>
Date: Sat, 27 Jan 2024 11:07:43 -0700
Subject: [PATCH] powerpc: xor_vmx: Add '-mhard-float' to CFLAGS
arch/powerpc/lib/xor_vmx.o is built with '-msoft-float' (from the main
powerpc Makefile) and '-maltivec' (from its CFLAGS), which causes an
error when building with clang after a recent change in main:
error: option '-msoft-float' cannot be specified with '-maltivec'
make[6]: *** [scripts/Makefile.build:243: arch/powerpc/lib/xor_vmx.o] Error 1
Explicitly add '-mhard-float' before '-maltivec' in xor_vmx.o's CFLAGS
to override the previous inclusion of '-msoft-float' (as the last option
wins), which matches how other areas of the kernel use '-maltivec', such
as AMDGPU.
Cc: stable(a)vger.kernel.org
Closes: https://github.com/ClangBuiltLinux/linux/issues/1986
Link: https://github.com/llvm/llvm-project/commit/4792f912b232141ecba4cbae538873b…
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://msgid.link/20240127-ppc-xor_vmx-drop-msoft-float-v1-1-f24140e81376@…
---
arch/powerpc/lib/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 6eac63e79a899..0ab65eeb93ee3 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -76,7 +76,7 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o
-CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
+CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec)
# Enable <altivec.h>
CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include)
--
2.43.0
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 38b43539d64b2fa020b3b9a752a986769f87f7a6 Mon Sep 17 00:00:00 2001
From: Tony Battersby <tonyb(a)cybernetics.com>
Date: Thu, 29 Feb 2024 13:08:09 -0500
Subject: [PATCH] block: Fix page refcounts for unaligned buffers in
__bio_release_pages()
Fix an incorrect number of pages being released for buffers that do not
start at the beginning of a page.
Fixes: 1b151e2435fc ("block: Remove special-casing of compound pages")
Cc: stable(a)vger.kernel.org
Signed-off-by: Tony Battersby <tonyb(a)cybernetics.com>
Tested-by: Greg Edwards <gedwards(a)ddn.com>
Link: https://lore.kernel.org/r/86e592a9-98d4-4cff-a646-0c0084328356@cybernetics.…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
---
block/bio.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index 496867b51609f..a8b6919400270 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1153,7 +1153,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
bio_for_each_folio_all(fi, bio) {
struct page *page;
- size_t done = 0;
+ size_t nr_pages;
if (mark_dirty) {
folio_lock(fi.folio);
@@ -1161,10 +1161,11 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
folio_unlock(fi.folio);
}
page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
+ nr_pages = (fi.offset + fi.length - 1) / PAGE_SIZE -
+ fi.offset / PAGE_SIZE + 1;
do {
bio_release_page(bio, page++);
- done += PAGE_SIZE;
- } while (done < fi.length);
+ } while (--nr_pages != 0);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
--
2.43.0
From: Chengming Zhou <zhouchengming(a)bytedance.com>
commit e5c0ca13659e9d18f53368d651ed7e6e433ec1cf upstream.
Chuck reported [1] an IO hang problem on NFS exports that reside on SATA
devices and bisected to commit 615939a2ae73 ("blk-mq: defer to the normal
submission path for post-flush requests").
We analysed the IO hang problem, found there are two postflush requests
waiting for each other.
The first postflush request completed the REQ_FSEQ_DATA sequence, so go to
the REQ_FSEQ_POSTFLUSH sequence and added in the flush pending list, but
failed to blk_kick_flush() because of the second postflush request which
is inflight waiting in scheduler queue.
The second postflush waiting in scheduler queue can't be dispatched because
the first postflush hasn't released scheduler resource even though it has
completed by itself.
Fix it by releasing scheduler resource when the first postflush request
completed, so the second postflush can be dispatched and completed, then
make blk_kick_flush() succeed.
While at it, remove the check for e->ops.finish_request, as all
schedulers set that. Reaffirm this requirement by adding a WARN_ON_ONCE()
at scheduler registration time, just like we do for insert_requests and
dispatch_request.
[1] https://lore.kernel.org/all/7A57C7AE-A51A-4254-888B-FE15CA21F9E9@oracle.com/
Link: https://lore.kernel.org/linux-block/20230819031206.2744005-1-chengming.zhou…
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202308172100.8ce4b853-oliver.sang@intel.com
Fixes: 615939a2ae73 ("blk-mq: defer to the normal submission path for post-flush requests")
Reported-by: Chuck Lever <chuck.lever(a)oracle.com>
Signed-off-by: Chengming Zhou <zhouchengming(a)bytedance.com>
Tested-by: Chuck Lever <chuck.lever(a)oracle.com>
Link: https://lore.kernel.org/r/20230813152325.3017343-1-chengming.zhou@linux.dev
[axboe: folded in incremental fix and added tags]
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
[bvanassche: changed RQF_USE_SCHED into RQF_ELVPRIV; restored the
finish_request pointer check before calling finish_request and removed
the new warning from the elevator code. This patch fixes an I/O hang
when submitting a REQ_FUA request to a request queue for a zoned block
device for which FUA has been disabled (QUEUE_FLAG_FUA is not set).]
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
block/blk-mq.c | 24 +++++++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7ed6b9469f97..07610505c177 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -675,6 +675,22 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
+static void blk_mq_finish_request(struct request *rq)
+{
+ struct request_queue *q = rq->q;
+
+ if ((rq->rq_flags & RQF_ELVPRIV) &&
+ q->elevator->type->ops.finish_request) {
+ q->elevator->type->ops.finish_request(rq);
+ /*
+ * For postflush request that may need to be
+ * completed twice, we should clear this flag
+ * to avoid double finish_request() on the rq.
+ */
+ rq->rq_flags &= ~RQF_ELVPRIV;
+ }
+}
+
static void __blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -701,9 +717,7 @@ void blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
- if ((rq->rq_flags & RQF_ELVPRIV) &&
- q->elevator->type->ops.finish_request)
- q->elevator->type->ops.finish_request(rq);
+ blk_mq_finish_request(rq);
if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
laptop_io_completion(q->disk->bdi);
@@ -1025,6 +1039,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
if (blk_mq_need_time_stamp(rq))
__blk_mq_end_request_acct(rq, ktime_get_ns());
+ blk_mq_finish_request(rq);
+
if (rq->end_io) {
rq_qos_done(rq->q, rq);
if (rq->end_io(rq, error) == RQ_END_IO_FREE)
@@ -1079,6 +1095,8 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
if (iob->need_ts)
__blk_mq_end_request_acct(rq, now);
+ blk_mq_finish_request(rq);
+
rq_qos_done(rq->q, rq);
/*
commit f45812cc23fb74bef62d4eb8a69fe7218f4b9f2a upstream.
Work around a quirk in a few old (2011-ish) UEFI implementations, where
a call to `GetNextVariableName` with a buffer size larger than 512 bytes
will always return EFI_INVALID_PARAMETER.
There is some lore around EFI variable names being up to 1024 bytes in
size, but this has no basis in the UEFI specification, and the upper
bounds are typically platform specific, and apply to the entire variable
(name plus payload).
Given that Linux does not permit creating files with names longer than
NAME_MAX (255) bytes, 512 bytes (== 256 UTF-16 characters) is a
reasonable limit.
Cc: <stable(a)vger.kernel.org> # 6.1+
Signed-off-by: Tim Schumacher <timschumi(a)gmx.de>
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
[timschumi(a)gmx.de: adjusted diff for changed context and code move]
Signed-off-by: Tim Schumacher <timschumi(a)gmx.de>
---
Please apply this patch to stable kernel 5.15, 5.10, 5.4, and 4.19
respectively. Kernel 6.1 and upwards were already handled via CC,
5.15 and below required a separate patch due to a slight refactor of
surrounding code in bbc6d2c6ef22 ("efi: vars: Switch to new wrapper
layer") and a subsequent code move in 2d82e6227ea1 ("efi: vars: Move
efivar caching layer into efivarfs").
Please note that the upper Signed-off-by tags are remnants from the
original patch, I documented my modifications below them and added
another sign-off. As far as I was able to gather, this is the expected
format for diverged stable patches.
I'm not sure on the specifics of manual stable backports, so let me
know in case anything doesn't follow the process. The linux-efi team
and list are on CC both for documentation/review purposes and in case
a new sign-off/ack of theirs is required.
---
drivers/firmware/efi/vars.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index cae590bd08f2..eaed1ddcc803 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -415,7 +415,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
void *data, bool duplicates, struct list_head *head)
{
const struct efivar_operations *ops;
- unsigned long variable_name_size = 1024;
+ unsigned long variable_name_size = 512;
efi_char16_t *variable_name;
efi_status_t status;
efi_guid_t vendor_guid;
@@ -438,12 +438,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
}
/*
- * Per EFI spec, the maximum storage allocated for both
- * the variable name and variable data is 1024 bytes.
+ * A small set of old UEFI implementations reject sizes
+ * above a certain threshold, the lowest seen in the wild
+ * is 512.
*/
do {
- variable_name_size = 1024;
+ variable_name_size = 512;
status = ops->get_next_variable(&variable_name_size,
variable_name,
@@ -491,9 +492,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
break;
case EFI_NOT_FOUND:
break;
+ case EFI_BUFFER_TOO_SMALL:
+ pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n",
+ variable_name_size);
+ status = EFI_NOT_FOUND;
+ break;
default:
- printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n",
- status);
+ pr_warn("efivars: get_next_variable: status=%lx\n", status);
status = EFI_NOT_FOUND;
break;
}
--
2.44.0
From: Yang Jihong <yangjihong1(a)huawei.com>
commit 6b959ba22d34ca793ffdb15b5715457c78e38b1a upstream.
perf_output_read_group may respond to IPI request of other cores and invoke
__perf_install_in_context function. As a result, hwc configuration is modified.
causing inconsistency and unexpected consequences.
Interrupts are not disabled when perf_output_read_group reads PMU counter.
In this case, IPI request may be received from other cores.
As a result, PMU configuration is modified and an error occurs when
reading PMU counter:
CPU0 CPU1
__se_sys_perf_event_open
perf_install_in_context
perf_output_read_group smp_call_function_single
for_each_sibling_event(sub, leader) { generic_exec_single
if ((sub != event) && remote_function
(sub->state == PERF_EVENT_STATE_ACTIVE)) |
<enter IPI handler: __perf_install_in_context> <----RAISE IPI-----+
__perf_install_in_context
ctx_resched
event_sched_out
armpmu_del
...
hwc->idx = -1; // event->hwc.idx is set to -1
...
<exit IPI>
sub->pmu->read(sub);
armpmu_read
armv8pmu_read_counter
armv8pmu_read_hw_counter
int idx = event->hw.idx; // idx = -1
u64 val = armv8pmu_read_evcntr(idx);
u32 counter = ARMV8_IDX_TO_COUNTER(idx); // invalid counter = 30
read_pmevcntrn(counter) // undefined instruction
Signed-off-by: Yang Jihong <yangjihong1(a)huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Link: https://lkml.kernel.org/r/20220902082918.179248-1-yangjihong1@huawei.com
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo(a)igalia.com>
---
This race may also lead to observed behavior like RCU stalls, hang tasks,
OOM. Likely due to list corruption or a similar root cause.
---
kernel/events/core.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4e5a73c7db12..e79cd0fd1d2b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7119,9 +7119,16 @@ static void perf_output_read_group(struct perf_output_handle *handle,
{
struct perf_event *leader = event->group_leader, *sub;
u64 read_format = event->attr.read_format;
+ unsigned long flags;
u64 values[6];
int n = 0;
+ /*
+ * Disabling interrupts avoids all counter scheduling
+ * (context switches, timer based rotation and IPIs).
+ */
+ local_irq_save(flags);
+
values[n++] = 1 + leader->nr_siblings;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -7157,6 +7164,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
__output_copy(handle, values, n * sizeof(u64));
}
+
+ local_irq_restore(flags);
}
#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
--
2.34.1
commit 38b43539d64b2fa020b3b9a752a986769f87f7a6 upstream.
Fix an incorrect number of pages being released for buffers that do not
start at the beginning of a page.
[ Tony: backport to v6.1 by replacing bio_release_page() loop with
folio_put_refs() as commits fd363244e883 and e4cc64657bec are not
present. ]
Fixes: 1b151e2435fc ("block: Remove special-casing of compound pages")
Cc: stable(a)vger.kernel.org
Signed-off-by: Tony Battersby <tonyb(a)cybernetics.com>
Tested-by: Greg Edwards <gedwards(a)ddn.com>
Link: https://lore.kernel.org/r/86e592a9-98d4-4cff-a646-0c0084328356@cybernetics.…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
---
This is the backport for 6.1.
The upstream patch should apply cleanly to 6.6, 6.7, and 6.8.
This patch does not need to be backported to 5.15, 5.10, 5.4, or 4.19,
since the backport of 1b151e2435fc to those kernels did not include
the bug fixed by this patch.
block/bio.c | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index 74c2818c7ec9..3318e0022fdf 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1112,19 +1112,16 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
struct folio_iter fi;
bio_for_each_folio_all(fi, bio) {
- struct page *page;
- size_t done = 0;
+ size_t nr_pages;
if (mark_dirty) {
folio_lock(fi.folio);
folio_mark_dirty(fi.folio);
folio_unlock(fi.folio);
}
- page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
- do {
- folio_put(fi.folio);
- done += PAGE_SIZE;
- } while (done < fi.length);
+ nr_pages = (fi.offset + fi.length - 1) / PAGE_SIZE -
+ fi.offset / PAGE_SIZE + 1;
+ folio_put_refs(fi.folio, nr_pages);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
base-commit: 61adba85cc40287232a539e607164f273260e0fe
--
2.25.1
This is a backport of recently upstreamed fix for XPS 9530 sound issue.
Both apply cleanly to 6.8.y, and could also be cherry-picked from upstream.
Ideally should be applied to all branches where upstream commit
d110858a6925827609d11db8513d76750483ec06 exists (6.8.y) or was backported
(6.7.y) as it adds initial yet incomplete support for this laptop. Patches
for 6.7.y require modification, will be submitted separately.
Signed-off-by: Aleksandrs Vinarskis <alex.vinarskis(a)gmail.com>
Aleksandrs Vinarskis (2):
mfd: intel-lpss: Switch to generalized quirk table
mfd: intel-lpss: Introduce QUIRK_CLOCK_DIVIDER_UNITY for XPS 9530
drivers/mfd/intel-lpss-pci.c | 28 ++++++++++++++++++++--------
drivers/mfd/intel-lpss.c | 9 ++++++++-
drivers/mfd/intel-lpss.h | 14 +++++++++++++-
3 files changed, 41 insertions(+), 10 deletions(-)
--
2.40.1
v2:
- This includes the backport of recently upstreamed mitigation of a CPU
vulnerability Register File Data Sampling (RFDS) (CVE-2023-28746).
This is because RFDS has a dependency on "Delay VERW" series, and it
is convenient to merge them together.
- rebased to v5.10.212
v1: https://lore.kernel.org/r/20240305-delay-verw-backport-5-10-y-v1-0-50bf452e…
This is the backport of recently upstreamed series that moves VERW
execution to a later point in exit-to-user path. This is needed because
in some cases it may be possible for data accessed after VERW executions
may end into MDS affected CPU buffers. Moving VERW closer to ring
transition reduces the attack surface.
- The series includes a dependency commit f87bc8dc7a7c ("x86/asm: Add
_ASM_RIP() macro for x86-64 (%rip) suffix").
- Patch 2 includes a change that adds runtime patching for jmp (instead
of verw in original series) due to lack of rip-relative relocation
support in kernels <v6.5.
- Fixed warning:
arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction.
- Resolved merge conflicts in:
syscall_return_via_sysret in entry_64.S
swapgs_restore_regs_and_return_to_usermode in entry_64.S.
__vmx_vcpu_run in vmenter.S.
vmx_update_fb_clear_dis in vmx.c.
- Boot tested with KASLR and KPTI enabled.
- Verified VERW being executed with mitigation ON.
To: stable(a)vger.kernel.org
Signed-off-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
---
H. Peter Anvin (Intel) (1):
x86/asm: Add _ASM_RIP() macro for x86-64 (%rip) suffix
Pawan Gupta (9):
x86/bugs: Add asm helpers for executing VERW
x86/entry_64: Add VERW just before userspace transition
x86/entry_32: Add VERW just before userspace transition
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
x86/mmio: Disable KVM mitigation when X86_FEATURE_CLEAR_CPU_BUF is set
Documentation/hw-vuln: Add documentation for RFDS
x86/rfds: Mitigate Register File Data Sampling (RFDS)
KVM/x86: Export RFDS_NO and RFDS_CLEAR to guests
Sean Christopherson (1):
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
Documentation/ABI/testing/sysfs-devices-system-cpu | 1 +
Documentation/admin-guide/hw-vuln/index.rst | 1 +
.../admin-guide/hw-vuln/reg-file-data-sampling.rst | 104 ++++++++++++++++++++
Documentation/admin-guide/kernel-parameters.txt | 21 ++++
Documentation/x86/mds.rst | 38 +++++---
arch/x86/Kconfig | 11 +++
arch/x86/entry/entry.S | 23 +++++
arch/x86/entry/entry_32.S | 3 +
arch/x86/entry/entry_64.S | 10 ++
arch/x86/entry/entry_64_compat.S | 1 +
arch/x86/include/asm/asm.h | 5 +
arch/x86/include/asm/cpufeatures.h | 2 +
arch/x86/include/asm/entry-common.h | 1 -
arch/x86/include/asm/irqflags.h | 1 +
arch/x86/include/asm/msr-index.h | 8 ++
arch/x86/include/asm/nospec-branch.h | 27 +++---
arch/x86/kernel/cpu/bugs.c | 107 ++++++++++++++++++---
arch/x86/kernel/cpu/common.c | 38 +++++++-
arch/x86/kernel/nmi.c | 3 -
arch/x86/kvm/vmx/run_flags.h | 7 +-
arch/x86/kvm/vmx/vmenter.S | 9 +-
arch/x86/kvm/vmx/vmx.c | 12 ++-
arch/x86/kvm/x86.c | 5 +-
drivers/base/cpu.c | 8 ++
include/linux/cpu.h | 2 +
25 files changed, 394 insertions(+), 54 deletions(-)
---
base-commit: 7cfcd0ed929b28ff6942c2bee15816d08d6f7266
change-id: 20240304-delay-verw-backport-5-10-y-00aad69432f4
Best regards,
--
Thanks,
Pawan
Here's the recently merged mds improvement patches adapted to latest stable tree.
I've only compile tested them, but since I have also done similar backports for
older kernels I'm sure they should work.
The main difference is in the definition of the CLEAR_CPU_BUFFERS macro since
5.4 doesn't contains the alternative relocation handling logic hence the verw
instruction is moved out of the alternative definition and instead we have a jump which
skips the verw instruction there. That way the relocation will be handled by the
toolchain rather than the kernel.
Since I don't know if I will have time to work on the other branches this patchset
can be used as basis for the rest of the stable kernels. The main difference would be
which bit is used for CLEAR_CPU_BUFFERS. For kernel 6.6 the 2nd patch can be used verbatim
from upstrem (unlike this modified version) since the alternative relocation
did land in v6.5. However, even if used as-is from this patchset it's not a problem.
V2:
Added upstream commit id to individual patches.
H. Peter Anvin (Intel) (1):
x86/asm: Add _ASM_RIP() macro for x86-64 (%rip) suffix
Pawan Gupta (5):
x86/bugs: Add asm helpers for executing VERW
x86/entry_64: Add VERW just before userspace transition
x86/entry_32: Add VERW just before userspace transition
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
Sean Christopherson (1):
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
Documentation/x86/mds.rst | 38 ++++++++++++++++++++--------
arch/x86/entry/Makefile | 2 +-
arch/x86/entry/common.c | 2 --
arch/x86/entry/entry.S | 23 +++++++++++++++++
arch/x86/entry/entry_32.S | 3 +++
arch/x86/entry/entry_64.S | 10 ++++++++
arch/x86/entry/entry_64_compat.S | 1 +
arch/x86/include/asm/asm.h | 6 ++++-
arch/x86/include/asm/cpufeatures.h | 2 +-
arch/x86/include/asm/irqflags.h | 1 +
arch/x86/include/asm/nospec-branch.h | 26 ++++++++++---------
arch/x86/kernel/cpu/bugs.c | 15 +++++------
arch/x86/kernel/nmi.c | 3 ---
arch/x86/kvm/vmx/run_flags.h | 7 +++--
arch/x86/kvm/vmx/vmenter.S | 9 ++++---
arch/x86/kvm/vmx/vmx.c | 12 ++++++---
16 files changed, 111 insertions(+), 49 deletions(-)
create mode 100644 arch/x86/entry/entry.S
--
2.34.1
v2:
- This includes the backport of recently upstreamed mitigation of a CPU
vulnerability Register File Data Sampling (RFDS) (CVE-2023-28746).
This is because RFDS has a dependency on "Delay VERW" series, and it
is convenient to merge them together.
- rebased to v5.15.151
v1: https://lore.kernel.org/r/20240304-delay-verw-backport-5-15-y-v1-0-fd02afc0…
This is the backport of recently upstreamed series that moves VERW
execution to a later point in exit-to-user path. This is needed because
in some cases it may be possible for data accessed after VERW executions
may end into MDS affected CPU buffers. Moving VERW closer to ring
transition reduces the attack surface.
- The series includes a dependency commit f87bc8dc7a7c ("x86/asm: Add
_ASM_RIP() macro for x86-64 (%rip) suffix").
- Patch 2 includes a change that adds runtime patching for jmp (instead
of verw in original series) due to lack of rip-relative relocation
support in kernels <v6.5.
- Fixed warning:
arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction.
- Resolved merge conflicts in:
swapgs_restore_regs_and_return_to_usermode in entry_64.S.
__vmx_vcpu_run in vmenter.S.
vmx_update_fb_clear_dis in vmx.c.
- Boot tested with KASLR and KPTI enabled.
- Verified VERW being executed with mitigation ON, and not being
executed with mitigation turned OFF.
To: stable(a)vger.kernel.org
Signed-off-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
---
---
H. Peter Anvin (Intel) (1):
x86/asm: Add _ASM_RIP() macro for x86-64 (%rip) suffix
Pawan Gupta (9):
x86/bugs: Add asm helpers for executing VERW
x86/entry_64: Add VERW just before userspace transition
x86/entry_32: Add VERW just before userspace transition
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
x86/mmio: Disable KVM mitigation when X86_FEATURE_CLEAR_CPU_BUF is set
Documentation/hw-vuln: Add documentation for RFDS
x86/rfds: Mitigate Register File Data Sampling (RFDS)
KVM/x86: Export RFDS_NO and RFDS_CLEAR to guests
Sean Christopherson (1):
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
Documentation/ABI/testing/sysfs-devices-system-cpu | 1 +
Documentation/admin-guide/hw-vuln/index.rst | 1 +
.../admin-guide/hw-vuln/reg-file-data-sampling.rst | 104 ++++++++++++++++++++
Documentation/admin-guide/kernel-parameters.txt | 21 ++++
Documentation/x86/mds.rst | 38 +++++---
arch/x86/Kconfig | 11 +++
arch/x86/entry/entry.S | 23 +++++
arch/x86/entry/entry_32.S | 3 +
arch/x86/entry/entry_64.S | 11 +++
arch/x86/entry/entry_64_compat.S | 1 +
arch/x86/include/asm/asm.h | 5 +
arch/x86/include/asm/cpufeatures.h | 3 +-
arch/x86/include/asm/entry-common.h | 1 -
arch/x86/include/asm/msr-index.h | 8 ++
arch/x86/include/asm/nospec-branch.h | 27 +++---
arch/x86/kernel/cpu/bugs.c | 107 ++++++++++++++++++---
arch/x86/kernel/cpu/common.c | 38 +++++++-
arch/x86/kernel/nmi.c | 3 -
arch/x86/kvm/vmx/run_flags.h | 7 +-
arch/x86/kvm/vmx/vmenter.S | 9 +-
arch/x86/kvm/vmx/vmx.c | 12 ++-
arch/x86/kvm/x86.c | 5 +-
drivers/base/cpu.c | 8 ++
include/linux/cpu.h | 2 +
24 files changed, 394 insertions(+), 55 deletions(-)
---
base-commit: 57436264850706f50887bbb2148ee2cc797c9485
change-id: 20240304-delay-verw-backport-5-15-y-e16f07fbb71e
Best regards,
--
Thanks,
Pawan
From: Josh Poimboeuf <jpoimboe(a)kernel.org>
[ Upstream commit b388e57d4628eb22782bdad4cd5b83ca87a1b7c9 ]
For CONFIG_RETHUNK kernels, objtool annotates all the function return
sites so they can be patched during boot. By design, after
apply_returns() is called, all tail-calls to the compiler-generated
default return thunk (__x86_return_thunk) should be patched out and
replaced with whatever's needed for any mitigations (or lack thereof).
The commit
4461438a8405 ("x86/retpoline: Ensure default return thunk isn't used at runtime")
adds a runtime check and a WARN_ONCE() if the default return thunk ever
gets executed after alternatives have been applied. This warning is
a sanity check to make sure objtool and apply_returns() are doing their
job.
As Nathan reported, that check found something:
Unpatched return thunk in use. This should not happen!
WARNING: CPU: 0 PID: 1 at arch/x86/kernel/cpu/bugs.c:2856 __warn_thunk+0x27/0x40
RIP: 0010:__warn_thunk+0x27/0x40
Call Trace:
<TASK>
? show_regs
? __warn
? __warn_thunk
? report_bug
? console_unlock
? handle_bug
? exc_invalid_op
? asm_exc_invalid_op
? ia32_binfmt_init
? __warn_thunk
warn_thunk_thunk
do_one_initcall
kernel_init_freeable
? __pfx_kernel_init
kernel_init
ret_from_fork
? __pfx_kernel_init
ret_from_fork_asm
</TASK>
Boris debugged to find that the unpatched return site was in
init_vdso_image_64(), and its translation unit wasn't being analyzed by
objtool, so it never got annotated. So it got ignored by
apply_returns().
This is only a minor issue, as this function is only called during boot.
Still, objtool needs full visibility to the kernel. Fix it by enabling
objtool on vdso-image-{32,64}.o.
Note this problem can only be seen with !CONFIG_X86_KERNEL_IBT, as that
requires objtool to run individually on all translation units rather on
vmlinux.o.
[ bp: Massage commit message. ]
Reported-by: Nathan Chancellor <nathan(a)kernel.org>
Signed-off-by: Josh Poimboeuf <jpoimboe(a)kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Link: https://lore.kernel.org/r/20240215032049.GA3944823@dev-arch.thelio-3990X
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
arch/x86/entry/vdso/Makefile | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index b1b8dd1608f7e..4ee59121b9053 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -34,8 +34,12 @@ obj-y += vma.o extable.o
KASAN_SANITIZE_vma.o := y
UBSAN_SANITIZE_vma.o := y
KCSAN_SANITIZE_vma.o := y
-OBJECT_FILES_NON_STANDARD_vma.o := n
-OBJECT_FILES_NON_STANDARD_extable.o := n
+
+OBJECT_FILES_NON_STANDARD_extable.o := n
+OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n
+OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n
+OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n
+OBJECT_FILES_NON_STANDARD_vma.o := n
# vDSO images to build
vdso_img-$(VDSO64-y) += 64
@@ -43,7 +47,6 @@ vdso_img-$(VDSOX32-y) += x32
vdso_img-$(VDSO32-y) += 32
obj-$(VDSO32-y) += vdso32-setup.o
-OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F)
--
2.43.0
Hi Stable Team,
In 5.15, unmapping large kvm vms on arm64 can generate softlockups. My team has
been hitting this when tearing down VMs > 100Gb in size.
Oliver fixed this with the attached patches. They've been in mainline since
6.1.
I tested on 5.15.150 with these patches applied. When they're present,
both the dirty_log_perf_test detailed in the second patch, and
kvm_page_table_test no longer generate softlockups when unmapping VMs
with large memory configurations.
Would you please consider these patches for inclusion in an upcoming 5.15
release?
Thanks,
-K
Oliver Upton (2):
KVM: arm64: Work out supported block level at compile time
KVM: arm64: Limit stage2_apply_range() batch size to largest block
arch/arm64/include/asm/kvm_pgtable.h | 18 +++++++++++++-----
arch/arm64/include/asm/stage2_pgtable.h | 20 --------------------
arch/arm64/kvm/mmu.c | 9 ++++++++-
3 files changed, 21 insertions(+), 26 deletions(-)
--
2.25.1
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 672448ccf9b6a676f96f9352cbf91f4d35f4084a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032746-stilt-vaporizer-fb22@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
672448ccf9b6 ("tty: serial: imx: Fix broken RS485")
ca530cfa968c ("serial: imx: Add support for RS485 RX_DURING_TX output GPIO")
79d0224f6bf2 ("tty: serial: imx: Handle RS485 DE signal active high")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 672448ccf9b6a676f96f9352cbf91f4d35f4084a Mon Sep 17 00:00:00 2001
From: Rickard x Andersson <rickaran(a)axis.com>
Date: Wed, 21 Feb 2024 12:53:04 +0100
Subject: [PATCH] tty: serial: imx: Fix broken RS485
When about to transmit the function imx_uart_start_tx is called and in
some RS485 configurations this function will call imx_uart_stop_rx. The
problem is that imx_uart_stop_rx will enable loopback in order to
release the RS485 bus, but when loopback is enabled transmitted data
will just be looped to RX.
This patch fixes the above problem by not enabling loopback when about
to transmit.
This driver now works well when used for RS485 half duplex master
configurations.
Fixes: 79d0224f6bf2 ("tty: serial: imx: Handle RS485 DE signal active high")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Rickard x Andersson <rickaran(a)axis.com>
Tested-by: Christoph Niedermaier <cniedermaier(a)dh-electronics.com>
Link: https://lore.kernel.org/r/20240221115304.509811-1-rickaran@axis.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 4aa72d5aeafb..e14813250616 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -462,8 +462,7 @@ static void imx_uart_stop_tx(struct uart_port *port)
}
}
-/* called with port.lock taken and irqs off */
-static void imx_uart_stop_rx(struct uart_port *port)
+static void imx_uart_stop_rx_with_loopback_ctrl(struct uart_port *port, bool loopback)
{
struct imx_port *sport = (struct imx_port *)port;
u32 ucr1, ucr2, ucr4, uts;
@@ -485,7 +484,7 @@ static void imx_uart_stop_rx(struct uart_port *port)
/* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */
if (port->rs485.flags & SER_RS485_ENABLED &&
port->rs485.flags & SER_RS485_RTS_ON_SEND &&
- sport->have_rtscts && !sport->have_rtsgpio) {
+ sport->have_rtscts && !sport->have_rtsgpio && loopback) {
uts = imx_uart_readl(sport, imx_uart_uts_reg(sport));
uts |= UTS_LOOP;
imx_uart_writel(sport, uts, imx_uart_uts_reg(sport));
@@ -497,6 +496,16 @@ static void imx_uart_stop_rx(struct uart_port *port)
imx_uart_writel(sport, ucr2, UCR2);
}
+/* called with port.lock taken and irqs off */
+static void imx_uart_stop_rx(struct uart_port *port)
+{
+ /*
+ * Stop RX and enable loopback in order to make sure RS485 bus
+ * is not blocked. Se comment in imx_uart_probe().
+ */
+ imx_uart_stop_rx_with_loopback_ctrl(port, true);
+}
+
/* called with port.lock taken and irqs off */
static void imx_uart_enable_ms(struct uart_port *port)
{
@@ -682,9 +691,14 @@ static void imx_uart_start_tx(struct uart_port *port)
imx_uart_rts_inactive(sport, &ucr2);
imx_uart_writel(sport, ucr2, UCR2);
+ /*
+ * Since we are about to transmit we can not stop RX
+ * with loopback enabled because that will make our
+ * transmitted data being just looped to RX.
+ */
if (!(port->rs485.flags & SER_RS485_RX_DURING_TX) &&
!port->rs485_rx_during_tx_gpio)
- imx_uart_stop_rx(port);
+ imx_uart_stop_rx_with_loopback_ctrl(port, false);
sport->tx_state = WAIT_AFTER_RTS;
From: Goldwyn Rodrigues <rgoldwyn(a)suse.com>
commit c853a5783ebe123847886d432354931874367292 upstream.
Instead of using kmalloc() to allocate btrfs_ioctl_defrag_range_args,
allocate btrfs_ioctl_defrag_range_args on stack, the size is reasonably
small and ioctls are called in process context.
sizeof(btrfs_ioctl_defrag_range_args) = 48
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
CC: stable(a)vger.kernel.org # 4.14+
[ This patch is needed to fix a memory leak of "range" that was
introduced when commit 173431b274a9 ("btrfs: defrag: reject unknown
flags of btrfs_ioctl_defrag_range_args") was backported to kernels
lacking this patch. Now with these two patches applied in reverse order,
range->flags needed to change back to range.flags.
This bug was discovered and resolved using Coverity Static Analysis
Security Testing (SAST) by Synopsys, Inc.]
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
---
fs/btrfs/ioctl.c | 25 ++++++++-----------------
1 file changed, 8 insertions(+), 17 deletions(-)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 049b837934e5..ab8ed187746e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3148,7 +3148,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
{
struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_ioctl_defrag_range_args *range;
+ struct btrfs_ioctl_defrag_range_args range = {0};
int ret;
ret = mnt_want_write_file(file);
@@ -3180,37 +3180,28 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
goto out;
}
- range = kzalloc(sizeof(*range), GFP_KERNEL);
- if (!range) {
- ret = -ENOMEM;
- goto out;
- }
-
if (argp) {
- if (copy_from_user(range, argp,
- sizeof(*range))) {
+ if (copy_from_user(&range, argp, sizeof(range))) {
ret = -EFAULT;
- kfree(range);
goto out;
}
- if (range->flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) {
+ if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) {
ret = -EOPNOTSUPP;
goto out;
}
/* compression requires us to start the IO */
- if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
- range->flags |= BTRFS_DEFRAG_RANGE_START_IO;
- range->extent_thresh = (u32)-1;
+ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
+ range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
+ range.extent_thresh = (u32)-1;
}
} else {
/* the rest are all set to zero by kzalloc */
- range->len = (u64)-1;
+ range.len = (u64)-1;
}
ret = btrfs_defrag_file(file_inode(file), file,
- range, BTRFS_OLDEST_GENERATION, 0);
+ &range, BTRFS_OLDEST_GENERATION, 0);
if (ret > 0)
ret = 0;
- kfree(range);
break;
default:
ret = -EINVAL;
--
2.40.1
Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
The patch below does not apply to the 6.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From d565fffa68560ac540bf3d62cc79719da50d5e7a Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain(a)oracle.com>
Date: Tue, 13 Feb 2024 09:13:56 +0800
Subject: [PATCH] btrfs: do not skip re-registration for the mounted device
There are reports that since version 6.7 update-grub fails to find the
device of the root on systems without initrd and on a single device.
This looks like the device name changed in the output of
/proc/self/mountinfo:
6.5-rc5 working
18 1 0:16 / / rw,noatime - btrfs /dev/sda8 ...
6.7 not working:
17 1 0:15 / / rw,noatime - btrfs /dev/root ...
and "update-grub" shows this error:
/usr/sbin/grub-probe: error: cannot find a device for / (is /dev mounted?)
This looks like it's related to the device name, but grub-probe
recognizes the "/dev/root" path and tries to find the underlying device.
However there's a special case for some filesystems, for btrfs in
particular.
The generic root device detection heuristic is not done and it all
relies on reading the device infos by a btrfs specific ioctl. This ioctl
returns the device name as it was saved at the time of device scan (in
this case it's /dev/root).
The change in 6.7 for temp_fsid to allow several single device
filesystem to exist with the same fsid (and transparently generate a new
UUID at mount time) was to skip caching/registering such devices.
This also skipped mounted device. One step of scanning is to check if
the device name hasn't changed, and if yes then update the cached value.
This broke the grub-probe as it always read the device /dev/root and
couldn't find it in the system. A temporary workaround is to create a
symlink but this does not survive reboot.
The right fix is to allow updating the device path of a mounted
filesystem even if this is a single device one.
In the fix, check if the device's major:minor number matches with the
cached device. If they do, then we can allow the scan to happen so that
device_list_add() can take care of updating the device path. The file
descriptor remains unchanged.
This does not affect the temp_fsid feature, the UUID of the mounted
filesystem remains the same and the matching is based on device major:minor
which is unique per mounted filesystem.
This covers the path when the device (that exists for all mounted
devices) name changes, updating /dev/root to /dev/sdx. Any other single
device with filesystem and is not mounted is still skipped.
Note that if a system is booted and initial mount is done on the
/dev/root device, this will be the cached name of the device. Only after
the command "btrfs device scan" it will change as it triggers the
rename.
The fix was verified by users whose systems were affected.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=218353
Link: https://lore.kernel.org/lkml/CAKLYgeJ1tUuqLcsquwuFqjDXPSJpEiokrWK2gisPKDZLs…
Fixes: bc27d6f0aa0e ("btrfs: scan but don't register device on single device filesystem")
CC: stable(a)vger.kernel.org # 6.7+
Tested-by: Alex Romosan <aromosan(a)gmail.com>
Tested-by: CHECK_1234543212345(a)protonmail.com
Signed-off-by: Anand Jain <anand.jain(a)oracle.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/volumes.c | 58 +++++++++++++++++++++++++++++++++++++---------
1 file changed, 47 insertions(+), 11 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a2d07fa3cfdff..1dc1f1946ae0e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1303,6 +1303,47 @@ int btrfs_forget_devices(dev_t devt)
return ret;
}
+static bool btrfs_skip_registration(struct btrfs_super_block *disk_super,
+ const char *path, dev_t devt,
+ bool mount_arg_dev)
+{
+ struct btrfs_fs_devices *fs_devices;
+
+ /*
+ * Do not skip device registration for mounted devices with matching
+ * maj:min but different paths. Booting without initrd relies on
+ * /dev/root initially, later replaced with the actual root device.
+ * A successful scan ensures grub2-probe selects the correct device.
+ */
+ list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
+ struct btrfs_device *device;
+
+ mutex_lock(&fs_devices->device_list_mutex);
+
+ if (!fs_devices->opened) {
+ mutex_unlock(&fs_devices->device_list_mutex);
+ continue;
+ }
+
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ if (device->bdev && (device->bdev->bd_dev == devt) &&
+ strcmp(device->name->str, path) != 0) {
+ mutex_unlock(&fs_devices->device_list_mutex);
+
+ /* Do not skip registration. */
+ return false;
+ }
+ }
+ mutex_unlock(&fs_devices->device_list_mutex);
+ }
+
+ if (!mount_arg_dev && btrfs_super_num_devices(disk_super) == 1 &&
+ !(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING))
+ return true;
+
+ return false;
+}
+
/*
* Look for a btrfs signature on a device. This may be called out of the mount path
* and we are not allowed to call set_blocksize during the scan. The superblock
@@ -1320,6 +1361,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
struct btrfs_device *device = NULL;
struct file *bdev_file;
u64 bytenr, bytenr_orig;
+ dev_t devt;
int ret;
lockdep_assert_held(&uuid_mutex);
@@ -1359,19 +1401,13 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
goto error_bdev_put;
}
- if (!mount_arg_dev && btrfs_super_num_devices(disk_super) == 1 &&
- !(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING)) {
- dev_t devt;
+ devt = file_bdev(bdev_file)->bd_dev;
+ if (btrfs_skip_registration(disk_super, path, devt, mount_arg_dev)) {
+ pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n",
+ path, MAJOR(devt), MINOR(devt));
- ret = lookup_bdev(path, &devt);
- if (ret)
- btrfs_warn(NULL, "lookup bdev failed for path %s: %d",
- path, ret);
- else
- btrfs_free_stale_devices(devt, NULL);
+ btrfs_free_stale_devices(devt, NULL);
- pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n",
- path, MAJOR(devt), MINOR(devt));
device = NULL;
goto free_disk_super;
}
--
2.43.0
With the addition of new MAC blocks like CN10K RPM and CN10KB
RPM_USX, LMACs are noncontiguous. Though in most of the functions,
lmac validation checks exist but in few functions they are missing.
The problem has been fixed by the following patch which can be
cleanly applied to the 6.1.y branch.
From: Fabio Estevam <festevam(a)denx.de>
Since commit 63b0cd30b78e ("media: ov2680: Add bus-cfg / endpoint
property verification") even when the correct 'link-frequencies'
property is passed in the devicetree, the driver fails to probe:
ov2680 1-0036: probe with driver ov2680 failed with error -22
The reason is that the variable 'ret' may contain the -EINVAL value
from a previous assignment:
ret = fwnode_property_read_u32(dev_fwnode(dev), "clock-frequency",
&rate);
Fix the problem by clearing 'ret' on the successful path.
Tested on imx7s-warp board with the following devicetree:
port {
ov2680_to_mipi: endpoint {
remote-endpoint = <&mipi_from_sensor>;
clock-lanes = <0>;
data-lanes = <1>;
link-frequencies = /bits/ 64 <330000000>;
};
};
Cc: stable(a)vger.kernel.org
Fixes: 63b0cd30b78e ("media: ov2680: Add bus-cfg / endpoint property verification")
Suggested-by: Hans de Goede <hdegoede(a)redhat.com>
Signed-off-by: Fabio Estevam <festevam(a)denx.de>
Reviewed-by: Hans de Goede <hdegoede(a)redhat.com>
---
Changes since v2:
- Collected Hans' Reviewed-by tag.
drivers/media/i2c/ov2680.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/media/i2c/ov2680.c b/drivers/media/i2c/ov2680.c
index 39d321e2b7f9..3e3b7c2b492c 100644
--- a/drivers/media/i2c/ov2680.c
+++ b/drivers/media/i2c/ov2680.c
@@ -1135,6 +1135,7 @@ static int ov2680_parse_dt(struct ov2680_dev *sensor)
goto out_free_bus_cfg;
}
+ ret = 0;
out_free_bus_cfg:
v4l2_fwnode_endpoint_free(&bus_cfg);
return ret;
--
2.34.1
From: Charan Teja Kalla <quic_charante(a)quicinc.com>
This fix is applicable for LTS kernel, 6.1.y. In latest kernels, this race
issue is fixed by the patch series [1] and [2]. The right thing to do here
would have been propagating these changes from latest kernel to the stable
branch, 6.1.y. However, these changes seems too intrusive to be picked for
stable branches. Hence, the fix proposed can be taken as an alternative
instead of backporting the patch series.
[1] https://lore.kernel.org/all/0-v8-81230027b2fa+9d-iommu_all_defdom_jgg@nvidi…
[2] https://lore.kernel.org/all/0-v5-1b99ae392328+44574-iommu_err_unwind_jgg@nv…
Issue:
A race condition is observed when arm_smmu_device_probe and
modprobe of client devices happens in parallel. This results
in the allocation of a new default domain for the iommu group
even though it was previously allocated and the respective iova
domain(iovad) was initialized. However, for this newly allocated
default domain, iovad will not be initialized. As a result, for
devices requesting dma allocations, this uninitialized iovad will
be used, thereby causing NULL pointer dereference issue.
Flow:
- During arm_smmu_device_probe, bus_iommu_probe() will be called
as part of iommu_device_register(). This results in the device probe,
__iommu_probe_device().
- When the modprobe of the client device happens in parallel, it
sets up the DMA configuration for the device using of_dma_configure_id(),
which inturn calls iommu_probe_device(). Later, default domain is
allocated and attached using iommu_alloc_default_domain() and
__iommu_attach_device() respectively. It then ends up initializing a
mapping domain(IOVA domain) and rcaches for the device via
arch_setup_dma_ops()->iommu_setup_dma_ops().
- Now, in the bus_iommu_probe() path, it again tries to allocate
a default domain via probe_alloc_default_domain(). This results in
allocating a new default domain(along with IOVA domain) via
__iommu_domain_alloc(). However, this newly allocated IOVA domain
will not be initialized.
- Now, when the same client device tries dma allocations via
iommu_dma_alloc(), it ends up accessing the rcaches of the newly
allocated IOVA domain, which is not initialized. This results
into NULL pointer dereferencing.
Fix this issue by adding a check in probe_alloc_default_domain()
to see if the iommu_group already has a default domain allocated
and initialized.
Signed-off-by: Charan Teja Kalla <quic_charante(a)quicinc.com>
Co-developed-by: Nikhil V <quic_nprakash(a)quicinc.com>
Signed-off-by: Nikhil V <quic_nprakash(a)quicinc.com>
---
drivers/iommu/iommu.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8b3897239477..83736824f17d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1741,6 +1741,9 @@ static void probe_alloc_default_domain(struct bus_type *bus,
{
struct __group_domain_type gtype;
+ if (group->default_domain)
+ return;
+
memset(>ype, 0, sizeof(gtype));
/* Ask for default domain requirements of all devices in the group */
--
2.17.1
From: Andrew Panyakin <apanyaki(a)amazon.com>
From: Maximilian Heyne <mheyne(a)amazon.de>
Commit fa765c4b4aed2d64266b694520ecb025c862c5a9 upstream
shutdown_pirq and startup_pirq are not taking the
irq_mapping_update_lock because they can't due to lock inversion. Both
are called with the irq_desc->lock being taking. The lock order,
however, is first irq_mapping_update_lock and then irq_desc->lock.
This opens multiple races:
- shutdown_pirq can be interrupted by a function that allocates an event
channel:
CPU0 CPU1
shutdown_pirq {
xen_evtchn_close(e)
__startup_pirq {
EVTCHNOP_bind_pirq
-> returns just freed evtchn e
set_evtchn_to_irq(e, irq)
}
xen_irq_info_cleanup() {
set_evtchn_to_irq(e, -1)
}
}
Assume here event channel e refers here to the same event channel
number.
After this race the evtchn_to_irq mapping for e is invalid (-1).
- __startup_pirq races with __unbind_from_irq in a similar way. Because
__startup_pirq doesn't take irq_mapping_update_lock it can grab the
evtchn that __unbind_from_irq is currently freeing and cleaning up. In
this case even though the event channel is allocated, its mapping can
be unset in evtchn_to_irq.
The fix is to first cleanup the mappings and then close the event
channel. In this way, when an event channel gets allocated it's
potential previous evtchn_to_irq mappings are guaranteed to be unset already.
This is also the reverse order of the allocation where first the event
channel is allocated and then the mappings are setup.
On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal
[un]bind interfaces"), we hit a BUG like the following during probing of NVMe
devices. The issue is that during nvme_setup_io_queues, pci_free_irq
is called for every device which results in a call to shutdown_pirq.
With many nvme devices it's therefore likely to hit this race during
boot because there will be multiple calls to shutdown_pirq and
startup_pirq are running potentially in parallel.
------------[ cut here ]------------
blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
kernel BUG at drivers/xen/events/events_base.c:499!
invalid opcode: 0000 [#1] SMP PTI
CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
Workqueue: nvme-reset-wq nvme_reset_work
RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
? show_trace_log_lvl+0x1c1/0x2d9
? show_trace_log_lvl+0x1c1/0x2d9
? set_affinity_irq+0xdc/0x1c0
? __die_body.cold+0x8/0xd
? die+0x2b/0x50
? do_trap+0x90/0x110
? bind_evtchn_to_cpu+0xdf/0xf0
? do_error_trap+0x65/0x80
? bind_evtchn_to_cpu+0xdf/0xf0
? exc_invalid_op+0x4e/0x70
? bind_evtchn_to_cpu+0xdf/0xf0
? asm_exc_invalid_op+0x12/0x20
? bind_evtchn_to_cpu+0xdf/0xf0
? bind_evtchn_to_cpu+0xc5/0xf0
set_affinity_irq+0xdc/0x1c0
irq_do_set_affinity+0x1d7/0x1f0
irq_setup_affinity+0xd6/0x1a0
irq_startup+0x8a/0xf0
__setup_irq+0x639/0x6d0
? nvme_suspend+0x150/0x150
request_threaded_irq+0x10c/0x180
? nvme_suspend+0x150/0x150
pci_request_irq+0xa8/0xf0
? __blk_mq_free_request+0x74/0xa0
queue_request_irq+0x6f/0x80
nvme_create_queue+0x1af/0x200
nvme_create_io_queues+0xbd/0xf0
nvme_setup_io_queues+0x246/0x320
? nvme_irq_check+0x30/0x30
nvme_reset_work+0x1c8/0x400
process_one_work+0x1b0/0x350
worker_thread+0x49/0x310
? process_one_work+0x350/0x350
kthread+0x11b/0x140
? __kthread_bind_mask+0x60/0x60
ret_from_fork+0x22/0x30
Modules linked in:
---[ end trace a11715de1eee1873 ]---
Fixes: d46a78b05c0e ("xen: implement pirq type event channels")
Co-debugged-by: Andrew Panyakin <apanyaki(a)amazon.com>
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
[apanyaki: backport to v5.4-stable]
Signed-off-by: Andrew Paniakin <apanyaki(a)amazon.com>
---
Compare to upstream patch this one does not have close_evtchn flag
because there is no need to handle static event channels.
This feature was added only in 58f6259b7a08f ("xen/evtchn: Introduce new
IOCTL to bind static evtchn")
drivers/xen/events/events_base.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 91806dc1236d..f8554d9a9f28 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -825,8 +825,8 @@ static void shutdown_pirq(struct irq_data *data)
return;
do_mask(info, EVT_MASK_REASON_EXPLICIT);
- xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
static void enable_pirq(struct irq_data *data)
@@ -869,8 +869,6 @@ static void __unbind_from_irq(unsigned int irq)
if (VALID_EVTCHN(evtchn)) {
unsigned int cpu = cpu_from_irq(irq);
- xen_evtchn_close(evtchn);
-
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
@@ -883,6 +881,7 @@ static void __unbind_from_irq(unsigned int irq)
}
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
xen_free_irq(irq);
--
2.40.1
Since kernel version 5.4.217 LTS, there has been an issue with the kernel live patching feature becoming unavailable.
When compiling the sample code for kernel live patching, the following message is displayed when enabled:
livepatch: klp_check_stack: kworker/u256:6:23490 has an unreliable stack
Reproduction steps:
1.git checkout v5.4.269 -b v5.4.269
2.make defconfig
3. Set CONFIG_LIVEPATCH=y、CONFIG_SAMPLE_LIVEPATCH=m
4. make -j bzImage
5. make samples/livepatch/livepatch-sample.ko
6. qemu-system-x86_64 -kernel arch/x86_64/boot/bzImage -nographic -append "console=ttyS0" -initrd initrd.img -m 1024M
7. insmod livepatch-sample.ko
Kernel live patch cannot complete successfully.
After some debugging, the immediate cause of the patch failure is an error in stack checking. The logs are as follows:
[ 340.974853] livepatch: klp_check_stack: kworker/u256:0:23486 has an unreliable stack
[ 340.974858] livepatch: klp_check_stack: kworker/u256:1:23487 has an unreliable stack
[ 340.974863] livepatch: klp_check_stack: kworker/u256:2:23488 has an unreliable stack
[ 340.974868] livepatch: klp_check_stack: kworker/u256:5:23489 has an unreliable stack
[ 340.974872] livepatch: klp_check_stack: kworker/u256:6:23490 has an unreliable stack
......
BTW,if you use the v5.4.217 tag for testing, make sure to set CONFIG_RETPOLINE = y and CONFIG_LIVEPATCH = y, and other steps are consistent with v5.4.269
After investigation, The problem is strongly related to the commit 8afd1c7da2b0 ("x86/speculation: Change FILL_RETURN_BUFFER to work with objtool"),
which would cause incorrect ORC entries to be generated, and the v5.4.217 version can undo this commit to make kernel livepatch work normally.
It is a back-ported upstream patch with some code adjustments,from the git log, the author also mentioned no intra-function call validation support.
Based on commit 24489321d0cd5339f9c2da01eb8bf2bccbac7956 (Linux 5.4.273), This patchset adds stack validation support for intra-function calls,
allowing the kernel live patching feature to work correctly.
v3 - v2
- fix the compile error in arch/x86/kvm/svm.c, the error message is../arch/x86/include/asm/nospec-branch.h: 313: Error: no such instruction: 'unwind_hint_empty'
v2 - v1
- add the tag "Cc: stable(a)vger.kernel.org" in the sign-off area for patch x86/speculation: Support intra-function call
- add my own Signed-off to all patches
s
Alexandre Chartre (2):
objtool: is_fentry_call() crashes if call has no destination
objtool: Add support for intra-function calls
Rui Qi (1):
x86/speculation: Support intra-function call validation
arch/x86/include/asm/nospec-branch.h | 7 ++
arch/x86/include/asm/unwind_hints.h | 2 +-
include/linux/frame.h | 11 ++++
.../Documentation/stack-validation.txt | 8 +++
tools/objtool/arch/x86/decode.c | 6 ++
tools/objtool/check.c | 64 +++++++++++++++++--
6 files changed, 92 insertions(+), 6 deletions(-)
--
2.20.1
Oops. + Cc stable(a)vger.kernel.org
On 2024/03/18 10:52, GONG, Ruiqi wrote:
> From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
>
> commit dbf4ab821804df071c8b566d9813083125e6d97b upstream.
>
> The SC16IS7XX IC supports a burst mode to access the FIFOs where the
> initial register address is sent ($00), followed by all the FIFO data
> without having to resend the register address each time. In this mode, the
> IC doesn't increment the register address for each R/W byte.
>
> The regmap_raw_read() and regmap_raw_write() are functions which can
> perform IO over multiple registers. They are currently used to read/write
> from/to the FIFO, and although they operate correctly in this burst mode on
> the SPI bus, they would corrupt the regmap cache if it was not disabled
> manually. The reason is that when the R/W size is more than 1 byte, these
> functions assume that the register address is incremented and handle the
> cache accordingly.
>
> Convert FIFO R/W functions to use the regmap _noinc_ versions in order to
> remove the manual cache control which was a workaround when using the
> _raw_ versions. FIFO registers are properly declared as volatile so
> cache will not be used/updated for FIFO accesses.
>
> Fixes: dfeae619d781 ("serial: sc16is7xx")
> Cc: <stable(a)vger.kernel.org>
> Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
> Link: https://lore.kernel.org/r/20231211171353.2901416-6-hugo@hugovil.com
> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
> Cc: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
> Signed-off-by: GONG, Ruiqi <gongruiqi1(a)huawei.com>
> ---
>
> The mainline commit dbf4ab821804 ("serial: sc16is7xx: convert from _raw_
> to _noinc_ regmap functions for FIFO") by Hugo has been assigned to be
> CVE-2023-52488, but for stable branches lower than 6.1 there's no
> official backport.
>
> I made up this backport patch for 5.10, and its correctness has been
> confirmed in previous communication with Hugo. Let's publicize it and
> merge it into upstream.
>
> drivers/tty/serial/sc16is7xx.c | 15 +++++++++------
> 1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
> index 31e0c5c3ddea..29f05db0d49b 100644
> --- a/drivers/tty/serial/sc16is7xx.c
> +++ b/drivers/tty/serial/sc16is7xx.c
> @@ -376,9 +376,7 @@ static void sc16is7xx_fifo_read(struct uart_port *port, unsigned int rxlen)
> const u8 line = sc16is7xx_line(port);
> u8 addr = (SC16IS7XX_RHR_REG << SC16IS7XX_REG_SHIFT) | line;
>
> - regcache_cache_bypass(s->regmap, true);
> - regmap_raw_read(s->regmap, addr, s->buf, rxlen);
> - regcache_cache_bypass(s->regmap, false);
> + regmap_noinc_read(s->regmap, addr, s->buf, rxlen);
> }
>
> static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send)
> @@ -394,9 +392,7 @@ static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send)
> if (unlikely(!to_send))
> return;
>
> - regcache_cache_bypass(s->regmap, true);
> - regmap_raw_write(s->regmap, addr, s->buf, to_send);
> - regcache_cache_bypass(s->regmap, false);
> + regmap_noinc_write(s->regmap, addr, s->buf, to_send);
> }
>
> static void sc16is7xx_port_update(struct uart_port *port, u8 reg,
> @@ -489,6 +485,11 @@ static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg)
> return false;
> }
>
> +static bool sc16is7xx_regmap_noinc(struct device *dev, unsigned int reg)
> +{
> + return reg == SC16IS7XX_RHR_REG;
> +}
> +
> static int sc16is7xx_set_baud(struct uart_port *port, int baud)
> {
> struct sc16is7xx_port *s = dev_get_drvdata(port->dev);
> @@ -1439,6 +1440,8 @@ static struct regmap_config regcfg = {
> .cache_type = REGCACHE_RBTREE,
> .volatile_reg = sc16is7xx_regmap_volatile,
> .precious_reg = sc16is7xx_regmap_precious,
> + .writeable_noinc_reg = sc16is7xx_regmap_noinc,
> + .readable_noinc_reg = sc16is7xx_regmap_noinc,
> };
>
> #ifdef CONFIG_SERIAL_SC16IS7XX_SPI
From: Bitterblue Smith <rtl8821cerfe2(a)gmail.com>
[ Upstream commit 605d7c0b05eecb985273b1647070497142c470d3 ]
Clear bit 8 of REG_SYS_STATUS1 after MAC power on.
Without this, some RTL8821CU and RTL8811CU cannot connect to any
network:
Feb 19 13:33:11 ideapad2 kernel: wlp3s0f3u2: send auth to
90:55:de:__:__:__ (try 1/3)
Feb 19 13:33:13 ideapad2 kernel: wlp3s0f3u2: send auth to
90:55:de:__:__:__ (try 2/3)
Feb 19 13:33:14 ideapad2 kernel: wlp3s0f3u2: send auth to
90:55:de:__:__:__ (try 3/3)
Feb 19 13:33:15 ideapad2 kernel: wlp3s0f3u2: authentication with
90:55:de:__:__:__ timed out
The RTL8822CU and RTL8822BU out-of-tree drivers do this as well, so do
it for all three types of chips.
Tested with RTL8811CU (Tenda U9 V2.0).
Signed-off-by: Bitterblue Smith <rtl8821cerfe2(a)gmail.com>
Acked-by: Ping-Ke Shih <pkshih(a)realtek.com>
Signed-off-by: Kalle Valo <kvalo(a)kernel.org>
Link: https://msgid.link/aeeefad9-27c8-4506-a510-ef9a9a8731a4@gmail.com
---
drivers/net/wireless/realtek/rtw88/mac.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index 298663b03580..0c1c1ff31085 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -309,6 +309,13 @@ static int rtw_mac_power_switch(struct rtw_dev *rtwdev, bool pwr_on)
pwr_seq = pwr_on ? chip->pwr_on_seq : chip->pwr_off_seq;
ret = rtw_pwr_seq_parser(rtwdev, pwr_seq);
+ if (pwr_on && rtw_hci_type(rtwdev) == RTW_HCI_TYPE_USB) {
+ if (chip->id == RTW_CHIP_TYPE_8822C ||
+ chip->id == RTW_CHIP_TYPE_8822B ||
+ chip->id == RTW_CHIP_TYPE_8821C)
+ rtw_write8_clr(rtwdev, REG_SYS_STATUS1 + 1, BIT(0));
+ }
+
if (rtw_hci_type(rtwdev) == RTW_HCI_TYPE_SDIO)
rtw_write32(rtwdev, REG_SDIO_HIMR, imr);
--
2.43.2
Hey stable folks,
Can the following patches found in mainline
[PATCH] ASoC: amd: yc: Revert "Fix non-functional mic on Lenovo 21J2"
(861b341)
[PATCH] ASoC: amd: yc: Revert "add new YC platform variant (0x63)
support" (37bee18)
be backported to linux-6.8.y?
They're improperly assuming the 0x63 variant is part of the Yellow Carp
family. This causes the microphone input device to not being properly
probed on the device.
Known broken devices: ThinkPad P16s Gen 2 (21K9CTO1WW)
Thanks, Luca.
Hello everyone,
Can those changes be pulled in stable? They're currently breaking mic
input on my 21K9CTO1WW, ThinkPad P16s Gen 2, and probably more devices
in the wild.
Thanks, Luca.
On 12/03/24 03:33, Jiawei Wang wrote:
> Please revert my previous two commits:
>
> ASoC: amd: yc: add new YC platform variant (0x63) support
> [ Upstream commit 316a784839b21b122e1761cdca54677bb19a47fa ]
>
> ASoC: amd: yc: Fix non-functional mic on Lenovo 21J2
> [ Upstream commit ed00a6945dc32462c2d3744a3518d2316da66fcc ]
>
> PCI revision id 0x63 is the Pink Sardine (PS) platform, not Yellow
> Carp (YC). Thanks to Mukunda Vijendar [1] for pointing out that.
>
> The mic on Lenovo 21J2 works after enabling the CONFIG_SND_SOC_AMD_PS
> flag, which I had not enabled when I was writing these patches. 21J2
> does not need to be in this quirk table.
>
> I apologize for the inconvenience caused.
>
> Link: https://lore.kernel.org/linux-sound/023092e1-689c-4b00-b93f-4092c3724fb6@am… [1]
>
> Signed-off-by: Jiawei Wang <me(a)jwang.link>
>
> Jiawei Wang (2):
> Revert "ASoC: amd: yc: Fix non-functional mic on Lenovo 21J2"
> Revert "ASoC: amd: yc: add new YC platform variant (0x63) support"
>
> sound/soc/amd/yc/acp6x-mach.c | 7 -------
> sound/soc/amd/yc/pci-acp6x.c | 1 -
> 2 files changed, 8 deletions(-)
>
The patch below does not apply to the 6.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From cefcd4fe2e3aaf792c14c9e56dab89e3d7a65d02 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb(a)kernel.org>
Date: Fri, 22 Mar 2024 17:03:58 +0200
Subject: [PATCH] x86/efistub: Call mixed mode boot services on the firmware's
stack
Normally, the EFI stub calls into the EFI boot services using the stack
that was live when the stub was entered. According to the UEFI spec,
this stack needs to be at least 128k in size - this might seem large but
all asynchronous processing and event handling in EFI runs from the same
stack and so quite a lot of space may be used in practice.
In mixed mode, the situation is a bit different: the bootloader calls
the 32-bit EFI stub entry point, which calls the decompressor's 32-bit
entry point, where the boot stack is set up, using a fixed allocation
of 16k. This stack is still in use when the EFI stub is started in
64-bit mode, and so all calls back into the EFI firmware will be using
the decompressor's limited boot stack.
Due to the placement of the boot stack right after the boot heap, any
stack overruns have gone unnoticed. However, commit
5c4feadb0011983b ("x86/decompressor: Move global symbol references to C code")
moved the definition of the boot heap into C code, and now the boot
stack is placed right at the base of BSS, where any overruns will
corrupt the end of the .data section.
While it would be possible to work around this by increasing the size of
the boot stack, doing so would affect all x86 systems, and mixed mode
systems are a tiny (and shrinking) fraction of the x86 installed base.
So instead, record the firmware stack pointer value when entering from
the 32-bit firmware, and switch to this stack every time a EFI boot
service call is made.
Cc: <stable(a)kernel.org> # v6.1+
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
---
arch/x86/boot/compressed/efi_mixed.S | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S
index f4e22ef774ab6..719e939050cbf 100644
--- a/arch/x86/boot/compressed/efi_mixed.S
+++ b/arch/x86/boot/compressed/efi_mixed.S
@@ -49,6 +49,11 @@ SYM_FUNC_START(startup_64_mixed_mode)
lea efi32_boot_args(%rip), %rdx
mov 0(%rdx), %edi
mov 4(%rdx), %esi
+
+ /* Switch to the firmware's stack */
+ movl efi32_boot_sp(%rip), %esp
+ andl $~7, %esp
+
#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
mov 8(%rdx), %edx // saved bootparams pointer
test %edx, %edx
@@ -254,6 +259,9 @@ SYM_FUNC_START_LOCAL(efi32_entry)
/* Store firmware IDT descriptor */
sidtl (efi32_boot_idt - 1b)(%ebx)
+ /* Store firmware stack pointer */
+ movl %esp, (efi32_boot_sp - 1b)(%ebx)
+
/* Store boot arguments */
leal (efi32_boot_args - 1b)(%ebx), %ebx
movl %ecx, 0(%ebx)
@@ -318,5 +326,6 @@ SYM_DATA_END(efi32_boot_idt)
SYM_DATA_LOCAL(efi32_boot_cs, .word 0)
SYM_DATA_LOCAL(efi32_boot_ds, .word 0)
+SYM_DATA_LOCAL(efi32_boot_sp, .long 0)
SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
SYM_DATA(efi_is64, .byte 1)
--
2.43.0
Commit e79c6a4fc923 ("net: make net namespace sysctls belong to container's owner")
added default values for i_uid/i_gid.
These however are only used when ctl_table_root->set_ownership is not
implemented.
But the callbacks themselves could fail to compute i_uid/i_gid and they
all need to have the same fallback logic for this case.
This is unnecessary code duplication and prone to errors.
For example net_ctl_set_ownership() missed the fallback.
Instead always initialize i_uid/i_gid inside the sysfs core so
set_ownership() can safely skip setting them.
Fixes: e79c6a4fc923 ("net: make net namespace sysctls belong to container's owner")
Cc: stable(a)vger.kernel.org
Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net>
---
Changes in v2:
- Move the fallback logic to the sysctl core
- Link to v1: https://lore.kernel.org/r/20240315-sysctl-net-ownership-v1-1-2b465555a292@w…
---
fs/proc/proc_sysctl.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 37cde0efee57..9e34ab9c21e4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -479,12 +479,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
make_empty_dir_inode(inode);
}
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
if (root->set_ownership)
root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
- else {
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- }
return inode;
}
---
base-commit: ff9c18e435b042596c9d48badac7488e3fa76a55
change-id: 20240315-sysctl-net-ownership-bc4e17eaeea6
Best regards,
--
Thomas Weißschuh <linux(a)weissschuh.net>
If drm_gem_handle_create() fails in vkms_gem_create(), then the
vkms_gem_object is not freed.
Fix it by adding a call to vkms_gem_free_object().
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 0ea2ea42b31a ("drm/vkms: Hold gem object while still in-use")
Cc: stable(a)vger.kernel.org#v5.10.212
#Co-developed-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Signed-off-by: Salomatkina Elena <elena.salomatkina.cmc(a)gmail.com>
---
drivers/gpu/drm/vkms/vkms_gem.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/vkms/vkms_gem.c b/drivers/gpu/drm/vkms/vkms_gem.c
index a017fc59905e..cc6584767a1b 100644
--- a/drivers/gpu/drm/vkms/vkms_gem.c
+++ b/drivers/gpu/drm/vkms/vkms_gem.c
@@ -113,9 +113,10 @@ static struct drm_gem_object *vkms_gem_create(struct drm_device *dev,
return ERR_CAST(obj);
ret = drm_gem_handle_create(file, &obj->gem, handle);
- if (ret)
+ if (ret) {
+ vkms_gem_free_object(&obj->gem);
return ERR_PTR(ret);
-
+ }
return &obj->gem;
}
--
2.34.1
The ISRs of the tps25750 and tps6598x do not handle generated events
properly under all circumstances.
The tps6598x ISR does not read all bits of the INT_EVENTX registers,
leaving events signaled with bits above 64 unattended. Moreover, these
events are not cleared, leaving the interrupt enabled.
The tps25750 reads all bits of the INT_EVENT1 register, but the event
checking is not right because the same event is checked in two different
regions of the same register by means of an OR operation.
This series aims to fix both issues by reading all bits of the
INT_EVENTX registers, and limiting the event checking to the region
where the supported events are defined (currently they are limited to
the first 64 bits of the registers, as the are defined as BIT_ULL()).
If the need for events above the first 64 bits of the INT_EVENTX
registers arises, a different mechanism might be required. But for the
current needs, all definitions can be left as they are.
Note: resend to add 'stable' mailing list (fixes in the series).
Signed-off-by: Javier Carrasco <javier.carrasco(a)wolfvision.net>
---
Javier Carrasco (2):
usb: typec: tipd: fix event checking for tps25750
usb: typec: tipd: fix event checking for tps6598x
drivers/usb/typec/tipd/core.c | 37 +++++++++++++++++++++----------------
1 file changed, 21 insertions(+), 16 deletions(-)
---
base-commit: 4cece764965020c22cff7665b18a012006359095
change-id: 20240328-tps6598x_fix_event_handling-8fff3a6018d9
Best regards,
--
Javier Carrasco <javier.carrasco(a)wolfvision.net>
From: Fabio Estevam <festevam(a)denx.de>
Since commit 63b0cd30b78e ("media: ov2680: Add bus-cfg / endpoint
property verification") even when the correct 'link-frequencies'
property is passed in the devicetree, the driver fails to probe:
ov2680 1-0036: probe with driver ov2680 failed with error -22
The reason is that the variable 'ret' may contain the -EINVAL value
from a previous assignment:
ret = fwnode_property_read_u32(dev_fwnode(dev), "clock-frequency",
&rate);
Fix the problem by clearing 'ret' on the successful path.
Tested on imx7s-warp board with the following devicetree:
port {
ov2680_to_mipi: endpoint {
remote-endpoint = <&mipi_from_sensor>;
clock-lanes = <0>;
data-lanes = <1>;
link-frequencies = /bits/ 64 <330000000>;
};
};
Cc: stable(a)vger.kernel.org
Fixes: 63b0cd30b78e ("media: ov2680: Add bus-cfg / endpoint property verification")
Suggested-by: Hans de Goede <hdegoede(a)redhat.com>
Signed-off-by: Fabio Estevam <festevam(a)denx.de>
---
Changes since v1:
- Use Hans' suggestion to clear 'ret'.
drivers/media/i2c/ov2680.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/media/i2c/ov2680.c b/drivers/media/i2c/ov2680.c
index 39d321e2b7f9..3e3b7c2b492c 100644
--- a/drivers/media/i2c/ov2680.c
+++ b/drivers/media/i2c/ov2680.c
@@ -1135,6 +1135,7 @@ static int ov2680_parse_dt(struct ov2680_dev *sensor)
goto out_free_bus_cfg;
}
+ ret = 0;
out_free_bus_cfg:
v4l2_fwnode_endpoint_free(&bus_cfg);
return ret;
--
2.34.1
Dear Linux folks,
Please apply commit 13e3a512a290 (i2c: smbus: Support up to 8 SPD
EEPROMs) [1] to the stable series to get rid of a warning and to support
more SPDs. That commit is present since v6.8-rc1.
Kind regards,
Paul
[1]:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
misc_cmd_type in exec_op have multiple problems. With commit a82990c8a409
("mtd: rawnand: qcom: Add read/read_start ops in exec_op path") it was
reworked and generalized but actually broke the handling of the
ERASE_BLOCK command.
Additional logic was added to the erase command cycle without clear
explaination causing the erase command to be broken on testing it on
a ipq806x nandc.
Fix the erase command by reverting the additional logic and only adding
the NAND_DEV0_CFG0 additional call (required for erase command).
Fixes: a82990c8a409 ("mtd: rawnand: qcom: Add read/read_start ops in exec_op path")
Cc: stable(a)vger.kernel.org
Signed-off-by: Christian Marangi <ansuelsmth(a)gmail.com>
---
Changes v2:
- Split this and rework commit description and title
drivers/mtd/nand/raw/qcom_nandc.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index b079605c84d3..19d76e345a49 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2830,9 +2830,8 @@ static int qcom_misc_cmd_type_exec(struct nand_chip *chip, const struct nand_sub
nandc_set_reg(chip, NAND_EXEC_CMD, 1);
write_reg_dma(nandc, NAND_FLASH_CMD, instrs, NAND_BAM_NEXT_SGL);
- (q_op.cmd_reg == OP_BLOCK_ERASE) ? write_reg_dma(nandc, NAND_DEV0_CFG0,
- 2, NAND_BAM_NEXT_SGL) : read_reg_dma(nandc,
- NAND_FLASH_STATUS, 1, NAND_BAM_NEXT_SGL);
+ if (q_op.cmd_reg == OP_BLOCK_ERASE)
+ write_reg_dma(nandc, NAND_DEV0_CFG0, 2, NAND_BAM_NEXT_SGL);
write_reg_dma(nandc, NAND_EXEC_CMD, 1, NAND_BAM_NEXT_SGL);
read_reg_dma(nandc, NAND_FLASH_STATUS, 1, NAND_BAM_NEXT_SGL);
--
2.43.0
Add module alias with the algorithm cra_name similar to what we have for
RSA-related and other algorithms.
The kernel attempts to modprobe asymmetric algorithms using the names
"crypto-$cra_name" and "crypto-$cra_name-all." However, since these
aliases are currently missing, the modules are not loaded. For instance,
when using the `add_key` function, the hash algorithm is typically
loaded automatically, but the asymmetric algorithm is not.
Steps to test:
1. Create certificate
openssl req -x509 -sha256 -newkey ec \
-pkeyopt "ec_paramgen_curve:secp384r1" -keyout key.pem -days 365 \
-subj '/CN=test' -nodes -outform der -out nist-p384.der
2. Optionally, trace module requests with: trace-cmd stream -e module &
3. Trigger add_key call for the cert:
# keyctl padd asymmetric "" @u < nist-p384.der
641069229
# lsmod | head -2
Module Size Used by
ecdsa_generic 16384 0
Fixes: c12d448ba939 ("crypto: ecdsa - Register NIST P384 and extend test suite")
Cc: stable(a)vger.kernel.org
Signed-off-by: Stefan Berger <stefanb(a)linux.ibm.com>
---
crypto/ecdsa.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c
index fbd76498aba8..3f9ec273a121 100644
--- a/crypto/ecdsa.c
+++ b/crypto/ecdsa.c
@@ -373,4 +373,7 @@ module_exit(ecdsa_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Stefan Berger <stefanb(a)linux.ibm.com>");
MODULE_DESCRIPTION("ECDSA generic algorithm");
+MODULE_ALIAS_CRYPTO("ecdsa-nist-p192");
+MODULE_ALIAS_CRYPTO("ecdsa-nist-p256");
+MODULE_ALIAS_CRYPTO("ecdsa-nist-p384");
MODULE_ALIAS_CRYPTO("ecdsa-generic");
--
2.43.0
On reworking and splitting the at803x driver, in splitting function of
at803x PHYs it was added a NULL dereference bug where priv is referenced
before it's actually allocated and then is tried to write to for the
is_1000basex and is_fiber variables in the case of at8031, writing on
the wrong address.
Fix this by correctly setting priv local variable only after
at803x_probe is called and actually allocates priv in the phydev struct.
Reported-by: William Wortel <wwortel(a)dorpstraat.com>
Cc: <stable(a)vger.kernel.org>
Fixes: 25d2ba94005f ("net: phy: at803x: move specific at8031 probe mode check to dedicated probe")
Signed-off-by: Christian Marangi <ansuelsmth(a)gmail.com>
---
drivers/net/phy/qcom/at803x.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c
index 4717c59d51d0..e79657f76bea 100644
--- a/drivers/net/phy/qcom/at803x.c
+++ b/drivers/net/phy/qcom/at803x.c
@@ -797,7 +797,7 @@ static int at8031_parse_dt(struct phy_device *phydev)
static int at8031_probe(struct phy_device *phydev)
{
- struct at803x_priv *priv = phydev->priv;
+ struct at803x_priv *priv;
int mode_cfg;
int ccr;
int ret;
@@ -806,6 +806,8 @@ static int at8031_probe(struct phy_device *phydev)
if (ret)
return ret;
+ priv = phydev->priv;
+
/* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping
* options.
*/
--
2.43.0
Hi,
I think we are at the end of it and hopefully this is the last
version. Thanks Matt for having followed this series until here.
This series does basically two things:
1. Disables automatic load balancing as adviced by the hardware
workaround.
2. Assigns all the CCS slices to one single user engine. The user
will then be able to query only one CCS engine
From v5 I have created a new file, gt/intel_gt_ccs_mode.c where
I added the intel_gt_apply_ccs_mode(). In the upcoming patches,
this file will contain the implementation for dynamic CCS mode
setting.
Thanks Tvrtko, Matt, John and Joonas for your reviews!
Andi
Changelog
=========
v7 -> v8
- Just used a different way for removing the first instance of
the CCS from the info->engine_mask, as suggested by Matt.
v6 -> v7
- find a more appropriate place where to remove the CCS engines:
remove them in init_engine_mask() instead of
intel_engines_init_mmio(). (Thanks, Matt)
- Add Michal's ACK, thanks Michal!
v5 -> v6 (thanks Matt for the suggestions in v6)
- Remove the refactoring and the for_each_available_engine()
macro and instead do not create the intel_engine_cs structure
at all.
- In patch 1 just a trivial reordering of the bit definitions.
v4 -> v5
- Use the workaround framework to do all the CCS balancing
settings in order to always apply the modes also when the
engine resets. Put everything in its own specific function to
be executed for the first CCS engine encountered. (Thanks
Matt)
- Calculate the CCS ID for the CCS mode as the first available
CCS among all the engines (Thanks Matt)
- create the intel_gt_ccs_mode.c function to host the CCS
configuration. We will have it ready for the next series.
- Fix a selftest that was failing because could not set CCS2.
- Add the for_each_available_engine() macro to exclude CCS1+ and
start using it in the hangcheck selftest.
v3 -> v4
- Reword correctly the comment in the workaround
- Fix a buffer overflow (Thanks Joonas)
- Handle properly the fused engines when setting the CCS mode.
v2 -> v3
- Simplified the algorithm for creating the list of the exported
uabi engines. (Patch 1) (Thanks, Tvrtko)
- Consider the fused engines when creating the uabi engine list
(Patch 2) (Thanks, Matt)
- Patch 4 now uses a the refactoring from patch 1, in a cleaner
outcome.
v1 -> v2
- In Patch 1 use the correct workaround number (thanks Matt).
- In Patch 2 do not add the extra CCS engines to the exposed
UABI engine list and adapt the engine counting accordingly
(thanks Tvrtko).
- Reword the commit of Patch 2 (thanks John).
Andi Shyti (3):
drm/i915/gt: Disable HW load balancing for CCS
drm/i915/gt: Do not generate the command streamer for all the CCS
drm/i915/gt: Enable only one CCS for compute workload
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 17 +++++++++
drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 39 +++++++++++++++++++++
drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 13 +++++++
drivers/gpu/drm/i915/gt/intel_gt_regs.h | 6 ++++
drivers/gpu/drm/i915/gt/intel_workarounds.c | 30 ++++++++++++++--
6 files changed, 104 insertions(+), 2 deletions(-)
create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
--
2.43.0
Hi,
this series does basically two things:
1. Disables automatic load balancing as adviced by the hardware
workaround.
2. Assigns all the CCS slices to one single user engine. The user
will then be able to query only one CCS engine
From v5 I have created a new file, gt/intel_gt_ccs_mode.c where
I added the intel_gt_apply_ccs_mode(). In the upcoming patches,
this file will contain the implementation for dynamic CCS mode
setting.
Thanks Tvrtko, Matt, John and Joonas for your reviews!
Andi
Changelog
=========
v6 -> v7
- find a more appropriate place where to remove the CCS engines:
remove them in init_engine_mask() instead of
intel_engines_init_mmio(). (Thanks, Matt)
- Add Michal's ACK, thanks Michal!
v5 -> v6 (thanks Matt for the suggestions in v6)
- Remove the refactoring and the for_each_available_engine()
macro and instead do not create the intel_engine_cs structure
at all.
- In patch 1 just a trivial reordering of the bit definitions.
v4 -> v5
- Use the workaround framework to do all the CCS balancing
settings in order to always apply the modes also when the
engine resets. Put everything in its own specific function to
be executed for the first CCS engine encountered. (Thanks
Matt)
- Calculate the CCS ID for the CCS mode as the first available
CCS among all the engines (Thanks Matt)
- create the intel_gt_ccs_mode.c function to host the CCS
configuration. We will have it ready for the next series.
- Fix a selftest that was failing because could not set CCS2.
- Add the for_each_available_engine() macro to exclude CCS1+ and
start using it in the hangcheck selftest.
v3 -> v4
- Reword correctly the comment in the workaround
- Fix a buffer overflow (Thanks Joonas)
- Handle properly the fused engines when setting the CCS mode.
v2 -> v3
- Simplified the algorithm for creating the list of the exported
uabi engines. (Patch 1) (Thanks, Tvrtko)
- Consider the fused engines when creating the uabi engine list
(Patch 2) (Thanks, Matt)
- Patch 4 now uses a the refactoring from patch 1, in a cleaner
outcome.
v1 -> v2
- In Patch 1 use the correct workaround number (thanks Matt).
- In Patch 2 do not add the extra CCS engines to the exposed
UABI engine list and adapt the engine counting accordingly
(thanks Tvrtko).
- Reword the commit of Patch 2 (thanks John).
Andi Shyti (3):
drm/i915/gt: Disable HW load balancing for CCS
drm/i915/gt: Do not generate the command streamer for all the CCS
drm/i915/gt: Enable only one CCS for compute workload
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 15 ++++++++
drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 39 +++++++++++++++++++++
drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 13 +++++++
drivers/gpu/drm/i915/gt/intel_gt_regs.h | 6 ++++
drivers/gpu/drm/i915/gt/intel_workarounds.c | 30 ++++++++++++++--
6 files changed, 102 insertions(+), 2 deletions(-)
create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
--
2.43.0
Hi,
On Wed, Mar 27, 2024 at 09:05:46PM +0100, Andi Shyti wrote:
> Commit 9bb66c179f50 ("drm/i915: Reserve some kernel space per
> vm") reduces the available VM space of one page in order to apply
> Wa_16018031267 and Wa_16018063123.
>
> This page was reserved indiscrimitely in all platforms even when
> not needed. Limit it to DG2 onwards.
>
> Fixes: 9bb66c179f50 ("drm/i915: Reserve some kernel space per vm")
> Signed-off-by: Andi Shyti <andi.shyti(a)linux.intel.com>
> Cc: Andrzej Hajda <andrzej.hajda(a)intel.com>
> Cc: Chris Wilson <chris.p.wilson(a)linux.intel.com>
> Cc: Jonathan Cavitt <jonathan.cavitt(a)intel.com>
> Cc: Nirmoy Das <nirmoy.das(a)intel.com>
I forgot to add stable here:
Cc: <stable(a)vger.kernel.org> # v6.8+
Sorry about that!
Andi
vhost_get_vq_desc (correctly) uses smp_rmb to order
avail ring reads after index reads.
However, over time we added two more places that read the
index and do not bother with barriers.
Since vhost_get_vq_desc when it was written assumed it is the
only reader when it sees a new index value is cached
it does not bother with a barrier either, as a result,
on the nvidia-gracehopper platform (arm64) available ring
entry reads have been observed bypassing ring reads, causing
a ring corruption.
To fix, factor out the correct index access code from vhost_get_vq_desc.
As a side benefit, we also validate the index on all paths now, which
will hopefully help catch future errors earlier.
Note: current code is inconsistent in how it handles errors:
some places treat it as an empty ring, others - non empty.
This patch does not attempt to change the existing behaviour.
Cc: stable(a)vger.kernel.org
Reported-by: Gavin Shan <gshan(a)redhat.com>
Reported-by: Will Deacon <will(a)kernel.org>
Suggested-by: Will Deacon <will(a)kernel.org>
Fixes: 275bf960ac69 ("vhost: better detection of available buffers")
Cc: "Jason Wang" <jasowang(a)redhat.com>
Fixes: d3bb267bbdcb ("vhost: cache avail index in vhost_enable_notify()")
Cc: "Stefano Garzarella" <sgarzare(a)redhat.com>
Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com>
---
I think it's better to bite the bullet and clean up the code.
Note: this is still only built, not tested.
Gavin could you help test please?
Especially on the arm platform you have?
Will thanks so much for finding this race!
drivers/vhost/vhost.c | 80 +++++++++++++++++++++++--------------------
1 file changed, 42 insertions(+), 38 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 045f666b4f12..26b70b1fd9ff 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1290,10 +1290,38 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d)
mutex_unlock(&d->vqs[i]->mutex);
}
-static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
- __virtio16 *idx)
+static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq)
{
- return vhost_get_avail(vq, *idx, &vq->avail->idx);
+ __virtio16 idx;
+ u16 avail_idx;
+ int r = vhost_get_avail(vq, idx, &vq->avail->idx);
+
+ if (unlikely(r < 0)) {
+ vq_err(vq, "Failed to access avail idx at %p: %d\n",
+ &vq->avail->idx, r);
+ return -EFAULT;
+ }
+
+ avail_idx = vhost16_to_cpu(vq, idx);
+
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ if (unlikely((u16)(avail_idx - vq->last_avail_idx) > vq->num)) {
+ vq_err(vq, "Guest moved used index from %u to %u",
+ vq->last_avail_idx, vq->avail_idx);
+ return -EFAULT;
+ }
+
+ /* Nothing new? We are done. */
+ if (avail_idx == vq->avail_idx)
+ return 0;
+
+ vq->avail_idx = avail_idx;
+
+ /* We updated vq->avail_idx so we need a memory barrier between
+ * the index read above and the caller reading avail ring entries.
+ */
+ smp_rmb();
+ return 1;
}
static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
@@ -2498,38 +2526,21 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
{
struct vring_desc desc;
unsigned int i, head, found = 0;
- u16 last_avail_idx;
- __virtio16 avail_idx;
+ u16 last_avail_idx = vq->last_avail_idx;
__virtio16 ring_head;
int ret, access;
- /* Check it isn't doing very strange things with descriptor numbers. */
- last_avail_idx = vq->last_avail_idx;
if (vq->avail_idx == vq->last_avail_idx) {
- if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
- vq_err(vq, "Failed to access avail idx at %p\n",
- &vq->avail->idx);
- return -EFAULT;
- }
- vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
-
- if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
- vq_err(vq, "Guest moved used index from %u to %u",
- last_avail_idx, vq->avail_idx);
- return -EFAULT;
- }
+ ret = vhost_get_avail_idx(vq);
+ if (unlikely(ret < 0))
+ return ret;
/* If there's nothing new since last we looked, return
* invalid.
*/
- if (vq->avail_idx == last_avail_idx)
+ if (!ret)
return vq->num;
-
- /* Only get avail ring entries after they have been
- * exposed by guest.
- */
- smp_rmb();
}
/* Grab the next descriptor number they're advertising, and increment
@@ -2790,25 +2801,21 @@ EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
/* return true if we're sure that avaiable ring is empty */
bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
- __virtio16 avail_idx;
int r;
if (vq->avail_idx != vq->last_avail_idx)
return false;
- r = vhost_get_avail_idx(vq, &avail_idx);
- if (unlikely(r))
- return false;
- vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+ r = vhost_get_avail_idx(vq);
- return vq->avail_idx == vq->last_avail_idx;
+ /* Note: we treat error as non-empty here */
+ return r == 0;
}
EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
/* OK, now we need to know about added descriptors. */
bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
- __virtio16 avail_idx;
int r;
if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
@@ -2832,13 +2839,10 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
/* They could have slipped one in as we were doing that: make
* sure it's written, then check again. */
smp_mb();
- r = vhost_get_avail_idx(vq, &avail_idx);
- if (r) {
- vq_err(vq, "Failed to check avail idx at %p: %d\n",
- &vq->avail->idx, r);
+ r = vhost_get_avail_idx(vq);
+ /* Note: we treat error as empty here */
+ if (r < 0)
return false;
- }
- vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
return vq->avail_idx != vq->last_avail_idx;
}
--
MST
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 672448ccf9b6a676f96f9352cbf91f4d35f4084a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032747-spiritism-worsening-c504@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
672448ccf9b6 ("tty: serial: imx: Fix broken RS485")
ca530cfa968c ("serial: imx: Add support for RS485 RX_DURING_TX output GPIO")
79d0224f6bf2 ("tty: serial: imx: Handle RS485 DE signal active high")
915162460152 ("serial: imx: remove redundant assignment in rs485_config")
028e083832b0 ("tty: serial: imx: disable UCR4_OREN in .stop_rx() instead of .shutdown()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 672448ccf9b6a676f96f9352cbf91f4d35f4084a Mon Sep 17 00:00:00 2001
From: Rickard x Andersson <rickaran(a)axis.com>
Date: Wed, 21 Feb 2024 12:53:04 +0100
Subject: [PATCH] tty: serial: imx: Fix broken RS485
When about to transmit the function imx_uart_start_tx is called and in
some RS485 configurations this function will call imx_uart_stop_rx. The
problem is that imx_uart_stop_rx will enable loopback in order to
release the RS485 bus, but when loopback is enabled transmitted data
will just be looped to RX.
This patch fixes the above problem by not enabling loopback when about
to transmit.
This driver now works well when used for RS485 half duplex master
configurations.
Fixes: 79d0224f6bf2 ("tty: serial: imx: Handle RS485 DE signal active high")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Rickard x Andersson <rickaran(a)axis.com>
Tested-by: Christoph Niedermaier <cniedermaier(a)dh-electronics.com>
Link: https://lore.kernel.org/r/20240221115304.509811-1-rickaran@axis.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 4aa72d5aeafb..e14813250616 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -462,8 +462,7 @@ static void imx_uart_stop_tx(struct uart_port *port)
}
}
-/* called with port.lock taken and irqs off */
-static void imx_uart_stop_rx(struct uart_port *port)
+static void imx_uart_stop_rx_with_loopback_ctrl(struct uart_port *port, bool loopback)
{
struct imx_port *sport = (struct imx_port *)port;
u32 ucr1, ucr2, ucr4, uts;
@@ -485,7 +484,7 @@ static void imx_uart_stop_rx(struct uart_port *port)
/* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */
if (port->rs485.flags & SER_RS485_ENABLED &&
port->rs485.flags & SER_RS485_RTS_ON_SEND &&
- sport->have_rtscts && !sport->have_rtsgpio) {
+ sport->have_rtscts && !sport->have_rtsgpio && loopback) {
uts = imx_uart_readl(sport, imx_uart_uts_reg(sport));
uts |= UTS_LOOP;
imx_uart_writel(sport, uts, imx_uart_uts_reg(sport));
@@ -497,6 +496,16 @@ static void imx_uart_stop_rx(struct uart_port *port)
imx_uart_writel(sport, ucr2, UCR2);
}
+/* called with port.lock taken and irqs off */
+static void imx_uart_stop_rx(struct uart_port *port)
+{
+ /*
+ * Stop RX and enable loopback in order to make sure RS485 bus
+ * is not blocked. Se comment in imx_uart_probe().
+ */
+ imx_uart_stop_rx_with_loopback_ctrl(port, true);
+}
+
/* called with port.lock taken and irqs off */
static void imx_uart_enable_ms(struct uart_port *port)
{
@@ -682,9 +691,14 @@ static void imx_uart_start_tx(struct uart_port *port)
imx_uart_rts_inactive(sport, &ucr2);
imx_uart_writel(sport, ucr2, UCR2);
+ /*
+ * Since we are about to transmit we can not stop RX
+ * with loopback enabled because that will make our
+ * transmitted data being just looped to RX.
+ */
if (!(port->rs485.flags & SER_RS485_RX_DURING_TX) &&
!port->rs485_rx_during_tx_gpio)
- imx_uart_stop_rx(port);
+ imx_uart_stop_rx_with_loopback_ctrl(port, false);
sport->tx_state = WAIT_AFTER_RTS;
The clk_alpha_pll_stromer_set_rate() function writes inproper
values into the ALPHA_VAL{,_U} registers which results in wrong
clock rates when the alpha value is used.
The broken behaviour can be seen on IPQ5018 for example, when
dynamic scaling sets the CPU frequency to 800000 KHz. In this
case the CPU cores are running only at 792031 KHz:
# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq
800000
# cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq
792031
This happens because the function ignores the fact that the alpha
value calculated by the alpha_pll_round_rate() function is only
32 bits wide which must be extended to 40 bits if it is used on
a hardware which supports 40 bits wide values.
Extend the clk_alpha_pll_stromer_set_rate() function to convert
the alpha value to 40 bits before wrinting that into the registers
in order to ensure that the hardware really uses the requested rate.
After the change the CPU frequency is correct:
# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq
800000
# cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq
800000
Cc: stable(a)vger.kernel.org
Fixes: e47a4f55f240 ("clk: qcom: clk-alpha-pll: Add support for Stromer PLLs")
Signed-off-by: Gabor Juhos <j4g8y7(a)gmail.com>
---
Changes in v2:
- fix subject prefix
- rebase on v6.9-rc1
- Link to v1: https://lore.kernel.org/r/20240324-alpha-pll-fix-stromer-set-rate-v1-1-335b…
Depends on the following patch:
https://lore.kernel.org/r/20240315-apss-ipq-pll-ipq5018-hang-v2-1-6fe30ada2…
---
drivers/clk/qcom/clk-alpha-pll.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c
index 8a412ef47e163..8e98198d4b4b6 100644
--- a/drivers/clk/qcom/clk-alpha-pll.c
+++ b/drivers/clk/qcom/clk-alpha-pll.c
@@ -2490,6 +2490,10 @@ static int clk_alpha_pll_stromer_set_rate(struct clk_hw *hw, unsigned long rate,
rate = alpha_pll_round_rate(rate, prate, &l, &a, ALPHA_REG_BITWIDTH);
regmap_write(pll->clkr.regmap, PLL_L_VAL(pll), l);
+
+ if (ALPHA_REG_BITWIDTH > ALPHA_BITWIDTH)
+ a <<= ALPHA_REG_BITWIDTH - ALPHA_BITWIDTH;
+
regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL(pll), a);
regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL_U(pll),
a >> ALPHA_BITWIDTH);
---
base-commit: 5eab983c5e31e5f0bf2d583731e320e21814d1b7
change-id: 20240324-alpha-pll-fix-stromer-set-rate-472376e624f0
Best regards,
--
Gabor Juhos <j4g8y7(a)gmail.com>
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x ceb013b2d9a2946035de5e1827624edc85ae9484
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032735-angling-payback-8e99@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
ceb013b2d9a2 ("i2c: i801: Avoid potential double call to gpiod_remove_lookup_table")
5581b4167c0f ("i2c: i801: Refactor mux code since platform_device_unregister() is NULL aware")
926e6b2cd1ca ("i2c: i801: Drop duplicate NULL check in i801_del_mux()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ceb013b2d9a2946035de5e1827624edc85ae9484 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1(a)gmail.com>
Date: Mon, 4 Mar 2024 21:31:06 +0100
Subject: [PATCH] i2c: i801: Avoid potential double call to
gpiod_remove_lookup_table
If registering the platform device fails, the lookup table is
removed in the error path. On module removal we would try to
remove the lookup table again. Fix this by setting priv->lookup
only if registering the platform device was successful.
In addition free the memory allocated for the lookup table in
the error path.
Fixes: d308dfbf62ef ("i2c: mux/i801: Switch to use descriptor passing")
Cc: stable(a)vger.kernel.org
Reviewed-by: Andi Shyti <andi.shyti(a)kernel.org>
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Signed-off-by: Heiner Kallweit <hkallweit1(a)gmail.com>
Signed-off-by: Andi Shyti <andi.shyti(a)kernel.org>
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 9a0a77383ca8..274e987e4cfa 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -1416,7 +1416,6 @@ static void i801_add_mux(struct i801_priv *priv)
lookup->table[i] = GPIO_LOOKUP(mux_config->gpio_chip,
mux_config->gpios[i], "mux", 0);
gpiod_add_lookup_table(lookup);
- priv->lookup = lookup;
/*
* Register the mux device, we use PLATFORM_DEVID_NONE here
@@ -1430,7 +1429,10 @@ static void i801_add_mux(struct i801_priv *priv)
sizeof(struct i2c_mux_gpio_platform_data));
if (IS_ERR(priv->mux_pdev)) {
gpiod_remove_lookup_table(lookup);
+ devm_kfree(dev, lookup);
dev_err(dev, "Failed to register i2c-mux-gpio device\n");
+ } else {
+ priv->lookup = lookup;
}
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x ceb013b2d9a2946035de5e1827624edc85ae9484
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032732-luckiness-repackage-f6f8@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
ceb013b2d9a2 ("i2c: i801: Avoid potential double call to gpiod_remove_lookup_table")
5581b4167c0f ("i2c: i801: Refactor mux code since platform_device_unregister() is NULL aware")
926e6b2cd1ca ("i2c: i801: Drop duplicate NULL check in i801_del_mux()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ceb013b2d9a2946035de5e1827624edc85ae9484 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1(a)gmail.com>
Date: Mon, 4 Mar 2024 21:31:06 +0100
Subject: [PATCH] i2c: i801: Avoid potential double call to
gpiod_remove_lookup_table
If registering the platform device fails, the lookup table is
removed in the error path. On module removal we would try to
remove the lookup table again. Fix this by setting priv->lookup
only if registering the platform device was successful.
In addition free the memory allocated for the lookup table in
the error path.
Fixes: d308dfbf62ef ("i2c: mux/i801: Switch to use descriptor passing")
Cc: stable(a)vger.kernel.org
Reviewed-by: Andi Shyti <andi.shyti(a)kernel.org>
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Signed-off-by: Heiner Kallweit <hkallweit1(a)gmail.com>
Signed-off-by: Andi Shyti <andi.shyti(a)kernel.org>
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 9a0a77383ca8..274e987e4cfa 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -1416,7 +1416,6 @@ static void i801_add_mux(struct i801_priv *priv)
lookup->table[i] = GPIO_LOOKUP(mux_config->gpio_chip,
mux_config->gpios[i], "mux", 0);
gpiod_add_lookup_table(lookup);
- priv->lookup = lookup;
/*
* Register the mux device, we use PLATFORM_DEVID_NONE here
@@ -1430,7 +1429,10 @@ static void i801_add_mux(struct i801_priv *priv)
sizeof(struct i2c_mux_gpio_platform_data));
if (IS_ERR(priv->mux_pdev)) {
gpiod_remove_lookup_table(lookup);
+ devm_kfree(dev, lookup);
dev_err(dev, "Failed to register i2c-mux-gpio device\n");
+ } else {
+ priv->lookup = lookup;
}
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x a9dd9ba323114f366eb07f1d9630822f8df6cb
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032745-jiffy-flyaway-dec6@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a9dd9ba323114f366eb07f1d9630822f8df6cbb2 Mon Sep 17 00:00:00 2001
From: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Date: Mon, 19 Feb 2024 20:13:59 +0100
Subject: [PATCH] iio: pressure: Fixes BMP38x and BMP390 SPI support
According to the datasheet of BMP38x and BMP390 devices, for an SPI
read operation the first byte that is returned needs to be dropped,
and the rest of the bytes are the actual data returned from the
sensor.
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Fixes: 8d329309184d ("iio: pressure: bmp280: Add support for BMP380 sensor family")
Signed-off-by: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Acked-by: Angel Iglesias <ang.iglesiasg(a)gmail.com>
Link: https://lore.kernel.org/r/20240219191359.18367-1-vassilisamir@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c
index e8a5fed07e88..a444d4b2978b 100644
--- a/drivers/iio/pressure/bmp280-spi.c
+++ b/drivers/iio/pressure/bmp280-spi.c
@@ -4,6 +4,7 @@
*
* Inspired by the older BMP085 driver drivers/misc/bmp085-spi.c
*/
+#include <linux/bits.h>
#include <linux/module.h>
#include <linux/spi/spi.h>
#include <linux/err.h>
@@ -35,6 +36,34 @@ static int bmp280_regmap_spi_read(void *context, const void *reg,
return spi_write_then_read(spi, reg, reg_size, val, val_size);
}
+static int bmp380_regmap_spi_read(void *context, const void *reg,
+ size_t reg_size, void *val, size_t val_size)
+{
+ struct spi_device *spi = to_spi_device(context);
+ u8 rx_buf[4];
+ ssize_t status;
+
+ /*
+ * Maximum number of consecutive bytes read for a temperature or
+ * pressure measurement is 3.
+ */
+ if (val_size > 3)
+ return -EINVAL;
+
+ /*
+ * According to the BMP3xx datasheets, for a basic SPI read opertion,
+ * the first byte needs to be dropped and the rest are the requested
+ * data.
+ */
+ status = spi_write_then_read(spi, reg, 1, rx_buf, val_size + 1);
+ if (status)
+ return status;
+
+ memcpy(val, rx_buf + 1, val_size);
+
+ return 0;
+}
+
static struct regmap_bus bmp280_regmap_bus = {
.write = bmp280_regmap_spi_write,
.read = bmp280_regmap_spi_read,
@@ -42,10 +71,19 @@ static struct regmap_bus bmp280_regmap_bus = {
.val_format_endian_default = REGMAP_ENDIAN_BIG,
};
+static struct regmap_bus bmp380_regmap_bus = {
+ .write = bmp280_regmap_spi_write,
+ .read = bmp380_regmap_spi_read,
+ .read_flag_mask = BIT(7),
+ .reg_format_endian_default = REGMAP_ENDIAN_BIG,
+ .val_format_endian_default = REGMAP_ENDIAN_BIG,
+};
+
static int bmp280_spi_probe(struct spi_device *spi)
{
const struct spi_device_id *id = spi_get_device_id(spi);
const struct bmp280_chip_info *chip_info;
+ struct regmap_bus *bmp_regmap_bus;
struct regmap *regmap;
int ret;
@@ -58,8 +96,18 @@ static int bmp280_spi_probe(struct spi_device *spi)
chip_info = spi_get_device_match_data(spi);
+ switch (chip_info->chip_id[0]) {
+ case BMP380_CHIP_ID:
+ case BMP390_CHIP_ID:
+ bmp_regmap_bus = &bmp380_regmap_bus;
+ break;
+ default:
+ bmp_regmap_bus = &bmp280_regmap_bus;
+ break;
+ }
+
regmap = devm_regmap_init(&spi->dev,
- &bmp280_regmap_bus,
+ bmp_regmap_bus,
&spi->dev,
chip_info->regmap_config);
if (IS_ERR(regmap)) {
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x e5d6bd25f93d6ae158bb4cd04956cb497a85b8ef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032718-mockup-swoosh-2069@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
e5d6bd25f93d ("serial: 8250_dw: Do not reclock if already at correct rate")
74365bc138ab ("serial: 8250_dw: drop bogus uartclk optimisation")
0eb9da9cf201 ("serial: 8250_dw: Fix the trivial typo in the comment")
cc816969d7b5 ("serial: 8250_dw: Fix common clocks usage race condition")
0be160cf86f9 ("serial: 8250_dw: Pass the same rate to the clk round and set rate methods")
442fdef1b931 ("serial: 8250_dw: Simplify the ref clock rate setting procedure")
a8afc193558a ("serial: 8250_dw: Use devm_clk_get_optional() to get the input clock")
4d5675c3b10b ("serial: 8250_dw: switch to use 8250_dwlib library")
62907e90cc7e ("serial: 8250_dw: use pointer to uart local variable")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e5d6bd25f93d6ae158bb4cd04956cb497a85b8ef Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc(a)google.com>
Date: Thu, 22 Feb 2024 11:26:34 -0800
Subject: [PATCH] serial: 8250_dw: Do not reclock if already at correct rate
When userspace opens the console, we call set_termios() passing a
termios with the console's configured baud rate. Currently this causes
dw8250_set_termios() to disable and then re-enable the UART clock at
the same frequency as it was originally. This can cause corruption
of any concurrent console output. Fix it by skipping the reclocking
if we are already at the correct rate.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Fixes: 4e26b134bd17 ("serial: 8250_dw: clock rate handling for all ACPI platforms")
Cc: stable(a)vger.kernel.org
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Link: https://lore.kernel.org/r/20240222192635.1050502-1-pcc@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 2d1f350a4bea..c1d43f040c43 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -357,9 +357,9 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
long rate;
int ret;
- clk_disable_unprepare(d->clk);
rate = clk_round_rate(d->clk, newrate);
- if (rate > 0) {
+ if (rate > 0 && p->uartclk != rate) {
+ clk_disable_unprepare(d->clk);
/*
* Note that any clock-notifer worker will block in
* serial8250_update_uartclk() until we are done.
@@ -367,8 +367,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
ret = clk_set_rate(d->clk, newrate);
if (!ret)
p->uartclk = rate;
+ clk_prepare_enable(d->clk);
}
- clk_prepare_enable(d->clk);
dw8250_do_set_termios(p, termios, old);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x e5d6bd25f93d6ae158bb4cd04956cb497a85b8ef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032717-vehicular-nutlike-6de9@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
e5d6bd25f93d ("serial: 8250_dw: Do not reclock if already at correct rate")
74365bc138ab ("serial: 8250_dw: drop bogus uartclk optimisation")
0eb9da9cf201 ("serial: 8250_dw: Fix the trivial typo in the comment")
cc816969d7b5 ("serial: 8250_dw: Fix common clocks usage race condition")
0be160cf86f9 ("serial: 8250_dw: Pass the same rate to the clk round and set rate methods")
442fdef1b931 ("serial: 8250_dw: Simplify the ref clock rate setting procedure")
a8afc193558a ("serial: 8250_dw: Use devm_clk_get_optional() to get the input clock")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e5d6bd25f93d6ae158bb4cd04956cb497a85b8ef Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc(a)google.com>
Date: Thu, 22 Feb 2024 11:26:34 -0800
Subject: [PATCH] serial: 8250_dw: Do not reclock if already at correct rate
When userspace opens the console, we call set_termios() passing a
termios with the console's configured baud rate. Currently this causes
dw8250_set_termios() to disable and then re-enable the UART clock at
the same frequency as it was originally. This can cause corruption
of any concurrent console output. Fix it by skipping the reclocking
if we are already at the correct rate.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Fixes: 4e26b134bd17 ("serial: 8250_dw: clock rate handling for all ACPI platforms")
Cc: stable(a)vger.kernel.org
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Link: https://lore.kernel.org/r/20240222192635.1050502-1-pcc@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 2d1f350a4bea..c1d43f040c43 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -357,9 +357,9 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
long rate;
int ret;
- clk_disable_unprepare(d->clk);
rate = clk_round_rate(d->clk, newrate);
- if (rate > 0) {
+ if (rate > 0 && p->uartclk != rate) {
+ clk_disable_unprepare(d->clk);
/*
* Note that any clock-notifer worker will block in
* serial8250_update_uartclk() until we are done.
@@ -367,8 +367,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
ret = clk_set_rate(d->clk, newrate);
if (!ret)
p->uartclk = rate;
+ clk_prepare_enable(d->clk);
}
- clk_prepare_enable(d->clk);
dw8250_do_set_termios(p, termios, old);
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x e5d6bd25f93d6ae158bb4cd04956cb497a85b8ef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032716-jaywalker-strainer-d5dc@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
e5d6bd25f93d ("serial: 8250_dw: Do not reclock if already at correct rate")
74365bc138ab ("serial: 8250_dw: drop bogus uartclk optimisation")
0eb9da9cf201 ("serial: 8250_dw: Fix the trivial typo in the comment")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e5d6bd25f93d6ae158bb4cd04956cb497a85b8ef Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc(a)google.com>
Date: Thu, 22 Feb 2024 11:26:34 -0800
Subject: [PATCH] serial: 8250_dw: Do not reclock if already at correct rate
When userspace opens the console, we call set_termios() passing a
termios with the console's configured baud rate. Currently this causes
dw8250_set_termios() to disable and then re-enable the UART clock at
the same frequency as it was originally. This can cause corruption
of any concurrent console output. Fix it by skipping the reclocking
if we are already at the correct rate.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Fixes: 4e26b134bd17 ("serial: 8250_dw: clock rate handling for all ACPI platforms")
Cc: stable(a)vger.kernel.org
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Link: https://lore.kernel.org/r/20240222192635.1050502-1-pcc@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 2d1f350a4bea..c1d43f040c43 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -357,9 +357,9 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
long rate;
int ret;
- clk_disable_unprepare(d->clk);
rate = clk_round_rate(d->clk, newrate);
- if (rate > 0) {
+ if (rate > 0 && p->uartclk != rate) {
+ clk_disable_unprepare(d->clk);
/*
* Note that any clock-notifer worker will block in
* serial8250_update_uartclk() until we are done.
@@ -367,8 +367,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
ret = clk_set_rate(d->clk, newrate);
if (!ret)
p->uartclk = rate;
+ clk_prepare_enable(d->clk);
}
- clk_prepare_enable(d->clk);
dw8250_do_set_termios(p, termios, old);
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e5d6bd25f93d6ae158bb4cd04956cb497a85b8ef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032710-unbent-dingy-2b33@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e5d6bd25f93d ("serial: 8250_dw: Do not reclock if already at correct rate")
74365bc138ab ("serial: 8250_dw: drop bogus uartclk optimisation")
0eb9da9cf201 ("serial: 8250_dw: Fix the trivial typo in the comment")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e5d6bd25f93d6ae158bb4cd04956cb497a85b8ef Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc(a)google.com>
Date: Thu, 22 Feb 2024 11:26:34 -0800
Subject: [PATCH] serial: 8250_dw: Do not reclock if already at correct rate
When userspace opens the console, we call set_termios() passing a
termios with the console's configured baud rate. Currently this causes
dw8250_set_termios() to disable and then re-enable the UART clock at
the same frequency as it was originally. This can cause corruption
of any concurrent console output. Fix it by skipping the reclocking
if we are already at the correct rate.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Fixes: 4e26b134bd17 ("serial: 8250_dw: clock rate handling for all ACPI platforms")
Cc: stable(a)vger.kernel.org
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Link: https://lore.kernel.org/r/20240222192635.1050502-1-pcc@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 2d1f350a4bea..c1d43f040c43 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -357,9 +357,9 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
long rate;
int ret;
- clk_disable_unprepare(d->clk);
rate = clk_round_rate(d->clk, newrate);
- if (rate > 0) {
+ if (rate > 0 && p->uartclk != rate) {
+ clk_disable_unprepare(d->clk);
/*
* Note that any clock-notifer worker will block in
* serial8250_update_uartclk() until we are done.
@@ -367,8 +367,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
ret = clk_set_rate(d->clk, newrate);
if (!ret)
p->uartclk = rate;
+ clk_prepare_enable(d->clk);
}
- clk_prepare_enable(d->clk);
dw8250_do_set_termios(p, termios, old);
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 4a30dcac38c2b34f5b4f358630774bc2c2c104b0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032707-among-rerun-a107@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
4a30dcac38c2 ("usb: typec: ucsi: fix UCSI on SM8550 & SM8650 Qualcomm devices")
1d103d6af241 ("usb: typec: ucsi: fix UCSI on buggy Qualcomm devices")
c6165ed2f425 ("usb: ucsi: glink: use the connector orientation GPIO to provide switch events")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4a30dcac38c2b34f5b4f358630774bc2c2c104b0 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong(a)linaro.org>
Date: Fri, 23 Feb 2024 10:40:40 +0100
Subject: [PATCH] usb: typec: ucsi: fix UCSI on SM8550 & SM8650 Qualcomm
devices
On SM8550 and SM8650 Qualcomm platforms a call to UCSI_GET_PDOS for
non-PD partners will cause a firmware crash with no
easy way to recover from it.
Add UCSI_NO_PARTNER_PDOS quirk for those platform until we find
a way to properly handle the crash.
Signed-off-by: Neil Armstrong <neil.armstrong(a)linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/20240223-topic-sm8550-upstream-ucsi-no-pdos-v1-1-…
Cc: stable <stable(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c
index 53a7ede8556d..faccc942b381 100644
--- a/drivers/usb/typec/ucsi/ucsi_glink.c
+++ b/drivers/usb/typec/ucsi/ucsi_glink.c
@@ -301,6 +301,7 @@ static const struct of_device_id pmic_glink_ucsi_of_quirks[] = {
{ .compatible = "qcom,sc8180x-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
{ .compatible = "qcom,sc8280xp-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
{ .compatible = "qcom,sm8350-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
+ { .compatible = "qcom,sm8550-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
{}
};
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 4a30dcac38c2b34f5b4f358630774bc2c2c104b0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032708-replay-kilobyte-d938@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
4a30dcac38c2 ("usb: typec: ucsi: fix UCSI on SM8550 & SM8650 Qualcomm devices")
1d103d6af241 ("usb: typec: ucsi: fix UCSI on buggy Qualcomm devices")
c6165ed2f425 ("usb: ucsi: glink: use the connector orientation GPIO to provide switch events")
62b5412b1f4a ("usb: typec: ucsi: add PMIC Glink UCSI driver")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4a30dcac38c2b34f5b4f358630774bc2c2c104b0 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong(a)linaro.org>
Date: Fri, 23 Feb 2024 10:40:40 +0100
Subject: [PATCH] usb: typec: ucsi: fix UCSI on SM8550 & SM8650 Qualcomm
devices
On SM8550 and SM8650 Qualcomm platforms a call to UCSI_GET_PDOS for
non-PD partners will cause a firmware crash with no
easy way to recover from it.
Add UCSI_NO_PARTNER_PDOS quirk for those platform until we find
a way to properly handle the crash.
Signed-off-by: Neil Armstrong <neil.armstrong(a)linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/20240223-topic-sm8550-upstream-ucsi-no-pdos-v1-1-…
Cc: stable <stable(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c
index 53a7ede8556d..faccc942b381 100644
--- a/drivers/usb/typec/ucsi/ucsi_glink.c
+++ b/drivers/usb/typec/ucsi/ucsi_glink.c
@@ -301,6 +301,7 @@ static const struct of_device_id pmic_glink_ucsi_of_quirks[] = {
{ .compatible = "qcom,sc8180x-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
{ .compatible = "qcom,sc8280xp-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
{ .compatible = "qcom,sm8350-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
+ { .compatible = "qcom,sm8550-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
{}
};
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 4a30dcac38c2b34f5b4f358630774bc2c2c104b0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032707-hemstitch-happily-562d@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
4a30dcac38c2 ("usb: typec: ucsi: fix UCSI on SM8550 & SM8650 Qualcomm devices")
1d103d6af241 ("usb: typec: ucsi: fix UCSI on buggy Qualcomm devices")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4a30dcac38c2b34f5b4f358630774bc2c2c104b0 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong(a)linaro.org>
Date: Fri, 23 Feb 2024 10:40:40 +0100
Subject: [PATCH] usb: typec: ucsi: fix UCSI on SM8550 & SM8650 Qualcomm
devices
On SM8550 and SM8650 Qualcomm platforms a call to UCSI_GET_PDOS for
non-PD partners will cause a firmware crash with no
easy way to recover from it.
Add UCSI_NO_PARTNER_PDOS quirk for those platform until we find
a way to properly handle the crash.
Signed-off-by: Neil Armstrong <neil.armstrong(a)linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/20240223-topic-sm8550-upstream-ucsi-no-pdos-v1-1-…
Cc: stable <stable(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c
index 53a7ede8556d..faccc942b381 100644
--- a/drivers/usb/typec/ucsi/ucsi_glink.c
+++ b/drivers/usb/typec/ucsi/ucsi_glink.c
@@ -301,6 +301,7 @@ static const struct of_device_id pmic_glink_ucsi_of_quirks[] = {
{ .compatible = "qcom,sc8180x-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
{ .compatible = "qcom,sc8280xp-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
{ .compatible = "qcom,sm8350-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
+ { .compatible = "qcom,sm8550-pmic-glink", .data = (void *)UCSI_NO_PARTNER_PDOS, },
{}
};
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 165376f6b23e9a779850e750fb2eb06622e5a531
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032758-thesaurus-rabid-563c@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
165376f6b23e ("usb: typec: altmodes/displayport: create sysfs nodes as driver's default device attribute group")
001b0c780eac ("usb: typec: altmodes/displayport: Add hpd sysfs attribute")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 165376f6b23e9a779850e750fb2eb06622e5a531 Mon Sep 17 00:00:00 2001
From: RD Babiera <rdbabiera(a)google.com>
Date: Thu, 29 Feb 2024 00:11:02 +0000
Subject: [PATCH] usb: typec: altmodes/displayport: create sysfs nodes as
driver's default device attribute group
The DisplayPort driver's sysfs nodes may be present to the userspace before
typec_altmode_set_drvdata() completes in dp_altmode_probe. This means that
a sysfs read can trigger a NULL pointer error by deferencing dp->hpd in
hpd_show or dp->lock in pin_assignment_show, as dev_get_drvdata() returns
NULL in those cases.
Remove manual sysfs node creation in favor of adding attribute group as
default for devices bound to the driver. The ATTRIBUTE_GROUPS() macro is
not used here otherwise the path to the sysfs nodes is no longer compliant
with the ABI.
Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode")
Cc: stable(a)vger.kernel.org
Signed-off-by: RD Babiera <rdbabiera(a)google.com>
Link: https://lore.kernel.org/r/20240229001101.3889432-2-rdbabiera@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
index f81bec0c7b86..f8ea3054be54 100644
--- a/drivers/usb/typec/altmodes/displayport.c
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -559,16 +559,21 @@ static ssize_t hpd_show(struct device *dev, struct device_attribute *attr, char
}
static DEVICE_ATTR_RO(hpd);
-static struct attribute *dp_altmode_attrs[] = {
+static struct attribute *displayport_attrs[] = {
&dev_attr_configuration.attr,
&dev_attr_pin_assignment.attr,
&dev_attr_hpd.attr,
NULL
};
-static const struct attribute_group dp_altmode_group = {
+static const struct attribute_group displayport_group = {
.name = "displayport",
- .attrs = dp_altmode_attrs,
+ .attrs = displayport_attrs,
+};
+
+static const struct attribute_group *displayport_groups[] = {
+ &displayport_group,
+ NULL,
};
int dp_altmode_probe(struct typec_altmode *alt)
@@ -576,7 +581,6 @@ int dp_altmode_probe(struct typec_altmode *alt)
const struct typec_altmode *port = typec_altmode_get_partner(alt);
struct fwnode_handle *fwnode;
struct dp_altmode *dp;
- int ret;
/* FIXME: Port can only be DFP_U. */
@@ -587,10 +591,6 @@ int dp_altmode_probe(struct typec_altmode *alt)
DP_CAP_PIN_ASSIGN_DFP_D(alt->vdo)))
return -ENODEV;
- ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group);
- if (ret)
- return ret;
-
dp = devm_kzalloc(&alt->dev, sizeof(*dp), GFP_KERNEL);
if (!dp)
return -ENOMEM;
@@ -624,7 +624,6 @@ void dp_altmode_remove(struct typec_altmode *alt)
{
struct dp_altmode *dp = typec_altmode_get_drvdata(alt);
- sysfs_remove_group(&alt->dev.kobj, &dp_altmode_group);
cancel_work_sync(&dp->work);
if (dp->connector_fwnode) {
@@ -649,6 +648,7 @@ static struct typec_altmode_driver dp_altmode_driver = {
.driver = {
.name = "typec_displayport",
.owner = THIS_MODULE,
+ .dev_groups = displayport_groups,
},
};
module_typec_altmode_driver(dp_altmode_driver);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 47b412c1ea77112f1148b4edd71700a388c7c80f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032728-showplace-outpour-6ee0@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
47b412c1ea77 ("phy: qcom-qmp-combo: fix type-c switch registration")
d2d7b8e88023 ("phy: qcom-qmp-combo: fix drm bridge registration")
35921910bbd0 ("phy: qcom: qmp-combo: switch to DRM_AUX_BRIDGE")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 47b412c1ea77112f1148b4edd71700a388c7c80f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Sat, 17 Feb 2024 16:02:28 +0100
Subject: [PATCH] phy: qcom-qmp-combo: fix type-c switch registration
Due to a long-standing issue in driver core, drivers may not probe defer
after having registered child devices to avoid triggering a probe
deferral loop (see fbc35b45f9f6 ("Add documentation on meaning of
-EPROBE_DEFER")).
Move registration of the typec switch to after looking up clocks and
other resources.
Note that PHY creation can in theory also trigger a probe deferral when
a 'phy' supply is used. This does not seem to affect the QMP PHY driver
but the PHY subsystem should be reworked to address this (i.e. by
separating initialisation and registration of the PHY).
Fixes: 2851117f8f42 ("phy: qcom-qmp-combo: Introduce orientation switching")
Cc: stable(a)vger.kernel.org # 6.5
Cc: Bjorn Andersson <quic_bjorande(a)quicinc.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Bjorn Andersson <andersson(a)kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Acked-by: Vinod Koul <vkoul(a)kernel.org>
Acked-by: Neil Armstrong <neil.armstrong(a)linaro.org>
Link: https://lore.kernel.org/r/20240217150228.5788-7-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index e19d6a084f10..17c4ad7553a5 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -3562,10 +3562,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
if (ret)
return ret;
- ret = qmp_combo_typec_switch_register(qmp);
- if (ret)
- return ret;
-
/* Check for legacy binding with child nodes. */
usb_np = of_get_child_by_name(dev->of_node, "usb3-phy");
if (usb_np) {
@@ -3585,6 +3581,10 @@ static int qmp_combo_probe(struct platform_device *pdev)
if (ret)
goto err_node_put;
+ ret = qmp_combo_typec_switch_register(qmp);
+ if (ret)
+ goto err_node_put;
+
ret = drm_aux_bridge_register(dev);
if (ret)
goto err_node_put;
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 47b412c1ea77112f1148b4edd71700a388c7c80f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032727-sureness-hungrily-02e0@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
47b412c1ea77 ("phy: qcom-qmp-combo: fix type-c switch registration")
d2d7b8e88023 ("phy: qcom-qmp-combo: fix drm bridge registration")
35921910bbd0 ("phy: qcom: qmp-combo: switch to DRM_AUX_BRIDGE")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 47b412c1ea77112f1148b4edd71700a388c7c80f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Sat, 17 Feb 2024 16:02:28 +0100
Subject: [PATCH] phy: qcom-qmp-combo: fix type-c switch registration
Due to a long-standing issue in driver core, drivers may not probe defer
after having registered child devices to avoid triggering a probe
deferral loop (see fbc35b45f9f6 ("Add documentation on meaning of
-EPROBE_DEFER")).
Move registration of the typec switch to after looking up clocks and
other resources.
Note that PHY creation can in theory also trigger a probe deferral when
a 'phy' supply is used. This does not seem to affect the QMP PHY driver
but the PHY subsystem should be reworked to address this (i.e. by
separating initialisation and registration of the PHY).
Fixes: 2851117f8f42 ("phy: qcom-qmp-combo: Introduce orientation switching")
Cc: stable(a)vger.kernel.org # 6.5
Cc: Bjorn Andersson <quic_bjorande(a)quicinc.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Bjorn Andersson <andersson(a)kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Acked-by: Vinod Koul <vkoul(a)kernel.org>
Acked-by: Neil Armstrong <neil.armstrong(a)linaro.org>
Link: https://lore.kernel.org/r/20240217150228.5788-7-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index e19d6a084f10..17c4ad7553a5 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -3562,10 +3562,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
if (ret)
return ret;
- ret = qmp_combo_typec_switch_register(qmp);
- if (ret)
- return ret;
-
/* Check for legacy binding with child nodes. */
usb_np = of_get_child_by_name(dev->of_node, "usb3-phy");
if (usb_np) {
@@ -3585,6 +3581,10 @@ static int qmp_combo_probe(struct platform_device *pdev)
if (ret)
goto err_node_put;
+ ret = qmp_combo_typec_switch_register(qmp);
+ if (ret)
+ goto err_node_put;
+
ret = drm_aux_bridge_register(dev);
if (ret)
goto err_node_put;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x d2d7b8e88023b75320662c2305d61779ff060950
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032712-safehouse-yearning-1b84@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
d2d7b8e88023 ("phy: qcom-qmp-combo: fix drm bridge registration")
35921910bbd0 ("phy: qcom: qmp-combo: switch to DRM_AUX_BRIDGE")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d2d7b8e88023b75320662c2305d61779ff060950 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Sat, 17 Feb 2024 16:02:27 +0100
Subject: [PATCH] phy: qcom-qmp-combo: fix drm bridge registration
Due to a long-standing issue in driver core, drivers may not probe defer
after having registered child devices to avoid triggering a probe
deferral loop (see fbc35b45f9f6 ("Add documentation on meaning of
-EPROBE_DEFER")).
This could potentially also trigger a bug in the DRM bridge
implementation which does not expect bridges to go away even if device
links may avoid triggering this (when enabled).
Move registration of the DRM aux bridge to after looking up clocks and
other resources.
Note that PHY creation can in theory also trigger a probe deferral when
a 'phy' supply is used. This does not seem to affect the QMP PHY driver
but the PHY subsystem should be reworked to address this (i.e. by
separating initialisation and registration of the PHY).
Fixes: 35921910bbd0 ("phy: qcom: qmp-combo: switch to DRM_AUX_BRIDGE")
Fixes: 1904c3f578dc ("phy: qcom-qmp-combo: Introduce drm_bridge")
Cc: stable(a)vger.kernel.org # 6.5
Cc: Bjorn Andersson <quic_bjorande(a)quicinc.com>
Cc: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Neil Armstrong <neil.armstrong(a)linaro.org>
Reviewed-by: Bjorn Andersson <andersson(a)kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Acked-by: Vinod Koul <vkoul(a)kernel.org>
Acked-by: Neil Armstrong <neil.armstrong(a)linaro.org>
Link: https://lore.kernel.org/r/20240217150228.5788-6-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 1ad10110dd25..e19d6a084f10 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -3566,10 +3566,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
if (ret)
return ret;
- ret = drm_aux_bridge_register(dev);
- if (ret)
- return ret;
-
/* Check for legacy binding with child nodes. */
usb_np = of_get_child_by_name(dev->of_node, "usb3-phy");
if (usb_np) {
@@ -3589,6 +3585,10 @@ static int qmp_combo_probe(struct platform_device *pdev)
if (ret)
goto err_node_put;
+ ret = drm_aux_bridge_register(dev);
+ if (ret)
+ goto err_node_put;
+
pm_runtime_set_active(dev);
ret = devm_pm_runtime_enable(dev);
if (ret)
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x d2d7b8e88023b75320662c2305d61779ff060950
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032711-saxophone-flammable-5cc1@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
d2d7b8e88023 ("phy: qcom-qmp-combo: fix drm bridge registration")
35921910bbd0 ("phy: qcom: qmp-combo: switch to DRM_AUX_BRIDGE")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d2d7b8e88023b75320662c2305d61779ff060950 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Sat, 17 Feb 2024 16:02:27 +0100
Subject: [PATCH] phy: qcom-qmp-combo: fix drm bridge registration
Due to a long-standing issue in driver core, drivers may not probe defer
after having registered child devices to avoid triggering a probe
deferral loop (see fbc35b45f9f6 ("Add documentation on meaning of
-EPROBE_DEFER")).
This could potentially also trigger a bug in the DRM bridge
implementation which does not expect bridges to go away even if device
links may avoid triggering this (when enabled).
Move registration of the DRM aux bridge to after looking up clocks and
other resources.
Note that PHY creation can in theory also trigger a probe deferral when
a 'phy' supply is used. This does not seem to affect the QMP PHY driver
but the PHY subsystem should be reworked to address this (i.e. by
separating initialisation and registration of the PHY).
Fixes: 35921910bbd0 ("phy: qcom: qmp-combo: switch to DRM_AUX_BRIDGE")
Fixes: 1904c3f578dc ("phy: qcom-qmp-combo: Introduce drm_bridge")
Cc: stable(a)vger.kernel.org # 6.5
Cc: Bjorn Andersson <quic_bjorande(a)quicinc.com>
Cc: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Reviewed-by: Neil Armstrong <neil.armstrong(a)linaro.org>
Reviewed-by: Bjorn Andersson <andersson(a)kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
Acked-by: Vinod Koul <vkoul(a)kernel.org>
Acked-by: Neil Armstrong <neil.armstrong(a)linaro.org>
Link: https://lore.kernel.org/r/20240217150228.5788-6-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 1ad10110dd25..e19d6a084f10 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -3566,10 +3566,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
if (ret)
return ret;
- ret = drm_aux_bridge_register(dev);
- if (ret)
- return ret;
-
/* Check for legacy binding with child nodes. */
usb_np = of_get_child_by_name(dev->of_node, "usb3-phy");
if (usb_np) {
@@ -3589,6 +3585,10 @@ static int qmp_combo_probe(struct platform_device *pdev)
if (ret)
goto err_node_put;
+ ret = drm_aux_bridge_register(dev);
+ if (ret)
+ goto err_node_put;
+
pm_runtime_set_active(dev);
ret = devm_pm_runtime_enable(dev);
if (ret)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 5ef1d8c1ddbf696e47b226e11888eaf8d9e8e807
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032702-emphasis-favorite-5e62@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
5ef1d8c1ddbf ("KVM: SVM: Flush pages under kvm->lock to fix UAF in svm_register_enc_region()")
19a23da53932 ("Fix unsynchronized access to sev members through svm_register_enc_region")
a8d908b5873c ("KVM: x86: report sev_pin_memory errors with PTR_ERR")
dc42c8ae0a77 ("KVM: SVM: convert get_user_pages() --> pin_user_pages()")
78824fabc72e ("KVM: SVM: fix svn_pin_memory()'s use of get_user_pages_fast()")
996ed22c7a52 ("arch/x86/kvm/svm/sev.c: change flag passed to GUP fast in sev_pin_memory()")
eaf78265a4ab ("KVM: SVM: Move SEV code to separate file")
ef0f64960d01 ("KVM: SVM: Move AVIC code to separate file")
883b0a91f41a ("KVM: SVM: Move Nested SVM Implementation to nested.c")
46a010dd6896 ("kVM SVM: Move SVM related files to own sub-directory")
8c1b724ddb21 ("Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5ef1d8c1ddbf696e47b226e11888eaf8d9e8e807 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 16 Feb 2024 17:34:30 -0800
Subject: [PATCH] KVM: SVM: Flush pages under kvm->lock to fix UAF in
svm_register_enc_region()
Do the cache flush of converted pages in svm_register_enc_region() before
dropping kvm->lock to fix use-after-free issues where region and/or its
array of pages could be freed by a different task, e.g. if userspace has
__unregister_enc_region_locked() already queued up for the region.
Note, the "obvious" alternative of using local variables doesn't fully
resolve the bug, as region->pages is also dynamically allocated. I.e. the
region structure itself would be fine, but region->pages could be freed.
Flushing multiple pages under kvm->lock is unfortunate, but the entire
flow is a rare slow path, and the manual flush is only needed on CPUs that
lack coherency for encrypted memory.
Fixes: 19a23da53932 ("Fix unsynchronized access to sev members through svm_register_enc_region")
Reported-by: Gabe Kirkpatrick <gkirkpatrick(a)google.com>
Cc: Josh Eads <josheads(a)google.com>
Cc: Peter Gonda <pgonda(a)google.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20240217013430.2079561-1-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index f760106c31f8..a132547fcfb5 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1975,20 +1975,22 @@ int sev_mem_enc_register_region(struct kvm *kvm,
goto e_free;
}
+ /*
+ * The guest may change the memory encryption attribute from C=0 -> C=1
+ * or vice versa for this memory range. Lets make sure caches are
+ * flushed to ensure that guest data gets written into memory with
+ * correct C-bit. Note, this must be done before dropping kvm->lock,
+ * as region and its array of pages can be freed by a different task
+ * once kvm->lock is released.
+ */
+ sev_clflush_pages(region->pages, region->npages);
+
region->uaddr = range->addr;
region->size = range->size;
list_add_tail(®ion->list, &sev->regions_list);
mutex_unlock(&kvm->lock);
- /*
- * The guest may change the memory encryption attribute from C=0 -> C=1
- * or vice versa for this memory range. Lets make sure caches are
- * flushed to ensure that guest data gets written into memory with
- * correct C-bit.
- */
- sev_clflush_pages(region->pages, region->npages);
-
return ret;
e_free:
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 5ef1d8c1ddbf696e47b226e11888eaf8d9e8e807
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032701-robotics-energize-d297@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
5ef1d8c1ddbf ("KVM: SVM: Flush pages under kvm->lock to fix UAF in svm_register_enc_region()")
19a23da53932 ("Fix unsynchronized access to sev members through svm_register_enc_region")
a8d908b5873c ("KVM: x86: report sev_pin_memory errors with PTR_ERR")
dc42c8ae0a77 ("KVM: SVM: convert get_user_pages() --> pin_user_pages()")
78824fabc72e ("KVM: SVM: fix svn_pin_memory()'s use of get_user_pages_fast()")
996ed22c7a52 ("arch/x86/kvm/svm/sev.c: change flag passed to GUP fast in sev_pin_memory()")
eaf78265a4ab ("KVM: SVM: Move SEV code to separate file")
ef0f64960d01 ("KVM: SVM: Move AVIC code to separate file")
883b0a91f41a ("KVM: SVM: Move Nested SVM Implementation to nested.c")
46a010dd6896 ("kVM SVM: Move SVM related files to own sub-directory")
8c1b724ddb21 ("Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5ef1d8c1ddbf696e47b226e11888eaf8d9e8e807 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 16 Feb 2024 17:34:30 -0800
Subject: [PATCH] KVM: SVM: Flush pages under kvm->lock to fix UAF in
svm_register_enc_region()
Do the cache flush of converted pages in svm_register_enc_region() before
dropping kvm->lock to fix use-after-free issues where region and/or its
array of pages could be freed by a different task, e.g. if userspace has
__unregister_enc_region_locked() already queued up for the region.
Note, the "obvious" alternative of using local variables doesn't fully
resolve the bug, as region->pages is also dynamically allocated. I.e. the
region structure itself would be fine, but region->pages could be freed.
Flushing multiple pages under kvm->lock is unfortunate, but the entire
flow is a rare slow path, and the manual flush is only needed on CPUs that
lack coherency for encrypted memory.
Fixes: 19a23da53932 ("Fix unsynchronized access to sev members through svm_register_enc_region")
Reported-by: Gabe Kirkpatrick <gkirkpatrick(a)google.com>
Cc: Josh Eads <josheads(a)google.com>
Cc: Peter Gonda <pgonda(a)google.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20240217013430.2079561-1-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index f760106c31f8..a132547fcfb5 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1975,20 +1975,22 @@ int sev_mem_enc_register_region(struct kvm *kvm,
goto e_free;
}
+ /*
+ * The guest may change the memory encryption attribute from C=0 -> C=1
+ * or vice versa for this memory range. Lets make sure caches are
+ * flushed to ensure that guest data gets written into memory with
+ * correct C-bit. Note, this must be done before dropping kvm->lock,
+ * as region and its array of pages can be freed by a different task
+ * once kvm->lock is released.
+ */
+ sev_clflush_pages(region->pages, region->npages);
+
region->uaddr = range->addr;
region->size = range->size;
list_add_tail(®ion->list, &sev->regions_list);
mutex_unlock(&kvm->lock);
- /*
- * The guest may change the memory encryption attribute from C=0 -> C=1
- * or vice versa for this memory range. Lets make sure caches are
- * flushed to ensure that guest data gets written into memory with
- * correct C-bit.
- */
- sev_clflush_pages(region->pages, region->npages);
-
return ret;
e_free:
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 910c57dfa4d113aae6571c2a8b9ae8c430975902
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024032739-sloped-goal-fadb@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
910c57dfa4d1 ("KVM: x86: Mark target gfn of emulated atomic instruction as dirty")
5d6c7de6446e ("KVM: x86: Bail to userspace if emulation of atomic user access faults")
1c2361f667f3 ("KVM: x86: Use __try_cmpxchg_user() to emulate atomic accesses")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 910c57dfa4d113aae6571c2a8b9ae8c430975902 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 14 Feb 2024 17:00:03 -0800
Subject: [PATCH] KVM: x86: Mark target gfn of emulated atomic instruction as
dirty
When emulating an atomic access on behalf of the guest, mark the target
gfn dirty if the CMPXCHG by KVM is attempted and doesn't fault. This
fixes a bug where KVM effectively corrupts guest memory during live
migration by writing to guest memory without informing userspace that the
page is dirty.
Marking the page dirty got unintentionally dropped when KVM's emulated
CMPXCHG was converted to do a user access. Before that, KVM explicitly
mapped the guest page into kernel memory, and marked the page dirty during
the unmap phase.
Mark the page dirty even if the CMPXCHG fails, as the old data is written
back on failure, i.e. the page is still written. The value written is
guaranteed to be the same because the operation is atomic, but KVM's ABI
is that all writes are dirty logged regardless of the value written. And
more importantly, that's what KVM did before the buggy commit.
Huge kudos to the folks on the Cc list (and many others), who did all the
actual work of triaging and debugging.
Fixes: 1c2361f667f3 ("KVM: x86: Use __try_cmpxchg_user() to emulate atomic accesses")
Cc: stable(a)vger.kernel.org
Cc: David Matlack <dmatlack(a)google.com>
Cc: Pasha Tatashin <tatashin(a)google.com>
Cc: Michael Krebs <mkrebs(a)google.com>
base-commit: 6769ea8da8a93ed4630f1ce64df6aafcaabfce64
Reviewed-by: Jim Mattson <jmattson(a)google.com>
Link: https://lore.kernel.org/r/20240215010004.1456078-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 48a61d283406..e4270eaa33df 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8007,6 +8007,16 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
if (r < 0)
return X86EMUL_UNHANDLEABLE;
+
+ /*
+ * Mark the page dirty _before_ checking whether or not the CMPXCHG was
+ * successful, as the old value is written back on failure. Note, for
+ * live migration, this is unnecessarily conservative as CMPXCHG writes
+ * back the original value and the access is atomic, but KVM's ABI is
+ * that all writes are dirty logged, regardless of the value written.
+ */
+ kvm_vcpu_mark_page_dirty(vcpu, gpa_to_gfn(gpa));
+
if (r)
return X86EMUL_CMPXCHG_FAILED;
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From a304fa1d10fcb974c117d391e5b4d34c2baa9a62 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks(a)gmail.com>
Date: Tue, 5 Mar 2024 13:23:00 +0900
Subject: [PATCH] docs: Makefile: Add dependency to $(YNL_INDEX) for targets
other than htmldocs
Commit f061c9f7d058 ("Documentation: Document each netlink family")
added recipes for YAML -> RST conversion.
Then commit 7da8bdbf8f5d ("docs: Makefile: Fix make cleandocs by
deleting generated .rst files") made sure those converted .rst files
are cleaned by "make cleandocs".
However, they took care of htmldocs build only.
If one of other targets such as latexdocs or epubdocs is built
without building htmldocs, missing .rst files can cause additional
WARNINGs from sphinx-build as follow:
./Documentation/userspace-api/netlink/specs.rst:18: WARNING: undefined label: 'specs'
./Documentation/userspace-api/netlink/netlink-raw.rst:64: WARNING: unknown document: '../../networking/netlink_spec/rt_link'
./Documentation/userspace-api/netlink/netlink-raw.rst:64: WARNING: unknown document: '../../networking/netlink_spec/tc'
./Documentation/userspace-api/netlink/index.rst:21: WARNING: undefined label: 'specs'
Add dependency to $(YNL_INDEX) for other targets and allow any targets
to be built cleanly right after "make cleandocs".
Signed-off-by: Akira Yokosawa <akiyks(a)gmail.com>
Cc: stable(a)vger.kernel.org # v6.7
Cc: Thorsten Blum <thorsten.blum(a)toblux.com>
Cc: Breno Leitao <leitao(a)debian.org>
Cc: Jakub Kicinski <kuba(a)kernel.org>
Cc: "David S. Miller" <davem(a)davemloft.net>
Reviwed-by: Breno Leitao <leitao(a)debian.org>
Signed-off-by: Jonathan Corbet <corbet(a)lwn.net>
Message-ID: <e876e3c8-109d-4bc8-9916-05a4bc4ee9ac(a)gmail.com>
---
Documentation/Makefile | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 4479910166fc1..b68f8c816897b 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -111,7 +111,9 @@ $(YNL_INDEX): $(YNL_RST_FILES)
$(YNL_RST_DIR)/%.rst: $(YNL_YAML_DIR)/%.yaml $(YNL_TOOL)
$(Q)$(YNL_TOOL) -i $< -o $@
-htmldocs: $(YNL_INDEX)
+htmldocs texinfodocs latexdocs epubdocs xmldocs: $(YNL_INDEX)
+
+htmldocs:
@$(srctree)/scripts/sphinx-pre-install --version-check
@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
--
2.43.0
Hi,
could you, please, include commit b4a11b2033b7 ("net: fix
IPSTATS_MIB_OUTPKGS increment in OutForwDatagrams") from Linus' tree
into the 6.6 stable tree (only)?
Reported-by: Vitezslav Samel <vitezslav(a)samel.cz>
Fixes: e4da8c78973c ("net: ipv4, ipv6: fix IPSTATS_MIB_OUTOCTETS increment duplicated")
Link: https://lore.kernel.org/netdev/ZauRBl7zXWQRVZnl@pc11.op.pod.cz/
Tested-by: Vitezslav Samel <vitezslav(a)samel.cz>
Thanks,
Vita
On Thu, Jan 25, 2024 at 11:20:58 -0700, David Ahern wrote:
> On 1/25/24 8:08 AM, Vitezslav Samel wrote:
> > On Thu, Jan 25, 2024 at 07:47:40 +0100, Vitezslav Samel wrote:
> >> On Wed, Jan 24, 2024 at 17:46:52 -0800, Jakub Kicinski wrote:
> >>> On Thu, 25 Jan 2024 08:37:11 +0800 heng guo wrote:
> >>>>>> Heng Guo, David, any thoughts on this? Revert?
> >>>>> Revert is best; Heng Guo can revisit the math and try again.
> >>>>>
> >>>>> The patch in question basically negated IPSTATS_MIB_OUTOCTETS; I see it
> >>>>> shown in proc but never bumped in the datapath.
> >>>> [HG]: Yes please revert it. I verified the patch on ipv4, seems I should
> >>>> not touch the codes to ipv6. Sorry for it.
> >>>
> >>> Would you mind sending a patch with a revert, explaining the situation,
> >>> the right Fixes tag and a link to Vitezslav's report?
> >>
> >> I took a look at current master and found that there is yet another
> >> commit since 6.6.x which touches this area: commit b4a11b2033b7 by Heng Guo
> >> ("net: fix IPSTATS_MIB_OUTPKGS increment in OutForwDatagrams"). It went
> >> in v6.7-rc1.
> >>
> >> I will test current master this afternoon and report back.
> >
> > Test 1: Linus' current master: IPv6 octets accounting is OK
> > Test 2: 6.6.13 with b4a11b2033b7 ("net: fix IPSTATS_MIB_OUTPKGS
> > increment in OutForwDatagrams") on top is also OK.
> >
> > Seems like my problem was solved in master already, but
> > it still exists in 6.6.y. IMHO commit b4a11b2033b7 should be
> > marked as for-stable-6.6.y and forwarded to GregKH. AFAIK only 6.6.y
> > stable tree is affected.
> >
> > But beware: I only tested my specific problem and I don't know if the
> > commit with fix doesn't break anything else.
>
> Only reported problem, so with b4a11b2033b7 backported to stable we
> should be good. Thanks for the testing of various releases to isolate
> the problem.
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From f2d5dcb48f7ba9e3ff249d58fc1fa963d374e66a Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy(a)infradead.org>
Date: Tue, 10 Oct 2023 15:55:49 +0100
Subject: [PATCH] bounds: support non-power-of-two CONFIG_NR_CPUS
ilog2() rounds down, so for example when PowerPC 85xx sets CONFIG_NR_CPUS
to 24, we will only allocate 4 bits to store the number of CPUs instead of
5. Use bits_per() instead, which rounds up. Found by code inspection.
The effect of this would probably be a misaccounting when doing NUMA
balancing, so to a user, it would only be a performance penalty. The
effects may be more wide-spread; it's hard to tell.
Link: https://lkml.kernel.org/r/20231010145549.1244748-1-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Fixes: 90572890d202 ("mm: numa: Change page last {nid,pid} into {cpu,pid}")
Reviewed-by: Rik van Riel <riel(a)surriel.com>
Acked-by: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Ingo Molnar <mingo(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/bounds.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/bounds.c b/kernel/bounds.c
index b529182e8b04f..c5a9fcd2d6228 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -19,7 +19,7 @@ int main(void)
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
#ifdef CONFIG_SMP
- DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
+ DEFINE(NR_CPUS_BITS, bits_per(CONFIG_NR_CPUS));
#endif
DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
#ifdef CONFIG_LRU_GEN
--
2.43.0
Hi,
Framework 16 supports variable refresh rates in it's panel but only
advertises it in the DisplayID block not the EDID. As Plasma 6 and
GNOME 46 support VRR this problem is exposed to more people.
This is fixed by:
2f14c0c8cae8 ("drm/amd/display: Use freesync when
`DRM_EDID_FEATURE_CONTINUOUS_FREQ` found")
Can you please bring this back to 6.6.y+?
Thanks,
This is the start of the stable review cycle for the 5.10.213 release.
There are 73 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri Mar 15 04:46:39 PM UTC 2024.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.10.y
and the diffstat can be found below.
Thanks,
Sasha
-------------
Pseudo-Shortlog of commits:
Andrea Parri (Microsoft) (1):
Drivers: hv: vmbus: Drop error message when 'No request id available'
Andres Beltran (2):
Drivers: hv: vmbus: Add vmbus_requestor data structure for VMBus
hardening
hv_netvsc: Use vmbus_requestor to generate transaction IDs for VMBus
hardening
Andy Shevchenko (4):
serial: max310x: Use devm_clk_get_optional() to get the input clock
serial: max310x: Try to get crystal clock rate from property
serial: max310x: Make use of device properties
serial: max310x: Unprepare and disable clock in error path
Ansuel Smith (1):
regmap: allow to define reg_update_bits for no bus configuration
Baokun Li (1):
ext4: make ext4_es_insert_extent() return void
Christophe Kerello (1):
mmc: mmci: stm32: fix DMA API overlapping mappings warning
Cosmin Tanislav (4):
serial: max310x: use regmap methods for SPI batch operations
serial: max310x: use a separate regmap for each port
serial: max310x: make accessing revision id interface-agnostic
serial: max310x: implement I2C support
Dexuan Cui (1):
hv_netvsc: Make netvsc/VF binding check both MAC and serial number
Edward Adam Davis (1):
net/rds: fix WARNING in rds_conn_connect_if_down
Eric Dumazet (2):
geneve: make sure to pull inner header in geneve_rx()
net/ipv6: avoid possible UAF in ip6_route_mpath_notify()
Florian Westphal (1):
netfilter: nft_ct: fix l3num expectations with inet pseudo family
Hugo Villeneuve (2):
serial: max310x: fail probe if clock crystal is unstable
serial: max310x: prevent infinite while() loop in port startup
Ingo Molnar (1):
exit: Fix typo in comment: s/sub-theads/sub-threads
Jan Kundrát (1):
serial: max310x: fix IO data corruption in batched operations
Jason Xing (12):
netrom: Fix a data-race around sysctl_netrom_default_path_quality
netrom: Fix a data-race around
sysctl_netrom_obsolescence_count_initialiser
netrom: Fix data-races around sysctl_netrom_network_ttl_initialiser
netrom: Fix a data-race around sysctl_netrom_transport_timeout
netrom: Fix a data-race around sysctl_netrom_transport_maximum_tries
netrom: Fix a data-race around
sysctl_netrom_transport_acknowledge_delay
netrom: Fix a data-race around sysctl_netrom_transport_busy_delay
netrom: Fix a data-race around
sysctl_netrom_transport_requested_window_size
netrom: Fix a data-race around
sysctl_netrom_transport_no_activity_timeout
netrom: Fix a data-race around sysctl_netrom_routing_control
netrom: Fix a data-race around sysctl_netrom_link_fails_count
netrom: Fix data-races around sysctl_net_busy_read
Johannes Berg (1):
um: allow not setting extra rpaths in the linux binary
John Efstathiades (4):
lan78xx: Fix white space and style issues
lan78xx: Add missing return code checks
lan78xx: Fix partial packet errors on suspend/resume
lan78xx: Fix race conditions in suspend/resume handling
Juhee Kang (1):
hv_netvsc: use netif_is_bond_master() instead of open code
Lena Wang (1):
netfilter: nf_conntrack_h323: Add protection for bmp length out of
range
Long Li (2):
hv_netvsc: Wait for completion on request SWITCH_DATA_PATH
hv_netvsc: Process NETDEV_GOING_DOWN on VF hot remove
Maciej Fijalkowski (2):
ixgbe: {dis, en}able irqs in ixgbe_txrx_ring_{dis, en}able
i40e: disable NAPI right after disabling irqs when handling xsk_pool
Marek Vasut (1):
regmap: Add bulk read/write callbacks into regmap_config
Martin KaFai Lau (2):
net: Change sock_getsockopt() to take the sk ptr instead of the sock
ptr
bpf: net: Change sk_getsockopt() to take the sockptr_t argument
Mathias Nyman (3):
xhci: remove extra loop in interrupt context
xhci: prevent double-fetch of transfer and transfer event TRBs
xhci: process isoc TD properly when there was a transaction error mid
TD.
Michal Pecio (1):
xhci: handle isoc Babble and Buffer Overrun events properly
Mike Kravetz (1):
mm/hugetlb: change hugetlb_reserve_pages() to type bool
Muhammad Usama Anjum (1):
selftests/mm: switch to bash from sh
Nico Pache (1):
selftests: mm: fix map_hugetlb failure on 64K page size systems
Oleg Nesterov (5):
getrusage: add the "signal_struct *sig" local variable
getrusage: move thread_group_cputime_adjusted() outside of
lock_task_sighand()
getrusage: use __for_each_thread()
getrusage: use sig->stats_lock rather than lock_task_sighand()
exit: wait_task_zombie: kill the no longer necessary
spin_lock_irq(siglock)
Oleksij Rempel (1):
net: lan78xx: fix runtime PM count underflow on link stop
Ondrej Mosnacek (1):
lsm: fix default return value of the socket_getpeersec_*() hooks
Paul Moore (1):
lsm: make security_socket_getpeersec_stream() sockptr_t safe
Prakash Sangappa (1):
mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE
Rand Deeb (1):
net: ice: Fix potential NULL pointer dereference in
ice_bridge_setlink()
Sasha Levin (1):
Linux 5.10.213-rc1
Shradha Gupta (1):
hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed
Steven Rostedt (Google) (1):
tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string
Toke Høiland-Jørgensen (1):
cpumap: Zero-initialise xdp_rxq_info struct before running XDP program
Yann Gautier (1):
mmc: mmci: stm32: use a buffer for unaligned DMA requests
Zhang Yi (2):
ext4: refactor ext4_da_map_blocks()
ext4: convert to exclusive lock while inserting delalloc extents
Makefile | 4 +-
arch/um/Kconfig | 13 +
arch/um/Makefile | 3 +-
arch/x86/Makefile.um | 2 +-
drivers/base/regmap/internal.h | 4 +
drivers/base/regmap/regmap.c | 77 +-
drivers/hv/channel.c | 174 +++-
drivers/hv/hyperv_vmbus.h | 3 +-
drivers/hv/ring_buffer.c | 28 +-
drivers/mmc/host/mmci_stm32_sdmmc.c | 112 ++-
drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +-
drivers/net/ethernet/intel/ice/ice_main.c | 2 +
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 56 +-
drivers/net/geneve.c | 18 +-
drivers/net/hyperv/hyperv_net.h | 13 +
drivers/net/hyperv/netvsc.c | 55 +-
drivers/net/hyperv/netvsc_drv.c | 107 +-
drivers/net/hyperv/rndis_filter.c | 1 +
drivers/net/usb/lan78xx.c | 910 ++++++++++++++----
drivers/tty/serial/Kconfig | 1 +
drivers/tty/serial/max310x.c | 378 ++++++--
drivers/usb/host/xhci-ring.c | 143 ++-
drivers/usb/host/xhci.h | 3 +
fs/ext4/extents.c | 5 +-
fs/ext4/extents_status.c | 14 +-
fs/ext4/extents_status.h | 6 +-
fs/ext4/inode.c | 65 +-
fs/hugetlbfs/inode.c | 17 +-
include/linux/filter.h | 3 +-
include/linux/hugetlb.h | 2 +-
include/linux/hyperv.h | 23 +
include/linux/lsm_hook_defs.h | 6 +-
include/linux/lsm_hooks.h | 4 +-
include/linux/regmap.h | 19 +
include/linux/security.h | 11 +-
include/linux/sockptr.h | 5 +
include/trace/events/qdisc.h | 20 +-
kernel/bpf/cpumap.c | 2 +-
kernel/exit.c | 12 +-
kernel/sys.c | 91 +-
mm/hugetlb.c | 37 +-
net/core/filter.c | 5 +-
net/core/sock.c | 52 +-
net/ipv6/route.c | 21 +-
net/netfilter/nf_conntrack_h323_asn1.c | 4 +
net/netfilter/nft_ct.c | 11 +-
net/netrom/af_netrom.c | 14 +-
net/netrom/nr_dev.c | 2 +-
net/netrom/nr_in.c | 6 +-
net/netrom/nr_out.c | 2 +-
net/netrom/nr_route.c | 8 +-
net/netrom/nr_subr.c | 5 +-
net/rds/rdma.c | 3 +
net/rds/send.c | 6 +-
security/apparmor/lsm.c | 29 +-
security/security.c | 35 +-
security/selinux/hooks.c | 13 +-
security/smack/smack_lsm.c | 19 +-
.../selftests/vm/charge_reserved_hugetlb.sh | 2 +-
tools/testing/selftests/vm/map_hugetlb.c | 7 +
.../selftests/vm/write_hugetlb_memory.sh | 2 +-
61 files changed, 1986 insertions(+), 711 deletions(-)
--
2.43.0
Hi,
Ben Hutchings reported in https://bugs.debian.org/1064035 a problem
with the kernel-doc builds once 3080ea5553cc ("stddef: Introduce
DECLARE_FLEX_ARRAY() helper") got applied in 5.10.210 (as
prerequisite of another fix in 5.10.y):
> The backport of commit 3080ea5553cc "stddef: Introduce
> DECLARE_FLEX_ARRAY() helper" modified scripts/kernel-doc and
> introduced a syntax error:
>
> Global symbol "$args" requires explicit package name (did you forget to declare "my $args"?) at ./scripts/kernel-doc line 1236.
> Global symbol "$args" requires explicit package name (did you forget to declare "my $args"?) at ./scripts/kernel-doc line 1236.
> Execution of ./scripts/kernel-doc aborted due to compilation errors.
>
> This doesn't stop the documentation build process, but causes the
> documentation that should be extracted by kernel-doc to be missing
> from linux-doc-5.10.
>
> We should be able to fix this by eithering backport commit
> e86bdb24375a "scripts: kernel-doc: reduce repeated regex expressions
> into variables" or replacing /$args/ with /([^,)]+)/.
>
> Ben.
What would be prefered here from stable maintainers point of view?
AFAICS e86bdb24375a ("scripts: kernel-doc: reduce repeated regex
expressions into variables") won't apply cleanly and needs some
refactoring. The alternative pointed out by Ben would be to replace
the /$args/ with /([^,)]+)/.
# 5.10.y specific regression
#regzbot introduced: 443b16ee3d9ce0a3ece0e3526a5af883e5b16eaf
#regzbot link: https://bugs.debian.org/1064035
Regards,
Salvatore
There is a bug when setting the RSS options in virtio_net that can break
the whole machine, getting the kernel into an infinite loop.
Running the following command in any QEMU virtual machine with virtionet
will reproduce this problem:
# ethtool -X eth0 hfunc toeplitz
This is how the problem happens:
1) ethtool_set_rxfh() calls virtnet_set_rxfh()
2) virtnet_set_rxfh() calls virtnet_commit_rss_command()
3) virtnet_commit_rss_command() populates 4 entries for the rss
scatter-gather
4) Since the command above does not have a key, then the last
scatter-gatter entry will be zeroed, since rss_key_size == 0.
sg_buf_size = vi->rss_key_size;
5) This buffer is passed to qemu, but qemu is not happy with a buffer
with zero length, and do the following in virtqueue_map_desc() (QEMU
function):
if (!sz) {
virtio_error(vdev, "virtio: zero sized buffers are not allowed");
6) virtio_error() (also QEMU function) set the device as broken
vdev->broken = true;
7) Qemu bails out, and do not repond this crazy kernel.
8) The kernel is waiting for the response to come back (function
virtnet_send_command())
9) The kernel is waiting doing the following :
while (!virtqueue_get_buf(vi->cvq, &tmp) &&
!virtqueue_is_broken(vi->cvq))
cpu_relax();
10) None of the following functions above is true, thus, the kernel
loops here forever. Keeping in mind that virtqueue_is_broken() does
not look at the qemu `vdev->broken`, so, it never realizes that the
vitio is broken at QEMU side.
Fix it by not sending RSS commands if the feature is not available in
the device.
Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.")
Cc: stable(a)vger.kernel.org
Cc: qemu-devel(a)nongnu.org
Signed-off-by: Breno Leitao <leitao(a)debian.org>
---
drivers/net/virtio_net.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c640fdf28fc5..e6b0eaf08ac2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3809,6 +3809,9 @@ static int virtnet_set_rxfh(struct net_device *dev,
struct virtnet_info *vi = netdev_priv(dev);
int i;
+ if (!vi->has_rss && !vi->has_rss_hash_report)
+ return -EOPNOTSUPP;
+
if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
rxfh->hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 1b5078f01b953a43d6198180ca5b110017315672 Mon Sep 17 00:00:00 2001
From: Zhikai Zhai <zhikai.zhai(a)amd.com>
Date: Mon, 29 Jan 2024 17:02:18 +0800
Subject: [PATCH] drm/amd/display: Add align done check
[WHY]
We Double-check link status if training successful,
but miss the lane align status.
[HOW]
Add the lane align status check
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Wenjing Liu <wenjing.liu(a)amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
Signed-off-by: Zhikai Zhai <zhikai.zhai(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
---
.../gpu/drm/amd/display/dc/link/protocols/link_dp_training.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
index e06d3c2d89102..e538c67d3ed91 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
@@ -517,6 +517,7 @@ enum link_training_result dp_check_link_loss_status(
{
enum link_training_result status = LINK_TRAINING_SUCCESS;
union lane_status lane_status;
+ union lane_align_status_updated dpcd_lane_status_updated;
uint8_t dpcd_buf[6] = {0};
uint32_t lane;
@@ -532,10 +533,12 @@ enum link_training_result dp_check_link_loss_status(
* check lanes status
*/
lane_status.raw = dp_get_nibble_at_index(&dpcd_buf[2], lane);
+ dpcd_lane_status_updated.raw = dpcd_buf[4];
if (!lane_status.bits.CHANNEL_EQ_DONE_0 ||
!lane_status.bits.CR_DONE_0 ||
- !lane_status.bits.SYMBOL_LOCKED_0) {
+ !lane_status.bits.SYMBOL_LOCKED_0 ||
+ !dp_is_interlane_aligned(dpcd_lane_status_updated)) {
/* if one of the channel equalization, clock
* recovery or symbol lock is dropped
* consider it as (link has been
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 002bf2fbc00e5c4b95fb167287e2ae7d1973281e Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka(a)linux.intel.com>
Date: Mon, 12 Feb 2024 13:01:35 +0100
Subject: [PATCH] PCI/AER: Block runtime suspend when handling errors
PM runtime can be done simultaneously with AER error handling. Avoid that
by using pm_runtime_get_sync() before and pm_runtime_put() after reset in
pcie_do_recovery() for all recovering devices.
pm_runtime_get_sync() will increase dev->power.usage_count counter to
prevent any possible future request to runtime suspend a device. It will
also resume a device, if it was previously in D3hot state.
I tested with igc device by doing simultaneous aer_inject and rpm
suspend/resume via /sys/bus/pci/devices/PCI_ID/power/control and can
reproduce:
igc 0000:02:00.0: not ready 65535ms after bus reset; giving up
pcieport 0000:00:1c.2: AER: Root Port link has been reset (-25)
pcieport 0000:00:1c.2: AER: subordinate device reset failed
pcieport 0000:00:1c.2: AER: device recovery failed
igc 0000:02:00.0: Unable to change power state from D3hot to D0, device inaccessible
The problem disappears when this patch is applied.
Link: https://lore.kernel.org/r/20240212120135.146068-1-stanislaw.gruszka@linux.i…
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka(a)linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy(a)linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
---
drivers/pci/pcie/err.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 59c90d04a609a..705893b5f7b09 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -13,6 +13,7 @@
#define dev_fmt(fmt) "AER: " fmt
#include <linux/pci.h>
+#include <linux/pm_runtime.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
@@ -85,6 +86,18 @@ static int report_error_detected(struct pci_dev *dev,
return 0;
}
+static int pci_pm_runtime_get_sync(struct pci_dev *pdev, void *data)
+{
+ pm_runtime_get_sync(&pdev->dev);
+ return 0;
+}
+
+static int pci_pm_runtime_put(struct pci_dev *pdev, void *data)
+{
+ pm_runtime_put(&pdev->dev);
+ return 0;
+}
+
static int report_frozen_detected(struct pci_dev *dev, void *data)
{
return report_error_detected(dev, pci_channel_io_frozen, data);
@@ -207,6 +220,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
else
bridge = pci_upstream_bridge(dev);
+ pci_walk_bridge(bridge, pci_pm_runtime_get_sync, NULL);
+
pci_dbg(bridge, "broadcast error_detected message\n");
if (state == pci_channel_io_frozen) {
pci_walk_bridge(bridge, report_frozen_detected, &status);
@@ -251,10 +266,15 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pcie_clear_device_status(dev);
pci_aer_clear_nonfatal_status(dev);
}
+
+ pci_walk_bridge(bridge, pci_pm_runtime_put, NULL);
+
pci_info(bridge, "device recovery successful\n");
return status;
failed:
+ pci_walk_bridge(bridge, pci_pm_runtime_put, NULL);
+
pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT);
/* TODO: Should kernel panic here? */
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 3a6a32b31a111f6e66526fb2d3cb13a876465076 Mon Sep 17 00:00:00 2001
From: Gabe Teeger <gabe.teeger(a)amd.com>
Date: Mon, 29 Jan 2024 13:31:44 -0500
Subject: [PATCH] Revert "drm/amd/display: Send DTBCLK disable message on first
commit"
This reverts commit f341055b10bd8be55c3c995dff5f770b236b8ca9.
System hang observed, this commit is thought to be the
regression point.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Ovidiu Bunea <ovidiu.bunea(a)amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
Signed-off-by: Gabe Teeger <gabe.teeger(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 06edca50a8fa1..36e5bb611fb10 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -414,7 +414,6 @@ static void init_clk_states(struct clk_mgr *clk_mgr)
uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
- clk_mgr->clks.dtbclk_en = true;
clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
clk_mgr->clks.p_state_change_support = true;
clk_mgr->clks.prev_p_state_change_support = true;
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 3b84525544be4ca0481110263a6d73eb00741cf3 Mon Sep 17 00:00:00 2001
From: Wayne Lin <Wayne.Lin(a)amd.com>
Date: Tue, 2 Jan 2024 14:20:37 +0800
Subject: [PATCH] drm/amd/display: Align the returned error code with legacy DP
[Why]
For usb4 connector, AUX transaction is handled by dmub utilizing a differnt
code path comparing to legacy DP connector. If the usb4 DP connector is
disconnected, AUX access will report EBUSY and cause igt@kms_dp_aux_dev
fail.
[How]
Align the error code with the one reported by legacy DP as EIO.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Acked-by: Alex Hung <alex.hung(a)amd.com>
Signed-off-by: Wayne Lin <Wayne.Lin(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
---
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index eaf8d9f482446..85b7f58a7f35a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -979,6 +979,11 @@ int dm_helper_dmub_aux_transfer_sync(
struct aux_payload *payload,
enum aux_return_code_type *operation_result)
{
+ if (!link->hpd_status) {
+ *operation_result = AUX_RET_ERROR_HPD_DISCON;
+ return -1;
+ }
+
return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
operation_result);
}
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 3d066f9547dd58329b526db44f42c487a7974703 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas(a)amd.com>
Date: Wed, 21 Feb 2024 12:27:31 -0500
Subject: [PATCH] drm/amd/display: Fix idle check for shared firmware state
[WHY]
We still had an instance of get_idle_state checking the PMFW scratch
register instead of the actual idle allow signal.
[HOW]
Replace it with the SW state check for whether we had allowed idle
through notify_idle.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Duncan Ma <duncan.ma(a)amd.com>
Acked-by: Alex Hung <alex.hung(a)amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/core/dc.c | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 613d09c42f3b9..958552a8605ff 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -4847,22 +4847,16 @@ void dc_exit_ips_for_hw_access(struct dc *dc)
bool dc_dmub_is_ips_idle_state(struct dc *dc)
{
- uint32_t idle_state = 0;
-
if (dc->debug.disable_idle_power_optimizations)
return false;
if (!dc->caps.ips_support || (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL))
return false;
- if (dc->hwss.get_idle_state)
- idle_state = dc->hwss.get_idle_state(dc);
-
- if (!(idle_state & DMUB_IPS1_ALLOW_MASK) ||
- !(idle_state & DMUB_IPS2_ALLOW_MASK))
- return true;
+ if (!dc->ctx->dmub_srv)
+ return false;
- return false;
+ return dc->ctx->dmub_srv->idle_allowed;
}
/* set min and max memory clock to lowest and highest DPM level, respectively */
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 4fbf8bc733d14bceb16dda46a3f5e19c6a9621c5 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1(a)huawei.com>
Date: Thu, 1 Feb 2024 22:18:45 +0800
Subject: [PATCH] ext4: correct best extent lstart adjustment logic
When yangerkun review commit 93cdf49f6eca ("ext4: Fix best extent lstart
adjustment logic in ext4_mb_new_inode_pa()"), it was found that the best
extent did not completely cover the original request after adjusting the
best extent lstart in ext4_mb_new_inode_pa() as follows:
original request: 2/10(8)
normalized request: 0/64(64)
best extent: 0/9(9)
When we check if best ex can be kept at start of goal, ac_o_ex.fe_logical
is 2 less than the adjusted best extent logical end 9, so we think the
adjustment is done. But obviously 0/9(9) doesn't cover 2/10(8), so we
should determine here if the original request logical end is less than or
equal to the adjusted best extent logical end.
In addition, add a comment stating when adjusted best_ex will not cover
the original request, and remove the duplicate assertion because adjusting
lstart makes no change to b_ex.fe_len.
Link: https://lore.kernel.org/r/3630fa7f-b432-7afd-5f79-781bc3b2c5ea@huawei.com
Fixes: 93cdf49f6eca ("ext4: Fix best extent lstart adjustment logic in ext4_mb_new_inode_pa()")
Cc: <stable(a)kernel.org>
Signed-off-by: yangerkun <yangerkun(a)huawei.com>
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Reviewed-by: Ojaswin Mujoo <ojaswin(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20240201141845.1879253-1-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
---
fs/ext4/mballoc.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bce82e1e792fd..bd26f2a907512 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -5172,10 +5172,16 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
.fe_len = ac->ac_orig_goal_len,
};
loff_t orig_goal_end = extent_logical_end(sbi, &ex);
+ loff_t o_ex_end = extent_logical_end(sbi, &ac->ac_o_ex);
- /* we can't allocate as much as normalizer wants.
- * so, found space must get proper lstart
- * to cover original request */
+ /*
+ * We can't allocate as much as normalizer wants, so we try
+ * to get proper lstart to cover the original request, except
+ * when the goal doesn't cover the original request as below:
+ *
+ * orig_ex:2045/2055(10), isize:8417280 -> normalized:0/2048
+ * best_ex:0/200(200) -> adjusted: 1848/2048(200)
+ */
BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
@@ -5187,7 +5193,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
* 1. Check if best ex can be kept at end of goal (before
* cr_best_avail trimmed it) and still cover original start
* 2. Else, check if best ex can be kept at start of goal and
- * still cover original start
+ * still cover original end
* 3. Else, keep the best ex at start of original request.
*/
ex.fe_len = ac->ac_b_ex.fe_len;
@@ -5197,7 +5203,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
goto adjust_bex;
ex.fe_logical = ac->ac_g_ex.fe_logical;
- if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex))
+ if (o_ex_end <= extent_logical_end(sbi, &ex))
goto adjust_bex;
ex.fe_logical = ac->ac_o_ex.fe_logical;
@@ -5205,7 +5211,6 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
ac->ac_b_ex.fe_logical = ex.fe_logical;
BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
- BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end);
}
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 6c6064cbe58b43533e3451ad6a8ba9736c109ac3 Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang(a)amd.com>
Date: Mon, 11 Mar 2024 18:07:34 -0400
Subject: [PATCH] drm/amdgpu: amdgpu_ttm_gart_bind set gtt bound flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Otherwise after the GTT bo is released, the GTT and gart space is freed
but amdgpu_ttm_backend_unbind will not clear the gart page table entry
and leave valid mapping entry pointing to the stale system page. Then
if GPU access the gart address mistakely, it will read undefined value
instead page fault, harder to debug and reproduce the real issue.
Cc: stable(a)vger.kernel.org
Signed-off-by: Philip Yang <Philip.Yang(a)amd.com>
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8722beba494e5..fc418e670fdae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -864,6 +864,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
gtt->ttm.dma_address, flags);
}
+ gtt->bound = true;
}
/*
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 6e7132ed3c07bd8a6ce3db4bb307ef2852b322dc Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka(a)redhat.com>
Date: Wed, 20 Mar 2024 18:43:11 +0100
Subject: [PATCH] dm snapshot: fix lockup in dm_exception_table_exit
There was reported lockup when we exit a snapshot with many exceptions.
Fix this by adding "cond_resched" to the loop that frees the exceptions.
Reported-by: John Pittman <jpittman(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Mikulas Patocka <mpatocka(a)redhat.com>
Signed-off-by: Mike Snitzer <snitzer(a)kernel.org>
---
drivers/md/dm-snap.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index bf7a574499a34..0ace06d1bee38 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -684,8 +684,10 @@ static void dm_exception_table_exit(struct dm_exception_table *et,
for (i = 0; i < size; i++) {
slot = et->table + i;
- hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list)
+ hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) {
kmem_cache_free(mem, ex);
+ cond_resched();
+ }
}
kvfree(et->table);
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 7af03e688792293ba33149fb8df619a8dff90e80 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula(a)intel.com>
Date: Fri, 8 Mar 2024 18:03:39 +0200
Subject: [PATCH] drm/probe-helper: warn about negative .get_modes()
The .get_modes() callback is supposed to return the number of modes,
never a negative error code. If a negative value is returned, it'll just
be interpreted as a negative count, and added to previous calculations.
Document the rules, but handle the negative values gracefully with an
error message.
Cc: stable(a)vger.kernel.org
Acked-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/50208c866facc33226a3c77b82bb9…
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
---
drivers/gpu/drm/drm_probe_helper.c | 7 +++++++
include/drm/drm_modeset_helper_vtables.h | 3 ++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 19ecb749704be..75f84753f6ee3 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -422,6 +422,13 @@ static int drm_helper_probe_get_modes(struct drm_connector *connector)
count = connector_funcs->get_modes(connector);
+ /* The .get_modes() callback should not return negative values. */
+ if (count < 0) {
+ drm_err(connector->dev, ".get_modes() returned %pe\n",
+ ERR_PTR(count));
+ count = 0;
+ }
+
/*
* Fallback for when DDC probe failed in drm_get_edid() and thus skipped
* override/firmware EDID.
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index 881b03e4dc288..9ed42469540eb 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -898,7 +898,8 @@ struct drm_connector_helper_funcs {
*
* RETURNS:
*
- * The number of modes added by calling drm_mode_probed_add().
+ * The number of modes added by calling drm_mode_probed_add(). Return 0
+ * on failures (no modes) instead of negative error codes.
*/
int (*get_modes)(struct drm_connector *connector);
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 7fb19d9510937121a1f285894cffd30bc96572e3 Mon Sep 17 00:00:00 2001
From: Josip Pavic <josip.pavic(a)amd.com>
Date: Fri, 9 Feb 2024 16:05:18 -0500
Subject: [PATCH] drm/amd/display: Allow dirty rects to be sent to dmub when
abm is active
[WHY]
It's beneficial for ABM to know when new frame data are available.
[HOW]
Add new condition to allow dirty rects to be sent to DMUB when ABM is
active. ABM will use this as a signal that a new frame has arrived.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Anthony Koo <anthony.koo(a)amd.com>
Acked-by: Alex Hung <alex.hung(a)amd.com>
Signed-off-by: Josip Pavic <josip.pavic(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 5211c1c0f3c0c..613d09c42f3b9 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3270,6 +3270,9 @@ static bool dc_dmub_should_send_dirty_rect_cmd(struct dc *dc, struct dc_stream_s
if (stream->link->replay_settings.config.replay_supported)
return true;
+ if (stream->ctx->dce_version >= DCN_VERSION_3_5 && stream->abm_level)
+ return true;
+
return false;
}
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 9c68ece8b2a5c5ff9b2fcaea923dd73efeb174cd Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai(a)loongson.cn>
Date: Tue, 19 Mar 2024 15:50:34 +0800
Subject: [PATCH] LoongArch: Define the __io_aw() hook as mmiowb()
Commit fb24ea52f78e0d595852e ("drivers: Remove explicit invocations of
mmiowb()") remove all mmiowb() in drivers, but it says:
"NOTE: mmiowb() has only ever guaranteed ordering in conjunction with
spin_unlock(). However, pairing each mmiowb() removal in this patch with
the corresponding call to spin_unlock() is not at all trivial, so there
is a small chance that this change may regress any drivers incorrectly
relying on mmiowb() to order MMIO writes between CPUs using lock-free
synchronisation."
The mmio in radeon_ring_commit() is protected by a mutex rather than a
spinlock, but in the mutex fastpath it behaves similar to spinlock. We
can add mmiowb() calls in the radeon driver but the maintainer says he
doesn't like such a workaround, and radeon is not the only example of
mutex protected mmio.
So we should extend the mmiowb tracking system from spinlock to mutex,
and maybe other locking primitives. This is not easy and error prone, so
we solve it in the architectural code, by simply defining the __io_aw()
hook as mmiowb(). And we no longer need to override queued_spin_unlock()
so use the generic definition.
Without this, we get such an error when run 'glxgears' on weak ordering
architectures such as LoongArch:
radeon 0000:04:00.0: ring 0 stalled for more than 10324msec
radeon 0000:04:00.0: ring 3 stalled for more than 10240msec
radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000001f412 last fence id 0x000000000001f414 on ring 3)
radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000000f940 last fence id 0x000000000000f941 on ring 0)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
Link: https://lore.kernel.org/dri-devel/29df7e26-d7a8-4f67-b988-44353c4270ac@amd.…
Link: https://lore.kernel.org/linux-arch/20240301130532.3953167-1-chenhuacai@loon…
Cc: stable(a)vger.kernel.org
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
arch/loongarch/include/asm/Kbuild | 1 +
arch/loongarch/include/asm/io.h | 2 ++
arch/loongarch/include/asm/qspinlock.h | 18 ------------------
3 files changed, 3 insertions(+), 18 deletions(-)
delete mode 100644 arch/loongarch/include/asm/qspinlock.h
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index a97c0edbb866a..2dbec7853ae86 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += early_ioremap.h
generic-y += qrwlock.h
+generic-y += qspinlock.h
generic-y += rwsem.h
generic-y += segment.h
generic-y += user.h
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index c486c2341b662..4a8adcca329b8 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -71,6 +71,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
+#define __io_aw() mmiowb()
+
#include <asm-generic/io.h>
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
deleted file mode 100644
index 34f43f8ad5912..0000000000000
--- a/arch/loongarch/include/asm/qspinlock.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_QSPINLOCK_H
-#define _ASM_QSPINLOCK_H
-
-#include <asm-generic/qspinlock_types.h>
-
-#define queued_spin_unlock queued_spin_unlock
-
-static inline void queued_spin_unlock(struct qspinlock *lock)
-{
- compiletime_assert_atomic_type(lock->locked);
- c_sync();
- WRITE_ONCE(lock->locked, 0);
-}
-
-#include <asm-generic/qspinlock.h>
-
-#endif /* _ASM_QSPINLOCK_H */
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 13c0a74747cb7fdadf58c5d3a7d52cfca2d51736 Mon Sep 17 00:00:00 2001
From: Shyam Prasad N <sprasad(a)microsoft.com>
Date: Wed, 13 Mar 2024 10:40:41 +0000
Subject: [PATCH] cifs: make sure server interfaces are requested only for
SMB3+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Some code paths for querying server interfaces make a false
assumption that it will only get called for SMB3+. Since this
function now can get called from a generic code paths, the correct
thing to do is to have specific handler for this functionality
per SMB dialect, and call this handler.
This change adds such a handler and implements this handler only
for SMB 3.0 and 3.1.1.
Cc: stable(a)vger.kernel.org
Cc: Jan Čermák <sairon(a)sairon.cz>
Reported-by: Paulo Alcantara <pc(a)manguebit.com>
Signed-off-by: Shyam Prasad N <sprasad(a)microsoft.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
---
fs/smb/client/cifsglob.h | 3 +++
fs/smb/client/connect.c | 6 +++++-
fs/smb/client/smb2ops.c | 2 ++
fs/smb/client/smb2pdu.c | 5 +++--
4 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 8be62ed053a25..3da625d532359 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -355,6 +355,9 @@ struct smb_version_operations {
/* informational QFS call */
void (*qfs_tcon)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *);
+ /* query for server interfaces */
+ int (*query_server_interfaces)(const unsigned int, struct cifs_tcon *,
+ bool);
/* check if a path is accessible or not */
int (*is_path_accessible)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const char *);
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 86ae578904a26..4cbb79418e506 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -123,12 +123,16 @@ static void smb2_query_server_interfaces(struct work_struct *work)
struct cifs_tcon *tcon = container_of(work,
struct cifs_tcon,
query_interfaces.work);
+ struct TCP_Server_Info *server = tcon->ses->server;
/*
* query server network interfaces, in case they change
*/
+ if (!server->ops->query_server_interfaces)
+ return;
+
xid = get_xid();
- rc = SMB3_request_interfaces(xid, tcon, false);
+ rc = server->ops->query_server_interfaces(xid, tcon, false);
free_xid(xid);
if (rc) {
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 6ee22d0dbc006..2ed456948f34c 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -5290,6 +5290,7 @@ struct smb_version_operations smb30_operations = {
.tree_connect = SMB2_tcon,
.tree_disconnect = SMB2_tdis,
.qfs_tcon = smb3_qfs_tcon,
+ .query_server_interfaces = SMB3_request_interfaces,
.is_path_accessible = smb2_is_path_accessible,
.can_echo = smb2_can_echo,
.echo = SMB2_echo,
@@ -5405,6 +5406,7 @@ struct smb_version_operations smb311_operations = {
.tree_connect = SMB2_tcon,
.tree_disconnect = SMB2_tdis,
.qfs_tcon = smb3_qfs_tcon,
+ .query_server_interfaces = SMB3_request_interfaces,
.is_path_accessible = smb2_is_path_accessible,
.can_echo = smb2_can_echo,
.echo = SMB2_echo,
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index e5e6b14f8cae3..3ea688558e6c9 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -409,14 +409,15 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
spin_unlock(&ses->ses_lock);
if (!rc &&
- (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+ (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL) &&
+ server->ops->query_server_interfaces) {
mutex_unlock(&ses->session_mutex);
/*
* query server network interfaces, in case they change
*/
xid = get_xid();
- rc = SMB3_request_interfaces(xid, tcon, false);
+ rc = server->ops->query_server_interfaces(xid, tcon, false);
free_xid(xid);
if (rc == -EOPNOTSUPP && ses->chan_count > 1) {
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 16a57d7681110b25708c7042688412238e6f73a9 Mon Sep 17 00:00:00 2001
From: Shyam Prasad N <sprasad(a)microsoft.com>
Date: Wed, 13 Mar 2024 10:40:40 +0000
Subject: [PATCH] cifs: reduce warning log level for server not advertising
interfaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Several users have reported this log getting dumped too regularly to
kernel log. The likely root cause has been identified, and it suggests
that this situation is expected for some configurations
(for example SMB2.1).
Since the function returns appropriately even for such cases, it is
fairly harmless to make this a debug log. When needed, the verbosity
can be increased to capture this log.
Cc: stable(a)vger.kernel.org
Reported-by: Jan Čermák <sairon(a)sairon.cz>
Signed-off-by: Shyam Prasad N <sprasad(a)microsoft.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
---
fs/smb/client/sess.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 8f37373fd3334..3216f786908fb 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -230,7 +230,7 @@ int cifs_try_adding_channels(struct cifs_ses *ses)
spin_lock(&ses->iface_lock);
if (!ses->iface_count) {
spin_unlock(&ses->iface_lock);
- cifs_dbg(VFS, "server %s does not advertise interfaces\n",
+ cifs_dbg(ONCE, "server %s does not advertise interfaces\n",
ses->server->hostname);
break;
}
@@ -396,7 +396,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
spin_lock(&ses->iface_lock);
if (!ses->iface_count) {
spin_unlock(&ses->iface_lock);
- cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
+ cifs_dbg(ONCE, "server %s does not advertise interfaces\n", ses->server->hostname);
return;
}
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 16c4770c75b1223998adbeb7286f9a15c65fba73 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Tue, 5 Mar 2024 15:23:02 +0800
Subject: [PATCH] dm-raid: really frozen sync_thread during suspend
1) commit f52f5c71f3d4 ("md: fix stopping sync thread") remove
MD_RECOVERY_FROZEN from __md_stop_writes() and doesn't realize that
dm-raid relies on __md_stop_writes() to frozen sync_thread
indirectly. Fix this problem by adding MD_RECOVERY_FROZEN in
md_stop_writes(), and since stop_sync_thread() is only used for
dm-raid in this case, also move stop_sync_thread() to
md_stop_writes().
2) The flag MD_RECOVERY_FROZEN doesn't mean that sync thread is frozen,
it only prevent new sync_thread to start, and it can't stop the
running sync thread; In order to frozen sync_thread, after seting the
flag, stop_sync_thread() should be used.
3) The flag MD_RECOVERY_FROZEN doesn't mean that writes are stopped, use
it as condition for md_stop_writes() in raid_postsuspend() doesn't
look correct. Consider that reentrant stop_sync_thread() do nothing,
always call md_stop_writes() in raid_postsuspend().
4) raid_message can set/clear the flag MD_RECOVERY_FROZEN at anytime,
and if MD_RECOVERY_FROZEN is cleared while the array is suspended,
new sync_thread can start unexpected. Fix this by disallow
raid_message() to change sync_thread status during suspend.
Note that after commit f52f5c71f3d4 ("md: fix stopping sync thread"), the
test shell/lvconvert-raid-reshape.sh start to hang in stop_sync_thread(),
and with previous fixes, the test won't hang there anymore, however, the
test will still fail and complain that ext4 is corrupted. And with this
patch, the test won't hang due to stop_sync_thread() or fail due to ext4
is corrupted anymore. However, there is still a deadlock related to
dm-raid456 that will be fixed in following patches.
Reported-by: Mikulas Patocka <mpatocka(a)redhat.com>
Closes: https://lore.kernel.org/all/e5e8afe2-e9a8-49a2-5ab0-958d4065c55e@redhat.com/
Fixes: 1af2048a3e87 ("dm raid: fix deadlock caused by premature md_stop_writes()")
Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target")
Fixes: f52f5c71f3d4 ("md: fix stopping sync thread")
Cc: stable(a)vger.kernel.org # v6.7+
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Signed-off-by: Xiao Ni <xni(a)redhat.com>
Acked-by: Mike Snitzer <snitzer(a)kernel.org>
Signed-off-by: Song Liu <song(a)kernel.org>
Link: https://lore.kernel.org/r/20240305072306.2562024-6-yukuai1@huaweicloud.com
---
drivers/md/dm-raid.c | 25 +++++++++++++++----------
drivers/md/md.c | 3 ++-
2 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index eb009d6bb03a1..e2d7a73c0f874 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3240,11 +3240,12 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
rs->md.ro = 1;
rs->md.in_sync = 1;
- /* Keep array frozen until resume. */
- set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
-
/* Has to be held on running the array */
mddev_suspend_and_lock_nointr(&rs->md);
+
+ /* Keep array frozen until resume. */
+ md_frozen_sync_thread(&rs->md);
+
r = md_run(&rs->md);
rs->md.in_sync = 0; /* Assume already marked dirty */
if (r) {
@@ -3722,6 +3723,9 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
if (!mddev->pers || !mddev->pers->sync_request)
return -EINVAL;
+ if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
+ return -EBUSY;
+
if (!strcasecmp(argv[0], "frozen"))
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
else
@@ -3796,10 +3800,11 @@ static void raid_postsuspend(struct dm_target *ti)
struct raid_set *rs = ti->private;
if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
- /* Writes have to be stopped before suspending to avoid deadlocks. */
- if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
- md_stop_writes(&rs->md);
-
+ /*
+ * sync_thread must be stopped during suspend, and writes have
+ * to be stopped before suspending to avoid deadlocks.
+ */
+ md_stop_writes(&rs->md);
mddev_suspend(&rs->md, false);
}
}
@@ -4012,8 +4017,6 @@ static int raid_preresume(struct dm_target *ti)
}
/* Check for any resize/reshape on @rs and adjust/initiate */
- /* Be prepared for mddev_resume() in raid_resume() */
- set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
mddev->resync_min = mddev->recovery_cp;
@@ -4055,10 +4058,12 @@ static void raid_resume(struct dm_target *ti)
if (mddev->delta_disks < 0)
rs_set_capacity(rs);
+ WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery));
+ WARN_ON_ONCE(test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
mddev_lock_nointr(mddev);
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
mddev->ro = 0;
mddev->in_sync = 0;
+ md_unfrozen_sync_thread(mddev);
mddev_unlock_and_resume(mddev);
}
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 55ecc05c17c65..167db77442392 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6364,7 +6364,6 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
- stop_sync_thread(mddev, true, false);
del_timer_sync(&mddev->safemode_timer);
if (mddev->pers && mddev->pers->quiesce) {
@@ -6389,6 +6388,8 @@ static void __md_stop_writes(struct mddev *mddev)
void md_stop_writes(struct mddev *mddev)
{
mddev_lock_nointr(mddev);
+ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ stop_sync_thread(mddev, true, false);
__md_stop_writes(mddev);
mddev_unlock(mddev);
}
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 17f46b803d4f23c66cacce81db35fef3adb8f2af Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Fri, 1 Mar 2024 11:49:57 -0500
Subject: [PATCH] nfs: fix UAF in direct writes
In production we have been hitting the following warning consistently
------------[ cut here ]------------
refcount_t: underflow; use-after-free.
WARNING: CPU: 17 PID: 1800359 at lib/refcount.c:28 refcount_warn_saturate+0x9c/0xe0
Workqueue: nfsiod nfs_direct_write_schedule_work [nfs]
RIP: 0010:refcount_warn_saturate+0x9c/0xe0
PKRU: 55555554
Call Trace:
<TASK>
? __warn+0x9f/0x130
? refcount_warn_saturate+0x9c/0xe0
? report_bug+0xcc/0x150
? handle_bug+0x3d/0x70
? exc_invalid_op+0x16/0x40
? asm_exc_invalid_op+0x16/0x20
? refcount_warn_saturate+0x9c/0xe0
nfs_direct_write_schedule_work+0x237/0x250 [nfs]
process_one_work+0x12f/0x4a0
worker_thread+0x14e/0x3b0
? ZSTD_getCParams_internal+0x220/0x220
kthread+0xdc/0x120
? __btf_name_valid+0xa0/0xa0
ret_from_fork+0x1f/0x30
This is because we're completing the nfs_direct_request twice in a row.
The source of this is when we have our commit requests to submit, we
process them and send them off, and then in the completion path for the
commit requests we have
if (nfs_commit_end(cinfo.mds))
nfs_direct_write_complete(dreq);
However since we're submitting asynchronous requests we sometimes have
one that completes before we submit the next one, so we end up calling
complete on the nfs_direct_request twice.
The only other place we use nfs_generic_commit_list() is in
__nfs_commit_inode, which wraps this call in a
nfs_commit_begin();
nfs_commit_end();
Which is a common pattern for this style of completion handling, one
that is also repeated in the direct code with get_dreq()/put_dreq()
calls around where we process events as well as in the completion paths.
Fix this by using the same pattern for the commit requests.
Before with my 200 node rocksdb stress running this warning would pop
every 10ish minutes. With my patch the stress test has been running for
several hours without popping.
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com>
---
fs/nfs/direct.c | 11 +++++++++--
fs/nfs/write.c | 2 +-
include/linux/nfs_fs.h | 1 +
3 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index befcc167e25fe..6b8798d01e3a1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -672,10 +672,17 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
LIST_HEAD(mds_list);
nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ nfs_commit_begin(cinfo.mds);
nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
- if (res < 0) /* res == -ENOMEM */
- nfs_direct_write_reschedule(dreq);
+ if (res < 0) { /* res == -ENOMEM */
+ spin_lock(&dreq->lock);
+ if (dreq->flags == 0)
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&dreq->lock);
+ }
+ if (nfs_commit_end(cinfo.mds))
+ nfs_direct_write_complete(dreq);
}
static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 58adbb7709ba7..15359bbfa56bc 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1646,7 +1646,7 @@ static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
!atomic_read(&cinfo->rpcs_out));
}
-static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
+void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
{
atomic_inc(&cinfo->rpcs_out);
}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f5ce7b1011461..d59116ac82099 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -611,6 +611,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio);
extern int nfs_commit_inode(struct inode *, int);
extern struct nfs_commit_data *nfs_commitdata_alloc(void);
extern void nfs_commit_free(struct nfs_commit_data *data);
+void nfs_commit_begin(struct nfs_mds_commit_info *cinfo);
bool nfs_commit_end(struct nfs_mds_commit_info *cinfo);
static inline bool nfs_have_writebacks(const struct inode *inode)
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 18c198c96a815c962adc2b9b77909eec0be7df4d Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson(a)redhat.com>
Date: Fri, 8 Mar 2024 16:05:25 -0700
Subject: [PATCH] vfio/pci: Create persistent INTx handler
A vulnerability exists where the eventfd for INTx signaling can be
deconfigured, which unregisters the IRQ handler but still allows
eventfds to be signaled with a NULL context through the SET_IRQS ioctl
or through unmask irqfd if the device interrupt is pending.
Ideally this could be solved with some additional locking; the igate
mutex serializes the ioctl and config space accesses, and the interrupt
handler is unregistered relative to the trigger, but the irqfd path
runs asynchronous to those. The igate mutex cannot be acquired from the
atomic context of the eventfd wake function. Disabling the irqfd
relative to the eventfd registration is potentially incompatible with
existing userspace.
As a result, the solution implemented here moves configuration of the
INTx interrupt handler to track the lifetime of the INTx context object
and irq_type configuration, rather than registration of a particular
trigger eventfd. Synchronization is added between the ioctl path and
eventfd_signal() wrapper such that the eventfd trigger can be
dynamically updated relative to in-flight interrupts or irqfd callbacks.
Cc: <stable(a)vger.kernel.org>
Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver")
Reported-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Eric Auger <eric.auger(a)redhat.com>
Link: https://lore.kernel.org/r/20240308230557.805580-5-alex.williamson@redhat.com
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
---
drivers/vfio/pci/vfio_pci_intrs.c | 145 ++++++++++++++++--------------
1 file changed, 78 insertions(+), 67 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 75c85eec21b3c..fb5392b749fff 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -90,11 +90,15 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
struct vfio_pci_irq_ctx *ctx;
+ struct eventfd_ctx *trigger;
ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx))
return;
- eventfd_signal(ctx->trigger);
+
+ trigger = READ_ONCE(ctx->trigger);
+ if (likely(trigger))
+ eventfd_signal(trigger);
}
}
@@ -253,100 +257,100 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
return ret;
}
-static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
+static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
+ struct eventfd_ctx *trigger)
{
+ struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_irq_ctx *ctx;
+ unsigned long irqflags;
+ char *name;
+ int ret;
if (!is_irq_none(vdev))
return -EINVAL;
- if (!vdev->pdev->irq)
+ if (!pdev->irq)
return -ENODEV;
+ name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
+ if (!name)
+ return -ENOMEM;
+
ctx = vfio_irq_ctx_alloc(vdev, 0);
if (!ctx)
return -ENOMEM;
+ ctx->name = name;
+ ctx->trigger = trigger;
+
/*
- * If the virtual interrupt is masked, restore it. Devices
- * supporting DisINTx can be masked at the hardware level
- * here, non-PCI-2.3 devices will have to wait until the
- * interrupt is enabled.
+ * Fill the initial masked state based on virq_disabled. After
+ * enable, changing the DisINTx bit in vconfig directly changes INTx
+ * masking. igate prevents races during setup, once running masked
+ * is protected via irqlock.
+ *
+ * Devices supporting DisINTx also reflect the current mask state in
+ * the physical DisINTx bit, which is not affected during IRQ setup.
+ *
+ * Devices without DisINTx support require an exclusive interrupt.
+ * IRQ masking is performed at the IRQ chip. Again, igate protects
+ * against races during setup and IRQ handlers and irqfds are not
+ * yet active, therefore masked is stable and can be used to
+ * conditionally auto-enable the IRQ.
+ *
+ * irq_type must be stable while the IRQ handler is registered,
+ * therefore it must be set before request_irq().
*/
ctx->masked = vdev->virq_disabled;
- if (vdev->pci_2_3)
- pci_intx(vdev->pdev, !ctx->masked);
+ if (vdev->pci_2_3) {
+ pci_intx(pdev, !ctx->masked);
+ irqflags = IRQF_SHARED;
+ } else {
+ irqflags = ctx->masked ? IRQF_NO_AUTOEN : 0;
+ }
vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
+ ret = request_irq(pdev->irq, vfio_intx_handler,
+ irqflags, ctx->name, vdev);
+ if (ret) {
+ vdev->irq_type = VFIO_PCI_NUM_IRQS;
+ kfree(name);
+ vfio_irq_ctx_free(vdev, ctx, 0);
+ return ret;
+ }
+
return 0;
}
-static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
+static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev,
+ struct eventfd_ctx *trigger)
{
struct pci_dev *pdev = vdev->pdev;
- unsigned long irqflags = IRQF_SHARED;
struct vfio_pci_irq_ctx *ctx;
- struct eventfd_ctx *trigger;
- unsigned long flags;
- int ret;
+ struct eventfd_ctx *old;
ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx))
return -EINVAL;
- if (ctx->trigger) {
- free_irq(pdev->irq, vdev);
- kfree(ctx->name);
- eventfd_ctx_put(ctx->trigger);
- ctx->trigger = NULL;
- }
-
- if (fd < 0) /* Disable only */
- return 0;
-
- ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
- pci_name(pdev));
- if (!ctx->name)
- return -ENOMEM;
-
- trigger = eventfd_ctx_fdget(fd);
- if (IS_ERR(trigger)) {
- kfree(ctx->name);
- return PTR_ERR(trigger);
- }
+ old = ctx->trigger;
- ctx->trigger = trigger;
+ WRITE_ONCE(ctx->trigger, trigger);
- /*
- * Devices without DisINTx support require an exclusive interrupt,
- * IRQ masking is performed at the IRQ chip. The masked status is
- * protected by vdev->irqlock. Setup the IRQ without auto-enable and
- * unmask as necessary below under lock. DisINTx is unmodified by
- * the IRQ configuration and may therefore use auto-enable.
- */
- if (!vdev->pci_2_3)
- irqflags = IRQF_NO_AUTOEN;
-
- ret = request_irq(pdev->irq, vfio_intx_handler,
- irqflags, ctx->name, vdev);
- if (ret) {
- ctx->trigger = NULL;
- kfree(ctx->name);
- eventfd_ctx_put(trigger);
- return ret;
+ /* Releasing an old ctx requires synchronizing in-flight users */
+ if (old) {
+ synchronize_irq(pdev->irq);
+ vfio_virqfd_flush_thread(&ctx->unmask);
+ eventfd_ctx_put(old);
}
- spin_lock_irqsave(&vdev->irqlock, flags);
- if (!vdev->pci_2_3 && !ctx->masked)
- enable_irq(pdev->irq);
- spin_unlock_irqrestore(&vdev->irqlock, flags);
-
return 0;
}
static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
{
+ struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_irq_ctx *ctx;
ctx = vfio_irq_ctx_get(vdev, 0);
@@ -354,10 +358,13 @@ static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
if (ctx) {
vfio_virqfd_disable(&ctx->unmask);
vfio_virqfd_disable(&ctx->mask);
+ free_irq(pdev->irq, vdev);
+ if (ctx->trigger)
+ eventfd_ctx_put(ctx->trigger);
+ kfree(ctx->name);
+ vfio_irq_ctx_free(vdev, ctx, 0);
}
- vfio_intx_set_signal(vdev, -1);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
- vfio_irq_ctx_free(vdev, ctx, 0);
}
/*
@@ -641,19 +648,23 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
return -EINVAL;
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ struct eventfd_ctx *trigger = NULL;
int32_t fd = *(int32_t *)data;
int ret;
- if (is_intx(vdev))
- return vfio_intx_set_signal(vdev, fd);
+ if (fd >= 0) {
+ trigger = eventfd_ctx_fdget(fd);
+ if (IS_ERR(trigger))
+ return PTR_ERR(trigger);
+ }
- ret = vfio_intx_enable(vdev);
- if (ret)
- return ret;
+ if (is_intx(vdev))
+ ret = vfio_intx_set_signal(vdev, trigger);
+ else
+ ret = vfio_intx_enable(vdev, trigger);
- ret = vfio_intx_set_signal(vdev, fd);
- if (ret)
- vfio_intx_disable(vdev);
+ if (ret && trigger)
+ eventfd_ctx_put(trigger);
return ret;
}
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 21afc872fbc29cd68cfde816d1df4d55848c3f61 Mon Sep 17 00:00:00 2001
From: Ivan Lipski <ivlipski(a)amd.com>
Date: Fri, 1 Dec 2023 06:25:16 -0700
Subject: [PATCH] drm/amd/display: Add monitor patch for specific eDP
[WHY]
Some eDP panels's ext caps don't write initial value cause the value of
dpcd_addr(0x317) is random. It means that sometimes the eDP will
clarify it is OLED, miniLED...etc cause the backlight control interface
is incorrect.
[HOW]
Add a new panel patch to remove sink ext caps(HDR,OLED...etc)
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Reviewed-by: Sun peng Li <sunpeng.li(a)amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira(a)amd.com>
Signed-off-by: Ivan Lipski <ivlipski(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
---
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index b4696ec621c45..eaf8d9f482446 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -64,6 +64,12 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps)
DRM_DEBUG_DRIVER("Disabling FAMS on monitor with panel id %X\n", panel_id);
edid_caps->panel_patch.disable_fams = true;
break;
+ /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */
+ case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB):
+ case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B):
+ DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id);
+ edid_caps->panel_patch.remove_sink_ext_caps = true;
+ break;
default:
return;
}
--
2.43.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 34cd86b6632718b7df3999d96f51e63de41c5e4f Mon Sep 17 00:00:00 2001
From: Marios Makassikis <mmakassikis(a)freebox.fr>
Date: Thu, 22 Feb 2024 10:58:21 +0100
Subject: [PATCH] ksmbd: retrieve number of blocks using vfs_getattr in
set_file_allocation_info
Use vfs_getattr() to retrieve stat information, rather than make
assumptions about how a filesystem fills inode structs.
Cc: stable(a)vger.kernel.org
Signed-off-by: Marios Makassikis <mmakassikis(a)freebox.fr>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
---
fs/smb/server/smb2pdu.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index f6cc5d2730ffb..199c31c275e5b 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -5809,15 +5809,21 @@ static int set_file_allocation_info(struct ksmbd_work *work,
loff_t alloc_blks;
struct inode *inode;
+ struct kstat stat;
int rc;
if (!(fp->daccess & FILE_WRITE_DATA_LE))
return -EACCES;
+ rc = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT);
+ if (rc)
+ return rc;
+
alloc_blks = (le64_to_cpu(file_alloc_info->AllocationSize) + 511) >> 9;
inode = file_inode(fp->filp);
- if (alloc_blks > inode->i_blocks) {
+ if (alloc_blks > stat.blocks) {
smb_break_all_levII_oplock(work, fp, 1);
rc = vfs_fallocate(fp->filp, FALLOC_FL_KEEP_SIZE, 0,
alloc_blks * 512);
@@ -5825,7 +5831,7 @@ static int set_file_allocation_info(struct ksmbd_work *work,
pr_err("vfs_fallocate is failed : %d\n", rc);
return rc;
}
- } else if (alloc_blks < inode->i_blocks) {
+ } else if (alloc_blks < stat.blocks) {
loff_t size;
/*
--
2.43.0