__split_huge_pmd_locked() can be called for a present THP, devmap or
(non-present) migration entry. It calls pmdp_invalidate()
unconditionally on the pmdp and only determines if it is present or not
based on the returned old pmd.
But arm64's pmd_mkinvalid(), called by pmdp_invalidate(),
unconditionally sets the PMD_PRESENT_INVALID flag, which causes future
pmd_present() calls to return true - even for a swap pmd. Therefore any
lockless pgtable walker could see the migration entry pmd in this state
and start interpretting the fields (e.g. pmd_pfn()) as if it were
present, leading to BadThings (TM). GUP-fast appears to be one such
lockless pgtable walker.
While the obvious fix is for core-mm to avoid such calls for non-present
pmds (pmdp_invalidate() will also issue TLBI which is not necessary for
this case either), all other arches that implement pmd_mkinvalid() do it
in such a way that it is robust to being called with a non-present pmd.
So it is simpler and safer to make arm64 robust too. This approach means
we can even add tests to debug_vm_pgtable.c to validate the required
behaviour.
This is a theoretical bug found during code review. I don't have any
test case to trigger it in practice.
Cc: stable(a)vger.kernel.org
Fixes: 53fa117bb33c ("arm64/mm: Enable THP migration")
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
---
Hi all,
v1 of this fix [1] took the approach of fixing core-mm to never call
pmdp_invalidate() on a non-present pmd. But Zi Yan highlighted that only arm64
suffers this problem; all other arches are robust. So his suggestion was to
instead make arm64 robust in the same way and add tests to validate it. Despite
my stated reservations in the context of the v1 discussion, having thought on it
for a bit, I now agree with Zi Yan. Hence this post.
Andrew has v1 in mm-unstable at the moment, so probably the best thing to do is
remove it from there and have this go in through the arm64 tree? Assuming there
is agreement that this approach is right one.
This applies on top of v6.9-rc5. Passes all the mm selftests on arm64.
[1] https://lore.kernel.org/linux-mm/20240425170704.3379492-1-ryan.roberts@arm.…
Thanks,
Ryan
arch/arm64/include/asm/pgtable.h | 12 +++++--
mm/debug_vm_pgtable.c | 61 ++++++++++++++++++++++++++++++++
2 files changed, 71 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index afdd56d26ad7..7d580271a46d 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -511,8 +511,16 @@ static inline int pmd_trans_huge(pmd_t pmd)
static inline pmd_t pmd_mkinvalid(pmd_t pmd)
{
- pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID));
- pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID));
+ /*
+ * If not valid then either we are already present-invalid or we are
+ * not-present (i.e. none or swap entry). We must not convert
+ * not-present to present-invalid. Unbelievably, the core-mm may call
+ * pmd_mkinvalid() for a swap entry and all other arches can handle it.
+ */
+ if (pmd_valid(pmd)) {
+ pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID));
+ pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID));
+ }
return pmd;
}
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 65c19025da3d..7e9c387d06b0 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -956,6 +956,65 @@ static void __init hugetlb_basic_tests(struct pgtable_debug_args *args) { }
#endif /* CONFIG_HUGETLB_PAGE */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if !defined(__HAVE_ARCH_PMDP_INVALIDATE) && defined(CONFIG_ARCH_ENABLE_THP_MIGRATION)
+static void __init swp_pmd_mkinvalid_tests(struct pgtable_debug_args *args)
+{
+ unsigned long max_swap_offset;
+ swp_entry_t swp_set, swp_clear, swp_convert;
+ pmd_t pmd_set, pmd_clear;
+
+ /*
+ * See generic_max_swapfile_size(): probe the maximum offset, then
+ * create swap entry will all possible bits set and a swap entry will
+ * all bits clear.
+ */
+ max_swap_offset = swp_offset(pmd_to_swp_entry(swp_entry_to_pmd(swp_entry(0, ~0UL))));
+ swp_set = swp_entry((1 << MAX_SWAPFILES_SHIFT) - 1, max_swap_offset);
+ swp_clear = swp_entry(0, 0);
+
+ /* Convert to pmd. */
+ pmd_set = swp_entry_to_pmd(swp_set);
+ pmd_clear = swp_entry_to_pmd(swp_clear);
+
+ /*
+ * Sanity check that the pmds are not-present, not-huge and swap entry
+ * is recoverable without corruption.
+ */
+ WARN_ON(pmd_present(pmd_set));
+ WARN_ON(pmd_trans_huge(pmd_set));
+ swp_convert = pmd_to_swp_entry(pmd_set);
+ WARN_ON(swp_type(swp_set) != swp_type(swp_convert));
+ WARN_ON(swp_offset(swp_set) != swp_offset(swp_convert));
+ WARN_ON(pmd_present(pmd_clear));
+ WARN_ON(pmd_trans_huge(pmd_clear));
+ swp_convert = pmd_to_swp_entry(pmd_clear);
+ WARN_ON(swp_type(swp_clear) != swp_type(swp_convert));
+ WARN_ON(swp_offset(swp_clear) != swp_offset(swp_convert));
+
+ /* Now invalidate the pmd. */
+ pmd_set = pmd_mkinvalid(pmd_set);
+ pmd_clear = pmd_mkinvalid(pmd_clear);
+
+ /*
+ * Since its a swap pmd, invalidation should effectively be a noop and
+ * the checks we already did should give the same answer. Check the
+ * invalidation didn't corrupt any fields.
+ */
+ WARN_ON(pmd_present(pmd_set));
+ WARN_ON(pmd_trans_huge(pmd_set));
+ swp_convert = pmd_to_swp_entry(pmd_set);
+ WARN_ON(swp_type(swp_set) != swp_type(swp_convert));
+ WARN_ON(swp_offset(swp_set) != swp_offset(swp_convert));
+ WARN_ON(pmd_present(pmd_clear));
+ WARN_ON(pmd_trans_huge(pmd_clear));
+ swp_convert = pmd_to_swp_entry(pmd_clear);
+ WARN_ON(swp_type(swp_clear) != swp_type(swp_convert));
+ WARN_ON(swp_offset(swp_clear) != swp_offset(swp_convert));
+}
+#else
+static void __init swp_pmd_mkinvalid_tests(struct pgtable_debug_args *args) { }
+#endif /* !__HAVE_ARCH_PMDP_INVALIDATE && CONFIG_ARCH_ENABLE_THP_MIGRATION */
+
static void __init pmd_thp_tests(struct pgtable_debug_args *args)
{
pmd_t pmd;
@@ -982,6 +1041,8 @@ static void __init pmd_thp_tests(struct pgtable_debug_args *args)
WARN_ON(!pmd_trans_huge(pmd_mkinvalid(pmd_mkhuge(pmd))));
WARN_ON(!pmd_present(pmd_mkinvalid(pmd_mkhuge(pmd))));
#endif /* __HAVE_ARCH_PMDP_INVALIDATE */
+
+ swp_pmd_mkinvalid_tests(args);
}
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
--
2.25.1
Change 'sent' to 'send'
Signed-off-by: Tim Bird <tim.bird(a)sony.com>
---
Documentation/process/stable-kernel-rules.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst
index 1704f1c686d0..3178bef6fca3 100644
--- a/Documentation/process/stable-kernel-rules.rst
+++ b/Documentation/process/stable-kernel-rules.rst
@@ -78,7 +78,7 @@ in the sign-off area. Once the patch is mainlined it will be applied to the
stable tree without anything else needing to be done by the author or
subsystem maintainer.
-To sent additional instructions to the stable team, use a shell-style inline
+To send additional instructions to the stable team, use a shell-style inline
comment:
* To specify any additional patch prerequisites for cherry picking use the
--
2.25.1
After a recent discussion regarding "do we need a 'nobackport' tag" I
set out to create one change for stable-kernel-rules.rst. This is now
the last patch in the series, which links to that discussion with
all the details; the other stuff is fine-tuning that happened along the
way.
Ciao, Thorsten
---
v1->v2:
* Add reviewed-by tag from Greg to the first patch.
* Change the backport example in 2 as suggested by Greg.
* Improve description of patch 3 while also making the change remove a
level of indenting.
* Add patch explaining stable(a)kernel.org (w/o @vger.)
* Move the patch adding a 'make AUTOSEL et. al. ignore a change' flag to
the end of the series and use stable+noautosel(a)kernel.org as
suggested my Konstantin and ACKed by Greg.
v1: https://lore.kernel.org/all/cover.1712812895.git.linux@leemhuis.info/
Thorsten Leemhuis (5):
docs: stable-kernel-rules: reduce redundancy
docs: stable-kernel-rules: call mainline by its name and change
example
docs: stable-kernel-rules: remove code-labels tags and a indention
level
docs: stable-kernel-rules: explain use of stable(a)kernel.org (w/o
@vger.)
docs: stable-kernel-rules: create special tag to flag 'no backporting'
Documentation/process/stable-kernel-rules.rst | 234 ++++++++----------
1 file changed, 110 insertions(+), 124 deletions(-)
base-commit: 5eb4573ea63d0c83bf58fb7c243fc2c2b6966c02
--
2.44.0
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
When using a high speed clock with a low baud rate, the 4x prescaler is
automatically selected if required. In that case, sc16is7xx_set_baud()
properly configures the chip registers, but returns an incorrect baud
rate by not taking into account the prescaler value. This incorrect baud
rate is then fed to uart_update_timeout().
For example, with an input clock of 80MHz, and a selected baud rate of 50,
sc16is7xx_set_baud() will return 200 instead of 50.
Fix this by first changing the prescaler variable to hold the selected
prescaler value instead of the MCR bitfield. Then properly take into
account the selected prescaler value in the return value computation.
Also add better documentation about the divisor value computation.
Fixes: dfeae619d781 ("serial: sc16is7xx")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
---
Changes for V2:
- Change prescaler type to "unsigned int" (Jiri S.)
---
drivers/tty/serial/sc16is7xx.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 03cf30e20b75..bf0065d1c8e9 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -555,16 +555,28 @@ static bool sc16is7xx_regmap_noinc(struct device *dev, unsigned int reg)
return reg == SC16IS7XX_RHR_REG;
}
+/*
+ * Configure programmable baud rate generator (divisor) according to the
+ * desired baud rate.
+ *
+ * From the datasheet, the divisor is computed according to:
+ *
+ * XTAL1 input frequency
+ * -----------------------
+ * prescaler
+ * divisor = ---------------------------
+ * baud-rate x sampling-rate
+ */
static int sc16is7xx_set_baud(struct uart_port *port, int baud)
{
struct sc16is7xx_one *one = to_sc16is7xx_one(port, port);
u8 lcr;
- u8 prescaler = 0;
+ unsigned int prescaler = 1;
unsigned long clk = port->uartclk, div = clk / 16 / baud;
if (div >= BIT(16)) {
- prescaler = SC16IS7XX_MCR_CLKSEL_BIT;
- div /= 4;
+ prescaler = 4;
+ div /= prescaler;
}
/* Enable enhanced features */
@@ -574,9 +586,10 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud)
SC16IS7XX_EFR_ENABLE_BIT);
sc16is7xx_efr_unlock(port);
+ /* If bit MCR_CLKSEL is set, the divide by 4 prescaler is activated. */
sc16is7xx_port_update(port, SC16IS7XX_MCR_REG,
SC16IS7XX_MCR_CLKSEL_BIT,
- prescaler);
+ prescaler == 1 ? 0 : SC16IS7XX_MCR_CLKSEL_BIT);
/* Backup LCR and access special register set (DLL/DLH) */
lcr = sc16is7xx_port_read(port, SC16IS7XX_LCR_REG);
@@ -592,7 +605,7 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud)
/* Restore LCR and access to general register set */
sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr);
- return DIV_ROUND_CLOSEST(clk / 16, div);
+ return DIV_ROUND_CLOSEST((clk / prescaler) / 16, div);
}
static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen,
base-commit: 660a708098569a66a47d0abdad998e29e1259de6
--
2.39.2