With hardware dirty bit management, calling pte_wrprotect() on a writable, dirty PTE will lose the dirty state and return a read-only, clean entry.
Move the logic from ptep_set_wrprotect() into pte_wrprotect() to ensure that the dirty bit is preserved for writable entries, as this is required for soft-dirty bit management if we enable it in the future.
Cc: stable@vger.kernel.org Signed-off-by: Will Deacon will@kernel.org --- arch/arm64/include/asm/pgtable.h | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1bdf51f01e73..a155551863c9 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -162,13 +162,6 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) return pmd; }
-static inline pte_t pte_wrprotect(pte_t pte) -{ - pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); - pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); - return pte; -} - static inline pte_t pte_mkwrite(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -194,6 +187,20 @@ static inline pte_t pte_mkdirty(pte_t pte) return pte; }
+static inline pte_t pte_wrprotect(pte_t pte) +{ + /* + * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY + * clear), set the PTE_DIRTY bit. + */ + if (pte_hw_dirty(pte)) + pte = pte_mkdirty(pte); + + pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + return pte; +} + static inline pte_t pte_mkold(pte_t pte) { return clear_pte_bit(pte, __pgprot(PTE_AF)); @@ -843,12 +850,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres pte = READ_ONCE(*ptep); do { old_pte = pte; - /* - * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY - * clear), set the PTE_DIRTY bit. - */ - if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); pte = pte_wrprotect(pte); pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte));
On Fri, Nov 20, 2020 at 02:35:53PM +0000, Will Deacon wrote:
With hardware dirty bit management, calling pte_wrprotect() on a writable, dirty PTE will lose the dirty state and return a read-only, clean entry.
Move the logic from ptep_set_wrprotect() into pte_wrprotect() to ensure that the dirty bit is preserved for writable entries, as this is required for soft-dirty bit management if we enable it in the future.
It this stable material if it would be a problem once ARM64 supports softdirty in future?
Signed-off-by: Will Deacon will@kernel.org
arch/arm64/include/asm/pgtable.h | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1bdf51f01e73..a155551863c9 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -162,13 +162,6 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) return pmd; } -static inline pte_t pte_wrprotect(pte_t pte) -{
- pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
- pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
- return pte;
-}
static inline pte_t pte_mkwrite(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -194,6 +187,20 @@ static inline pte_t pte_mkdirty(pte_t pte) return pte; } +static inline pte_t pte_wrprotect(pte_t pte) +{
- /*
* If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
* clear), set the PTE_DIRTY bit.
*/
- if (pte_hw_dirty(pte))
pte = pte_mkdirty(pte);
- pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
- pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
- return pte;
+}
static inline pte_t pte_mkold(pte_t pte) { return clear_pte_bit(pte, __pgprot(PTE_AF)); @@ -843,12 +850,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres pte = READ_ONCE(*ptep); do { old_pte = pte;
/*
* If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
* clear), set the PTE_DIRTY bit.
*/
if (pte_hw_dirty(pte))
pte = pte_wrprotect(pte); pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte));pte = pte_mkdirty(pte);
-- 2.29.2.454.gaff20da3a2-goog
On Fri, Nov 20, 2020 at 09:09:03AM -0800, Minchan Kim wrote:
On Fri, Nov 20, 2020 at 02:35:53PM +0000, Will Deacon wrote:
With hardware dirty bit management, calling pte_wrprotect() on a writable, dirty PTE will lose the dirty state and return a read-only, clean entry.
Move the logic from ptep_set_wrprotect() into pte_wrprotect() to ensure that the dirty bit is preserved for writable entries, as this is required for soft-dirty bit management if we enable it in the future.
It this stable material if it would be a problem once ARM64 supports softdirty in future?
I don't think so. Arm64 did not have a hardware dirty mechanism from the start, it was added later but in a way as to coexist with other CPUs or peripherals that don't support it. So instead of setting a PTE_DIRTY bit as one would expect, the CPU clears the PTE_RDONLY on write access to a writable PTE (the PTE_DBM/PTE_WRITE bit set). So our pte_wrprotect() needs to set PTE_RDONLY and clear PTE_DBM (PTE_WRITE) but !PTE_RDONLY is our only information of a pte having been dirtied, so we have to transfer it to a software PTE_DIRTY bit. This is different from a soft-dirty pte bit if we add it in the future.
On Fri, Nov 20, 2020 at 02:35:53PM +0000, Will Deacon wrote:
With hardware dirty bit management, calling pte_wrprotect() on a writable, dirty PTE will lose the dirty state and return a read-only, clean entry.
My assumption at the time was that the caller of pte_wrprotect() already moved the 'dirty' information to the underlying page. Most pte_wrprotect() calls also do a pte_mkclean(). However, it doesn't seem to always be the case (soft-dirty but we don't support it yet).
I was worried that we may inadvertently set the dirty bit when doing a pte_wrprotect() on a freshly created pte (not read from memory, for example __split_huge_pmd_locked()) but I think all our __P* and __S* attributes start with a PTE_RDONLY, therefore the pte_hw_dirty() returns false. A test for mm/debug_vm_pgtable.c, something like:
for (i = 0, i < ARRAY_SIZE(protection_map); i++) { pte = pfn_pte(pfn, protection_map(i)); WARN_ON(pte_dirty(pte_wrprotect(pte)); }
(I'll leave this to Anshuman ;))
Move the logic from ptep_set_wrprotect() into pte_wrprotect() to ensure that the dirty bit is preserved for writable entries, as this is required for soft-dirty bit management if we enable it in the future.
Cc: stable@vger.kernel.org Signed-off-by: Will Deacon will@kernel.org
I think this could go back as far as the hardware AF/DBM support (v4.3):
Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits")
If you limit this fix to 4.14, you probably don't need additional commits. Otherwise, at least this one:
3bbf7157ac66 ("arm64: Convert pte handling from inline asm to using (cmp)xchg")
and a slightly more intrusive:
73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()")
We also had some attempts at fixing ptep_set_wrprotect():
64c26841b349 ("arm64: Ignore hardware dirty bit updates in ptep_set_wrprotect()")
Fixed subsequently by:
8781bcbc5e69 ("arm64: mm: Fix pte_mkclean, pte_mkdirty semantics")
I have a hope that at some point we'll understand how this all works ;).
For this patch:
Reviewed-by: Catalin Marinas catalin.marinas@arm.com
linux-stable-mirror@lists.linaro.org