mmu_interval_notifier_remove() can't be called safely from inside the invalidate() callback because it sleeps waiting for invalidate callbacks to finish. Removals might be needed when the invalidate() callback is for munmap() (i.e., the event type MMU_NOTIFY_UNMAP), and the interval being tracked is no longer needed.
Add a new function mmu_interval_notifier_put() which is safe to call from the invalidate() callback. The ops->release() function will be called when all callbacks are finished and no CPUs are accessing the mmu_interval_notifier.
Signed-off-by: Ralph Campbell rcampbell@nvidia.com --- include/linux/mmu_notifier.h | 6 +++ mm/mmu_notifier.c | 86 ++++++++++++++++++++++++++++-------- 2 files changed, 74 insertions(+), 18 deletions(-)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 027c9c8f3a69..6dcaa632eef7 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -233,11 +233,16 @@ struct mmu_notifier { * @invalidate: Upon return the caller must stop using any SPTEs within this * range. This function can sleep. Return false only if sleeping * was required but mmu_notifier_range_blockable(range) is false. + * @release: This function should be defined when using + * mmu_interval_notifier_put(). It will be called when the + * mmu_interval_notifier is removed from the interval tree. + * No other callbacks will be generated after this returns. */ struct mmu_interval_notifier_ops { bool (*invalidate)(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq); + void (*release)(struct mmu_interval_notifier *mni); };
struct mmu_interval_notifier { @@ -304,6 +309,7 @@ int mmu_interval_notifier_insert_safe( unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops); void mmu_interval_notifier_remove(struct mmu_interval_notifier *mni); +void mmu_interval_notifier_put(struct mmu_interval_notifier *mni);
/** * mmu_interval_set_seq - Save the invalidation sequence diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index a5ff19cd1bc5..40c837ae8d90 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -129,6 +129,7 @@ static void mn_itree_inv_end(struct mmu_notifier_mm *mmn_mm) { struct mmu_interval_notifier *mni; struct hlist_node *next; + struct hlist_head removed_list;
spin_lock(&mmn_mm->lock); if (--mmn_mm->active_invalidate_ranges || @@ -144,20 +145,35 @@ static void mn_itree_inv_end(struct mmu_notifier_mm *mmn_mm) * The inv_end incorporates a deferred mechanism like rtnl_unlock(). * Adds and removes are queued until the final inv_end happens then * they are progressed. This arrangement for tree updates is used to - * avoid using a blocking lock during invalidate_range_start. + * avoid using a blocking lock while walking the interval tree. */ + INIT_HLIST_HEAD(&removed_list); hlist_for_each_entry_safe(mni, next, &mmn_mm->deferred_list, deferred_item) { + hlist_del(&mni->deferred_item); if (RB_EMPTY_NODE(&mni->interval_tree.rb)) interval_tree_insert(&mni->interval_tree, &mmn_mm->itree); - else + else { interval_tree_remove(&mni->interval_tree, &mmn_mm->itree); - hlist_del(&mni->deferred_item); + if (mni->ops->release) + hlist_add_head(&mni->deferred_item, + &removed_list); + } } spin_unlock(&mmn_mm->lock);
+ hlist_for_each_entry_safe(mni, next, &removed_list, deferred_item) { + struct mm_struct *mm = mni->mm; + + hlist_del(&mni->deferred_item); + mni->ops->release(mni); + + /* pairs with mmgrab() in __mmu_interval_notifier_insert() */ + mmdrop(mm); + } + wake_up_all(&mmn_mm->wq); }
@@ -1006,24 +1022,13 @@ int mmu_interval_notifier_insert_safe( } EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert_safe);
-/** - * mmu_interval_notifier_remove - Remove a interval notifier - * @mni: Interval notifier to unregister - * - * This function must be paired with mmu_interval_notifier_insert(). It cannot - * be called from any ops callback. - * - * Once this returns ops callbacks are no longer running on other CPUs and - * will not be called in future. - */ -void mmu_interval_notifier_remove(struct mmu_interval_notifier *mni) +static unsigned long __mmu_interval_notifier_put( + struct mmu_interval_notifier *mni) { struct mm_struct *mm = mni->mm; struct mmu_notifier_mm *mmn_mm = mm->mmu_notifier_mm; unsigned long seq = 0;
- might_sleep(); - spin_lock(&mmn_mm->lock); if (mn_itree_is_invalidating(mmn_mm)) { /* @@ -1043,6 +1048,28 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *mni) } spin_unlock(&mmn_mm->lock);
+ return seq; +} + +/** + * mmu_interval_notifier_remove - Remove an interval notifier + * @mni: Interval notifier to unregister + * + * This function must be paired with one of the mmu_interval_notifier_insert() + * functions. It cannot be called from any ops callback. + * Once this returns, ops callbacks are no longer running on other CPUs and + * will not be called in future. + */ +void mmu_interval_notifier_remove(struct mmu_interval_notifier *mni) +{ + struct mm_struct *mm = mni->mm; + struct mmu_notifier_mm *mmn_mm = mm->mmu_notifier_mm; + unsigned long seq; + + might_sleep(); + + seq = __mmu_interval_notifier_put(mni); + /* * The possible sleep on progress in the invalidation requires the * caller not hold any locks held by invalidation callbacks. @@ -1053,11 +1080,34 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *mni) wait_event(mmn_mm->wq, READ_ONCE(mmn_mm->invalidate_seq) != seq);
- /* pairs with mmgrab in mmu_interval_notifier_insert() */ - mmdrop(mm); + /* pairs with mmgrab() in __mmu_interval_notifier_insert() */ + if (!mni->ops->release) + mmdrop(mm); } EXPORT_SYMBOL_GPL(mmu_interval_notifier_remove);
+/** + * mmu_interval_notifier_put - Unregister an interval notifier + * @mni: Interval notifier to unregister + * + * This function must be paired with one of the mmu_interval_notifier_insert() + * functions. It is safe to call from the invalidate() callback. + * Once this returns, ops callbacks may still be running on other CPUs and + * the release() callback will be called when they finish. + */ +void mmu_interval_notifier_put(struct mmu_interval_notifier *mni) +{ + struct mm_struct *mm = mni->mm; + + if (!__mmu_interval_notifier_put(mni)) { + mni->ops->release(mni); + + /* pairs with mmgrab() in __mmu_interval_notifier_insert() */ + mmdrop(mm); + } +} +EXPORT_SYMBOL_GPL(mmu_interval_notifier_put); + /** * mmu_notifier_synchronize - Ensure all mmu_notifiers are freed *