On Thu, Feb 22, 2024 at 04:34:10PM +0800, Yi Liu wrote:
It doesn't mean that the S2 is globally shared across all the nesting translations (like ARM does), and you still have to iterate over every nesting DID.
In light of that this design seems to have gone a bit off..
A domain should have a list of places that need invalidation, specifically a list of DIDs and ATCs that need an invalidation to be issued.
Instead we now somehow have 4 different lists in the domain the invalidation code iterates over?
So I would think this:
struct dmar_domain { struct xarray iommu_array; /* Attached IOMMU array */ struct list_head devices; /* all devices' list */ struct list_head dev_pasids; /* all attached pasids */ struct list_head s1_domains;
Would make sense to be collapsed into one logical list of attached devices:
struct intel_iommu_domain_attachment { unsigned int did; ioasid_t pasid; struct device_domain_info *info; list_head item; };
When you attach a S1/S2 nest you allocate two of the above structs and one is linked on the S1 and one is linked on the S2..
yes, this shall work. ATC flushing should be fine. But there can be a drawback when flushing DIDs. VT-d side, if there are multiple devices behind the same iommu unit, just need one DID flushing is enough. But after the above change, the number of DID flushing would increase per the number of devices. Although no functional issue, but it submits duplicated invalidation.
At least the three server drivers all have this same problem, I would like to seem some core code to help solve it, since it is tricky and the RCU idea is good..
Collapsing invalidations can be done by sorting, I think this was Robin's suggestion. But we could also easially maintain multiple list threads hidden inside the API, or allocate a multi-level list.
Something very approximately like this:
Jason
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d14413916f93a0..7b2de139e7c437 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -38,6 +38,8 @@
#include "iommu-sva.h"
+#include <linux/iommu-alist.h> + static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); static DEFINE_IDA(iommu_global_pasid_ida); diff --git a/include/linux/iommu-alist.h b/include/linux/iommu-alist.h new file mode 100644 index 00000000000000..7a9243f8b2b5f8 --- /dev/null +++ b/include/linux/iommu-alist.h @@ -0,0 +1,126 @@ +#include <linux/list.h> +#include <linux/rculist.h> +#include <linux/iommu.h> + +/* + * Place in an iommu_domain to keep track of what devices have been attached to + * it. + */ +struct iommu_attach_list { + spinlock_t lock; + struct list_head all; +}; + +/* + * Allocate for every time a device is attached to a domain + */ +struct iommu_attach_elm { + /* + * Note that this pointer is accessed under RCU, the driver has to be + * careful to rcu free it. + */ + struct iommu_device *iommu_device; + ioasid_t pasid; + u8 using_atc; + struct list_head all_item; + + /* May not be accessed under RCU! */ + struct device *device; +}; + +void iommu_attach_init(struct iommu_attach_list *alist) +{ + spin_lock_init(&alist->lock); +} + +int iommu_attach_add(struct iommu_attach_list *alist, + struct iommu_attach_elm *elm) +{ + struct list_head *insert_after; + + spin_lock(&alist->lock); + insert_after = list_find_sorted_insertion(alist, elm, cmp); + list_add_rcu(&elm->all_item, insert_after); + spin_unlock(&alist->lock); +} + +void iommu_attach_del_free(struct iommu_attach_list *alist, struct iommu_attach_elm *elm) +{ + spin_lock(&alist->lock); + list_del_rcu(&elm->all_item); + spin_unlock(&alist->lock); + /* assumes 0 offset */ + kfree_rcu_mightsleep(elm); +} + +static struct iommu_attach_elm * +__iommu_attach_next_iommu(struct iommu_attach_list *alist, + struct iommu_attach_elm *pos, + struct iommu_attach_elm *last) +{ + struct iommu_attach_elm *next; + + do { + next = list_next_or_null_rcu(&alist->all, &pos->all_item, + struct iommu_attach_elm, all_item); + if (!next) + return NULL; + if (!last) + return next; + } while (pos->iommu_device == next->iommu_device); + return next; +} + +/* assumes 0 offset */ +#define iommu_attach_next_iommu(alist, pos, last, member) \ + container_of(__iommu_attach_next_iommu(alist, &(pos)->member, \ + &(last)->member), \ + typeof(*pos), member) + +#define iommu_attach_for_each_iommu(pos, last, alist, member) \ + for (({ \ + RCU_LOCKDEP_WARN( \ + !rcu_read_lock_any_held(), \ + "RCU-list traversed in non-reader section!"); \ + }), \ + pos = list_first_or_null_rcu(&(alist)->all, typeof(*pos), \ + member.all_item), \ + last = NULL; \ + pos; pos = iommu_attach_next_iommu(alist, pos, last, member)) + +/* Example */ +struct driver_domain { + struct iommu_domain domain; + struct iommu_attach_list alist; +}; + +struct driver_attach_elm { + struct iommu_attach_elm aelm; + unsigned int cache_tag; +}; + +static void example(struct driver_domain *domain) +{ + struct driver_attach_elm *elm; + struct driver_attach_elm *pos, *last; + bool need_atc; + + iommu_attach_init(&domain->alist); + + elm = kzalloc(sizeof(*elm), GFP_KERNEL); + iommu_attach_add(&domain->alist, &elm->aelm); + + rcu_read_lock(); + iommu_attach_for_each_iommu(pos, last, &domain->alist, aelm) { + invalidate_iommu(elm); + need_atc |= elm->aelm.using_atc; + } + if (need_atc) { + list_for_each_entry_rcu(pos, &domain->alist.all, + aelm.all_item) { + if (pos->aelm.using_atc) + invalidate_atc(elm); + } + } + rcu_read_unlock(); +}