A few unrelated devmem TCP fixes bundled in a series for some convenience (if that's ok).
Patch 1-2: fix naming and provide page_pool_alloc_netmem for fragged netmem.
Patch 3-4: fix issues with dma-buf dma addresses being potentially passed to dma_sync_for_* helpers.
Patch 5-6: fix syzbot SO_DEVMEM_DONTNEED issue and add test for this case.
Mina Almasry (6): net: page_pool: rename page_pool_alloc_netmem to *_netmems net: page_pool: create page_pool_alloc_netmem page_pool: disable sync for cpu for dmabuf memory provider netmem: add netmem_prefetch net: fix SO_DEVMEM_DONTNEED looping too long ncdevmem: add test for too many token_count
Samiullah Khawaja (1): page_pool: Set `dma_sync` to false for devmem memory provider
include/net/netmem.h | 7 ++++ include/net/page_pool/helpers.h | 50 ++++++++++++++++++-------- include/net/page_pool/types.h | 2 +- net/core/devmem.c | 9 +++-- net/core/page_pool.c | 11 +++--- net/core/sock.c | 46 ++++++++++++++---------- tools/testing/selftests/net/ncdevmem.c | 11 ++++++ 7 files changed, 93 insertions(+), 43 deletions(-)
page_pool_alloc_netmem (without an s) was the mirror of page_pool_alloc_pages (with an s), which was confusing.
Rename to page_pool_alloc_netmems so it's the mirror of page_pool_alloc_pages.
Signed-off-by: Mina Almasry almasrymina@google.com --- include/net/page_pool/types.h | 2 +- net/core/page_pool.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index c022c410abe3..8f564fe9eb9a 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -242,7 +242,7 @@ struct page_pool { };
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); -netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp); +netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp); struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size, gfp_t gfp); netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, diff --git a/net/core/page_pool.c b/net/core/page_pool.c index a813d30d2135..77de79c1933b 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -574,7 +574,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, /* For using page_pool replace: alloc_pages() API calls, but provide * synchronization guarantee for allocation side. */ -netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp) +netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) { netmem_ref netmem;
@@ -590,11 +590,11 @@ netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp) netmem = __page_pool_alloc_pages_slow(pool, gfp); return netmem; } -EXPORT_SYMBOL(page_pool_alloc_netmem); +EXPORT_SYMBOL(page_pool_alloc_netmems);
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) { - return netmem_to_page(page_pool_alloc_netmem(pool, gfp)); + return netmem_to_page(page_pool_alloc_netmems(pool, gfp)); } EXPORT_SYMBOL(page_pool_alloc_pages); ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL); @@ -956,7 +956,7 @@ netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, }
if (!netmem) { - netmem = page_pool_alloc_netmem(pool, gfp); + netmem = page_pool_alloc_netmems(pool, gfp); if (unlikely(!netmem)) { pool->frag_page = 0; return 0;
Create page_pool_alloc_netmem to be the mirror of page_pool_alloc.
This enables drivers that want currently use page_pool_alloc to transition to netmem by converting the call sites to page_pool_alloc_netmem.
Signed-off-by: Mina Almasry almasrymina@google.com --- include/net/page_pool/helpers.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 793e6fd78bc5..8e548ff3044c 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -116,22 +116,22 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, return page_pool_alloc_frag(pool, offset, size, gfp); }
-static inline struct page *page_pool_alloc(struct page_pool *pool, - unsigned int *offset, - unsigned int *size, gfp_t gfp) +static inline netmem_ref page_pool_alloc_netmem(struct page_pool *pool, + unsigned int *offset, + unsigned int *size, gfp_t gfp) { unsigned int max_size = PAGE_SIZE << pool->p.order; - struct page *page; + netmem_ref netmem;
if ((*size << 1) > max_size) { *size = max_size; *offset = 0; - return page_pool_alloc_pages(pool, gfp); + return page_pool_alloc_netmems(pool, gfp); }
- page = page_pool_alloc_frag(pool, offset, *size, gfp); - if (unlikely(!page)) - return NULL; + netmem = page_pool_alloc_frag_netmem(pool, offset, *size, gfp); + if (unlikely(!netmem)) + return 0;
/* There is very likely not enough space for another fragment, so append * the remaining size to the current fragment to avoid truesize @@ -142,7 +142,14 @@ static inline struct page *page_pool_alloc(struct page_pool *pool, pool->frag_offset = max_size; }
- return page; + return netmem; +} + +static inline struct page *page_pool_alloc(struct page_pool *pool, + unsigned int *offset, + unsigned int *size, gfp_t gfp) +{ + return netmem_to_page(page_pool_alloc_netmem(pool, offset, size, gfp)); }
/**
On 2024/10/30 4:45, Mina Almasry wrote:
Create page_pool_alloc_netmem to be the mirror of page_pool_alloc.
This enables drivers that want currently use page_pool_alloc to transition to netmem by converting the call sites to page_pool_alloc_netmem.
For old API, page_pool_alloc_pages() always return a whole page, and page_pool_alloc() returns a whole page or a page fragment based on the requested size.
For new netmem API, page_pool_alloc_netmems() always return a whole netmem, and page_pool_alloc_netmem() returns a whole netmem or a netmem fragment based on the requested size.
Isn't it a little odd that old and new are not following the same pattern?
On Fri, Nov 1, 2024 at 4:14 AM Yunsheng Lin linyunsheng@huawei.com wrote:
On 2024/10/30 4:45, Mina Almasry wrote:
Create page_pool_alloc_netmem to be the mirror of page_pool_alloc.
This enables drivers that want currently use page_pool_alloc to transition to netmem by converting the call sites to page_pool_alloc_netmem.
For old API, page_pool_alloc_pages() always return a whole page, and page_pool_alloc() returns a whole page or a page fragment based on the requested size.
For new netmem API, page_pool_alloc_netmems() always return a whole netmem, and page_pool_alloc_netmem() returns a whole netmem or a netmem fragment based on the requested size.
Isn't it a little odd that old and new are not following the same pattern?
Hi Yunsheng,
The intention is that page_pool_alloc_pages is mirrored by page_pool_alloc_netmems.
And page_pool_alloc is mirrored by page_pool_alloc_netmem.
From your description, the behavior is the same for each function and its mirror. What is the gap in the pattern that you see?
On 11/1/2024 9:10 PM, Mina Almasry wrote:
...
Isn't it a little odd that old and new are not following the same pattern?
Hi Yunsheng,
The intention is that page_pool_alloc_pages is mirrored by page_pool_alloc_netmems.
And page_pool_alloc is mirrored by page_pool_alloc_netmem.
From your description, the behavior is the same for each function and
its mirror. What is the gap in the pattern that you see?
I was mostly referring to the API naming pattern.
Isn't it better if page_pool_alloc is mirrored by netmem_pool_alloc and netmem_pool_alloc_netmems is mirrored by page_pool_alloc_pages() from API naming prespective?
And maybe page_pool_alloc_frag can be mirrored by netmem_pool_alloc_frag in the future?
Also, it would be good to update Documentation/networking/page_pool.rst for those new netmem APIs, or create a new doc file for them.
On Sun, Nov 3, 2024 at 6:35 AM Yunsheng Lin yunshenglin0825@gmail.com wrote:
On 11/1/2024 9:10 PM, Mina Almasry wrote:
...
Isn't it a little odd that old and new are not following the same pattern?
Hi Yunsheng,
The intention is that page_pool_alloc_pages is mirrored by page_pool_alloc_netmems.
And page_pool_alloc is mirrored by page_pool_alloc_netmem.
From your description, the behavior is the same for each function and
its mirror. What is the gap in the pattern that you see?
I was mostly referring to the API naming pattern.
Isn't it better if page_pool_alloc is mirrored by netmem_pool_alloc and netmem_pool_alloc_netmems is mirrored by page_pool_alloc_pages() from API naming prespective?
I've been treating the page_pool_* prefix to all the page_pool functions as constant in all the renames so far. I replace 'page' with 'netmem' when available, or add a _netmem postfix when available.
And maybe page_pool_alloc_frag can be mirrored by netmem_pool_alloc_frag in the future?
Also, it would be good to update Documentation/networking/page_pool.rst for those new netmem APIs, or create a new doc file for them.
Heard. I do have an action item to update the docs. Currently, outside of drivers immediately looking to immediately adopt devmem tcp, there is no need yet to use the netmem APIs, but I do hope to make them more widespread (and perhaps deprecate the page APIs when it's time to do so).
From: Samiullah Khawaja skhawaja@google.com
Move the `dma_map` and `dma_sync` checks to `page_pool_init` to make them generic. Set dma_sync to false for devmem memory provider because the dma_sync APIs should not be used for dma_buf backed devmem memory provider.
Cc: Jason Gunthorpe jgg@ziepe.ca Signed-off-by: Samiullah Khawaja skhawaja@google.com Signed-off-by: Mina Almasry almasrymina@google.com
--- net/core/devmem.c | 9 ++++----- net/core/page_pool.c | 3 +++ 2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/net/core/devmem.c b/net/core/devmem.c index 11b91c12ee11..826d0b00159f 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -331,11 +331,10 @@ int mp_dmabuf_devmem_init(struct page_pool *pool) if (!binding) return -EINVAL;
- if (!pool->dma_map) - return -EOPNOTSUPP; - - if (pool->dma_sync) - return -EOPNOTSUPP; + /* dma-buf dma addresses should not be used with + * dma_sync_for_cpu/device. Force disable dma_sync. + */ + pool->dma_sync = false;
if (pool->p.order != 0) return -E2BIG; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 77de79c1933b..528dd4d18eab 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -287,6 +287,9 @@ static int page_pool_init(struct page_pool *pool, }
if (pool->mp_priv) { + if (!pool->dma_map || !pool->dma_sync) + return -EOPNOTSUPP; + err = mp_dmabuf_devmem_init(pool); if (err) { pr_warn("%s() mem-provider init failed %d\n", __func__,
dmabuf dma-addresses should not be dma_sync'd for CPU/device. Typically its the driver responsibility to dma_sync for CPU, but the driver should not dma_sync for CPU if the netmem is actually coming from a dmabuf memory provider.
The page_pool already exposes a helper for dma_sync_for_cpu: page_pool_dma_sync_for_cpu. Upgrade this existing helper to handle netmem, and have it skip dma_sync if the memory is from a dmabuf memory provider. Drivers should migrate to using this helper when adding support for netmem.
Cc: Jason Gunthorpe jgg@ziepe.ca Signed-off-by: Mina Almasry almasrymina@google.com
--- include/net/page_pool/helpers.h | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 8e548ff3044c..ad4fed4a791c 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -429,9 +429,10 @@ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) }
/** - * page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW + * page_pool_dma_sync_netmem_for_cpu - sync Rx page for CPU after it's written + * by HW * @pool: &page_pool the @page belongs to - * @page: page to sync + * @netmem: netmem to sync * @offset: offset from page start to "hard" start if using PP frags * @dma_sync_size: size of the data written to the page * @@ -440,16 +441,28 @@ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) * Note that this version performs DMA sync unconditionally, even if the * associated PP doesn't perform sync-for-device. */ -static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, - const struct page *page, - u32 offset, u32 dma_sync_size) +static inline void +page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool, + const netmem_ref netmem, u32 offset, + u32 dma_sync_size) { + if (pool->mp_priv) + return; + dma_sync_single_range_for_cpu(pool->p.dev, - page_pool_get_dma_addr(page), + page_pool_get_dma_addr_netmem(netmem), offset + pool->p.offset, dma_sync_size, page_pool_get_dma_dir(pool)); }
+static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, + struct page *page, u32 offset, + u32 dma_sync_size) +{ + page_pool_dma_sync_netmem_for_cpu(pool, page_to_netmem(page), offset, + dma_sync_size); +} + static inline bool page_pool_put(struct page_pool *pool) { return refcount_dec_and_test(&pool->user_cnt);
prefect(page) is a common thing to be called from drivers. Add netmem_prefetch that can be called on generic netmem. Skips the prefetch for net_iovs.
Signed-off-by: Mina Almasry almasrymina@google.com --- include/net/netmem.h | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/include/net/netmem.h b/include/net/netmem.h index 8a6e20be4b9d..923c47147aa8 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -171,4 +171,11 @@ static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) return __netmem_clear_lsb(netmem)->dma_addr; }
+static inline void netmem_prefetch(netmem_ref netmem) +{ + if (netmem_is_net_iov(netmem)) + return; + + prefetch(netmem_to_page(netmem)); +} #endif /* _NET_NETMEM_H */
linux-kselftest-mirror@lists.linaro.org