A bit more than expected because apart from 9 failed-to-apply patches there are lots of dependencies to them, but for the most part automatically merged.
Hao Xu (1): io_uring: fix flush cqring overflow list while TASK_INTERRUPTIBLE
Jens Axboe (2): io_uring: account io_uring internal files as REQ_F_INFLIGHT io_uring: if we see flush on exit, cancel related tasks
Pavel Begunkov (13): io_uring: simplify io_task_match() io_uring: add a {task,files} pair matching helper io_uring: don't iterate io_uring_cancel_files() io_uring: pass files into kill timeouts/poll io_uring: always batch cancel in *cancel_files() io_uring: fix files cancellation io_uring: fix __io_uring_files_cancel() with TASK_UNINTERRUPTIBLE io_uring: replace inflight_wait with tctx->wait io_uring: fix cancellation taking mutex while TASK_UNINTERRUPTIBLE io_uring: fix list corruption for splice file_get io_uring: fix sqo ownership false positive warning io_uring: reinforce cancel on flush during exit io_uring: drop mm/files between task_work_submit
fs/io-wq.c | 10 -- fs/io-wq.h | 1 - fs/io_uring.c | 360 ++++++++++++++++++++------------------------------ 3 files changed, 141 insertions(+), 230 deletions(-)
[ Upstream commit 06de5f5973c641c7ae033f133ecfaaf64fe633a6 ]
If IORING_SETUP_SQPOLL is set all requests belong to the corresponding SQPOLL task, so skip task checking in that case and always match.
Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io_uring.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 907ecaffc338..510a860f8bdf 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1472,11 +1472,7 @@ static bool io_task_match(struct io_kiocb *req, struct task_struct *tsk)
if (!tsk || req->task == tsk) return true; - if (ctx->flags & IORING_SETUP_SQPOLL) { - if (ctx->sq_data && req->task == ctx->sq_data->thread) - return true; - } - return false; + return (ctx->flags & IORING_SETUP_SQPOLL); }
/*
[ Upstream commit 08d23634643c239ddae706758f54d3a8e0c24962 ]
Add io_match_task() that matches both task and files.
Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io_uring.c | 63 ++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 31 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 510a860f8bdf..71bdd288c396 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -997,6 +997,36 @@ static inline void io_clean_op(struct io_kiocb *req) __io_clean_op(req); }
+static inline bool __io_match_files(struct io_kiocb *req, + struct files_struct *files) +{ + return ((req->flags & REQ_F_WORK_INITIALIZED) && + (req->work.flags & IO_WQ_WORK_FILES)) && + req->work.identity->files == files; +} + +static bool io_match_task(struct io_kiocb *head, + struct task_struct *task, + struct files_struct *files) +{ + struct io_kiocb *link; + + if (task && head->task != task) + return false; + if (!files) + return true; + if (__io_match_files(head, files)) + return true; + if (head->flags & REQ_F_LINK_HEAD) { + list_for_each_entry(link, &head->link_list, link_list) { + if (__io_match_files(link, files)) + return true; + } + } + return false; +} + + static void io_sq_thread_drop_mm(void) { struct mm_struct *mm = current->mm; @@ -1612,32 +1642,6 @@ static void io_cqring_mark_overflow(struct io_ring_ctx *ctx) } }
-static inline bool __io_match_files(struct io_kiocb *req, - struct files_struct *files) -{ - return ((req->flags & REQ_F_WORK_INITIALIZED) && - (req->work.flags & IO_WQ_WORK_FILES)) && - req->work.identity->files == files; -} - -static bool io_match_files(struct io_kiocb *req, - struct files_struct *files) -{ - struct io_kiocb *link; - - if (!files) - return true; - if (__io_match_files(req, files)) - return true; - if (req->flags & REQ_F_LINK_HEAD) { - list_for_each_entry(link, &req->link_list, link_list) { - if (__io_match_files(link, files)) - return true; - } - } - return false; -} - /* Returns true if there are no backlogged entries after the flush */ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct task_struct *tsk, @@ -1659,9 +1663,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
cqe = NULL; list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) { - if (tsk && req->task != tsk) - continue; - if (!io_match_files(req, files)) + if (!io_match_task(req, tsk, files)) continue;
cqe = io_get_cqring(ctx); @@ -8635,8 +8637,7 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
spin_lock_irq(&ctx->completion_lock); list_for_each_entry_reverse(de, &ctx->defer_list, list) { - if (io_task_match(de->req, task) && - io_match_files(de->req, files)) { + if (io_match_task(de->req, task, files)) { list_cut_position(&list, &ctx->defer_list, &de->list); break; }
[ Upstream commit b52fda00dd9df8b4a6de5784df94f9617f6133a1 ]
io_uring_cancel_files() guarantees to cancel all matching requests, that's not necessary to do that in a loop. Move it up in the callchain into io_uring_cancel_task_requests().
Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io_uring.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 71bdd288c396..b8c413830722 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8654,16 +8654,10 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, } }
-/* - * Returns true if we found and killed one or more files pinning requests - */ -static bool io_uring_cancel_files(struct io_ring_ctx *ctx, +static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct task_struct *task, struct files_struct *files) { - if (list_empty_careful(&ctx->inflight_list)) - return false; - while (!list_empty_careful(&ctx->inflight_list)) { struct io_kiocb *cancel_req = NULL, *req; DEFINE_WAIT(wait); @@ -8698,8 +8692,6 @@ static bool io_uring_cancel_files(struct io_ring_ctx *ctx, schedule(); finish_wait(&ctx->inflight_wait, &wait); } - - return true; }
static bool io_cancel_task_cb(struct io_wq_work *work, void *data) @@ -8710,15 +8702,12 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data) return io_task_match(req, task); }
-static bool __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, - struct task_struct *task, - struct files_struct *files) +static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, + struct task_struct *task) { - bool ret; - - ret = io_uring_cancel_files(ctx, task, files); - if (!files) { + while (1) { enum io_wq_cancel cret; + bool ret = false;
cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, task, true); if (cret != IO_WQ_CANCEL_NOTFOUND) @@ -8734,9 +8723,11 @@ static bool __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
ret |= io_poll_remove_all(ctx, task); ret |= io_kill_timeouts(ctx, task); + if (!ret) + break; + io_run_task_work(); + cond_resched(); } - - return ret; }
static void io_disable_sqo_submit(struct io_ring_ctx *ctx) @@ -8771,11 +8762,10 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
io_cancel_defer_files(ctx, task, files); io_cqring_overflow_flush(ctx, true, task, files); + io_uring_cancel_files(ctx, task, files);
- while (__io_uring_cancel_task_requests(ctx, task, files)) { - io_run_task_work(); - cond_resched(); - } + if (!files) + __io_uring_cancel_task_requests(ctx, task);
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { atomic_dec(&task->io_uring->in_idle);
[ Upstream commit 6b81928d4ca8668513251f9c04cdcb9d38ef51c7 ]
Make io_poll_remove_all() and io_kill_timeouts() to match against files as well. A preparation patch, effectively not used by now.
Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io_uring.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index b8c413830722..0a9f938ac3a1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1508,14 +1508,15 @@ static bool io_task_match(struct io_kiocb *req, struct task_struct *tsk) /* * Returns true if we found and killed one or more timeouts */ -static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk) +static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, + struct files_struct *files) { struct io_kiocb *req, *tmp; int canceled = 0;
spin_lock_irq(&ctx->completion_lock); list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { - if (io_task_match(req, tsk)) { + if (io_match_task(req, tsk, files)) { io_kill_timeout(req); canceled++; } @@ -5312,7 +5313,8 @@ static bool io_poll_remove_one(struct io_kiocb *req) /* * Returns true if we found and killed one or more poll requests */ -static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk) +static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, + struct files_struct *files) { struct hlist_node *tmp; struct io_kiocb *req; @@ -5324,7 +5326,7 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk)
list = &ctx->cancel_hash[i]; hlist_for_each_entry_safe(req, tmp, list, hash_node) { - if (io_task_match(req, tsk)) + if (io_match_task(req, tsk, files)) posted += io_poll_remove_one(req); } } @@ -8485,8 +8487,8 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) __io_cqring_overflow_flush(ctx, true, NULL, NULL); mutex_unlock(&ctx->uring_lock);
- io_kill_timeouts(ctx, NULL); - io_poll_remove_all(ctx, NULL); + io_kill_timeouts(ctx, NULL, NULL); + io_poll_remove_all(ctx, NULL, NULL);
if (ctx->io_wq) io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true); @@ -8721,8 +8723,8 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, } }
- ret |= io_poll_remove_all(ctx, task); - ret |= io_kill_timeouts(ctx, task); + ret |= io_poll_remove_all(ctx, task, NULL); + ret |= io_kill_timeouts(ctx, task, NULL); if (!ret) break; io_run_task_work();
[ Upstream commit f6edbabb8359798c541b0776616c5eab3a840d3d ]
Instead of iterating over each request and cancelling it individually in io_uring_cancel_files(), try to cancel all matching requests and use ->inflight_list only to check if there anything left.
In many cases it should be faster, and we can reuse a lot of code from task cancellation.
Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io-wq.c | 10 ---- fs/io-wq.h | 1 - fs/io_uring.c | 139 ++++++++------------------------------------------ 3 files changed, 20 insertions(+), 130 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c index b53c055bea6a..f72d53848dcb 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -1078,16 +1078,6 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, return IO_WQ_CANCEL_NOTFOUND; }
-static bool io_wq_io_cb_cancel_data(struct io_wq_work *work, void *data) -{ - return work == data; -} - -enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork) -{ - return io_wq_cancel_cb(wq, io_wq_io_cb_cancel_data, (void *)cwork, false); -} - struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) { int ret = -ENOMEM, node; diff --git a/fs/io-wq.h b/fs/io-wq.h index aaa363f35891..75113bcd5889 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -130,7 +130,6 @@ static inline bool io_wq_is_hashed(struct io_wq_work *work) }
void io_wq_cancel_all(struct io_wq *wq); -enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
diff --git a/fs/io_uring.c b/fs/io_uring.c index 0a9f938ac3a1..44b859456ef6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1496,15 +1496,6 @@ static void io_kill_timeout(struct io_kiocb *req) } }
-static bool io_task_match(struct io_kiocb *req, struct task_struct *tsk) -{ - struct io_ring_ctx *ctx = req->ctx; - - if (!tsk || req->task == tsk) - return true; - return (ctx->flags & IORING_SETUP_SQPOLL); -} - /* * Returns true if we found and killed one or more timeouts */ @@ -8524,112 +8515,31 @@ static int io_uring_release(struct inode *inode, struct file *file) return 0; }
-/* - * Returns true if 'preq' is the link parent of 'req' - */ -static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req) -{ - struct io_kiocb *link; - - if (!(preq->flags & REQ_F_LINK_HEAD)) - return false; - - list_for_each_entry(link, &preq->link_list, link_list) { - if (link == req) - return true; - } - - return false; -} - -/* - * We're looking to cancel 'req' because it's holding on to our files, but - * 'req' could be a link to another request. See if it is, and cancel that - * parent request if so. - */ -static bool io_poll_remove_link(struct io_ring_ctx *ctx, struct io_kiocb *req) -{ - struct hlist_node *tmp; - struct io_kiocb *preq; - bool found = false; - int i; - - spin_lock_irq(&ctx->completion_lock); - for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { - struct hlist_head *list; - - list = &ctx->cancel_hash[i]; - hlist_for_each_entry_safe(preq, tmp, list, hash_node) { - found = io_match_link(preq, req); - if (found) { - io_poll_remove_one(preq); - break; - } - } - } - spin_unlock_irq(&ctx->completion_lock); - return found; -} - -static bool io_timeout_remove_link(struct io_ring_ctx *ctx, - struct io_kiocb *req) -{ - struct io_kiocb *preq; - bool found = false; - - spin_lock_irq(&ctx->completion_lock); - list_for_each_entry(preq, &ctx->timeout_list, timeout.list) { - found = io_match_link(preq, req); - if (found) { - __io_timeout_cancel(preq); - break; - } - } - spin_unlock_irq(&ctx->completion_lock); - return found; -} +struct io_task_cancel { + struct task_struct *task; + struct files_struct *files; +};
-static bool io_cancel_link_cb(struct io_wq_work *work, void *data) +static bool io_cancel_task_cb(struct io_wq_work *work, void *data) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); + struct io_task_cancel *cancel = data; bool ret;
- if (req->flags & REQ_F_LINK_TIMEOUT) { + if (cancel->files && (req->flags & REQ_F_LINK_TIMEOUT)) { unsigned long flags; struct io_ring_ctx *ctx = req->ctx;
/* protect against races with linked timeouts */ spin_lock_irqsave(&ctx->completion_lock, flags); - ret = io_match_link(req, data); + ret = io_match_task(req, cancel->task, cancel->files); spin_unlock_irqrestore(&ctx->completion_lock, flags); } else { - ret = io_match_link(req, data); + ret = io_match_task(req, cancel->task, cancel->files); } return ret; }
-static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) -{ - enum io_wq_cancel cret; - - /* cancel this particular work, if it's running */ - cret = io_wq_cancel_work(ctx->io_wq, &req->work); - if (cret != IO_WQ_CANCEL_NOTFOUND) - return; - - /* find links that hold this pending, cancel those */ - cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_link_cb, req, true); - if (cret != IO_WQ_CANCEL_NOTFOUND) - return; - - /* if we have a poll link holding this pending, cancel that */ - if (io_poll_remove_link(ctx, req)) - return; - - /* final option, timeout link is holding this req pending */ - io_timeout_remove_link(ctx, req); -} - static void io_cancel_defer_files(struct io_ring_ctx *ctx, struct task_struct *task, struct files_struct *files) @@ -8661,8 +8571,10 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { while (!list_empty_careful(&ctx->inflight_list)) { - struct io_kiocb *cancel_req = NULL, *req; + struct io_task_cancel cancel = { .task = task, .files = NULL, }; + struct io_kiocb *req; DEFINE_WAIT(wait); + bool found = false;
spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { @@ -8670,25 +8582,21 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, (req->work.flags & IO_WQ_WORK_FILES) && req->work.identity->files != files) continue; - /* req is being completed, ignore */ - if (!refcount_inc_not_zero(&req->refs)) - continue; - cancel_req = req; + found = true; break; } - if (cancel_req) + if (found) prepare_to_wait(&ctx->inflight_wait, &wait, TASK_UNINTERRUPTIBLE); spin_unlock_irq(&ctx->inflight_lock);
/* We need to keep going until we don't find a matching req */ - if (!cancel_req) + if (!found) break; - /* cancel this request, or head link requests */ - io_attempt_cancel(ctx, cancel_req); - io_cqring_overflow_flush(ctx, true, task, files);
- io_put_req(cancel_req); + io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); + io_poll_remove_all(ctx, task, files); + io_kill_timeouts(ctx, task, files); /* cancellations _may_ trigger task work */ io_run_task_work(); schedule(); @@ -8696,22 +8604,15 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, } }
-static bool io_cancel_task_cb(struct io_wq_work *work, void *data) -{ - struct io_kiocb *req = container_of(work, struct io_kiocb, work); - struct task_struct *task = data; - - return io_task_match(req, task); -} - static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, struct task_struct *task) { while (1) { + struct io_task_cancel cancel = { .task = task, .files = NULL, }; enum io_wq_cancel cret; bool ret = false;
- cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, task, true); + cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); if (cret != IO_WQ_CANCEL_NOTFOUND) ret = true;
[ Upstream commit bee749b187ac57d1faf00b2ab356ff322230fce8 ]
io_uring_cancel_files()'s task check condition mistakenly got flipped.
1. There can't be a request in the inflight list without IO_WQ_WORK_FILES, kill this check to keep the whole condition simpler. 2. Also, don't call the function for files==NULL to not do such a check, all that staff is already handled well by its counter part, __io_uring_cancel_task_requests().
With that just flip the task check.
Also, it iowq-cancels all request of current task there, don't forget to set right ->files into struct io_task_cancel.
Fixes: c1973b38bf639 ("io_uring: cancel only requests of current task") Reported-by: syzbot+c0d52d0b3c0c3ffb9525@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io_uring.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 44b859456ef6..a40ee81e6438 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8571,15 +8571,14 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { while (!list_empty_careful(&ctx->inflight_list)) { - struct io_task_cancel cancel = { .task = task, .files = NULL, }; + struct io_task_cancel cancel = { .task = task, .files = files }; struct io_kiocb *req; DEFINE_WAIT(wait); bool found = false;
spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { - if (req->task == task && - (req->work.flags & IO_WQ_WORK_FILES) && + if (req->task != task || req->work.identity->files != files) continue; found = true; @@ -8665,10 +8664,11 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
io_cancel_defer_files(ctx, task, files); io_cqring_overflow_flush(ctx, true, task, files); - io_uring_cancel_files(ctx, task, files);
if (!files) __io_uring_cancel_task_requests(ctx, task); + else + io_uring_cancel_files(ctx, task, files);
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { atomic_dec(&task->io_uring->in_idle);
From: Jens Axboe axboe@kernel.dk
[ Upstream commit 02a13674fa0e8dd326de8b9f4514b41b03d99003 ]
We need to actively cancel anything that introduces a potential circular loop, where io_uring holds a reference to itself. If the file in question is an io_uring file, then add the request to the inflight list.
Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Pavel Begunkov asml.silence@gmail.com --- fs/io_uring.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index a40ee81e6438..9801fa9b00ba 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1000,6 +1000,9 @@ static inline void io_clean_op(struct io_kiocb *req) static inline bool __io_match_files(struct io_kiocb *req, struct files_struct *files) { + if (req->file && req->file->f_op == &io_uring_fops) + return true; + return ((req->flags & REQ_F_WORK_INITIALIZED) && (req->work.flags & IO_WQ_WORK_FILES)) && req->work.identity->files == files; @@ -1398,11 +1401,14 @@ static bool io_grab_identity(struct io_kiocb *req) return false; atomic_inc(&id->files->count); get_nsproxy(id->nsproxy); - req->flags |= REQ_F_INFLIGHT;
- spin_lock_irq(&ctx->inflight_lock); - list_add(&req->inflight_entry, &ctx->inflight_list); - spin_unlock_irq(&ctx->inflight_lock); + if (!(req->flags & REQ_F_INFLIGHT)) { + req->flags |= REQ_F_INFLIGHT; + + spin_lock_irq(&ctx->inflight_lock); + list_add(&req->inflight_entry, &ctx->inflight_list); + spin_unlock_irq(&ctx->inflight_lock); + } req->work.flags |= IO_WQ_WORK_FILES; } if (!(req->work.flags & IO_WQ_WORK_MM) && @@ -5886,8 +5892,10 @@ static void io_req_drop_files(struct io_kiocb *req) struct io_ring_ctx *ctx = req->ctx; unsigned long flags;
- put_files_struct(req->work.identity->files); - put_nsproxy(req->work.identity->nsproxy); + if (req->work.flags & IO_WQ_WORK_FILES) { + put_files_struct(req->work.identity->files); + put_nsproxy(req->work.identity->nsproxy); + } spin_lock_irqsave(&ctx->inflight_lock, flags); list_del(&req->inflight_entry); spin_unlock_irqrestore(&ctx->inflight_lock, flags); @@ -6159,6 +6167,15 @@ static struct file *io_file_get(struct io_submit_state *state, file = __io_file_get(state, fd); }
+ if (file && file->f_op == &io_uring_fops) { + io_req_init_async(req); + req->flags |= REQ_F_INFLIGHT; + + spin_lock_irq(&ctx->inflight_lock); + list_add(&req->inflight_entry, &ctx->inflight_list); + spin_unlock_irq(&ctx->inflight_lock); + } + return file; }
@@ -8578,8 +8595,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { - if (req->task != task || - req->work.identity->files != files) + if (!io_match_task(req, task, files)) continue; found = true; break;
From: Jens Axboe axboe@kernel.dk
[ Upstream commit 84965ff8a84f0368b154c9b367b62e59c1193f30 ]
Ensure we match tasks that belong to a dead or dying task as well, as we need to reap those in addition to those belonging to the exiting task.
Cc: stable@vger.kernel.org # 5.9+ Reported-by: Josef Grieb josef.grieb@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Pavel Begunkov asml.silence@gmail.com --- fs/io_uring.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 9801fa9b00ba..95faa3d913b1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1014,8 +1014,12 @@ static bool io_match_task(struct io_kiocb *head, { struct io_kiocb *link;
- if (task && head->task != task) + if (task && head->task != task) { + /* in terms of cancelation, always match if req task is dead */ + if (head->task->flags & PF_EXITING) + return true; return false; + } if (!files) return true; if (__io_match_files(head, files)) @@ -8850,6 +8854,9 @@ static int io_uring_flush(struct file *file, void *data) struct io_uring_task *tctx = current->io_uring; struct io_ring_ctx *ctx = file->private_data;
+ if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) + io_uring_cancel_task_requests(ctx, NULL); + if (!tctx) return 0;
[ Upstream commit a1bb3cd58913338e1b627ea6b8c03c2ae82d293f ]
If the tctx inflight number haven't changed because of cancellation, __io_uring_task_cancel() will continue leaving the task in TASK_UNINTERRUPTIBLE state, that's not expected by __io_uring_files_cancel(). Ensure we always call finish_wait() before retrying.
Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io_uring.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 95faa3d913b1..170f980c3243 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8835,15 +8835,15 @@ void __io_uring_task_cancel(void) prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
/* - * If we've seen completions, retry. This avoids a race where - * a completion comes in before we did prepare_to_wait(). + * If we've seen completions, retry without waiting. This + * avoids a race where a completion comes in before we did + * prepare_to_wait(). */ - if (inflight != tctx_inflight(tctx)) - continue; - schedule(); + if (inflight == tctx_inflight(tctx)) + schedule(); + finish_wait(&tctx->wait, &wait); } while (1);
- finish_wait(&tctx->wait, &wait); atomic_dec(&tctx->in_idle);
io_uring_remove_task_files(tctx);
[ Upstream commit c98de08c990e190fc7cc3aaf8079b4a0674c6425 ]
As tasks now cancel only theirs requests, and inflight_wait is awaited only in io_uring_cancel_files(), which should be called with ->in_idle set, instead of keeping a separate inflight_wait use tctx->wait.
That will add some spurious wakeups but actually is safer from point of not hanging the task.
e.g. task1 | IRQ | *start* io_complete_rw_common(link) | link: req1 -> req2 -> req3(with files) *cancel_files() | io_wq_cancel(), etc. | | put_req(link), adds to io-wq req2 schedule() |
So, task1 will never try to cancel req2 or req3. If req2 is long-standing (e.g. read(empty_pipe)), this may hang.
Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io_uring.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 170f980c3243..e3ae0daf97f7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -286,7 +286,6 @@ struct io_ring_ctx { struct list_head timeout_list; struct list_head cq_overflow_list;
- wait_queue_head_t inflight_wait; struct io_uring_sqe *sq_sqes; } ____cacheline_aligned_in_smp;
@@ -1220,7 +1219,6 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->iopoll_list); INIT_LIST_HEAD(&ctx->defer_list); INIT_LIST_HEAD(&ctx->timeout_list); - init_waitqueue_head(&ctx->inflight_wait); spin_lock_init(&ctx->inflight_lock); INIT_LIST_HEAD(&ctx->inflight_list); INIT_DELAYED_WORK(&ctx->file_put_work, io_file_put_work); @@ -5894,6 +5892,7 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe) static void io_req_drop_files(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; + struct io_uring_task *tctx = req->task->io_uring; unsigned long flags;
if (req->work.flags & IO_WQ_WORK_FILES) { @@ -5905,8 +5904,8 @@ static void io_req_drop_files(struct io_kiocb *req) spin_unlock_irqrestore(&ctx->inflight_lock, flags); req->flags &= ~REQ_F_INFLIGHT; req->work.flags &= ~IO_WQ_WORK_FILES; - if (waitqueue_active(&ctx->inflight_wait)) - wake_up(&ctx->inflight_wait); + if (atomic_read(&tctx->in_idle)) + wake_up(&tctx->wait); }
static void __io_clean_op(struct io_kiocb *req) @@ -8605,8 +8604,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, break; } if (found) - prepare_to_wait(&ctx->inflight_wait, &wait, - TASK_UNINTERRUPTIBLE); + prepare_to_wait(&task->io_uring->wait, &wait, + TASK_UNINTERRUPTIBLE); spin_unlock_irq(&ctx->inflight_lock);
/* We need to keep going until we don't find a matching req */ @@ -8619,7 +8618,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, /* cancellations _may_ trigger task work */ io_run_task_work(); schedule(); - finish_wait(&ctx->inflight_wait, &wait); + finish_wait(&task->io_uring->wait, &wait); } }
[ Upstream commit ca70f00bed6cb255b7a9b91aa18a2717c9217f70 ]
do not call blocking ops when !TASK_RUNNING; state=2 set at [<00000000ced9dbfc>] prepare_to_wait+0x1f4/0x3b0 kernel/sched/wait.c:262 WARNING: CPU: 1 PID: 19888 at kernel/sched/core.c:7853 __might_sleep+0xed/0x100 kernel/sched/core.c:7848 RIP: 0010:__might_sleep+0xed/0x100 kernel/sched/core.c:7848 Call Trace: __mutex_lock_common+0xc4/0x2ef0 kernel/locking/mutex.c:935 __mutex_lock kernel/locking/mutex.c:1103 [inline] mutex_lock_nested+0x1a/0x20 kernel/locking/mutex.c:1118 io_wq_submit_work+0x39a/0x720 fs/io_uring.c:6411 io_run_cancel fs/io-wq.c:856 [inline] io_wqe_cancel_pending_work fs/io-wq.c:990 [inline] io_wq_cancel_cb+0x614/0xcb0 fs/io-wq.c:1027 io_uring_cancel_files fs/io_uring.c:8874 [inline] io_uring_cancel_task_requests fs/io_uring.c:8952 [inline] __io_uring_files_cancel+0x115d/0x19e0 fs/io_uring.c:9038 io_uring_files_cancel include/linux/io_uring.h:51 [inline] do_exit+0x2e6/0x2490 kernel/exit.c:780 do_group_exit+0x168/0x2d0 kernel/exit.c:922 get_signal+0x16b5/0x2030 kernel/signal.c:2770 arch_do_signal_or_restart+0x8e/0x6a0 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0xac/0x1e0 kernel/entry/common.c:201 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline] syscall_exit_to_user_mode+0x48/0x190 kernel/entry/common.c:302 entry_SYSCALL_64_after_hwframe+0x44/0xa9
Rewrite io_uring_cancel_files() to mimic __io_uring_task_cancel()'s counting scheme, so it does all the heavy work before setting TASK_UNINTERRUPTIBLE.
Cc: stable@vger.kernel.org # 5.9+ Reported-by: syzbot+f655445043a26a7cfab8@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov asml.silence@gmail.com [axboe: fix inverted task check] Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io_uring.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index e3ae0daf97f7..03c1185270d1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8586,30 +8586,31 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, } }
+static int io_uring_count_inflight(struct io_ring_ctx *ctx, + struct task_struct *task, + struct files_struct *files) +{ + struct io_kiocb *req; + int cnt = 0; + + spin_lock_irq(&ctx->inflight_lock); + list_for_each_entry(req, &ctx->inflight_list, inflight_entry) + cnt += io_match_task(req, task, files); + spin_unlock_irq(&ctx->inflight_lock); + return cnt; +} + static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct task_struct *task, struct files_struct *files) { while (!list_empty_careful(&ctx->inflight_list)) { struct io_task_cancel cancel = { .task = task, .files = files }; - struct io_kiocb *req; DEFINE_WAIT(wait); - bool found = false; - - spin_lock_irq(&ctx->inflight_lock); - list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { - if (!io_match_task(req, task, files)) - continue; - found = true; - break; - } - if (found) - prepare_to_wait(&task->io_uring->wait, &wait, - TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&ctx->inflight_lock); + int inflight;
- /* We need to keep going until we don't find a matching req */ - if (!found) + inflight = io_uring_count_inflight(ctx, task, files); + if (!inflight) break;
io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); @@ -8617,7 +8618,11 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, io_kill_timeouts(ctx, task, files); /* cancellations _may_ trigger task work */ io_run_task_work(); - schedule(); + + prepare_to_wait(&task->io_uring->wait, &wait, + TASK_UNINTERRUPTIBLE); + if (inflight == io_uring_count_inflight(ctx, task, files)) + schedule(); finish_wait(&task->io_uring->wait, &wait); } }
From: Hao Xu haoxu@linux.alibaba.com
[ Upstream commit 6195ba09822c87cad09189bbf550d0fbe714687a ]
Abaci reported the follow warning:
[ 27.073425] do not call blocking ops when !TASK_RUNNING; state=1 set at [] prepare_to_wait_exclusive+0x3a/0xc0 [ 27.075805] WARNING: CPU: 0 PID: 951 at kernel/sched/core.c:7853 __might_sleep+0x80/0xa0 [ 27.077604] Modules linked in: [ 27.078379] CPU: 0 PID: 951 Comm: a.out Not tainted 5.11.0-rc3+ #1 [ 27.079637] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 27.080852] RIP: 0010:__might_sleep+0x80/0xa0 [ 27.081835] Code: 65 48 8b 04 25 80 71 01 00 48 8b 90 c0 15 00 00 48 8b 70 18 48 c7 c7 08 39 95 82 c6 05 f9 5f de 08 01 48 89 d1 e8 00 c6 fa ff 0b eb bf 41 0f b6 f5 48 c7 c7 40 23 c9 82 e8 f3 48 ec 00 eb a7 [ 27.084521] RSP: 0018:ffffc90000fe3ce8 EFLAGS: 00010286 [ 27.085350] RAX: 0000000000000000 RBX: ffffffff82956083 RCX: 0000000000000000 [ 27.086348] RDX: ffff8881057a0000 RSI: ffffffff8118cc9e RDI: ffff88813bc28570 [ 27.087598] RBP: 00000000000003a7 R08: 0000000000000001 R09: 0000000000000001 [ 27.088819] R10: ffffc90000fe3e00 R11: 00000000fffef9f0 R12: 0000000000000000 [ 27.089819] R13: 0000000000000000 R14: ffff88810576eb80 R15: ffff88810576e800 [ 27.091058] FS: 00007f7b144cf740(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 [ 27.092775] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 27.093796] CR2: 00000000022da7b8 CR3: 000000010b928002 CR4: 00000000003706f0 [ 27.094778] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 27.095780] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 27.097011] Call Trace: [ 27.097685] __mutex_lock+0x5d/0xa30 [ 27.098565] ? prepare_to_wait_exclusive+0x71/0xc0 [ 27.099412] ? io_cqring_overflow_flush.part.101+0x6d/0x70 [ 27.100441] ? lockdep_hardirqs_on_prepare+0xe9/0x1c0 [ 27.101537] ? _raw_spin_unlock_irqrestore+0x2d/0x40 [ 27.102656] ? trace_hardirqs_on+0x46/0x110 [ 27.103459] ? io_cqring_overflow_flush.part.101+0x6d/0x70 [ 27.104317] io_cqring_overflow_flush.part.101+0x6d/0x70 [ 27.105113] io_cqring_wait+0x36e/0x4d0 [ 27.105770] ? find_held_lock+0x28/0xb0 [ 27.106370] ? io_uring_remove_task_files+0xa0/0xa0 [ 27.107076] __x64_sys_io_uring_enter+0x4fb/0x640 [ 27.107801] ? rcu_read_lock_sched_held+0x59/0xa0 [ 27.108562] ? lockdep_hardirqs_on_prepare+0xe9/0x1c0 [ 27.109684] ? syscall_enter_from_user_mode+0x26/0x70 [ 27.110731] do_syscall_64+0x2d/0x40 [ 27.111296] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 27.112056] RIP: 0033:0x7f7b13dc8239 [ 27.112663] Code: 01 00 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 3d 01 f0 ff ff 73 01 c3 48 8b 0d 27 ec 2c 00 f7 d8 64 89 01 48 [ 27.115113] RSP: 002b:00007ffd6d7f5c88 EFLAGS: 00000286 ORIG_RAX: 00000000000001aa [ 27.116562] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7b13dc8239 [ 27.117961] RDX: 000000000000478e RSI: 0000000000000000 RDI: 0000000000000003 [ 27.118925] RBP: 00007ffd6d7f5cb0 R08: 0000000020000040 R09: 0000000000000008 [ 27.119773] R10: 0000000000000001 R11: 0000000000000286 R12: 0000000000400480 [ 27.120614] R13: 00007ffd6d7f5d90 R14: 0000000000000000 R15: 0000000000000000 [ 27.121490] irq event stamp: 5635 [ 27.121946] hardirqs last enabled at (5643): [] console_unlock+0x5c4/0x740 [ 27.123476] hardirqs last disabled at (5652): [] console_unlock+0x4e7/0x740 [ 27.125192] softirqs last enabled at (5272): [] __do_softirq+0x3c5/0x5aa [ 27.126430] softirqs last disabled at (5267): [] asm_call_irq_on_stack+0xf/0x20 [ 27.127634] ---[ end trace 289d7e28fa60f928 ]---
This is caused by calling io_cqring_overflow_flush() which may sleep after calling prepare_to_wait_exclusive() which set task state to TASK_INTERRUPTIBLE
Reported-by: Abaci abaci@linux.alibaba.com Fixes: 6c503150ae33 ("io_uring: patch up IOPOLL overflow_flush sync") Reviewed-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Hao Xu haoxu@linux.alibaba.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Pavel Begunkov asml.silence@gmail.com --- fs/io_uring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 03c1185270d1..b09a59edf6aa 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7000,14 +7000,18 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, TASK_INTERRUPTIBLE); /* make sure we run task_work before checking for signals */ ret = io_run_task_work_sig(); - if (ret > 0) + if (ret > 0) { + finish_wait(&ctx->wait, &iowq.wq); continue; + } else if (ret < 0) break; if (io_should_wake(&iowq)) break; - if (test_bit(0, &ctx->cq_check_overflow)) + if (test_bit(0, &ctx->cq_check_overflow)) { + finish_wait(&ctx->wait, &iowq.wq); continue; + } schedule(); } while (1); finish_wait(&ctx->wait, &iowq.wq);
[ Upstream commit f609cbb8911e40e15f9055e8f945f926ac906924 ]
kernel BUG at lib/list_debug.c:29! Call Trace: __list_add include/linux/list.h:67 [inline] list_add include/linux/list.h:86 [inline] io_file_get+0x8cc/0xdb0 fs/io_uring.c:6466 __io_splice_prep+0x1bc/0x530 fs/io_uring.c:3866 io_splice_prep fs/io_uring.c:3920 [inline] io_req_prep+0x3546/0x4e80 fs/io_uring.c:6081 io_queue_sqe+0x609/0x10d0 fs/io_uring.c:6628 io_submit_sqe fs/io_uring.c:6705 [inline] io_submit_sqes+0x1495/0x2720 fs/io_uring.c:6953 __do_sys_io_uring_enter+0x107d/0x1f30 fs/io_uring.c:9353 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9
io_file_get() may be called from splice, and so REQ_F_INFLIGHT may already be set.
Fixes: 02a13674fa0e8 ("io_uring: account io_uring internal files as REQ_F_INFLIGHT") Cc: stable@vger.kernel.org # 5.9+ Reported-by: syzbot+6879187cf57845801267@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index b09a59edf6aa..296b4cb44c7d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6170,7 +6170,8 @@ static struct file *io_file_get(struct io_submit_state *state, file = __io_file_get(state, fd); }
- if (file && file->f_op == &io_uring_fops) { + if (file && file->f_op == &io_uring_fops && + !(req->flags & REQ_F_INFLIGHT)) { io_req_init_async(req); req->flags |= REQ_F_INFLIGHT;
[ Upstream commit 70b2c60d3797bffe182dddb9bb55975b9be5889a ]
WARNING: CPU: 0 PID: 21359 at fs/io_uring.c:9042 io_uring_cancel_task_requests+0xe55/0x10c0 fs/io_uring.c:9042 Call Trace: io_uring_flush+0x47b/0x6e0 fs/io_uring.c:9227 filp_close+0xb4/0x170 fs/open.c:1295 close_files fs/file.c:403 [inline] put_files_struct fs/file.c:418 [inline] put_files_struct+0x1cc/0x350 fs/file.c:415 exit_files+0x7e/0xa0 fs/file.c:435 do_exit+0xc22/0x2ae0 kernel/exit.c:820 do_group_exit+0x125/0x310 kernel/exit.c:922 get_signal+0x427/0x20f0 kernel/signal.c:2773 arch_do_signal_or_restart+0x2a8/0x1eb0 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x148/0x250 kernel/entry/common.c:201 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline] syscall_exit_to_user_mode+0x19/0x50 kernel/entry/common.c:302 entry_SYSCALL_64_after_hwframe+0x44/0xa9
Now io_uring_cancel_task_requests() can be called not through file notes but directly, remove a WARN_ONCE() there that give us false positives. That check is not very important and we catch it in other places.
Fixes: 84965ff8a84f0 ("io_uring: if we see flush on exit, cancel related tasks") Cc: stable@vger.kernel.org # 5.9+ Reported-by: syzbot+3e3d9bd0c6ce9efbc3ef@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io_uring.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 296b4cb44c7d..0bda8cc25845 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8683,8 +8683,6 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, struct task_struct *task = current;
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { - /* for SQPOLL only sqo_task has task notes */ - WARN_ON_ONCE(ctx->sqo_task != current); io_disable_sqo_submit(ctx); task = ctx->sq_data->thread; atomic_inc(&task->io_uring->in_idle);
[ Upstream commit 3a7efd1ad269ccaf9c1423364d97c9661ba6dafa ]
What 84965ff8a84f0 ("io_uring: if we see flush on exit, cancel related tasks") really wants is to cancel all relevant REQ_F_INFLIGHT requests reliably. That can be achieved by io_uring_cancel_files(), but we'll miss it calling io_uring_cancel_task_requests(files=NULL) from io_uring_flush(), because it will go through __io_uring_cancel_task_requests().
Just always call io_uring_cancel_files() during cancel, it's good enough for now.
Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io_uring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 0bda8cc25845..5eaf3a7badcc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8692,10 +8692,9 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, io_cancel_defer_files(ctx, task, files); io_cqring_overflow_flush(ctx, true, task, files);
+ io_uring_cancel_files(ctx, task, files); if (!files) __io_uring_cancel_task_requests(ctx, task); - else - io_uring_cancel_files(ctx, task, files);
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { atomic_dec(&task->io_uring->in_idle);
[ Upstream commit aec18a57edad562d620f7d19016de1fc0cc2208c ]
Since SQPOLL task can be shared and so task_work entries can be a mix of them, we need to drop mm and files before trying to issue next request.
Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk --- fs/io_uring.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 5eaf3a7badcc..3c9bb7673da5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2084,6 +2084,9 @@ static void __io_req_task_submit(struct io_kiocb *req) else __io_req_task_cancel(req, -EFAULT); mutex_unlock(&ctx->uring_lock); + + if (ctx->flags & IORING_SETUP_SQPOLL) + io_sq_thread_drop_mm(); }
static void io_req_task_submit(struct callback_head *cb)
On Tue, Feb 09, 2021 at 04:47:34AM +0000, Pavel Begunkov wrote:
A bit more than expected because apart from 9 failed-to-apply patches there are lots of dependencies to them, but for the most part automatically merged.
Hao Xu (1): io_uring: fix flush cqring overflow list while TASK_INTERRUPTIBLE
Jens Axboe (2): io_uring: account io_uring internal files as REQ_F_INFLIGHT io_uring: if we see flush on exit, cancel related tasks
Pavel Begunkov (13): io_uring: simplify io_task_match() io_uring: add a {task,files} pair matching helper io_uring: don't iterate io_uring_cancel_files() io_uring: pass files into kill timeouts/poll io_uring: always batch cancel in *cancel_files() io_uring: fix files cancellation io_uring: fix __io_uring_files_cancel() with TASK_UNINTERRUPTIBLE io_uring: replace inflight_wait with tctx->wait io_uring: fix cancellation taking mutex while TASK_UNINTERRUPTIBLE io_uring: fix list corruption for splice file_get io_uring: fix sqo ownership false positive warning io_uring: reinforce cancel on flush during exit io_uring: drop mm/files between task_work_submit
fs/io-wq.c | 10 -- fs/io-wq.h | 1 - fs/io_uring.c | 360 ++++++++++++++++++++------------------------------ 3 files changed, 141 insertions(+), 230 deletions(-)
All now queued up, thanks!
greg k-h
linux-stable-mirror@lists.linaro.org