On 5/7/20 4:06 PM, Al Viro wrote:
On Thu, May 07, 2020 at 02:53:30PM -0600, Jens Axboe wrote:
I think the patch is correct as-is, I took a good look at how we're currently handling it. None of those three ops should fiddle with the fd at all, and all of them do forbid the use of fixed files (the descriptor table look-alikes), so that part is fine, too.
There's some low hanging fruit around optimizing and improving it, I'm including an updated version below. Max, can you double check with your testing?
<looks>
Could you explain WTF is io_issue_sqe() doing in case of IORING_OP_CLOSE? Specifically, what is the value of req->close.fd = READ_ONCE(sqe->fd); if (req->file->f_op == &io_uring_fops || req->close.fd == req->ctx->ring_fd) return -EBADF; in io_close_prep()? And what does happen if some joker does dup2() of something with io_uring_fops into our slot right after that check? Before the subsequent ret = __close_fd_get_file(req->close.fd, &req->close.put_file); that is.
I agree, there's a gap there. We should do the check in the op handler, and under the files_struct lock. How about something like the below?
diff --git a/fs/file.c b/fs/file.c index c8a4e4c86e55..50ee73b76d17 100644 --- a/fs/file.c +++ b/fs/file.c @@ -646,18 +646,13 @@ int __close_fd(struct files_struct *files, unsigned fd) } EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
-/* - * variant of __close_fd that gets a ref on the file for later fput. - * The caller must ensure that filp_close() called on the file, and then - * an fput(). - */ -int __close_fd_get_file(unsigned int fd, struct file **res) +int __close_fd_get_file_locked(struct files_struct *files, unsigned int fd, + struct file **res) + __releases(&files->file_lock) { - struct files_struct *files = current->files; struct file *file; struct fdtable *fdt;
- spin_lock(&files->file_lock); fdt = files_fdtable(files); if (fd >= fdt->max_fds) goto out_unlock; @@ -677,6 +672,19 @@ int __close_fd_get_file(unsigned int fd, struct file **res) return -ENOENT; }
+/* + * variant of __close_fd that gets a ref on the file for later fput. + * The caller must ensure that filp_close() called on the file, and then + * an fput(). + */ +int __close_fd_get_file(unsigned int fd, struct file **res) +{ + struct files_struct *files = current->files; + + spin_lock(&files->file_lock); + return __close_fd_get_file_locked(files, fd, res); +} + void do_close_on_exec(struct files_struct *files) { unsigned i; diff --git a/fs/io_uring.c b/fs/io_uring.c index 979d9f977409..740547106717 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3399,10 +3399,6 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EBADF;
req->close.fd = READ_ONCE(sqe->fd); - if (req->file->f_op == &io_uring_fops || - req->close.fd == req->ctx->ring_fd) - return -EBADF; - return 0; }
@@ -3430,10 +3426,19 @@ static void io_close_finish(struct io_wq_work **workptr)
static int io_close(struct io_kiocb *req, bool force_nonblock) { + struct files_struct *files = current->files; int ret;
req->close.put_file = NULL; - ret = __close_fd_get_file(req->close.fd, &req->close.put_file); + spin_lock(&files->file_lock); + if (req->file->f_op == &io_uring_fops || + req->close.fd == req->ctx->ring_fd) { + spin_unlock(&files->file_lock); + return -EBADF; + } + + ret = __close_fd_get_file_locked(files, req->close.fd, + &req->close.put_file); if (ret < 0) return ret;
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index f07c55ea0c22..11d19303af46 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -122,6 +122,8 @@ extern void __fd_install(struct files_struct *files, extern int __close_fd(struct files_struct *files, unsigned int fd); extern int __close_fd_get_file(unsigned int fd, struct file **res); +extern int __close_fd_get_file_locked(struct files_struct *files, + unsigned int fd, struct file **res);
extern struct kmem_cache *files_cachep;