epoll is a mess, and does various invalid things in the name of performance.
Let's try to rein it in a bit. Something like this, perhaps?
Not-yet-signed-off-by: Linus Torvalds torvalds@linux-foundation.org ---
This is entirely untested, thus the "Not-yet-signed-off-by". But I think this may be kind of the right path forward.
I suspect the ->poll() call is the main case that matters, but there are other places where eventpoll just looks up the file pointer without then being very careful about it. The sock_from_file(epi->ffd.file) uses in particular should probably also use this to look up the file.
Comments?
fs/eventpoll.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 882b89edc52a..bffa8083ff36 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -285,6 +285,30 @@ static inline void free_ephead(struct epitems_head *head) kmem_cache_free(ephead_cache, head); }
+/* + * The ffd.file pointer may be in the process of + * being torn down due to being closed, but we + * may not have finished eventpoll_release() yet. + * + * Technically, even with the atomic_long_inc_not_zero, + * the file may have been free'd and then gotten + * re-allocated to something else (since files are + * not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU). + * + * But for epoll, we don't much care. + */ +static struct file *epi_fget(const struct epitem *epi) +{ + struct file *file; + + rcu_read_lock(); + file = epi->ffd.file; + if (!atomic_long_inc_not_zero(&file->f_count)) + file = NULL; + rcu_read_unlock(); + return file; +} + static void list_file(struct file *file) { struct epitems_head *head; @@ -987,14 +1011,18 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth) { - struct file *file = epi->ffd.file; + struct file *file = epi_fget(epi); __poll_t res;
+ if (!file) + return 0; + pt->_key = epi->event.events; if (!is_file_epoll(file)) res = vfs_poll(file, pt); else res = __ep_eventpoll_poll(file, pt, depth); + fput(file); return res & epi->event.events; }