Commit 0ef625bba6fb ("vfs: support statx(..., NULL, AT_EMPTY_PATH, ...)") added support for passing in NULL when AT_EMPTY_PATH is given, improving performance when statx is used for fetching stat informantion from a given fd, which is especially important for 32-bit platforms. This commit also improved the performance when an empty string is given by short-circuiting the handling of such paths.
This series is based on the commits in the Linus’ tree. Comparing to the original patches, the helper vfs_empty_path() is moved to stat.c from linux/fs.h, because get_user() is only available in fs.h since v5.7, where commit 80fbaf1c3f29 ('rcuwait: Add @State argument to rcuwait_wait_event()') added linux/sched/signal.h to rcuwait.h, and uaccess.h finally got its way to fs.h along the path uaccess.h -> sched/task.h -> sched/signal.h -> rcuwait.h -> percpu-rwsem.h -> fs.h. uaccess.h cannot be directly included in fs.h before v5.7, where commit df23e2be3d24 ('acpi: Remove header dependency') removed proc_fs.h from acpi/acpi_bus.h, preventing arch/x86/boot/compressed/cmdline.c from indirectly including fs.h. Otherwise, the function set_fs() defined in asm/uaccess.h will get into cmdline.c, which contains another set_fs(), resulting conflicting function definations. There is no users of vfs_empty_path() except stat.c, and as a result, putting it in stat.c is acceptable.
The existing vfs_statx_fd(), which is removed since v5.10, is utilized to implement short-circuit handling of NULL and "" paths, instead of introducing vfs_statx_path(), simplifying the implementation.
Tested-by: Xi Ruoyao xry111@xry111.site Signed-off-by: Miao Wang shankerwangmiao@gmail.com --- Christian Brauner (2): fs: new helper vfs_empty_path() stat: use vfs_empty_path() helper
Christoph Hellwig (2): fs: implement vfs_stat and vfs_lstat in terms of vfs_fstatat fs: move vfs_fstatat out of line
Linus Torvalds (1): vfs: mostly undo glibc turning 'fstat()' into 'fstatat(AT_EMPTY_PATH)'
Mateusz Guzik (1): vfs: support statx(..., NULL, AT_EMPTY_PATH, ...)
fs/stat.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++----- include/linux/fs.h | 26 ++++++++++-------------- 2 files changed, 63 insertions(+), 21 deletions(-) --- base-commit: 661f109c057497c8baf507a2562ceb9f9fb3cbc2 change-id: 20240918-statx-stable-linux-5-4-y-a79d4268600d
Best regards,
From: Christoph Hellwig hch@lst.de
commit 0b2c669 upstream.
Go through vfs_fstatat instead of duplicating the *stat to statx mapping three times.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Al Viro viro@zeniv.linux.org.uk
Cc: stable@vger.kernel.org # 4.19.x-5.4.x Signed-off-by: Miao Wang shankerwangmiao@gmail.com Tested-by: Xi Ruoyao xry111@xry111.site --- include/linux/fs.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h index d4f5fcc60744..2db4e5f7d00b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3262,16 +3262,6 @@ extern int iterate_dir(struct file *, struct dir_context *); extern int vfs_statx(int, const char __user *, int, struct kstat *, u32); extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int);
-static inline int vfs_stat(const char __user *filename, struct kstat *stat) -{ - return vfs_statx(AT_FDCWD, filename, AT_NO_AUTOMOUNT, - stat, STATX_BASIC_STATS); -} -static inline int vfs_lstat(const char __user *name, struct kstat *stat) -{ - return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT, - stat, STATX_BASIC_STATS); -} static inline int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { @@ -3283,6 +3273,14 @@ static inline int vfs_fstat(int fd, struct kstat *stat) return vfs_statx_fd(fd, stat, STATX_BASIC_STATS, 0); }
+static inline int vfs_stat(const char __user *filename, struct kstat *stat) +{ + return vfs_fstatat(AT_FDCWD, filename, stat, 0); +} +static inline int vfs_lstat(const char __user *name, struct kstat *stat) +{ + return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); +}
extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int vfs_readlink(struct dentry *, char __user *, int);
From: Christoph Hellwig hch@lst.de
commit 09f1bde upstream.
This allows to keep vfs_statx static in fs/stat.c to prepare for the following changes.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Al Viro viro@zeniv.linux.org.uk
Cc: stable@vger.kernel.org # 4.19.x-5.4.x Signed-off-by: Miao Wang shankerwangmiao@gmail.com Tested-by: Xi Ruoyao xry111@xry111.site --- fs/stat.c | 9 +++++++-- include/linux/fs.h | 10 +++------- 2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/fs/stat.c b/fs/stat.c index 268c9eb89656..b09a0e2a6681 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -165,7 +165,7 @@ EXPORT_SYMBOL(vfs_statx_fd); * * 0 will be returned on success, and a -ve error code if unsuccessful. */ -int vfs_statx(int dfd, const char __user *filename, int flags, +static int vfs_statx(int dfd, const char __user *filename, int flags, struct kstat *stat, u32 request_mask) { struct path path; @@ -197,8 +197,13 @@ int vfs_statx(int dfd, const char __user *filename, int flags, out: return error; } -EXPORT_SYMBOL(vfs_statx);
+int vfs_fstatat(int dfd, const char __user *filename, + struct kstat *stat, int flags) +{ + return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); +}
#ifdef __ARCH_WANT_OLD_STAT
diff --git a/include/linux/fs.h b/include/linux/fs.h index 2db4e5f7d00b..952f103be4a0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3259,20 +3259,16 @@ extern const struct inode_operations simple_symlink_inode_operations;
extern int iterate_dir(struct file *, struct dir_context *);
-extern int vfs_statx(int, const char __user *, int, struct kstat *, u32); extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int);
-static inline int vfs_fstatat(int dfd, const char __user *filename, - struct kstat *stat, int flags) -{ - return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, - stat, STATX_BASIC_STATS); -} static inline int vfs_fstat(int fd, struct kstat *stat) { return vfs_statx_fd(fd, stat, STATX_BASIC_STATS, 0); }
+int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, + int flags); + static inline int vfs_stat(const char __user *filename, struct kstat *stat) { return vfs_fstatat(AT_FDCWD, filename, stat, 0);
From: Linus Torvalds torvalds@linux-foundation.org
commit 9013c51 upstream.
Mateusz reports that glibc turns 'fstat()' calls into 'fstatat()', and that seems to have been going on for quite a long time due to glibc having tried to simplify its stat logic into just one point.
This turns out to cause completely unnecessary overhead, where we then go off and allocate the kernel side pathname, and actually look up the empty path. Sure, our path lookup is quite optimized, but it still causes a fair bit of allocation overhead and a couple of completely unnecessary rounds of lockref accesses etc.
This is all hopefully getting fixed in user space, and there is a patch floating around for just having glibc use the native fstat() system call. But even with the current situation we can at least improve on things by catching the situation and short-circuiting it.
Note that this is still measurably slower than just a plain 'fstat()', since just checking that the filename is actually empty is somewhat expensive due to inevitable user space access overhead from the kernel (ie verifying pointers, and SMAP on x86). But it's still quite a bit faster than actually looking up the path for real.
To quote numers from Mateusz: "Sapphire Rapids, will-it-scale, ops/s
stock fstat 5088199 patched fstat 7625244 (+49%) real fstat 8540383 (+67% / +12%)"
where that 'stock fstat' is the glibc translation of fstat into fstatat() with an empty path, the 'patched fstat' is with this short circuiting of the path lookup, and the 'real fstat' is the actual native fstat() system call with none of this overhead.
Link: https://lore.kernel.org/lkml/20230903204858.lv7i3kqvw6eamhgz@f/ Reported-by: Mateusz Guzik mjguzik@gmail.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
Cc: stable@vger.kernel.org # 4.19.x-5.4.x Signed-off-by: Miao Wang shankerwangmiao@gmail.com Tested-by: Xi Ruoyao xry111@xry111.site --- fs/stat.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/fs/stat.c b/fs/stat.c index b09a0e2a6681..526fa0801cad 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -201,6 +201,22 @@ static int vfs_statx(int dfd, const char __user *filename, int flags, int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { + /* + * Work around glibc turning fstat() into fstatat(AT_EMPTY_PATH) + * + * If AT_EMPTY_PATH is set, we expect the common case to be that + * empty path, and avoid doing all the extra pathname work. + */ + if (dfd >= 0 && flags == AT_EMPTY_PATH) { + char c; + + ret = get_user(c, filename); + if (unlikely(ret)) + return ret; + + if (likely(!c)) + return vfs_fstat(dfd, stat); + } return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, stat, STATX_BASIC_STATS); }
From: Christian Brauner brauner@kernel.org
commit 1bc6d44 upstream.
Make it possible to quickly check whether AT_EMPTY_PATH is valid. Note, after some discussion we decided to also allow NULL to be passed instead of requiring the empty string.
Signed-off-by: Christian Brauner brauner@kernel.org
Cc: stable@vger.kernel.org # 4.19.x-5.4.x Signed-off-by: Miao Wang shankerwangmiao@gmail.com Tested-by: Xi Ruoyao xry111@xry111.site --- fs/stat.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+)
diff --git a/fs/stat.c b/fs/stat.c index 526fa0801cad..3ae958308e48 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -150,6 +150,23 @@ int vfs_statx_fd(unsigned int fd, struct kstat *stat, } EXPORT_SYMBOL(vfs_statx_fd);
+static inline bool vfs_empty_path(int dfd, const char __user *path) +{ + char c; + + if (dfd < 0) + return false; + + /* We now allow NULL to be used for empty path. */ + if (!path) + return true; + + if (unlikely(get_user(c, path))) + return false; + + return !c; +} + /** * vfs_statx - Get basic and extra attributes by filename * @dfd: A file descriptor representing the base dir for a relative filename
From: Christian Brauner brauner@kernel.org
commit 27a2d0c upstream.
Use the newly added helper for this.
Signed-off-by: Christian Brauner brauner@kernel.org
Cc: stable@vger.kernel.org # 4.19.x-5.4.x Signed-off-by: Miao Wang shankerwangmiao@gmail.com Tested-by: Xi Ruoyao xry111@xry111.site --- fs/stat.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/fs/stat.c b/fs/stat.c index 3ae958308e48..1aaa5d847db8 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -224,16 +224,9 @@ int vfs_fstatat(int dfd, const char __user *filename, * If AT_EMPTY_PATH is set, we expect the common case to be that * empty path, and avoid doing all the extra pathname work. */ - if (dfd >= 0 && flags == AT_EMPTY_PATH) { - char c; + if (flags == AT_EMPTY_PATH && vfs_empty_path(dfd, filename)) + return vfs_fstat(dfd, stat);
- ret = get_user(c, filename); - if (unlikely(ret)) - return ret; - - if (likely(!c)) - return vfs_fstat(dfd, stat); - } return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, stat, STATX_BASIC_STATS); }
From: Mateusz Guzik mjguzik@gmail.com
commit 0ef625b upstream.
The newly used helper also checks for empty ("") paths.
NULL paths with any flag value other than AT_EMPTY_PATH go the usual route and end up with -EFAULT to retain compatibility (Rust is abusing calls of the sort to detect availability of statx).
This avoids path lookup code, lockref management, memory allocation and in case of NULL path userspace memory access (which can be quite expensive with SMAP on x86_64).
Benchmarked with statx(..., AT_EMPTY_PATH, ...) running on Sapphire Rapids, with the "" path for the first two cases and NULL for the last one.
Results in ops/s: stock: 4231237 pre-check: 5944063 (+40%) NULL path: 6601619 (+11%/+56%)
Signed-off-by: Mateusz Guzik mjguzik@gmail.com Link: https://lore.kernel.org/r/20240625151807.620812-1-mjguzik@gmail.com Tested-by: Xi Ruoyao xry111@xry111.site [brauner: use path_mounted() and other tweaks] Signed-off-by: Christian Brauner brauner@kernel.org
Cc: stable@vger.kernel.org # 4.19.x-5.4.x Signed-off-by: Miao Wang shankerwangmiao@gmail.com Tested-by: Xi Ruoyao xry111@xry111.site --- fs/stat.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-)
diff --git a/fs/stat.c b/fs/stat.c index 1aaa5d847db8..111443789ced 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -590,13 +590,14 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) /** * sys_statx - System call to get enhanced stats * @dfd: Base directory to pathwalk from *or* fd to stat. - * @filename: File to stat or "" with AT_EMPTY_PATH + * @filename: File to stat or either NULL or "" with AT_EMPTY_PATH * @flags: AT_* flags to control pathwalk. * @mask: Parts of statx struct actually required. * @buffer: Result buffer. * * Note that fstat() can be emulated by setting dfd to the fd of interest, - * supplying "" as the filename and setting AT_EMPTY_PATH in the flags. + * supplying "" (or preferably NULL) as the filename and setting AT_EMPTY_PATH + * in the flags. */ SYSCALL_DEFINE5(statx, int, dfd, const char __user *, filename, unsigned, flags, @@ -605,13 +606,29 @@ SYSCALL_DEFINE5(statx, { struct kstat stat; int error; + unsigned lflags;
if (mask & STATX__RESERVED) return -EINVAL; if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL;
- error = vfs_statx(dfd, filename, flags, &stat, mask); + /* + * Short-circuit handling of NULL and "" paths. + * + * For a NULL path we require and accept only the AT_EMPTY_PATH flag + * (possibly |'d with AT_STATX flags). + * + * However, glibc on 32-bit architectures implements fstatat as statx + * with the "" pathname and AT_NO_AUTOMOUNT | AT_EMPTY_PATH flags. + * Supporting this results in the uglification below. + */ + lflags = flags & ~(AT_NO_AUTOMOUNT | AT_STATX_SYNC_TYPE); + if (lflags == AT_EMPTY_PATH && vfs_empty_path(dfd, filename)) + error = vfs_statx_fd(dfd, &stat, mask, + flags & ~(AT_NO_AUTOMOUNT | AT_EMPTY_PATH)); + else + error = vfs_statx(dfd, filename, flags, &stat, mask); if (error) return error;
linux-stable-mirror@lists.linaro.org