Hi,
The goal of this patch series is to reduce the performance impact of walking through a lot of files while being landlocked. Indeed, because of the unprivileged nature of Landlock, each file access implies to check access granted to each directory of the path, which slows down open time.
Currently, openat(2) calls spend more than 22% of their time in hook_file_open(). The performance impact for a common worth case scenario is significantly reduced thanks to this patch series, theoretically going from O(n) with n as the depth of a path, to O(1) (cf. benchmarks in the caching patch).
This series adds a new security hook (resolve_path_at) and uses it to implement access caching in Landlock. I'm planning to build on top of that for other improvements (using task's working directory and task's root directory) but that will require other hook changes.
This new hook is also a first step to be able to securely restrict file descriptors used for path resolution (e.g. dirfd in openat2).
Caching may be difficult to get right especially for security checks. I extended the current tests and I'm still working on new ones. If you have test/attack scenarios, please share them. I would really appreciate constructive reviews for these critical changes. This series can be applied on top of v5.13 .
Regards,
Mickaël Salaün (4): fs,security: Add resolve_path_at() hook landlock: Add filesystem rule caching selftests/landlock: Work in a temporary directory selftests/landlock: Check all possible intermediate directories
fs/namei.c | 9 + include/linux/lsm_hook_defs.h | 2 + include/linux/lsm_hooks.h | 8 + include/linux/security.h | 9 + security/landlock/cache.h | 77 +++++++ security/landlock/cred.c | 15 +- security/landlock/cred.h | 20 +- security/landlock/fs.c | 224 +++++++++++++++++++-- security/landlock/fs.h | 29 +++ security/landlock/setup.c | 2 + security/security.c | 6 + tools/testing/selftests/landlock/fs_test.c | 205 ++++++++++++++----- 12 files changed, 544 insertions(+), 62 deletions(-) create mode 100644 security/landlock/cache.h
base-commit: 62fb9874f5da54fdb243003b386128037319b219
From: Mickaël Salaün mic@linux.microsoft.com
This new resolve_path_at() security hook enables checking absolute and relative pathname resolutions according to the task's root directory, the task's working directory, or an open file descriptor (e.g. thanks to *at() syscalls).
It is useful to (efficiently) restrict access to file hierarchies (e.g. leveraging openat2()'s resolve flags) according to file descriptors, and to improve path access check time, which is used by Landlock in the following commit.
Cc: Al Viro viro@zeniv.linux.org.uk Cc: James Morris jmorris@namei.org Cc: Jann Horn jannh@google.com Cc: Kees Cook keescook@chromium.org Cc: Serge Hallyn serge@hallyn.com Signed-off-by: Mickaël Salaün mic@linux.microsoft.com Link: https://lore.kernel.org/r/20210630224856.1313928-2-mic@digikod.net --- fs/namei.c | 9 +++++++++ include/linux/lsm_hook_defs.h | 2 ++ include/linux/lsm_hooks.h | 8 ++++++++ include/linux/security.h | 9 +++++++++ security/security.c | 6 ++++++ 5 files changed, 34 insertions(+)
diff --git a/fs/namei.c b/fs/namei.c index 79b0ff9b151e..6efdd0b8e2b1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2331,6 +2331,9 @@ static const char *path_init(struct nameidata *nd, unsigned flags) error = nd_jump_root(nd); if (unlikely(error)) return ERR_PTR(error); + error = security_resolve_path_at(&nd->path, NULL, flags); + if (error) + return ERR_PTR(error); return s; }
@@ -2350,6 +2353,9 @@ static const char *path_init(struct nameidata *nd, unsigned flags) get_fs_pwd(current->fs, &nd->path); nd->inode = nd->path.dentry->d_inode; } + error = security_resolve_path_at(&nd->path, NULL, flags); + if (error) + return ERR_PTR(error); } else { /* Caller must check execute permissions on the starting path component */ struct fd f = fdget_raw(nd->dfd); @@ -2373,7 +2379,10 @@ static const char *path_init(struct nameidata *nd, unsigned flags) path_get(&nd->path); nd->inode = nd->path.dentry->d_inode; } + error = security_resolve_path_at(&nd->path, f.file, flags); fdput(f); + if (error) + return ERR_PTR(error); }
/* For scoped-lookups we need to set the root to the dirfd as well. */ diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 04c01794de83..1db6d53b1dfb 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -178,6 +178,8 @@ LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk, struct fown_struct *fown, int sig) LSM_HOOK(int, 0, file_receive, struct file *file) LSM_HOOK(int, 0, file_open, struct file *file) +LSM_HOOK(int, 0, resolve_path_at, const struct path *path_at, + struct file *file_at, int lookup_flags) LSM_HOOK(int, 0, task_alloc, struct task_struct *task, unsigned long clone_flags) LSM_HOOK(void, LSM_RET_VOID, task_free, struct task_struct *task) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 5c4c5c0602cb..2ef130e36309 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -600,6 +600,14 @@ * Save open-time permission checking state for later use upon * file_permission, and recheck access if anything has changed * since inode_permission. + * @resolve_path_at: + * Check path resolution from a file descriptor, or the current working + * directory, or the current root directory. + * Can be called in RCU read-side critical section. + * @path_at points to the base path. + * @file_at can point to the file descriptor used to resolve the path, or + * be NULL for AT_FDCWD. + * @lookup_flags contains the lookup options (e.g. LOOKUP_IS_SCOPED). * * Security hooks for task operations. * diff --git a/include/linux/security.h b/include/linux/security.h index 06f7c50ce77f..e3ae93b9189d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -391,6 +391,8 @@ int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); int security_file_open(struct file *file); +int security_resolve_path_at(const struct path *path_at, struct file *file_at, + int lookup_flags); int security_task_alloc(struct task_struct *task, unsigned long clone_flags); void security_task_free(struct task_struct *task); int security_cred_alloc_blank(struct cred *cred, gfp_t gfp); @@ -1011,6 +1013,13 @@ static inline int security_file_open(struct file *file) return 0; }
+static inline int security_resolve_path_at(const struct path *path_at, + struct file *file_at, + int lookup_flags) +{ + return 0; +} + static inline int security_task_alloc(struct task_struct *task, unsigned long clone_flags) { diff --git a/security/security.c b/security/security.c index b38155b2de83..98161f5919ee 100644 --- a/security/security.c +++ b/security/security.c @@ -1637,6 +1637,12 @@ int security_file_open(struct file *file) return fsnotify_perm(file, MAY_OPEN); }
+int security_resolve_path_at(const struct path *path_at, struct file *file_at, + int lookup_flags) +{ + return call_int_hook(resolve_path_at, 0, path_at, file_at, lookup_flags); +} + int security_task_alloc(struct task_struct *task, unsigned long clone_flags) { int rc = lsm_task_alloc(task);
From: Mickaël Salaün mic@linux.microsoft.com
Because of the unprivileged nature of Landlock, the main issue with the current implementation is that path access checks require to walk from the leaf file to the (real) root directory for each open-like syscall. To limit this path walk, leverage the new resolve_path_at() hook to reuse cached accesses collected for files at open time, when these files are then used for relative path resolution (e.g. dirfd in openat()-like syscalls).
Indeed, in practice, software walking through the filesystem (e.g. "find" tool) trigger a lot of access checks. However, efficient walk implementations reuse opened directory file descriptors to continue the walk to children directories. Being able to cache such access checks leads to significant performance improvements.
This cache mechanism is implemented thanks to new Landlock security blobs attached to tasks and files. Each landlocked task keeps a (weak) reference to the last file used to open a path. If the referenced base path matches a directory in the current path walk and if the access request was already granted to the base path, then the path walk stops early and access is granted.
Each file opened by a landlocked task gets a cache that will be valid as long as the backing struct file exists. This cache also references the domain for which the cached accesses are valid.
Running the "find" tool on a directory with Linux source files (version 5.13, which contains a depth of less than 10 nested source directories) is used as a metric because it is a real-life worse case that requires to walk through a lot of files in different directories. To highlight the measured performance impact, we put these source files in directories of different depth: 1, 10 and 20. Here are the times taken by Landlock for an open syscall after 10,000 iterations with the mainline kernel and the caching changes:
Depth of 1: /0/ - v5.13: 28% - this patch: 22%
Depth of 10: /0/1/.../9/ - v5.13: 43% - this patch: 25%
Depth of 20: /0/1/.../19/ - v5.13: 54% - this patch: 30%
The performance impact for such worth case is significantly reduced: theoretically going from O(n) with n as the depth of a path, to O(1), but as we can see there is still an increased penalty with the depth. This may be explained with the relative ".." resolutions that don't benefit from this caching. As a side note, the getdents64 syscall (not impacted by Landlock) takes 18 times longer than the openat syscall in is this scenario.
Cc: James Morris jmorris@namei.org Cc: Jann Horn jannh@google.com Cc: Kees Cook keescook@chromium.org Cc: Serge Hallyn serge@hallyn.com Signed-off-by: Mickaël Salaün mic@linux.microsoft.com Link: https://lore.kernel.org/r/20210630224856.1313928-3-mic@digikod.net --- security/landlock/cache.h | 77 +++++++++++++ security/landlock/cred.c | 15 ++- security/landlock/cred.h | 20 +++- security/landlock/fs.c | 224 +++++++++++++++++++++++++++++++++++--- security/landlock/fs.h | 29 +++++ security/landlock/setup.c | 2 + 6 files changed, 350 insertions(+), 17 deletions(-) create mode 100644 security/landlock/cache.h
diff --git a/security/landlock/cache.h b/security/landlock/cache.h new file mode 100644 index 000000000000..7ceaa3b15671 --- /dev/null +++ b/security/landlock/cache.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Landlock LSM - Generic cache management + * + * Copyright © 2021 Microsoft Corporation + */ + +#ifndef _SECURITY_LANDLOCK_CACHE_H +#define _SECURITY_LANDLOCK_CACHE_H + +#include <linux/compiler.h> +#include <linux/refcount.h> +#include <linux/types.h> + +#include "ruleset.h" + +/** + * struct landlock_cache - Generic access cache for an object + * + * Store cached access rights for a Landlock object (i.e. tied to specific + * domain). Allowed accesses are set once (e.g. at file opening) and never + * change after that. As a side effect, this means that such cache created + * before a domain transition will not get an up to date allowed accesses. + * This implies to always check a cached domain against the current domain + * thanks to landlock_cache_is_valid(). + * + * This struct is part of a typed cache (e.g. &struct landlock_fs_cache.core) + * that identifies the tied object. + */ +struct landlock_cache { + /** + * @dangling_domain: If not NULL, points to the domain for which + * @allowed_accesses is valid. This brings two constraints: + * - @dangling_domain must only be read with READ_ONCE() and written + * with WRITE_ONCE() (except at initialization). + * - @dangling_domain can only be safely dereferenced by the cache + * owner (e.g. with landlock_disable_cache() when the underlying file + * is being closed). + */ + void *dangling_domain; + /** + * @usage: This counter is used to keep a cache alive while it can + * still be checked against. + */ + refcount_t usage; + /** + * @allowed_accesses: Mask of absolute known-to-be allowed accesses to + * an object at creation-time (e.g. at open-time for the file hierarchy + * of a file descriptor). A bit not set doesn't mean that the related + * access is denied. The type of access is inferred from the type of + * the related object. The task domain may not be the same as the + * cached one and they must then be checked against each other when + * evaluating @allowed_accesses thanks to landlock_cache_is_valid(). + */ + u16 allowed_accesses; +}; + +static inline void landlock_disable_cache(struct landlock_cache *cache) +{ + struct landlock_ruleset *const dom = cache->dangling_domain; + + /* Atomically marks the cache as disabled. */ + WRITE_ONCE(cache->dangling_domain, NULL); + /* + * There is no need for other synchronisation mechanism because the + * domain is never dereferenced elsewhere. + */ + landlock_put_ruleset(dom); +} + +static inline bool landlock_cache_is_valid(const struct landlock_cache *cache, + const struct landlock_ruleset *domain) +{ + return (domain && domain == READ_ONCE(cache->dangling_domain)); +} + +#endif /* _SECURITY_LANDLOCK_CACHE_H */ diff --git a/security/landlock/cred.c b/security/landlock/cred.c index 6725af24c684..8c9408dd46e1 100644 --- a/security/landlock/cred.c +++ b/security/landlock/cred.c @@ -1,16 +1,19 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Landlock LSM - Credential hooks + * Landlock LSM - Credential and task hooks * * Copyright © 2017-2020 Mickaël Salaün mic@digikod.net * Copyright © 2018-2020 ANSSI + * Copyright © 2021 Microsoft Corporation */
#include <linux/cred.h> #include <linux/lsm_hooks.h> +#include <linux/sched.h>
#include "common.h" #include "cred.h" +#include "fs.h" #include "ruleset.h" #include "setup.h"
@@ -34,9 +37,19 @@ static void hook_cred_free(struct cred *const cred) landlock_put_ruleset_deferred(dom); }
+static void hook_task_free(struct task_struct *const task) +{ + struct landlock_fs_cache *const last_at_cache = + landlock_task(task)->cache.last_at; + + landlock_put_fs_cache(last_at_cache); +} + static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(cred_prepare, hook_cred_prepare), LSM_HOOK_INIT(cred_free, hook_cred_free), + + LSM_HOOK_INIT(task_free, hook_task_free), };
__init void landlock_add_cred_hooks(void) diff --git a/security/landlock/cred.h b/security/landlock/cred.h index 5f99d3decade..0734a14933c1 100644 --- a/security/landlock/cred.h +++ b/security/landlock/cred.h @@ -1,9 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Landlock LSM - Credential hooks + * Landlock LSM - Credential and task hooks * * Copyright © 2019-2020 Mickaël Salaün mic@digikod.net * Copyright © 2019-2020 ANSSI + * Copyright © 2021 Microsoft Corporation */
#ifndef _SECURITY_LANDLOCK_CRED_H @@ -13,6 +14,7 @@ #include <linux/init.h> #include <linux/rcupdate.h>
+#include "fs.h" #include "ruleset.h" #include "setup.h"
@@ -20,13 +22,27 @@ struct landlock_cred_security { struct landlock_ruleset *domain; };
+struct landlock_task_cache { + struct landlock_fs_cache *last_at; +}; + +struct landlock_task_security { + struct landlock_task_cache cache; +}; + static inline struct landlock_cred_security *landlock_cred( const struct cred *cred) { return cred->security + landlock_blob_sizes.lbs_cred; }
-static inline const struct landlock_ruleset *landlock_get_current_domain(void) +static inline struct landlock_task_security *landlock_task( + const struct task_struct *task) +{ + return task->security + landlock_blob_sizes.lbs_task; +} + +static inline struct landlock_ruleset *landlock_get_current_domain(void) { return landlock_cred(current_cred())->domain; } diff --git a/security/landlock/fs.c b/security/landlock/fs.c index 97b8e421f617..f9ea08f4078f 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -4,6 +4,7 @@ * * Copyright © 2016-2020 Mickaël Salaün mic@digikod.net * Copyright © 2018-2020 ANSSI + * Copyright © 2021 Microsoft Corporation */
#include <linux/atomic.h> @@ -20,6 +21,7 @@ #include <linux/lsm_hooks.h> #include <linux/mount.h> #include <linux/namei.h> +#include <linux/overflow.h> #include <linux/path.h> #include <linux/rcupdate.h> #include <linux/spinlock.h> @@ -29,6 +31,7 @@ #include <linux/workqueue.h> #include <uapi/linux/landlock.h>
+#include "cache.h" #include "common.h" #include "cred.h" #include "fs.h" @@ -37,6 +40,78 @@ #include "ruleset.h" #include "setup.h"
+/* Cache management */ + +/** + * struct landlock_fs_cache - Landlock access cache for the filesystem + * + * Cached accesses for a filesystem object relative to a specific domain. + */ +struct landlock_fs_cache { + /** + * @dangling_path: Enables identifying a path (same mnt and dentry + * fields), iff the other path is held and + * READ_ONCE(core->dangling_domain) is not NULL. Because there is no + * synchronization mechanism, @dangling_path can only be safely + * derefenced by the cache owner (e.g. with create_fs_cache()). + */ + struct path dangling_path; + /** + * @core: Stores the generic caching. + */ + struct landlock_cache core; +}; + +static void build_check_cache(void) +{ + const struct landlock_cache cache = { + .allowed_accesses = ~0, + }; + struct landlock_layer *layer; + struct landlock_ruleset *ruleset; + + BUILD_BUG_ON(cache.allowed_accesses < LANDLOCK_MASK_ACCESS_FS); + BUILD_BUG_ON(!__same_type(cache.allowed_accesses, layer->access)); + BUILD_BUG_ON(!__same_type(cache.allowed_accesses, + ruleset->fs_access_masks[0])); +} + +static struct landlock_fs_cache *create_fs_cache( + struct landlock_ruleset *domain, const u16 allowed_accesses, + struct path *const path) +{ + struct landlock_fs_cache *cache; + + build_check_cache(); + + /* Only creates useful cache. */ + if (!allowed_accesses) + return NULL; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL_ACCOUNT); + if (!cache) + return NULL; + + landlock_get_ruleset(domain); + cache->core.dangling_domain = domain; + cache->core.allowed_accesses = allowed_accesses; + path_get(path); + cache->dangling_path = *path; + refcount_set(&cache->core.usage, 1); + return cache; +} + +static void landlock_get_fs_cache(struct landlock_fs_cache *cache) +{ + refcount_inc(&cache->core.usage); +} + +void landlock_put_fs_cache(struct landlock_fs_cache *cache) +{ + if (cache && refcount_dec_and_test(&cache->core.usage)) + kfree(cache); +} + /* Underlying object management */
static void release_inode(struct landlock_object *const object) @@ -180,10 +255,42 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
/* Access-control management */
+static inline bool is_allowed_in_cache( + const struct landlock_ruleset *const domain, + const struct path *const path, const u32 access_request, + u16 (*const denied_cache)[]) +{ + size_t i; + struct landlock_fs_cache *const fs_cache = + landlock_task(current)->cache.last_at; + + if (!fs_cache) + return false; + /* + * This domain check protects against race conditions when checking if + * @path is equal to fs_cache->dangling_path because we own @path and + * fs_cache->dangling_path is only reset after the cache is disabled. + */ + if (!landlock_cache_is_valid(&fs_cache->core, domain)) + return false; + /* path_equal() doesn't dereference the mnt and dentry fields. */ + if (!path_equal(path, &fs_cache->dangling_path)) + return false; + + /* Fills the returned cache as much as possible. */ + for (i = 0; i < domain->num_layers; i++) + /* Removes allowed accesses from the denied lists. */ + (*denied_cache)[i] &= ~fs_cache->core.allowed_accesses; + + if ((access_request & fs_cache->core.allowed_accesses) != access_request) + return false; + return true; +} + static inline u64 unmask_layers( const struct landlock_ruleset *const domain, const struct path *const path, const u32 access_request, - u64 layer_mask) + u64 layer_mask, u16 (*const denied_cache)[]) { const struct landlock_rule *rule; const struct inode *inode; @@ -212,19 +319,20 @@ static inline u64 unmask_layers( const u64 layer_level = BIT_ULL(layer->level - 1);
/* Checks that the layer grants access to the full request. */ - if ((layer->access & access_request) == access_request) { + if ((layer->access & access_request) == access_request) layer_mask &= ~layer_level;
- if (layer_mask == 0) - return layer_mask; - } + /* Removes allowed accesses from the denied lists. */ + (*denied_cache)[layer->level - 1] &= ~layer->access; } return layer_mask; }
static int check_access_path(const struct landlock_ruleset *const domain, - const struct path *const path, u32 access_request) + const struct path *const path, const u32 access_request, + u16 *const allowed_accesses) { + typeof(*allowed_accesses) denied_cache[LANDLOCK_MAX_NUM_LAYERS]; bool allowed = false; struct path walker_path; u64 layer_mask; @@ -252,6 +360,8 @@ static int check_access_path(const struct landlock_ruleset *const domain, /* Saves all layers handling a subset of requested accesses. */ layer_mask = 0; for (i = 0; i < domain->num_layers; i++) { + /* Stores potentially denied accesses. */ + denied_cache[i] = domain->fs_access_masks[i]; if (domain->fs_access_masks[i] & access_request) layer_mask |= BIT_ULL(i); } @@ -268,8 +378,15 @@ static int check_access_path(const struct landlock_ruleset *const domain, while (true) { struct dentry *parent_dentry;
+ if (is_allowed_in_cache(domain, &walker_path, + access_request, &denied_cache)) { + /* Stops when access request is allowed by a cache. */ + allowed = true; + break; + } + layer_mask = unmask_layers(domain, &walker_path, - access_request, layer_mask); + access_request, layer_mask, &denied_cache); if (layer_mask == 0) { /* Stops when a rule from each layer grants access. */ allowed = true; @@ -304,6 +421,15 @@ static int check_access_path(const struct landlock_ruleset *const domain, walker_path.dentry = parent_dentry; } path_put(&walker_path); + + if (allowed_accesses) { + typeof(*allowed_accesses) allowed_cache = + LANDLOCK_MASK_ACCESS_FS; + + for (i = 0; i < domain->num_layers; i++) + allowed_cache &= ~denied_cache[i]; + *allowed_accesses = allowed_cache; + } return allowed ? 0 : -EACCES; }
@@ -315,7 +441,7 @@ static inline int current_check_access_path(const struct path *const path,
if (!dom) return 0; - return check_access_path(dom, path, access_request); + return check_access_path(dom, path, access_request, NULL); }
/* Inode hooks */ @@ -560,7 +686,8 @@ static int hook_path_link(struct dentry *const old_dentry, if (unlikely(d_is_negative(old_dentry))) return -ENOENT; return check_access_path(dom, new_dir, - get_mode_access(d_backing_inode(old_dentry)->i_mode)); + get_mode_access(d_backing_inode(old_dentry)->i_mode), + NULL); }
static inline u32 maybe_remove(const struct dentry *const dentry) @@ -590,7 +717,8 @@ static int hook_path_rename(const struct path *const old_dir, /* RENAME_EXCHANGE is handled because directories are the same. */ return check_access_path(dom, old_dir, maybe_remove(old_dentry) | maybe_remove(new_dentry) | - get_mode_access(d_backing_inode(old_dentry)->i_mode)); + get_mode_access(d_backing_inode(old_dentry)->i_mode), + NULL); }
static int hook_path_mkdir(const struct path *const dir, @@ -608,7 +736,7 @@ static int hook_path_mknod(const struct path *const dir,
if (!dom) return 0; - return check_access_path(dom, dir, get_mode_access(mode)); + return check_access_path(dom, dir, get_mode_access(mode), NULL); }
static int hook_path_symlink(const struct path *const dir, @@ -651,17 +779,83 @@ static inline u32 get_file_access(const struct file *const file)
static int hook_file_open(struct file *const file) { - const struct landlock_ruleset *const dom = - landlock_get_current_domain(); + int ret; + u16 allowed_accesses = 0; + struct landlock_ruleset *const dom = landlock_get_current_domain();
if (!dom) return 0; + /* * Because a file may be opened with O_PATH, get_file_access() may * return 0. This case will be handled with a future Landlock * evolution. */ - return check_access_path(dom, &file->f_path, get_file_access(file)); + ret = check_access_path(dom, &file->f_path, get_file_access(file), + &allowed_accesses); + + /* + * Ignores (accounted) memory allocation errors: just don't + * create a cache. + */ + landlock_file(file)->cache = create_fs_cache(dom, + allowed_accesses, &file->f_path); + return ret; +} + +static void hook_file_free_security(struct file *const file) +{ + struct landlock_fs_cache *const file_cache = + landlock_file(file)->cache; + + if (!file_cache) + return; + + /* + * If there is a cache, it is well alive because it was created at this + * file opening and is owned by this file. Disables the cache and puts + * the (dangling) object reference; the order doesn't matter. There is + * no access-control race-condition in is_allowed_in_cache() because + * the dangling path can only match if it is also owned by the caller. + */ + landlock_disable_cache(&file_cache->core); + /* + * Releases dangling_path; it may be freed at the end of file_free(). + * Even if it is not required, resets dangling_path as a safety + * measure. + */ + path_put_init(&file_cache->dangling_path); + landlock_put_fs_cache(file_cache); +} + +static int hook_resolve_path_at(const struct path *path_at, + struct file *file_at, int lookup_flags) +{ + const struct landlock_ruleset *const dom = + landlock_get_current_domain(); + struct landlock_fs_cache **const current_at_cache = + &landlock_task(current)->cache.last_at; + + if (!dom) + return 0; + + /* Clean up previous cache, if any. */ + landlock_put_fs_cache(*current_at_cache); + *current_at_cache = NULL; + + if (file_at) { + struct landlock_fs_cache *const file_at_cache = + landlock_file(file_at)->cache; + + if (!file_at_cache) + return 0; + /* Don't update existing cached accesses. */ + if (!landlock_cache_is_valid(&file_at_cache->core, dom)) + return 0; + landlock_get_fs_cache(file_at_cache); + *current_at_cache = file_at_cache; + } + return 0; }
static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = { @@ -683,6 +877,8 @@ static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(path_rmdir, hook_path_rmdir),
LSM_HOOK_INIT(file_open, hook_file_open), + LSM_HOOK_INIT(file_free_security, hook_file_free_security), + LSM_HOOK_INIT(resolve_path_at, hook_resolve_path_at), };
__init void landlock_add_fs_hooks(void) diff --git a/security/landlock/fs.h b/security/landlock/fs.h index 187284b421c9..2c57406f73b4 100644 --- a/security/landlock/fs.h +++ b/security/landlock/fs.h @@ -4,6 +4,7 @@ * * Copyright © 2017-2020 Mickaël Salaün mic@digikod.net * Copyright © 2018-2020 ANSSI + * Copyright © 2021 Microsoft Corporation */
#ifndef _SECURITY_LANDLOCK_FS_H @@ -12,7 +13,9 @@ #include <linux/fs.h> #include <linux/init.h> #include <linux/rcupdate.h> +#include <linux/refcount.h>
+#include "object.h" #include "ruleset.h" #include "setup.h"
@@ -50,6 +53,24 @@ struct landlock_superblock_security { atomic_long_t inode_refs; };
+struct landlock_fs_cache; + +/** + * struct landlock_file_security - File security blob + * + * Cached access right for an open file. + */ +struct landlock_file_security { + /** + * @cache: This cache is set once at the file opening and never change + * after that (except when freeing the file). This means that a file + * open before a domain transition will not get an updated cache, but + * the domain tied to the cache must always be checked with + * landlock_cache_is_valid(). + */ + struct landlock_fs_cache *cache; +}; + static inline struct landlock_inode_security *landlock_inode( const struct inode *const inode) { @@ -62,8 +83,16 @@ static inline struct landlock_superblock_security *landlock_superblock( return superblock->s_security + landlock_blob_sizes.lbs_superblock; }
+static inline struct landlock_file_security *landlock_file( + const struct file *const file) +{ + return file->f_security + landlock_blob_sizes.lbs_file; +} + __init void landlock_add_fs_hooks(void);
+void landlock_put_fs_cache(struct landlock_fs_cache *cache); + int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, const struct path *const path, u32 access_hierarchy);
diff --git a/security/landlock/setup.c b/security/landlock/setup.c index f8e8e980454c..42d381dd0f0c 100644 --- a/security/landlock/setup.c +++ b/security/landlock/setup.c @@ -19,8 +19,10 @@ bool landlock_initialized __lsm_ro_after_init = false;
struct lsm_blob_sizes landlock_blob_sizes __lsm_ro_after_init = { .lbs_cred = sizeof(struct landlock_cred_security), + .lbs_file = sizeof(struct landlock_file_security), .lbs_inode = sizeof(struct landlock_inode_security), .lbs_superblock = sizeof(struct landlock_superblock_security), + .lbs_task = sizeof(struct landlock_task_security), };
static int __init landlock_init(void)
From: Mickaël Salaün mic@linux.microsoft.com
To be able to test the current working directory, run all tests in a temporary directory instead of in its parent directory. This is required for the following commit.
Cc: James Morris jmorris@namei.org Cc: Jann Horn jannh@google.com Cc: Kees Cook keescook@chromium.org Cc: Serge Hallyn serge@hallyn.com Cc: Shuah Khan shuah@kernel.org Signed-off-by: Mickaël Salaün mic@linux.microsoft.com Link: https://lore.kernel.org/r/20210630224856.1313928-4-mic@digikod.net --- tools/testing/selftests/landlock/fs_test.c | 65 ++++++++++++---------- 1 file changed, 35 insertions(+), 30 deletions(-)
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 10c9a1e4ebd9..403c8255311f 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -23,31 +23,31 @@ #include "common.h"
#define TMP_DIR "tmp" -#define BINARY_PATH "./true" +#define BINARY_PATH "../true"
/* Paths (sibling number and depth) */ -static const char dir_s1d1[] = TMP_DIR "/s1d1"; -static const char file1_s1d1[] = TMP_DIR "/s1d1/f1"; -static const char file2_s1d1[] = TMP_DIR "/s1d1/f2"; -static const char dir_s1d2[] = TMP_DIR "/s1d1/s1d2"; -static const char file1_s1d2[] = TMP_DIR "/s1d1/s1d2/f1"; -static const char file2_s1d2[] = TMP_DIR "/s1d1/s1d2/f2"; -static const char dir_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3"; -static const char file1_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f1"; -static const char file2_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f2"; - -static const char dir_s2d1[] = TMP_DIR "/s2d1"; -static const char file1_s2d1[] = TMP_DIR "/s2d1/f1"; -static const char dir_s2d2[] = TMP_DIR "/s2d1/s2d2"; -static const char file1_s2d2[] = TMP_DIR "/s2d1/s2d2/f1"; -static const char dir_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3"; -static const char file1_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f1"; -static const char file2_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f2"; - -static const char dir_s3d1[] = TMP_DIR "/s3d1"; +static const char dir_s1d1[] = "./s1d1"; +static const char file1_s1d1[] = "./s1d1/f1"; +static const char file2_s1d1[] = "./s1d1/f2"; +static const char dir_s1d2[] = "./s1d1/s1d2"; +static const char file1_s1d2[] = "./s1d1/s1d2/f1"; +static const char file2_s1d2[] = "./s1d1/s1d2/f2"; +static const char dir_s1d3[] = "./s1d1/s1d2/s1d3"; +static const char file1_s1d3[] = "./s1d1/s1d2/s1d3/f1"; +static const char file2_s1d3[] = "./s1d1/s1d2/s1d3/f2"; + +static const char dir_s2d1[] = "./s2d1"; +static const char file1_s2d1[] = "./s2d1/f1"; +static const char dir_s2d2[] = "./s2d1/s2d2"; +static const char file1_s2d2[] = "./s2d1/s2d2/f1"; +static const char dir_s2d3[] = "./s2d1/s2d2/s2d3"; +static const char file1_s2d3[] = "./s2d1/s2d2/s2d3/f1"; +static const char file2_s2d3[] = "./s2d1/s2d2/s2d3/f2"; + +static const char dir_s3d1[] = "./s3d1"; /* dir_s3d2 is a mount point. */ -static const char dir_s3d2[] = TMP_DIR "/s3d1/s3d2"; -static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3"; +static const char dir_s3d2[] = "./s3d1/s3d2"; +static const char dir_s3d3[] = "./s3d1/s3d2/s3d3";
/* * layout1 hierarchy: @@ -140,11 +140,12 @@ static int remove_path(const char *const path) walker[i] = '\0'; ret = rmdir(walker); if (ret) { - if (errno != ENOTEMPTY && errno != EBUSY) + if (errno != ENOTEMPTY && errno != EBUSY + && errno != EINVAL) err = errno; goto out; } - if (strcmp(walker, TMP_DIR) == 0) + if (strcmp(walker, ".") == 0) goto out; }
@@ -168,10 +169,14 @@ static void prepare_layout(struct __test_metadata *const _metadata) ASSERT_EQ(0, mount("tmp", TMP_DIR, "tmpfs", 0, "size=4m,mode=700")); ASSERT_EQ(0, mount(NULL, TMP_DIR, NULL, MS_PRIVATE | MS_REC, NULL)); clear_cap(_metadata, CAP_SYS_ADMIN); + + ASSERT_EQ(0, chdir(TMP_DIR)); }
static void cleanup_layout(struct __test_metadata *const _metadata) { + EXPECT_EQ(0, chdir("..")); + set_cap(_metadata, CAP_SYS_ADMIN); EXPECT_EQ(0, umount(TMP_DIR)); clear_cap(_metadata, CAP_SYS_ADMIN); @@ -1370,7 +1375,7 @@ static void test_relative_path(struct __test_metadata *const _metadata, */ const struct rule layer1_base[] = { { - .path = TMP_DIR, + .path = ".", .access = ACCESS_RO, }, {} @@ -2095,8 +2100,8 @@ FIXTURE_TEARDOWN(layout1_bind) cleanup_layout(_metadata); }
-static const char bind_dir_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3"; -static const char bind_file1_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f1"; +static const char bind_dir_s1d3[] = "./s2d1/s2d2/s1d3"; +static const char bind_file1_s1d3[] = "./s2d1/s2d2/s1d3/f1";
/* * layout1_bind hierarchy: @@ -2282,7 +2287,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file) ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY)); }
-#define LOWER_BASE TMP_DIR "/lower" +#define LOWER_BASE "./lower" #define LOWER_DATA LOWER_BASE "/data" static const char lower_fl1[] = LOWER_DATA "/fl1"; static const char lower_dl1[] = LOWER_DATA "/dl1"; @@ -2309,7 +2314,7 @@ static const char (*lower_sub_files[])[] = { NULL };
-#define UPPER_BASE TMP_DIR "/upper" +#define UPPER_BASE "./upper" #define UPPER_DATA UPPER_BASE "/data" #define UPPER_WORK UPPER_BASE "/work" static const char upper_fu1[] = UPPER_DATA "/fu1"; @@ -2337,7 +2342,7 @@ static const char (*upper_sub_files[])[] = { NULL };
-#define MERGE_BASE TMP_DIR "/merge" +#define MERGE_BASE "./merge" #define MERGE_DATA MERGE_BASE "/data" static const char merge_fl1[] = MERGE_DATA "/fl1"; static const char merge_dl1[] = MERGE_DATA "/dl1";
From: Mickaël Salaün mic@linux.microsoft.com
Open and store file descriptors for the initial test directory at layout initialization and at sandbox creation. This enables checking relative path opening with different scenarios. To be sure to test all combinations, the intermediate path checks are of exponential complexity, which is OK for these use case though. All meaningful tests now also check relative paths.
Use some macros to avoid useless helper call rewrites with a context tied to each test.
Cc: James Morris jmorris@namei.org Cc: Jann Horn jannh@google.com Cc: Kees Cook keescook@chromium.org Cc: Serge Hallyn serge@hallyn.com Cc: Shuah Khan shuah@kernel.org Signed-off-by: Mickaël Salaün mic@linux.microsoft.com Link: https://lore.kernel.org/r/20210630224856.1313928-5-mic@digikod.net --- tools/testing/selftests/landlock/fs_test.c | 140 ++++++++++++++++++--- 1 file changed, 125 insertions(+), 15 deletions(-)
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 403c8255311f..b1a91fdd0f88 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -154,7 +154,13 @@ static int remove_path(const char *const path) return err; }
-static void prepare_layout(struct __test_metadata *const _metadata) +struct layout_context { + int init_layout_cwd_fd; + int init_sandbox_cwd_fd; +}; + +static void prepare_layout(struct __test_metadata *const _metadata, + struct layout_context *const ctx) { disable_caps(_metadata); umask(0077); @@ -171,10 +177,18 @@ static void prepare_layout(struct __test_metadata *const _metadata) clear_cap(_metadata, CAP_SYS_ADMIN);
ASSERT_EQ(0, chdir(TMP_DIR)); + ctx->init_layout_cwd_fd = open(".", O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, ctx->init_layout_cwd_fd); + ctx->init_sandbox_cwd_fd = -1; }
-static void cleanup_layout(struct __test_metadata *const _metadata) +static void cleanup_layout(struct __test_metadata *const _metadata, + const struct layout_context *const ctx) { + EXPECT_EQ(0, close(ctx->init_layout_cwd_fd)); + if (ctx->init_sandbox_cwd_fd != -1) { + EXPECT_EQ(0, close(ctx->init_sandbox_cwd_fd)); + } EXPECT_EQ(0, chdir(".."));
set_cap(_metadata, CAP_SYS_ADMIN); @@ -227,11 +241,12 @@ static void remove_layout1(struct __test_metadata *const _metadata) }
FIXTURE(layout1) { + struct layout_context ctx; };
FIXTURE_SETUP(layout1) { - prepare_layout(_metadata); + prepare_layout(_metadata, &self->ctx);
create_layout1(_metadata); } @@ -240,7 +255,7 @@ FIXTURE_TEARDOWN(layout1) { remove_layout1(_metadata);
- cleanup_layout(_metadata); + cleanup_layout(_metadata, &self->ctx); }
/* @@ -264,11 +279,84 @@ static int test_open_rel(const int dirfd, const char *const path, const int flag return 0; }
-static int test_open(const char *const path, const int flags) +static int _test_open_all(const int base_fd, char *const path, const int flags) +{ + int ret_child, i; + + ret_child = test_open_rel(base_fd, path, flags); + if (ret_child == ENOENT) + return ret_child; + + for (i = strlen(path); i > 0; i--) { + int inter_fd, ret_parent; + + if (path[i] != '/') + continue; + path[i] = '\0'; + /* Using a non-O_PATH FD fills the cache. */ + inter_fd = openat(base_fd, path, O_CLOEXEC | O_DIRECTORY); + path[i] = '/'; + if (inter_fd < 0) { + if (errno != EACCES) + return -1; + /* If the sandbox denies access, then use O_PATH. */ + path[i] = '\0'; + inter_fd = openat(base_fd, path, O_CLOEXEC | + O_DIRECTORY | O_PATH); + path[i] = '/'; + if (inter_fd < 0) + return -1; + } + + /* + * Checks all possible inter_fd combinations with + * recursive calls. + */ + ret_parent = _test_open_all(inter_fd, path + i + 1, flags); + close(inter_fd); + + /* Checks inconsistencies that may come from the cache. */ + if (ret_parent != ret_child) + return -1; + } + return ret_child; +} + +static int _test_open_ctx(const struct layout_context *const ctx, + const char *const path, const int flags) { - return test_open_rel(AT_FDCWD, path, flags); + char *walker; + int ret_init, ret_next, i; + /* + * Checks with a FD opened without sandbox, and another FD opened + * within a sandbox (but maybe not the same). + */ + const int extra_dirfd[] = { + ctx->init_layout_cwd_fd, + ctx->init_sandbox_cwd_fd + }; + + walker = strdup(path); + if (!walker) + return ENOMEM; + + ret_init = _test_open_all(AT_FDCWD, walker, flags); + for (i = 0; i < ARRAY_SIZE(extra_dirfd); i++) { + const int dirfd = extra_dirfd[i]; + + if (dirfd != -1) { + ret_next = _test_open_all(dirfd, walker, flags); + if (ret_next != ret_init) + return -1; + } + } + + free(walker); + return ret_init; }
+#define test_open(path, flags) _test_open_ctx(&self->ctx, path, flags) + TEST_F_FORK(layout1, no_restriction) { ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); @@ -493,15 +581,28 @@ static int create_ruleset(struct __test_metadata *const _metadata, return ruleset_fd; }
-static void enforce_ruleset(struct __test_metadata *const _metadata, - const int ruleset_fd) +static void _enforce_ruleset(struct layout_context *const ctx, + struct __test_metadata *const _metadata, const int ruleset_fd) { ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) { TH_LOG("Failed to enforce ruleset: %s", strerror(errno)); } + + if (ctx->init_sandbox_cwd_fd == -1) { + ctx->init_sandbox_cwd_fd = open(".", O_DIRECTORY | O_CLOEXEC); + if (ctx->init_sandbox_cwd_fd == -1) { + ASSERT_EQ(EACCES, errno); + ctx->init_sandbox_cwd_fd = open(".", O_DIRECTORY | + O_CLOEXEC | O_PATH); + ASSERT_LE(0, ctx->init_sandbox_cwd_fd); + } + } }
+#define enforce_ruleset(_metadata, ruleset_fd) \ + _enforce_ruleset(&self->ctx, _metadata, ruleset_fd) + TEST_F_FORK(layout1, proc_nsfs) { const struct rule rules[] = { @@ -1265,7 +1366,6 @@ TEST_F_FORK(layout1, rule_inside_mount_ns) enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd));
- ASSERT_EQ(0, test_open("s3d3", O_RDONLY)); ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); }
@@ -1366,7 +1466,8 @@ enum relative_access { REL_CHROOT_CHDIR, };
-static void test_relative_path(struct __test_metadata *const _metadata, +static void _test_relative_path(struct __test_metadata *const _metadata, + FIXTURE_DATA(layout1) *const self, const enum relative_access rel) { /* @@ -1479,6 +1580,8 @@ static void test_relative_path(struct __test_metadata *const _metadata, ASSERT_EQ(0, close(ruleset_fd)); }
+#define test_relative_path(_metadata, rel) _test_relative_path(_metadata, self, rel) + TEST_F_FORK(layout1, relative_open) { test_relative_path(_metadata, REL_OPEN); @@ -1809,7 +1912,8 @@ TEST_F_FORK(layout1, remove_file) ASSERT_EQ(0, unlinkat(AT_FDCWD, file1_s1d3, 0)); }
-static void test_make_file(struct __test_metadata *const _metadata, +static void _test_make_file(struct __test_metadata *const _metadata, + FIXTURE_DATA(layout1) *self, const __u64 access, const mode_t mode, const dev_t dev) { const struct rule rules[] = { @@ -1860,6 +1964,10 @@ static void test_make_file(struct __test_metadata *const _metadata, ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3)); }
+#define test_make_file(_metadata, access, mode, dev) \ + _test_make_file(_metadata, self, access, mode, dev) + + TEST_F_FORK(layout1, make_char) { /* Creates a /dev/null device. */ @@ -2076,11 +2184,12 @@ TEST_F_FORK(layout1, proc_pipe) }
FIXTURE(layout1_bind) { + struct layout_context ctx; };
FIXTURE_SETUP(layout1_bind) { - prepare_layout(_metadata); + prepare_layout(_metadata, &self->ctx);
create_layout1(_metadata);
@@ -2097,7 +2206,7 @@ FIXTURE_TEARDOWN(layout1_bind)
remove_layout1(_metadata);
- cleanup_layout(_metadata); + cleanup_layout(_metadata, &self->ctx); }
static const char bind_dir_s1d3[] = "./s2d1/s2d2/s1d3"; @@ -2417,11 +2526,12 @@ static const char (*merge_sub_files[])[] = { */
FIXTURE(layout2_overlay) { + struct layout_context ctx; };
FIXTURE_SETUP(layout2_overlay) { - prepare_layout(_metadata); + prepare_layout(_metadata, &self->ctx);
create_directory(_metadata, LOWER_BASE); set_cap(_metadata, CAP_SYS_ADMIN); @@ -2484,7 +2594,7 @@ FIXTURE_TEARDOWN(layout2_overlay) clear_cap(_metadata, CAP_SYS_ADMIN); EXPECT_EQ(0, remove_path(MERGE_DATA));
- cleanup_layout(_metadata); + cleanup_layout(_metadata, &self->ctx); }
TEST_F_FORK(layout2_overlay, no_restriction)
linux-kselftest-mirror@lists.linaro.org