Hi,
This patch series fixes read-only issue when non-empty workdir occurred in overlayfs, the non-empty workdir could be easily reproduced in power-failure test during write operations.
These patches have passed basic test in unionmount-testsuite.
Antonio Murdaca (1): ovl: override creds with the ones from the superblock mounter
Miklos Szeredi (2): ovl: rename is_merge to is_lowest ovl: proper cleanup of workdir
fs/overlayfs/copy_up.c | 26 +---------- fs/overlayfs/dir.c | 67 +++-------------------------- fs/overlayfs/overlayfs.h | 3 ++ fs/overlayfs/readdir.c | 93 +++++++++++++++++++++++++++++++--------- fs/overlayfs/super.c | 20 ++++++++- 5 files changed, 100 insertions(+), 109 deletions(-)
From: Miklos Szeredi mszeredi@redhat.com
commit 56656e960b555cb98bc414382566dcb59aae99a2 upstream
The 'is_merge' is an historical naming from when only a single lower layer could exist. With the introduction of multiple lower layers the meaning of this flag was changed to mean only the "lowest layer" (while all lower layers were being merged).
So now 'is_merge' is inaccurate and hence renaming to 'is_lowest'
Signed-off-by: Miklos Szeredi mszeredi@redhat.com Signed-off-by: SZ Lin (林上智) sz.lin@moxa.com --- fs/overlayfs/readdir.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 0c59955c4653..42f2612bfd98 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -36,7 +36,7 @@ struct ovl_dir_cache {
struct ovl_readdir_data { struct dir_context ctx; - bool is_merge; + bool is_lowest; struct rb_root root; struct list_head *list; struct list_head middle; @@ -140,9 +140,9 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, return 0; }
-static int ovl_fill_lower(struct ovl_readdir_data *rdd, - const char *name, int namelen, - loff_t offset, u64 ino, unsigned int d_type) +static int ovl_fill_lowest(struct ovl_readdir_data *rdd, + const char *name, int namelen, + loff_t offset, u64 ino, unsigned int d_type) { struct ovl_cache_entry *p;
@@ -194,10 +194,10 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name, container_of(ctx, struct ovl_readdir_data, ctx);
rdd->count++; - if (!rdd->is_merge) + if (!rdd->is_lowest) return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); else - return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); + return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type); }
static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) @@ -290,7 +290,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) .ctx.actor = ovl_fill_merge, .list = list, .root = RB_ROOT, - .is_merge = false, + .is_lowest = false, }; int idx, next;
@@ -307,7 +307,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) * allows offsets to be reasonably constant */ list_add(&rdd.middle, rdd.list); - rdd.is_merge = true; + rdd.is_lowest = true; err = ovl_dir_read(&realpath, &rdd); list_del(&rdd.middle); }
From: Antonio Murdaca amurdaca@redhat.com
commit 3fe6e52f062643676eb4518d68cee3bc1272091b upstream
In user namespace the whiteout creation fails with -EPERM because the current process isn't capable(CAP_SYS_ADMIN) when setting xattr.
A simple reproducer:
$ mkdir upper lower work merged lower/dir $ sudo mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged $ unshare -m -p -f -U -r bash
Now as root in the user namespace:
# touch merged/dir/{1,2,3} # this will force a copy up of lower/dir # rm -fR merged/*
This ends up failing with -EPERM after the files in dir has been correctly deleted:
unlinkat(4, "2", 0) = 0 unlinkat(4, "1", 0) = 0 unlinkat(4, "3", 0) = 0 close(4) = 0 unlinkat(AT_FDCWD, "merged/dir", AT_REMOVEDIR) = -1 EPERM (Operation not permitted)
Interestingly, if you don't place files in merged/dir you can remove it, meaning if upper/dir does not exist, creating the char device file works properly in that same location.
This patch uses ovl_sb_creator_cred() to get the cred struct from the superblock mounter and override the old cred with these new ones so that the whiteout creation is possible because overlay is wrong in assuming that the creds it will get with prepare_creds will be in the initial user namespace. The old cap_raise game is removed in favor of just overriding the old cred struct.
This patch also drops from ovl_copy_up_one() the following two lines:
override_cred->fsuid = stat->uid; override_cred->fsgid = stat->gid;
This is because the correct uid and gid are taken directly with the stat struct and correctly set with ovl_set_attr().
Signed-off-by: Antonio Murdaca runcom@redhat.com Signed-off-by: Miklos Szeredi mszeredi@redhat.com Signed-off-by: SZ Lin (林上智) sz.lin@moxa.com --- fs/overlayfs/copy_up.c | 26 +--------------- fs/overlayfs/dir.c | 67 +++------------------------------------- fs/overlayfs/overlayfs.h | 1 + fs/overlayfs/readdir.c | 14 ++------- fs/overlayfs/super.c | 18 ++++++++++- 5 files changed, 27 insertions(+), 99 deletions(-)
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 63a0d0ba36de..64c5386d0c1b 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -317,7 +317,6 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, struct dentry *upperdir; struct dentry *upperdentry; const struct cred *old_cred; - struct cred *override_cred; char *link = NULL;
if (WARN_ON(!workdir)) @@ -336,28 +335,7 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, return PTR_ERR(link); }
- err = -ENOMEM; - override_cred = prepare_creds(); - if (!override_cred) - goto out_free_link; - - override_cred->fsuid = stat->uid; - override_cred->fsgid = stat->gid; - /* - * CAP_SYS_ADMIN for copying up extended attributes - * CAP_DAC_OVERRIDE for create - * CAP_FOWNER for chmod, timestamp update - * CAP_FSETID for chmod - * CAP_CHOWN for chown - * CAP_MKNOD for mknod - */ - cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - cap_raise(override_cred->cap_effective, CAP_FOWNER); - cap_raise(override_cred->cap_effective, CAP_FSETID); - cap_raise(override_cred->cap_effective, CAP_CHOWN); - cap_raise(override_cred->cap_effective, CAP_MKNOD); - old_cred = override_creds(override_cred); + old_cred = ovl_override_creds(dentry->d_sb);
err = -EIO; if (lock_rename(workdir, upperdir) != NULL) { @@ -380,9 +358,7 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, out_unlock: unlock_rename(workdir, upperdir); revert_creds(old_cred); - put_cred(override_cred);
-out_free_link: if (link) free_page((unsigned long) link);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 327177df03a5..f8aa54272121 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -408,28 +408,13 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, err = ovl_create_upper(dentry, inode, &stat, link, hardlink); } else { const struct cred *old_cred; - struct cred *override_cred;
- err = -ENOMEM; - override_cred = prepare_creds(); - if (!override_cred) - goto out_iput; - - /* - * CAP_SYS_ADMIN for setting opaque xattr - * CAP_DAC_OVERRIDE for create in workdir, rename - * CAP_FOWNER for removing whiteout from sticky dir - */ - cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - cap_raise(override_cred->cap_effective, CAP_FOWNER); - old_cred = override_creds(override_cred); + old_cred = ovl_override_creds(dentry->d_sb);
err = ovl_create_over_whiteout(dentry, inode, &stat, link, hardlink);
revert_creds(old_cred); - put_cred(override_cred); }
if (!err) @@ -659,32 +644,11 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) if (OVL_TYPE_PURE_UPPER(type)) { err = ovl_remove_upper(dentry, is_dir); } else { - const struct cred *old_cred; - struct cred *override_cred; - - err = -ENOMEM; - override_cred = prepare_creds(); - if (!override_cred) - goto out_drop_write; - - /* - * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir - * CAP_DAC_OVERRIDE for create in workdir, rename - * CAP_FOWNER for removing whiteout from sticky dir - * CAP_FSETID for chmod of opaque dir - * CAP_CHOWN for chown of opaque dir - */ - cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - cap_raise(override_cred->cap_effective, CAP_FOWNER); - cap_raise(override_cred->cap_effective, CAP_FSETID); - cap_raise(override_cred->cap_effective, CAP_CHOWN); - old_cred = override_creds(override_cred); + const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
err = ovl_remove_and_whiteout(dentry, is_dir);
revert_creds(old_cred); - put_cred(override_cred); } out_drop_write: ovl_drop_write(dentry); @@ -723,7 +687,6 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, bool new_is_dir = false; struct dentry *opaquedir = NULL; const struct cred *old_cred = NULL; - struct cred *override_cred = NULL;
err = -EINVAL; if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) @@ -792,26 +755,8 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, old_opaque = !OVL_TYPE_PURE_UPPER(old_type); new_opaque = !OVL_TYPE_PURE_UPPER(new_type);
- if (old_opaque || new_opaque) { - err = -ENOMEM; - override_cred = prepare_creds(); - if (!override_cred) - goto out_drop_write; - - /* - * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir - * CAP_DAC_OVERRIDE for create in workdir - * CAP_FOWNER for removing whiteout from sticky dir - * CAP_FSETID for chmod of opaque dir - * CAP_CHOWN for chown of opaque dir - */ - cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - cap_raise(override_cred->cap_effective, CAP_FOWNER); - cap_raise(override_cred->cap_effective, CAP_FSETID); - cap_raise(override_cred->cap_effective, CAP_CHOWN); - old_cred = override_creds(override_cred); - } + if (old_opaque || new_opaque) + old_cred = ovl_override_creds(old->d_sb);
if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) { opaquedir = ovl_check_empty_and_clear(new); @@ -942,10 +887,8 @@ out_dput_old: out_unlock: unlock_rename(new_upperdir, old_upperdir); out_revert_creds: - if (old_opaque || new_opaque) { + if (old_opaque || new_opaque) revert_creds(old_cred); - put_cred(override_cred); - } out_drop_write: ovl_drop_write(old); out: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 28316b292b8a..6d01bd46880c 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -150,6 +150,7 @@ void ovl_drop_write(struct dentry *dentry); bool ovl_dentry_is_opaque(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); bool ovl_is_whiteout(struct dentry *dentry); +const struct cred *ovl_override_creds(struct super_block *sb); void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 42f2612bfd98..7613041231fc 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -36,6 +36,7 @@ struct ovl_dir_cache {
struct ovl_readdir_data { struct dir_context ctx; + struct dentry *dentry; bool is_lowest; struct rb_root root; struct list_head *list; @@ -206,17 +207,8 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) struct ovl_cache_entry *p; struct dentry *dentry; const struct cred *old_cred; - struct cred *override_cred; - - override_cred = prepare_creds(); - if (!override_cred) - return -ENOMEM;
- /* - * CAP_DAC_OVERRIDE for lookup - */ - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - old_cred = override_creds(override_cred); + old_cred = ovl_override_creds(rdd->dentry->d_sb);
err = mutex_lock_killable(&dir->d_inode->i_mutex); if (!err) { @@ -232,7 +224,6 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) mutex_unlock(&dir->d_inode->i_mutex); } revert_creds(old_cred); - put_cred(override_cred);
return err; } @@ -288,6 +279,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) struct path realpath; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_merge, + .dentry = dentry, .list = list, .root = RB_ROOT, .is_lowest = false, diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 0035cb80ecd1..e9a382b94a23 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -42,6 +42,8 @@ struct ovl_fs { long lower_namelen; /* pathnames of lower and upper dirs, for show_options */ struct ovl_config config; + /* creds of process who forced instantiation of super block */ + const struct cred *creator_cred; };
struct ovl_dir_cache; @@ -246,6 +248,13 @@ bool ovl_is_whiteout(struct dentry *dentry) return inode && IS_WHITEOUT(inode); }
+const struct cred *ovl_override_creds(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return override_creds(ofs->creator_cred); +} + static bool ovl_is_opaquedir(struct dentry *dentry) { int res; @@ -587,6 +596,7 @@ static void ovl_put_super(struct super_block *sb) kfree(ufs->config.lowerdir); kfree(ufs->config.upperdir); kfree(ufs->config.workdir); + put_cred(ufs->creator_cred); kfree(ufs); }
@@ -1107,10 +1117,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) else sb->s_d_op = &ovl_dentry_operations;
+ ufs->creator_cred = prepare_creds(); + if (!ufs->creator_cred) + goto out_put_lower_mnt; + err = -ENOMEM; oe = ovl_alloc_entry(numlower); if (!oe) - goto out_put_lower_mnt; + goto out_put_cred;
root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, oe)); if (!root_dentry) @@ -1143,6 +1157,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
out_free_oe: kfree(oe); +out_put_cred: + put_cred(ufs->creator_cred); out_put_lower_mnt: for (i = 0; i < ufs->numlower; i++) mntput(ufs->lower_mnt[i]);
From: Miklos Szeredi mszeredi@redhat.com
commit eea2fb4851e9dcbab6b991aaf47e2e024f1f55a0 upstream
When mounting overlayfs it needs a clean "work" directory under the supplied workdir.
Previously the mount code removed this directory if it already existed and created a new one. If the removal failed (e.g. directory was not empty) then it fell back to a read-only mount not using the workdir.
While this has never been reported, it is possible to get a non-empty "work" dir from a previous mount of overlayfs in case of crash in the middle of an operation using the work directory.
In this case the left over state should be discarded and the overlay filesystem will be consistent, guaranteed by the atomicity of operations on moving to/from the workdir to the upper layer.
This patch implements cleaning out any files left in workdir. It is implemented using real recursion for simplicity, but the depth is limited to 2, because the worst case is that of a directory containing whiteouts under "work".
Signed-off-by: Miklos Szeredi mszeredi@redhat.com Cc: stable@vger.kernel.org Tested-by: Wes Huang (黃淵河) wes.huang@moxa.com Signed-off-by: SZ Lin (林上智) sz.lin@moxa.com --- fs/overlayfs/overlayfs.h | 2 ++ fs/overlayfs/readdir.c | 63 +++++++++++++++++++++++++++++++++++++++- fs/overlayfs/super.c | 2 +- 3 files changed, 65 insertions(+), 2 deletions(-)
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 6d01bd46880c..27a42975d7cd 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -165,6 +165,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); void ovl_cache_free(struct list_head *list); int ovl_check_d_type_supported(struct path *realpath); +void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, + struct dentry *dentry, int level);
/* inode.c */ int ovl_setattr(struct dentry *dentry, struct iattr *attr); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 7613041231fc..da999e73c97a 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -248,7 +248,7 @@ static inline int ovl_dir_read(struct path *realpath, err = rdd->err; } while (!err && rdd->count);
- if (!err && rdd->first_maybe_whiteout) + if (!err && rdd->first_maybe_whiteout && rdd->dentry) err = ovl_check_whiteouts(realpath->dentry, rdd);
fput(realfile); @@ -610,3 +610,64 @@ int ovl_check_d_type_supported(struct path *realpath)
return rdd.d_type_supported; } + +static void ovl_workdir_cleanup_recurse(struct path *path, int level) +{ + int err; + struct inode *dir = path->dentry->d_inode; + LIST_HEAD(list); + struct ovl_cache_entry *p; + struct ovl_readdir_data rdd = { + .ctx.actor = ovl_fill_merge, + .dentry = NULL, + .list = &list, + .root = RB_ROOT, + .is_lowest = false, + }; + + err = ovl_dir_read(path, &rdd); + if (err) + goto out; + + inode_lock_nested(dir, I_MUTEX_PARENT); + list_for_each_entry(p, &list, l_node) { + struct dentry *dentry; + + if (p->name[0] == '.') { + if (p->len == 1) + continue; + if (p->len == 2 && p->name[1] == '.') + continue; + } + dentry = lookup_one_len(p->name, path->dentry, p->len); + if (IS_ERR(dentry)) + continue; + if (dentry->d_inode) + ovl_workdir_cleanup(dir, path->mnt, dentry, level); + dput(dentry); + } + inode_unlock(dir); +out: + ovl_cache_free(&list); +} + +void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, + struct dentry *dentry, int level) +{ + int err; + + if (!d_is_dir(dentry) || level > 1) { + ovl_cleanup(dir, dentry); + return; + } + + err = ovl_do_rmdir(dir, dentry); + if (err) { + struct path path = { .mnt = mnt, .dentry = dentry }; + + inode_unlock(dir); + ovl_workdir_cleanup_recurse(&path, level + 1); + inode_lock_nested(dir, I_MUTEX_PARENT); + ovl_cleanup(dir, dentry); + } +} diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index e9a382b94a23..fa20c95bd456 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -784,7 +784,7 @@ retry: goto out_dput;
retried = true; - ovl_cleanup(dir, work); + ovl_workdir_cleanup(dir, mnt, work, 0); dput(work); goto retry; }
On Tue, Aug 28, 2018 at 06:40:29PM +0800, SZ Lin (林上智) wrote:
Hi,
This patch series fixes read-only issue when non-empty workdir occurred in overlayfs, the non-empty workdir could be easily reproduced in power-failure test during write operations.
These patches have passed basic test in unionmount-testsuite.
All now applied, thanks.
greg k-h
linux-stable-mirror@lists.linaro.org