August 2018 - Linux-stable-mirror

FAILED: patch "[PATCH] Btrfs: fix send failure when root has deleted files still" failed to apply to 4.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 46b2f4590aab71d31088a265c86026b1e96c9de4 Mon Sep 17 00:00:00 2001 From: Filipe Manana <fdmanana(a)suse.com> Date: Tue, 24 Jul 2018 11:54:04 +0100 Subject: [PATCH] Btrfs: fix send failure when root has deleted files still open The more common use case of send involves creating a RO snapshot and then use it for a send operation. In this case it's not possible to have inodes in the snapshot that have a link count of zero (inode with an orphan item) since during snapshot creation we do the orphan cleanup. However, other less common use cases for send can end up seeing inodes with a link count of zero and in this case the send operation fails with a ENOENT error because any attempt to generate a path for the inode, with the purpose of creating it or updating it at the receiver, fails since there are no inode reference items. One use case it to use a regular subvolume for a send operation after turning it to RO mode or turning a RW snapshot into RO mode and then using it for a send operation. In both cases, if a file gets all its hard links deleted while there is an open file descriptor before turning the subvolume/snapshot into RO mode, the send operation will encounter an inode with a link count of zero and then fail with errno ENOENT. Example using a full send with a subvolume: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ btrfs subvolume create /mnt/sv1 $ touch /mnt/sv1/foo $ touch /mnt/sv1/bar # keep an open file descriptor on file bar $ exec 73</mnt/sv1/bar $ unlink /mnt/sv1/bar # Turn the subvolume to RO mode and use it for a full send, while # holding the open file descriptor. $ btrfs property set /mnt/sv1 ro true $ btrfs send -f /tmp/full.send /mnt/sv1 At subvol /mnt/sv1 ERROR: send ioctl failed with -2: No such file or directory Example using an incremental send with snapshots: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ btrfs subvolume create /mnt/sv1 $ touch /mnt/sv1/foo $ touch /mnt/sv1/bar $ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap1 $ echo "hello world" >> /mnt/sv1/bar $ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap2 # Turn the second snapshot to RW mode and delete file foo while # holding an open file descriptor on it. $ btrfs property set /mnt/snap2 ro false $ exec 73</mnt/snap2/foo $ unlink /mnt/snap2/foo # Set the second snapshot back to RO mode and do an incremental send. $ btrfs property set /mnt/snap2 ro true $ btrfs send -f /tmp/inc.send -p /mnt/snap1 /mnt/snap2 At subvol /mnt/snap2 ERROR: send ioctl failed with -2: No such file or directory So fix this by ignoring inodes with a link count of zero if we are either doing a full send or if they do not exist in the parent snapshot (they are new in the send snapshot), and unlink all paths found in the parent snapshot when doing an incremental send (and ignoring all other inode items, such as xattrs and extents). A test case for fstests follows soon. CC: stable(a)vger.kernel.org # 4.4+ Reported-by: Martin Wilck <martin.wilck(a)suse.com> Signed-off-by: Filipe Manana <fdmanana(a)suse.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 42e04cd3cd95..551294a6c9e2 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -100,6 +100,7 @@ struct send_ctx { u64 cur_inode_rdev; u64 cur_inode_last_extent; u64 cur_inode_next_write_offset; + bool ignore_cur_inode; u64 send_progress; @@ -5796,6 +5797,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) int pending_move = 0; int refs_processed = 0; + if (sctx->ignore_cur_inode) + return 0; + ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, &refs_processed); if (ret < 0) @@ -5914,6 +5918,93 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) return ret; } +struct parent_paths_ctx { + struct list_head *refs; + struct send_ctx *sctx; +}; + +static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name, + void *ctx) +{ + struct parent_paths_ctx *ppctx = ctx; + + return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx, + ppctx->refs); +} + +/* + * Issue unlink operations for all paths of the current inode found in the + * parent snapshot. + */ +static int btrfs_unlink_all_paths(struct send_ctx *sctx) +{ + LIST_HEAD(deleted_refs); + struct btrfs_path *path; + struct btrfs_key key; + struct parent_paths_ctx ctx; + int ret; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = sctx->cur_ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); + if (ret < 0) + goto out; + + ctx.refs = &deleted_refs; + ctx.sctx = sctx; + + while (true) { + struct extent_buffer *eb = path->nodes[0]; + int slot = path->slots[0]; + + if (slot >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(sctx->parent_root, path); + if (ret < 0) + goto out; + else if (ret > 0) + break; + continue; + } + + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.objectid != sctx->cur_ino) + break; + if (key.type != BTRFS_INODE_REF_KEY && + key.type != BTRFS_INODE_EXTREF_KEY) + break; + + ret = iterate_inode_ref(sctx->parent_root, path, &key, 1, + record_parent_ref, &ctx); + if (ret < 0) + goto out; + + path->slots[0]++; + } + + while (!list_empty(&deleted_refs)) { + struct recorded_ref *ref; + + ref = list_first_entry(&deleted_refs, struct recorded_ref, list); + ret = send_unlink(sctx, ref->full_path); + if (ret < 0) + goto out; + fs_path_free(ref->full_path); + list_del(&ref->list); + kfree(ref); + } + ret = 0; +out: + btrfs_free_path(path); + if (ret) + __free_recorded_refs(&deleted_refs); + return ret; +} + static int changed_inode(struct send_ctx *sctx, enum btrfs_compare_tree_result result) { @@ -5928,6 +6019,7 @@ static int changed_inode(struct send_ctx *sctx, sctx->cur_inode_new_gen = 0; sctx->cur_inode_last_extent = (u64)-1; sctx->cur_inode_next_write_offset = 0; + sctx->ignore_cur_inode = false; /* * Set send_progress to current inode. This will tell all get_cur_xxx @@ -5968,6 +6060,33 @@ static int changed_inode(struct send_ctx *sctx, sctx->cur_inode_new_gen = 1; } + /* + * Normally we do not find inodes with a link count of zero (orphans) + * because the most common case is to create a snapshot and use it + * for a send operation. However other less common use cases involve + * using a subvolume and send it after turning it to RO mode just + * after deleting all hard links of a file while holding an open + * file descriptor against it or turning a RO snapshot into RW mode, + * keep an open file descriptor against a file, delete it and then + * turn the snapshot back to RO mode before using it for a send + * operation. So if we find such cases, ignore the inode and all its + * items completely if it's a new inode, or if it's a changed inode + * make sure all its previous paths (from the parent snapshot) are all + * unlinked and all other the inode items are ignored. + */ + if (result == BTRFS_COMPARE_TREE_NEW || + result == BTRFS_COMPARE_TREE_CHANGED) { + u32 nlinks; + + nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii); + if (nlinks == 0) { + sctx->ignore_cur_inode = true; + if (result == BTRFS_COMPARE_TREE_CHANGED) + ret = btrfs_unlink_all_paths(sctx); + goto out; + } + } + if (result == BTRFS_COMPARE_TREE_NEW) { sctx->cur_inode_gen = left_gen; sctx->cur_inode_new = 1; @@ -6306,15 +6425,17 @@ static int changed_cb(struct btrfs_path *left_path, key->objectid == BTRFS_FREE_SPACE_OBJECTID) goto out; - if (key->type == BTRFS_INODE_ITEM_KEY) + if (key->type == BTRFS_INODE_ITEM_KEY) { ret = changed_inode(sctx, result); - else if (key->type == BTRFS_INODE_REF_KEY || - key->type == BTRFS_INODE_EXTREF_KEY) - ret = changed_ref(sctx, result); - else if (key->type == BTRFS_XATTR_ITEM_KEY) - ret = changed_xattr(sctx, result); - else if (key->type == BTRFS_EXTENT_DATA_KEY) - ret = changed_extent(sctx, result); + } else if (!sctx->ignore_cur_inode) { + if (key->type == BTRFS_INODE_REF_KEY || + key->type == BTRFS_INODE_EXTREF_KEY) + ret = changed_ref(sctx, result); + else if (key->type == BTRFS_XATTR_ITEM_KEY) + ret = changed_xattr(sctx, result); + else if (key->type == BTRFS_EXTENT_DATA_KEY) + ret = changed_extent(sctx, result); + } out: return ret;

7 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] Btrfs: fix send failure when root has deleted files still" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 46b2f4590aab71d31088a265c86026b1e96c9de4 Mon Sep 17 00:00:00 2001 From: Filipe Manana <fdmanana(a)suse.com> Date: Tue, 24 Jul 2018 11:54:04 +0100 Subject: [PATCH] Btrfs: fix send failure when root has deleted files still open The more common use case of send involves creating a RO snapshot and then use it for a send operation. In this case it's not possible to have inodes in the snapshot that have a link count of zero (inode with an orphan item) since during snapshot creation we do the orphan cleanup. However, other less common use cases for send can end up seeing inodes with a link count of zero and in this case the send operation fails with a ENOENT error because any attempt to generate a path for the inode, with the purpose of creating it or updating it at the receiver, fails since there are no inode reference items. One use case it to use a regular subvolume for a send operation after turning it to RO mode or turning a RW snapshot into RO mode and then using it for a send operation. In both cases, if a file gets all its hard links deleted while there is an open file descriptor before turning the subvolume/snapshot into RO mode, the send operation will encounter an inode with a link count of zero and then fail with errno ENOENT. Example using a full send with a subvolume: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ btrfs subvolume create /mnt/sv1 $ touch /mnt/sv1/foo $ touch /mnt/sv1/bar # keep an open file descriptor on file bar $ exec 73</mnt/sv1/bar $ unlink /mnt/sv1/bar # Turn the subvolume to RO mode and use it for a full send, while # holding the open file descriptor. $ btrfs property set /mnt/sv1 ro true $ btrfs send -f /tmp/full.send /mnt/sv1 At subvol /mnt/sv1 ERROR: send ioctl failed with -2: No such file or directory Example using an incremental send with snapshots: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ btrfs subvolume create /mnt/sv1 $ touch /mnt/sv1/foo $ touch /mnt/sv1/bar $ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap1 $ echo "hello world" >> /mnt/sv1/bar $ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap2 # Turn the second snapshot to RW mode and delete file foo while # holding an open file descriptor on it. $ btrfs property set /mnt/snap2 ro false $ exec 73</mnt/snap2/foo $ unlink /mnt/snap2/foo # Set the second snapshot back to RO mode and do an incremental send. $ btrfs property set /mnt/snap2 ro true $ btrfs send -f /tmp/inc.send -p /mnt/snap1 /mnt/snap2 At subvol /mnt/snap2 ERROR: send ioctl failed with -2: No such file or directory So fix this by ignoring inodes with a link count of zero if we are either doing a full send or if they do not exist in the parent snapshot (they are new in the send snapshot), and unlink all paths found in the parent snapshot when doing an incremental send (and ignoring all other inode items, such as xattrs and extents). A test case for fstests follows soon. CC: stable(a)vger.kernel.org # 4.4+ Reported-by: Martin Wilck <martin.wilck(a)suse.com> Signed-off-by: Filipe Manana <fdmanana(a)suse.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 42e04cd3cd95..551294a6c9e2 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -100,6 +100,7 @@ struct send_ctx { u64 cur_inode_rdev; u64 cur_inode_last_extent; u64 cur_inode_next_write_offset; + bool ignore_cur_inode; u64 send_progress; @@ -5796,6 +5797,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) int pending_move = 0; int refs_processed = 0; + if (sctx->ignore_cur_inode) + return 0; + ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, &refs_processed); if (ret < 0) @@ -5914,6 +5918,93 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) return ret; } +struct parent_paths_ctx { + struct list_head *refs; + struct send_ctx *sctx; +}; + +static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name, + void *ctx) +{ + struct parent_paths_ctx *ppctx = ctx; + + return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx, + ppctx->refs); +} + +/* + * Issue unlink operations for all paths of the current inode found in the + * parent snapshot. + */ +static int btrfs_unlink_all_paths(struct send_ctx *sctx) +{ + LIST_HEAD(deleted_refs); + struct btrfs_path *path; + struct btrfs_key key; + struct parent_paths_ctx ctx; + int ret; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = sctx->cur_ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); + if (ret < 0) + goto out; + + ctx.refs = &deleted_refs; + ctx.sctx = sctx; + + while (true) { + struct extent_buffer *eb = path->nodes[0]; + int slot = path->slots[0]; + + if (slot >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(sctx->parent_root, path); + if (ret < 0) + goto out; + else if (ret > 0) + break; + continue; + } + + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.objectid != sctx->cur_ino) + break; + if (key.type != BTRFS_INODE_REF_KEY && + key.type != BTRFS_INODE_EXTREF_KEY) + break; + + ret = iterate_inode_ref(sctx->parent_root, path, &key, 1, + record_parent_ref, &ctx); + if (ret < 0) + goto out; + + path->slots[0]++; + } + + while (!list_empty(&deleted_refs)) { + struct recorded_ref *ref; + + ref = list_first_entry(&deleted_refs, struct recorded_ref, list); + ret = send_unlink(sctx, ref->full_path); + if (ret < 0) + goto out; + fs_path_free(ref->full_path); + list_del(&ref->list); + kfree(ref); + } + ret = 0; +out: + btrfs_free_path(path); + if (ret) + __free_recorded_refs(&deleted_refs); + return ret; +} + static int changed_inode(struct send_ctx *sctx, enum btrfs_compare_tree_result result) { @@ -5928,6 +6019,7 @@ static int changed_inode(struct send_ctx *sctx, sctx->cur_inode_new_gen = 0; sctx->cur_inode_last_extent = (u64)-1; sctx->cur_inode_next_write_offset = 0; + sctx->ignore_cur_inode = false; /* * Set send_progress to current inode. This will tell all get_cur_xxx @@ -5968,6 +6060,33 @@ static int changed_inode(struct send_ctx *sctx, sctx->cur_inode_new_gen = 1; } + /* + * Normally we do not find inodes with a link count of zero (orphans) + * because the most common case is to create a snapshot and use it + * for a send operation. However other less common use cases involve + * using a subvolume and send it after turning it to RO mode just + * after deleting all hard links of a file while holding an open + * file descriptor against it or turning a RO snapshot into RW mode, + * keep an open file descriptor against a file, delete it and then + * turn the snapshot back to RO mode before using it for a send + * operation. So if we find such cases, ignore the inode and all its + * items completely if it's a new inode, or if it's a changed inode + * make sure all its previous paths (from the parent snapshot) are all + * unlinked and all other the inode items are ignored. + */ + if (result == BTRFS_COMPARE_TREE_NEW || + result == BTRFS_COMPARE_TREE_CHANGED) { + u32 nlinks; + + nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii); + if (nlinks == 0) { + sctx->ignore_cur_inode = true; + if (result == BTRFS_COMPARE_TREE_CHANGED) + ret = btrfs_unlink_all_paths(sctx); + goto out; + } + } + if (result == BTRFS_COMPARE_TREE_NEW) { sctx->cur_inode_gen = left_gen; sctx->cur_inode_new = 1; @@ -6306,15 +6425,17 @@ static int changed_cb(struct btrfs_path *left_path, key->objectid == BTRFS_FREE_SPACE_OBJECTID) goto out; - if (key->type == BTRFS_INODE_ITEM_KEY) + if (key->type == BTRFS_INODE_ITEM_KEY) { ret = changed_inode(sctx, result); - else if (key->type == BTRFS_INODE_REF_KEY || - key->type == BTRFS_INODE_EXTREF_KEY) - ret = changed_ref(sctx, result); - else if (key->type == BTRFS_XATTR_ITEM_KEY) - ret = changed_xattr(sctx, result); - else if (key->type == BTRFS_EXTENT_DATA_KEY) - ret = changed_extent(sctx, result); + } else if (!sctx->ignore_cur_inode) { + if (key->type == BTRFS_INODE_REF_KEY || + key->type == BTRFS_INODE_EXTREF_KEY) + ret = changed_ref(sctx, result); + else if (key->type == BTRFS_XATTR_ITEM_KEY) + ret = changed_xattr(sctx, result); + else if (key->type == BTRFS_EXTENT_DATA_KEY) + ret = changed_extent(sctx, result); + } out: return ret;

7 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] Btrfs: fix send failure when root has deleted files still" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 46b2f4590aab71d31088a265c86026b1e96c9de4 Mon Sep 17 00:00:00 2001 From: Filipe Manana <fdmanana(a)suse.com> Date: Tue, 24 Jul 2018 11:54:04 +0100 Subject: [PATCH] Btrfs: fix send failure when root has deleted files still open The more common use case of send involves creating a RO snapshot and then use it for a send operation. In this case it's not possible to have inodes in the snapshot that have a link count of zero (inode with an orphan item) since during snapshot creation we do the orphan cleanup. However, other less common use cases for send can end up seeing inodes with a link count of zero and in this case the send operation fails with a ENOENT error because any attempt to generate a path for the inode, with the purpose of creating it or updating it at the receiver, fails since there are no inode reference items. One use case it to use a regular subvolume for a send operation after turning it to RO mode or turning a RW snapshot into RO mode and then using it for a send operation. In both cases, if a file gets all its hard links deleted while there is an open file descriptor before turning the subvolume/snapshot into RO mode, the send operation will encounter an inode with a link count of zero and then fail with errno ENOENT. Example using a full send with a subvolume: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ btrfs subvolume create /mnt/sv1 $ touch /mnt/sv1/foo $ touch /mnt/sv1/bar # keep an open file descriptor on file bar $ exec 73</mnt/sv1/bar $ unlink /mnt/sv1/bar # Turn the subvolume to RO mode and use it for a full send, while # holding the open file descriptor. $ btrfs property set /mnt/sv1 ro true $ btrfs send -f /tmp/full.send /mnt/sv1 At subvol /mnt/sv1 ERROR: send ioctl failed with -2: No such file or directory Example using an incremental send with snapshots: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ btrfs subvolume create /mnt/sv1 $ touch /mnt/sv1/foo $ touch /mnt/sv1/bar $ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap1 $ echo "hello world" >> /mnt/sv1/bar $ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap2 # Turn the second snapshot to RW mode and delete file foo while # holding an open file descriptor on it. $ btrfs property set /mnt/snap2 ro false $ exec 73</mnt/snap2/foo $ unlink /mnt/snap2/foo # Set the second snapshot back to RO mode and do an incremental send. $ btrfs property set /mnt/snap2 ro true $ btrfs send -f /tmp/inc.send -p /mnt/snap1 /mnt/snap2 At subvol /mnt/snap2 ERROR: send ioctl failed with -2: No such file or directory So fix this by ignoring inodes with a link count of zero if we are either doing a full send or if they do not exist in the parent snapshot (they are new in the send snapshot), and unlink all paths found in the parent snapshot when doing an incremental send (and ignoring all other inode items, such as xattrs and extents). A test case for fstests follows soon. CC: stable(a)vger.kernel.org # 4.4+ Reported-by: Martin Wilck <martin.wilck(a)suse.com> Signed-off-by: Filipe Manana <fdmanana(a)suse.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 42e04cd3cd95..551294a6c9e2 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -100,6 +100,7 @@ struct send_ctx { u64 cur_inode_rdev; u64 cur_inode_last_extent; u64 cur_inode_next_write_offset; + bool ignore_cur_inode; u64 send_progress; @@ -5796,6 +5797,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) int pending_move = 0; int refs_processed = 0; + if (sctx->ignore_cur_inode) + return 0; + ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, &refs_processed); if (ret < 0) @@ -5914,6 +5918,93 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) return ret; } +struct parent_paths_ctx { + struct list_head *refs; + struct send_ctx *sctx; +}; + +static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name, + void *ctx) +{ + struct parent_paths_ctx *ppctx = ctx; + + return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx, + ppctx->refs); +} + +/* + * Issue unlink operations for all paths of the current inode found in the + * parent snapshot. + */ +static int btrfs_unlink_all_paths(struct send_ctx *sctx) +{ + LIST_HEAD(deleted_refs); + struct btrfs_path *path; + struct btrfs_key key; + struct parent_paths_ctx ctx; + int ret; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = sctx->cur_ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); + if (ret < 0) + goto out; + + ctx.refs = &deleted_refs; + ctx.sctx = sctx; + + while (true) { + struct extent_buffer *eb = path->nodes[0]; + int slot = path->slots[0]; + + if (slot >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(sctx->parent_root, path); + if (ret < 0) + goto out; + else if (ret > 0) + break; + continue; + } + + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.objectid != sctx->cur_ino) + break; + if (key.type != BTRFS_INODE_REF_KEY && + key.type != BTRFS_INODE_EXTREF_KEY) + break; + + ret = iterate_inode_ref(sctx->parent_root, path, &key, 1, + record_parent_ref, &ctx); + if (ret < 0) + goto out; + + path->slots[0]++; + } + + while (!list_empty(&deleted_refs)) { + struct recorded_ref *ref; + + ref = list_first_entry(&deleted_refs, struct recorded_ref, list); + ret = send_unlink(sctx, ref->full_path); + if (ret < 0) + goto out; + fs_path_free(ref->full_path); + list_del(&ref->list); + kfree(ref); + } + ret = 0; +out: + btrfs_free_path(path); + if (ret) + __free_recorded_refs(&deleted_refs); + return ret; +} + static int changed_inode(struct send_ctx *sctx, enum btrfs_compare_tree_result result) { @@ -5928,6 +6019,7 @@ static int changed_inode(struct send_ctx *sctx, sctx->cur_inode_new_gen = 0; sctx->cur_inode_last_extent = (u64)-1; sctx->cur_inode_next_write_offset = 0; + sctx->ignore_cur_inode = false; /* * Set send_progress to current inode. This will tell all get_cur_xxx @@ -5968,6 +6060,33 @@ static int changed_inode(struct send_ctx *sctx, sctx->cur_inode_new_gen = 1; } + /* + * Normally we do not find inodes with a link count of zero (orphans) + * because the most common case is to create a snapshot and use it + * for a send operation. However other less common use cases involve + * using a subvolume and send it after turning it to RO mode just + * after deleting all hard links of a file while holding an open + * file descriptor against it or turning a RO snapshot into RW mode, + * keep an open file descriptor against a file, delete it and then + * turn the snapshot back to RO mode before using it for a send + * operation. So if we find such cases, ignore the inode and all its + * items completely if it's a new inode, or if it's a changed inode + * make sure all its previous paths (from the parent snapshot) are all + * unlinked and all other the inode items are ignored. + */ + if (result == BTRFS_COMPARE_TREE_NEW || + result == BTRFS_COMPARE_TREE_CHANGED) { + u32 nlinks; + + nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii); + if (nlinks == 0) { + sctx->ignore_cur_inode = true; + if (result == BTRFS_COMPARE_TREE_CHANGED) + ret = btrfs_unlink_all_paths(sctx); + goto out; + } + } + if (result == BTRFS_COMPARE_TREE_NEW) { sctx->cur_inode_gen = left_gen; sctx->cur_inode_new = 1; @@ -6306,15 +6425,17 @@ static int changed_cb(struct btrfs_path *left_path, key->objectid == BTRFS_FREE_SPACE_OBJECTID) goto out; - if (key->type == BTRFS_INODE_ITEM_KEY) + if (key->type == BTRFS_INODE_ITEM_KEY) { ret = changed_inode(sctx, result); - else if (key->type == BTRFS_INODE_REF_KEY || - key->type == BTRFS_INODE_EXTREF_KEY) - ret = changed_ref(sctx, result); - else if (key->type == BTRFS_XATTR_ITEM_KEY) - ret = changed_xattr(sctx, result); - else if (key->type == BTRFS_EXTENT_DATA_KEY) - ret = changed_extent(sctx, result); + } else if (!sctx->ignore_cur_inode) { + if (key->type == BTRFS_INODE_REF_KEY || + key->type == BTRFS_INODE_EXTREF_KEY) + ret = changed_ref(sctx, result); + else if (key->type == BTRFS_XATTR_ITEM_KEY) + ret = changed_xattr(sctx, result); + else if (key->type == BTRFS_EXTENT_DATA_KEY) + ret = changed_extent(sctx, result); + } out: return ret;

7 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] Btrfs: fix btrfs_write_inode vs delayed iput deadlock" failed to apply to 3.18-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 3.18-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 3c4276936f6fbe52884b4ea4e6cc120b890a0f9f Mon Sep 17 00:00:00 2001 From: Josef Bacik <jbacik(a)fb.com> Date: Fri, 20 Jul 2018 11:46:10 -0700 Subject: [PATCH] Btrfs: fix btrfs_write_inode vs delayed iput deadlock We recently ran into the following deadlock involving btrfs_write_inode(): [ +0.005066] __schedule+0x38e/0x8c0 [ +0.007144] schedule+0x36/0x80 [ +0.006447] bit_wait+0x11/0x60 [ +0.006446] __wait_on_bit+0xbe/0x110 [ +0.007487] ? bit_wait_io+0x60/0x60 [ +0.007319] __inode_wait_for_writeback+0x96/0xc0 [ +0.009568] ? autoremove_wake_function+0x40/0x40 [ +0.009565] inode_wait_for_writeback+0x21/0x30 [ +0.009224] evict+0xb0/0x190 [ +0.006099] iput+0x1a8/0x210 [ +0.006103] btrfs_run_delayed_iputs+0x73/0xc0 [ +0.009047] btrfs_commit_transaction+0x799/0x8c0 [ +0.009567] btrfs_write_inode+0x81/0xb0 [ +0.008008] __writeback_single_inode+0x267/0x320 [ +0.009569] writeback_sb_inodes+0x25b/0x4e0 [ +0.008702] wb_writeback+0x102/0x2d0 [ +0.007487] wb_workfn+0xa4/0x310 [ +0.006794] ? wb_workfn+0xa4/0x310 [ +0.007143] process_one_work+0x150/0x410 [ +0.008179] worker_thread+0x6d/0x520 [ +0.007490] kthread+0x12c/0x160 [ +0.006620] ? put_pwq_unlocked+0x80/0x80 [ +0.008185] ? kthread_park+0xa0/0xa0 [ +0.007484] ? do_syscall_64+0x53/0x150 [ +0.007837] ret_from_fork+0x29/0x40 Writeback calls: btrfs_write_inode btrfs_commit_transaction btrfs_run_delayed_iputs If iput() is called on that same inode, evict() will wait for writeback forever. btrfs_write_inode() was originally added way back in 4730a4bc5bf3 ("btrfs_dirty_inode") to support O_SYNC writes. However, ->write_inode() hasn't been used for O_SYNC since 148f948ba877 ("vfs: Introduce new helpers for syncing after writing to O_SYNC file or IS_SYNC inode"), so btrfs_write_inode() is actually unnecessary (and leads to a bunch of unnecessary commits). Get rid of it, which also gets rid of the deadlock. CC: stable(a)vger.kernel.org # 3.2+ Signed-off-by: Josef Bacik <jbacik(a)fb.com> [Omar: new commit message] Signed-off-by: Omar Sandoval <osandov(a)fb.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4955e04da4c8..472457795486 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6021,32 +6021,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) return ret; } -int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - int ret = 0; - bool nolock = false; - - if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) - return 0; - - if (btrfs_fs_closing(root->fs_info) && - btrfs_is_free_space_inode(BTRFS_I(inode))) - nolock = true; - - if (wbc->sync_mode == WB_SYNC_ALL) { - if (nolock) - trans = btrfs_join_transaction_nolock(root); - else - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans); - } - return ret; -} - /* * This is somewhat expensive, updating the tree every time the * inode changes. But, it is most likely to find the inode in cache. diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index efe8b03ce380..67de3c0fc85b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2344,7 +2344,6 @@ static const struct super_operations btrfs_super_ops = { .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, .show_devname = btrfs_show_devname, - .write_inode = btrfs_write_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs,

7 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] Btrfs: fix btrfs_write_inode vs delayed iput deadlock" failed to apply to 4.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 3c4276936f6fbe52884b4ea4e6cc120b890a0f9f Mon Sep 17 00:00:00 2001 From: Josef Bacik <jbacik(a)fb.com> Date: Fri, 20 Jul 2018 11:46:10 -0700 Subject: [PATCH] Btrfs: fix btrfs_write_inode vs delayed iput deadlock We recently ran into the following deadlock involving btrfs_write_inode(): [ +0.005066] __schedule+0x38e/0x8c0 [ +0.007144] schedule+0x36/0x80 [ +0.006447] bit_wait+0x11/0x60 [ +0.006446] __wait_on_bit+0xbe/0x110 [ +0.007487] ? bit_wait_io+0x60/0x60 [ +0.007319] __inode_wait_for_writeback+0x96/0xc0 [ +0.009568] ? autoremove_wake_function+0x40/0x40 [ +0.009565] inode_wait_for_writeback+0x21/0x30 [ +0.009224] evict+0xb0/0x190 [ +0.006099] iput+0x1a8/0x210 [ +0.006103] btrfs_run_delayed_iputs+0x73/0xc0 [ +0.009047] btrfs_commit_transaction+0x799/0x8c0 [ +0.009567] btrfs_write_inode+0x81/0xb0 [ +0.008008] __writeback_single_inode+0x267/0x320 [ +0.009569] writeback_sb_inodes+0x25b/0x4e0 [ +0.008702] wb_writeback+0x102/0x2d0 [ +0.007487] wb_workfn+0xa4/0x310 [ +0.006794] ? wb_workfn+0xa4/0x310 [ +0.007143] process_one_work+0x150/0x410 [ +0.008179] worker_thread+0x6d/0x520 [ +0.007490] kthread+0x12c/0x160 [ +0.006620] ? put_pwq_unlocked+0x80/0x80 [ +0.008185] ? kthread_park+0xa0/0xa0 [ +0.007484] ? do_syscall_64+0x53/0x150 [ +0.007837] ret_from_fork+0x29/0x40 Writeback calls: btrfs_write_inode btrfs_commit_transaction btrfs_run_delayed_iputs If iput() is called on that same inode, evict() will wait for writeback forever. btrfs_write_inode() was originally added way back in 4730a4bc5bf3 ("btrfs_dirty_inode") to support O_SYNC writes. However, ->write_inode() hasn't been used for O_SYNC since 148f948ba877 ("vfs: Introduce new helpers for syncing after writing to O_SYNC file or IS_SYNC inode"), so btrfs_write_inode() is actually unnecessary (and leads to a bunch of unnecessary commits). Get rid of it, which also gets rid of the deadlock. CC: stable(a)vger.kernel.org # 3.2+ Signed-off-by: Josef Bacik <jbacik(a)fb.com> [Omar: new commit message] Signed-off-by: Omar Sandoval <osandov(a)fb.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4955e04da4c8..472457795486 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6021,32 +6021,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) return ret; } -int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - int ret = 0; - bool nolock = false; - - if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) - return 0; - - if (btrfs_fs_closing(root->fs_info) && - btrfs_is_free_space_inode(BTRFS_I(inode))) - nolock = true; - - if (wbc->sync_mode == WB_SYNC_ALL) { - if (nolock) - trans = btrfs_join_transaction_nolock(root); - else - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans); - } - return ret; -} - /* * This is somewhat expensive, updating the tree every time the * inode changes. But, it is most likely to find the inode in cache. diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index efe8b03ce380..67de3c0fc85b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2344,7 +2344,6 @@ static const struct super_operations btrfs_super_ops = { .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, .show_devname = btrfs_show_devname, - .write_inode = btrfs_write_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs,

7 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] Btrfs: fix btrfs_write_inode vs delayed iput deadlock" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 3c4276936f6fbe52884b4ea4e6cc120b890a0f9f Mon Sep 17 00:00:00 2001 From: Josef Bacik <jbacik(a)fb.com> Date: Fri, 20 Jul 2018 11:46:10 -0700 Subject: [PATCH] Btrfs: fix btrfs_write_inode vs delayed iput deadlock We recently ran into the following deadlock involving btrfs_write_inode(): [ +0.005066] __schedule+0x38e/0x8c0 [ +0.007144] schedule+0x36/0x80 [ +0.006447] bit_wait+0x11/0x60 [ +0.006446] __wait_on_bit+0xbe/0x110 [ +0.007487] ? bit_wait_io+0x60/0x60 [ +0.007319] __inode_wait_for_writeback+0x96/0xc0 [ +0.009568] ? autoremove_wake_function+0x40/0x40 [ +0.009565] inode_wait_for_writeback+0x21/0x30 [ +0.009224] evict+0xb0/0x190 [ +0.006099] iput+0x1a8/0x210 [ +0.006103] btrfs_run_delayed_iputs+0x73/0xc0 [ +0.009047] btrfs_commit_transaction+0x799/0x8c0 [ +0.009567] btrfs_write_inode+0x81/0xb0 [ +0.008008] __writeback_single_inode+0x267/0x320 [ +0.009569] writeback_sb_inodes+0x25b/0x4e0 [ +0.008702] wb_writeback+0x102/0x2d0 [ +0.007487] wb_workfn+0xa4/0x310 [ +0.006794] ? wb_workfn+0xa4/0x310 [ +0.007143] process_one_work+0x150/0x410 [ +0.008179] worker_thread+0x6d/0x520 [ +0.007490] kthread+0x12c/0x160 [ +0.006620] ? put_pwq_unlocked+0x80/0x80 [ +0.008185] ? kthread_park+0xa0/0xa0 [ +0.007484] ? do_syscall_64+0x53/0x150 [ +0.007837] ret_from_fork+0x29/0x40 Writeback calls: btrfs_write_inode btrfs_commit_transaction btrfs_run_delayed_iputs If iput() is called on that same inode, evict() will wait for writeback forever. btrfs_write_inode() was originally added way back in 4730a4bc5bf3 ("btrfs_dirty_inode") to support O_SYNC writes. However, ->write_inode() hasn't been used for O_SYNC since 148f948ba877 ("vfs: Introduce new helpers for syncing after writing to O_SYNC file or IS_SYNC inode"), so btrfs_write_inode() is actually unnecessary (and leads to a bunch of unnecessary commits). Get rid of it, which also gets rid of the deadlock. CC: stable(a)vger.kernel.org # 3.2+ Signed-off-by: Josef Bacik <jbacik(a)fb.com> [Omar: new commit message] Signed-off-by: Omar Sandoval <osandov(a)fb.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4955e04da4c8..472457795486 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6021,32 +6021,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) return ret; } -int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - int ret = 0; - bool nolock = false; - - if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) - return 0; - - if (btrfs_fs_closing(root->fs_info) && - btrfs_is_free_space_inode(BTRFS_I(inode))) - nolock = true; - - if (wbc->sync_mode == WB_SYNC_ALL) { - if (nolock) - trans = btrfs_join_transaction_nolock(root); - else - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans); - } - return ret; -} - /* * This is somewhat expensive, updating the tree every time the * inode changes. But, it is most likely to find the inode in cache. diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index efe8b03ce380..67de3c0fc85b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2344,7 +2344,6 @@ static const struct super_operations btrfs_super_ops = { .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, .show_devname = btrfs_show_devname, - .write_inode = btrfs_write_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs,

7 years, 3 months

1
0
0 0

[gregkh@linuxfoundation.org: Patch "drm: re-enable error handling" has been added to the 3.18-stable tree]

by Nicholas Mc Guire

Hi ! this is also the wrong version of the patch - the proper version is below. This has been posted to lkml https://lkml.org/lkml/2018/7/18/191 "[PATCH V3] drm: handle error values properly" but there was no review yet The version you have here though is for sure broken. So maybe this should be simply dropped until the above, presumably correct fix, is confirmed. thx! hofrat ----- Forwarded message from gregkh(a)linuxfoundation.org ----- Date: Tue, 28 Aug 2018 16:09:59 +0200 From: gregkh(a)linuxfoundation.org To: 1531571532-22733-1-git-send-email-hofrat(a)osadl.org, alexander.levin(a)microsoft.com, gregkh(a)linuxfoundation.org, hofrat(a)osadl.org, seanpaul(a)chromium.org Cc: stable-commits(a)vger.kernel.org Subject: Patch "drm: re-enable error handling" has been added to the 3.18-stable tree This is a note to let you know that I've just added the patch titled drm: re-enable error handling to the 3.18-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum… The filename of the patch is: drm-re-enable-error-handling.patch and it can be found in the queue-3.18 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable(a)vger.kernel.org> know about it. >From foo@baz Tue Aug 28 16:08:28 CEST 2018 From: Nicholas Mc Guire <hofrat(a)osadl.org> Date: Sat, 14 Jul 2018 14:32:12 +0200 Subject: drm: re-enable error handling From: Nicholas Mc Guire <hofrat(a)osadl.org> [ Upstream commit d530b5f1ca0bb66958a2b714bebe40a1248b9c15 ] drm_legacy_ctxbitmap_next() returns idr_alloc() which can return -ENOMEM, -EINVAL or -ENOSPC none of which are -1 . but the call sites of drm_legacy_ctxbitmap_next() seem to be assuming that the error case would be -1 (original return of drm_ctxbitmap_next() prior to 2.6.23 was actually -1). Thus reenable error handling by checking for < 0. Signed-off-by: Nicholas Mc Guire <hofrat(a)osadl.org> Fixes: 62968144e673 ("drm: convert drm context code to use Linux idr") Signed-off-by: Sean Paul <seanpaul(a)chromium.org> Link: https://patchwork.freedesktop.org/patch/msgid/1531571532-22733-1-git-send-e… Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/gpu/drm/drm_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/gpu/drm/drm_context.c +++ b/drivers/gpu/drm/drm_context.c @@ -341,7 +341,7 @@ int drm_legacy_addctx(struct drm_device ctx->handle = drm_legacy_ctxbitmap_next(dev); } DRM_DEBUG("%d\n", ctx->handle); - if (ctx->handle == -1) { + if (ctx->handle < 0) { DRM_DEBUG("Not enough free contexts.\n"); /* Should this return -EBUSY instead? */ return -ENOMEM; Patches currently in stable-queue which might be from hofrat(a)osadl.org are queue-3.18/drm-re-enable-error-handling.patch queue-3.18/can-mpc5xxx_can-check-of_iomap-return-before-use.patch ----- End forwarded message -----

7 years, 3 months

2
1
0 0

[PATCH] x86/nmi: Fix some races in NMI uaccess

by Andy Lutomirski

In NMI context, we might be in the middle of context switching or in the middle of switch_mm_irqs_off(). In either case, CR3 might not match current->mm, which could cause copy_from_user_nmi() and friends to read the wrong memory. Fix it by adding a new nmi_uaccess_okay() helper and checking it in copy_from_user_nmi() and in __copy_from_user_nmi()'s callers. Cc: stable(a)vger.kernel.org Cc: Peter Zijlstra <peterz(a)infradead.org> Cc: Nadav Amit <nadav.amit(a)gmail.com> Signed-off-by: Andy Lutomirski <luto(a)kernel.org> --- The 0day bot is still chewing on this, but I've tested it a bit locally and it seems to do the right thing. I've never observed the bug it fixes, but it does appear to fix a bug unless I've missed something. It's also a prerequisite for Nadav's fixmap bugfix. arch/x86/events/core.c | 2 +- arch/x86/include/asm/tlbflush.h | 16 ++++++++++++++++ arch/x86/lib/usercopy.c | 5 +++++ arch/x86/mm/tlb.c | 3 +++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 5f4829f10129..dfb2f7c0d019 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2465,7 +2465,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs perf_callchain_store(entry, regs->ip); - if (!current->mm) + if (!nmi_uaccess_okay()) return; if (perf_callchain_user32(regs, entry)) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 89a73bc31622..b23b2625793b 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -230,6 +230,22 @@ struct tlb_state { }; DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); +/* + * Blindly accessing user memory from NMI context can be dangerous + * if we're in the middle of switching the current user task or + * switching the loaded mm. It can also be dangerous if we + * interrupted some kernel code that was temporarily using a + * different mm. + */ +static inline bool nmi_uaccess_okay(void) +{ + struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); + struct mm_struct *current_mm = current->mm; + + return current_mm && loaded_mm == current_mm && + loaded_mm->pgd == __va(read_cr3_pa()); +} + /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) { diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index c8c6ad0d58b8..3f435d7fca5e 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -7,6 +7,8 @@ #include <linux/uaccess.h> #include <linux/export.h> +#include <asm/tlbflush.h> + /* * We rely on the nested NMI work to allow atomic faults from the NMI path; the * nested NMI paths are careful to preserve CR2. @@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) if (__range_not_ok(from, n, TASK_SIZE)) return n; + if (!nmi_uaccess_okay()) + return n; + /* * Even though this function is typically called from NMI/IRQ context * disable pagefaults so that its behaviour is consistent even when diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 457b281b9339..f4b41d5a93dd 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -345,6 +345,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, */ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { + /* Let NMI code know that CR3 may not match expectations. */ + this_cpu_write(cpu_tlbstate.loaded_mm, NULL); + /* The new ASID is already up to date. */ load_new_mm_cr3(next->pgd, new_asid, false); -- 2.17.1

7 years, 3 months

3
12
0 0

Applied "regulator: bd71837: Disable voltage monitoring for LDO3/4" to the regulator tree

by Mark Brown

The patch regulator: bd71837: Disable voltage monitoring for LDO3/4 has been applied to the regulator tree at https://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From 823f18f8b860526fc099c222619a126d57d2ad8c Mon Sep 17 00:00:00 2001 From: Matti Vaittinen <matti.vaittinen(a)fi.rohmeurope.com> Date: Wed, 29 Aug 2018 15:36:10 +0300 Subject: [PATCH] regulator: bd71837: Disable voltage monitoring for LDO3/4 There is a HW quirk in BD71837. The shutdown sequence timings for bucks/LDOs which are enabled via register interface are changed. At PMIC poweroff the voltage for BUCK6/7 is cut immediately at the beginning of shut-down sequence. This causes LDO5/6 voltage monitoring to detect under voltage and force PMIC to emergency state instead of poweroff. Disable voltage monitoring for LDO5 and LDO6 at probe to avoid this. Signed-off-by: Matti Vaittinen <matti.vaittinen(a)fi.rohmeurope.com> Signed-off-by: Mark Brown <broonie(a)kernel.org> Cc: stable(a)vger.kernel.org --- drivers/regulator/bd71837-regulator.c | 19 +++++++++++++++ include/linux/mfd/rohm-bd718x7.h | 33 ++++++++++++++++++++++++--- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/bd71837-regulator.c b/drivers/regulator/bd71837-regulator.c index 0f8ac8dec3e1..a1bd8aaf4d98 100644 --- a/drivers/regulator/bd71837-regulator.c +++ b/drivers/regulator/bd71837-regulator.c @@ -569,6 +569,25 @@ static int bd71837_probe(struct platform_device *pdev) BD71837_REG_REGLOCK); } + /* + * There is a HW quirk in BD71837. The shutdown sequence timings for + * bucks/LDOs which are controlled via register interface are changed. + * At PMIC poweroff the voltage for BUCK6/7 is cut immediately at the + * beginning of shut-down sequence. As bucks 6 and 7 are parent + * supplies for LDO5 and LDO6 - this causes LDO5/6 voltage + * monitoring to errorneously detect under voltage and force PMIC to + * emergency state instead of poweroff. In order to avoid this we + * disable voltage monitoring for LDO5 and LDO6 + */ + err = regmap_update_bits(pmic->mfd->regmap, BD718XX_REG_MVRFLTMASK2, + BD718XX_LDO5_VRMON80 | BD718XX_LDO6_VRMON80, + BD718XX_LDO5_VRMON80 | BD718XX_LDO6_VRMON80); + if (err) { + dev_err(&pmic->pdev->dev, + "Failed to disable voltage monitoring\n"); + goto err; + } + for (i = 0; i < ARRAY_SIZE(pmic_regulator_inits); i++) { struct regulator_desc *desc; diff --git a/include/linux/mfd/rohm-bd718x7.h b/include/linux/mfd/rohm-bd718x7.h index a528747f8aed..e8338e5dc10b 100644 --- a/include/linux/mfd/rohm-bd718x7.h +++ b/include/linux/mfd/rohm-bd718x7.h @@ -78,9 +78,9 @@ enum { BD71837_REG_TRANS_COND0 = 0x1F, BD71837_REG_TRANS_COND1 = 0x20, BD71837_REG_VRFAULTEN = 0x21, - BD71837_REG_MVRFLTMASK0 = 0x22, - BD71837_REG_MVRFLTMASK1 = 0x23, - BD71837_REG_MVRFLTMASK2 = 0x24, + BD718XX_REG_MVRFLTMASK0 = 0x22, + BD718XX_REG_MVRFLTMASK1 = 0x23, + BD718XX_REG_MVRFLTMASK2 = 0x24, BD71837_REG_RCVCFG = 0x25, BD71837_REG_RCVNUM = 0x26, BD71837_REG_PWRONCONFIG0 = 0x27, @@ -159,6 +159,33 @@ enum { #define BUCK8_MASK 0x3F #define BUCK8_DEFAULT 0x1E +/* BD718XX Voltage monitoring masks */ +#define BD718XX_BUCK1_VRMON80 0x1 +#define BD718XX_BUCK1_VRMON130 0x2 +#define BD718XX_BUCK2_VRMON80 0x4 +#define BD718XX_BUCK2_VRMON130 0x8 +#define BD718XX_1ST_NODVS_BUCK_VRMON80 0x1 +#define BD718XX_1ST_NODVS_BUCK_VRMON130 0x2 +#define BD718XX_2ND_NODVS_BUCK_VRMON80 0x4 +#define BD718XX_2ND_NODVS_BUCK_VRMON130 0x8 +#define BD718XX_3RD_NODVS_BUCK_VRMON80 0x10 +#define BD718XX_3RD_NODVS_BUCK_VRMON130 0x20 +#define BD718XX_4TH_NODVS_BUCK_VRMON80 0x40 +#define BD718XX_4TH_NODVS_BUCK_VRMON130 0x80 +#define BD718XX_LDO1_VRMON80 0x1 +#define BD718XX_LDO2_VRMON80 0x2 +#define BD718XX_LDO3_VRMON80 0x4 +#define BD718XX_LDO4_VRMON80 0x8 +#define BD718XX_LDO5_VRMON80 0x10 +#define BD718XX_LDO6_VRMON80 0x20 + +/* BD71837 specific voltage monitoring masks */ +#define BD71837_BUCK3_VRMON80 0x10 +#define BD71837_BUCK3_VRMON130 0x20 +#define BD71837_BUCK4_VRMON80 0x40 +#define BD71837_BUCK4_VRMON130 0x80 +#define BD71837_LDO7_VRMON80 0x40 + /* BD71837_REG_IRQ bits */ #define IRQ_SWRST 0x40 #define IRQ_PWRON_S 0x20 -- 2.18.0

7 years, 3 months

1
0
0 0

[PATCH v3 2/2] btrfs: Ensure btrfs_trim_fs can trim the whole fs

by Qu Wenruo

[BUG] fstrim on some btrfs only trims the unallocated space, not trimming any space in existing block groups. [CAUSE] Before fstrim_range passed to btrfs_trim_fs(), it get truncated to range [0, super->total_bytes). So later btrfs_trim_fs() will only be able to trim block groups in range [0, super->total_bytes). While for btrfs, any bytenr aligned to sector size is valid, since btrfs use its logical address space, there is nothing limiting the location where we put block groups. For btrfs with routine balance, it's quite easy to relocate all block groups and bytenr of block groups will start beyond super->total_bytes. In that case, btrfs will not trim existing block groups. [FIX] Just remove the truncation in btrfs_ioctl_fitrim(), so btrfs_trim_fs() can get the unmodified range, which is normally set to [0, U64_MAX]. Reported-by: Chris Murphy <lists(a)colorremedies.com> Fixes: f4c697e6406d ("btrfs: return EINVAL if start > total_bytes in fitrim ioctl") Cc: <stable(a)vger.kernel.org> # v4.0+ Signed-off-by: Qu Wenruo <wqu(a)suse.com> --- fs/btrfs/extent-tree.c | 10 +--------- fs/btrfs/ioctl.c | 11 +++++++---- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7768f206196a..d1478d66c7a5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10851,21 +10851,13 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) u64 start; u64 end; u64 trimmed = 0; - u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); u64 bg_failed = 0; u64 dev_failed = 0; int bg_ret = 0; int dev_ret = 0; int ret = 0; - /* - * try to trim all FS space, our block group may start from non-zero. - */ - if (range->len == total_bytes) - cache = btrfs_lookup_first_block_group(fs_info, range->start); - else - cache = btrfs_lookup_block_group(fs_info, range->start); - + cache = btrfs_lookup_first_block_group(fs_info, range->start); for (; cache; cache = next_block_group(fs_info, cache)) { if (cache->key.objectid >= (range->start + range->len)) { btrfs_put_block_group(cache); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 63600dc2ac4c..8165a4bfa579 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -491,7 +491,6 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) struct fstrim_range range; u64 minlen = ULLONG_MAX; u64 num_devices = 0; - u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); int ret; if (!capable(CAP_SYS_ADMIN)) @@ -515,11 +514,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) return -EOPNOTSUPP; if (copy_from_user(&range, arg, sizeof(range))) return -EFAULT; - if (range.start > total_bytes || - range.len < fs_info->sb->s_blocksize) + + /* + * NOTE: Don't truncate the range using super->total_bytes. + * Bytenr of btrfs block group is in btrfs logical address space, + * which can be any sector size aligned bytenr in [0, U64_MAX]. + */ + if (range.len < fs_info->sb->s_blocksize) return -EINVAL; - range.len = min(range.len, total_bytes - range.start); range.minlen = max(range.minlen, minlen); ret = btrfs_trim_fs(fs_info, &range); if (ret < 0) -- 2.18.0

7 years, 3 months

2
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror August 2018