- Linux-stable-mirror - lists.linaro.org

FAILED: patch "[PATCH] btrfs: protect space cache inode alloc with GFP_NOFS" failed to apply to 4.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 84de76a2fb217dc1b6bc2965cc397d1648aa1404 Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef(a)toxicpanda.com> Date: Fri, 28 Sep 2018 07:17:49 -0400 Subject: [PATCH] btrfs: protect space cache inode alloc with GFP_NOFS If we're allocating a new space cache inode it's likely going to be under a transaction handle, so we need to use memalloc_nofs_save() in order to avoid deadlocks, and more importantly lockdep messages that make xfstests fail. CC: stable(a)vger.kernel.org # 4.4+ Reviewed-by: Omar Sandoval <osandov(a)fb.com> Signed-off-by: Josef Bacik <josef(a)toxicpanda.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index ed097ff023e8..a1f379e8ad13 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -10,6 +10,7 @@ #include <linux/math64.h> #include <linux/ratelimit.h> #include <linux/error-injection.h> +#include <linux/sched/mm.h> #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" @@ -47,6 +48,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, struct btrfs_free_space_header *header; struct extent_buffer *leaf; struct inode *inode = NULL; + unsigned nofs_flag; int ret; key.objectid = BTRFS_FREE_SPACE_OBJECTID; @@ -68,7 +70,13 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, btrfs_disk_key_to_cpu(&location, &disk_key); btrfs_release_path(path); + /* + * We are often under a trans handle at this point, so we need to make + * sure NOFS is set to keep us from deadlocking. + */ + nofs_flag = memalloc_nofs_save(); inode = btrfs_iget(fs_info->sb, &location, root, NULL); + memalloc_nofs_restore(nofs_flag); if (IS_ERR(inode)) return inode;

6 years, 9 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: protect space cache inode alloc with GFP_NOFS" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 84de76a2fb217dc1b6bc2965cc397d1648aa1404 Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef(a)toxicpanda.com> Date: Fri, 28 Sep 2018 07:17:49 -0400 Subject: [PATCH] btrfs: protect space cache inode alloc with GFP_NOFS If we're allocating a new space cache inode it's likely going to be under a transaction handle, so we need to use memalloc_nofs_save() in order to avoid deadlocks, and more importantly lockdep messages that make xfstests fail. CC: stable(a)vger.kernel.org # 4.4+ Reviewed-by: Omar Sandoval <osandov(a)fb.com> Signed-off-by: Josef Bacik <josef(a)toxicpanda.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index ed097ff023e8..a1f379e8ad13 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -10,6 +10,7 @@ #include <linux/math64.h> #include <linux/ratelimit.h> #include <linux/error-injection.h> +#include <linux/sched/mm.h> #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" @@ -47,6 +48,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, struct btrfs_free_space_header *header; struct extent_buffer *leaf; struct inode *inode = NULL; + unsigned nofs_flag; int ret; key.objectid = BTRFS_FREE_SPACE_OBJECTID; @@ -68,7 +70,13 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, btrfs_disk_key_to_cpu(&location, &disk_key); btrfs_release_path(path); + /* + * We are often under a trans handle at this point, so we need to make + * sure NOFS is set to keep us from deadlocking. + */ + nofs_flag = memalloc_nofs_save(); inode = btrfs_iget(fs_info->sb, &location, root, NULL); + memalloc_nofs_restore(nofs_flag); if (IS_ERR(inode)) return inode;

6 years, 9 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: release metadata before running delayed refs" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From f45c752b65af46bf42963295c332865d95f97fff Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef(a)toxicpanda.com> Date: Fri, 28 Sep 2018 07:17:48 -0400 Subject: [PATCH] btrfs: release metadata before running delayed refs We want to release the unused reservation we have since it refills the delayed refs reserve, which will make everything go smoother when running the delayed refs if we're short on our reservation. CC: stable(a)vger.kernel.org # 4.4+ Reviewed-by: Omar Sandoval <osandov(a)fb.com> Reviewed-by: Liu Bo <bo.liu(a)linux.alibaba.com> Reviewed-by: Nikolay Borisov <nborisov(a)suse.com> Signed-off-by: Josef Bacik <josef(a)toxicpanda.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cadc747292d9..e7f618b17b07 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1932,6 +1932,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) return ret; } + btrfs_trans_release_metadata(trans); + trans->block_rsv = NULL; + /* make a pass through all the delayed refs we have so far * any runnings procs may add more while we are here */ @@ -1941,9 +1944,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) return ret; } - btrfs_trans_release_metadata(trans); - trans->block_rsv = NULL; - cur_trans = trans->transaction; /*

6 years, 9 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: release metadata before running delayed refs" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From f45c752b65af46bf42963295c332865d95f97fff Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef(a)toxicpanda.com> Date: Fri, 28 Sep 2018 07:17:48 -0400 Subject: [PATCH] btrfs: release metadata before running delayed refs We want to release the unused reservation we have since it refills the delayed refs reserve, which will make everything go smoother when running the delayed refs if we're short on our reservation. CC: stable(a)vger.kernel.org # 4.4+ Reviewed-by: Omar Sandoval <osandov(a)fb.com> Reviewed-by: Liu Bo <bo.liu(a)linux.alibaba.com> Reviewed-by: Nikolay Borisov <nborisov(a)suse.com> Signed-off-by: Josef Bacik <josef(a)toxicpanda.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cadc747292d9..e7f618b17b07 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1932,6 +1932,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) return ret; } + btrfs_trans_release_metadata(trans); + trans->block_rsv = NULL; + /* make a pass through all the delayed refs we have so far * any runnings procs may add more while we are here */ @@ -1941,9 +1944,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) return ret; } - btrfs_trans_release_metadata(trans); - trans->block_rsv = NULL; - cur_trans = trans->transaction; /*

6 years, 9 months

1
0
0 0

FAILED: patch "[PATCH] Btrfs: don't clean dirty pages during buffered writes" failed to apply to 4.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 7703bdd8d23e6ef057af3253958a793ec6066b28 Mon Sep 17 00:00:00 2001 From: Chris Mason <clm(a)fb.com> Date: Wed, 20 Jun 2018 07:56:11 -0700 Subject: [PATCH] Btrfs: don't clean dirty pages during buffered writes During buffered writes, we follow this basic series of steps: again: lock all the pages wait for writeback on all the pages Take the extent range lock wait for ordered extents on the whole range clean all the pages if (copy_from_user_in_atomic() hits a fault) { drop our locks goto again; } dirty all the pages release all the locks The extra waiting, cleaning and locking are there to make sure we don't modify pages in flight to the drive, after they've been crc'd. If some of the pages in the range were already dirty when the write began, and we need to goto again, we create a window where a dirty page has been cleaned and unlocked. It may be reclaimed before we're able to lock it again, which means we'll read the old contents off the drive and lose any modifications that had been pending writeback. We don't actually need to clean the pages. All of the other locking in place makes sure we don't start IO on the pages, so we can just leave them dirty for the duration of the write. Fixes: 73d59314e6ed (the original btrfs merge) CC: stable(a)vger.kernel.org # v4.4+ Signed-off-by: Chris Mason <clm(a)fb.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d254cf94545f..15b925142793 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -531,6 +531,14 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, end_of_last_block = start_pos + num_bytes - 1; + /* + * The pages may have already been dirty, clear out old accounting so + * we can set things up properly + */ + clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, end_of_last_block, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, cached); + if (!btrfs_is_free_space_inode(BTRFS_I(inode))) { if (start_pos >= isize && !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) { @@ -1500,18 +1508,27 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, } if (ordered) btrfs_put_ordered_extent(ordered); - clear_extent_bit(&inode->io_tree, start_pos, last_pos, - EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, - 0, 0, cached_state); + *lockstart = start_pos; *lockend = last_pos; ret = 1; } + /* + * It's possible the pages are dirty right now, but we don't want + * to clean them yet because copy_from_user may catch a page fault + * and we might have to fall back to one page at a time. If that + * happens, we'll unlock these pages and we'd have a window where + * reclaim could sneak in and drop the once-dirty page on the floor + * without writing it. + * + * We have the pages locked and the extent range locked, so there's + * no way someone can start IO on any dirty pages in this range. + * + * We'll call btrfs_dirty_pages() later on, and that will flip around + * delalloc bits and dirty the pages as required. + */ for (i = 0; i < num_pages; i++) { - if (clear_page_dirty_for_io(pages[i])) - account_page_redirty(pages[i]); set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); }

6 years, 9 months

1
0
0 0

FAILED: patch "[PATCH] Btrfs: don't clean dirty pages during buffered writes" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 7703bdd8d23e6ef057af3253958a793ec6066b28 Mon Sep 17 00:00:00 2001 From: Chris Mason <clm(a)fb.com> Date: Wed, 20 Jun 2018 07:56:11 -0700 Subject: [PATCH] Btrfs: don't clean dirty pages during buffered writes During buffered writes, we follow this basic series of steps: again: lock all the pages wait for writeback on all the pages Take the extent range lock wait for ordered extents on the whole range clean all the pages if (copy_from_user_in_atomic() hits a fault) { drop our locks goto again; } dirty all the pages release all the locks The extra waiting, cleaning and locking are there to make sure we don't modify pages in flight to the drive, after they've been crc'd. If some of the pages in the range were already dirty when the write began, and we need to goto again, we create a window where a dirty page has been cleaned and unlocked. It may be reclaimed before we're able to lock it again, which means we'll read the old contents off the drive and lose any modifications that had been pending writeback. We don't actually need to clean the pages. All of the other locking in place makes sure we don't start IO on the pages, so we can just leave them dirty for the duration of the write. Fixes: 73d59314e6ed (the original btrfs merge) CC: stable(a)vger.kernel.org # v4.4+ Signed-off-by: Chris Mason <clm(a)fb.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d254cf94545f..15b925142793 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -531,6 +531,14 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, end_of_last_block = start_pos + num_bytes - 1; + /* + * The pages may have already been dirty, clear out old accounting so + * we can set things up properly + */ + clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, end_of_last_block, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, cached); + if (!btrfs_is_free_space_inode(BTRFS_I(inode))) { if (start_pos >= isize && !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) { @@ -1500,18 +1508,27 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, } if (ordered) btrfs_put_ordered_extent(ordered); - clear_extent_bit(&inode->io_tree, start_pos, last_pos, - EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, - 0, 0, cached_state); + *lockstart = start_pos; *lockend = last_pos; ret = 1; } + /* + * It's possible the pages are dirty right now, but we don't want + * to clean them yet because copy_from_user may catch a page fault + * and we might have to fall back to one page at a time. If that + * happens, we'll unlock these pages and we'd have a window where + * reclaim could sneak in and drop the once-dirty page on the floor + * without writing it. + * + * We have the pages locked and the extent range locked, so there's + * no way someone can start IO on any dirty pages in this range. + * + * We'll call btrfs_dirty_pages() later on, and that will flip around + * delalloc bits and dirty the pages as required. + */ for (i = 0; i < num_pages; i++) { - if (clear_page_dirty_for_io(pages[i])) - account_page_redirty(pages[i]); set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); }

6 years, 9 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: keep trim from interfering with transaction commits" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From fee7acc361314df6561208c2d3c0882d663dd537 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney <jeffm(a)suse.com> Date: Thu, 6 Sep 2018 17:18:16 -0400 Subject: [PATCH] btrfs: keep trim from interfering with transaction commits Commit 499f377f49f08 (btrfs: iterate over unused chunk space in FITRIM) fixed free space trimming, but introduced latency when it was running. This is due to it pinning the transaction using both a incremented refcount and holding the commit root sem for the duration of a single trim operation. This was to ensure safety but it's unnecessary. We already hold the the chunk mutex so we know that the chunk we're using can't be allocated while we're trimming it. In order to check against chunks allocated already in this transaction, we need to check the pending chunks list. To to that safely without joining the transaction (or attaching than then having to commit it) we need to ensure that the dev root's commit root doesn't change underneath us and the pending chunk lists stays around until we're done with it. We can ensure the former by holding the commit root sem and the latter by pinning the transaction. We do this now, but the critical section covers the trim operation itself and we don't need to do that. This patch moves the pinning and unpinning logic into helpers and unpins the transaction after performing the search and check for pending chunks. Limiting the critical section of the transaction pinning improves the latency substantially on slower storage (e.g. image files over NFS). Fixes: 499f377f49f08 ("btrfs: iterate over unused chunk space in FITRIM") CC: stable(a)vger.kernel.org # 4.4+ Signed-off-by: Jeff Mahoney <jeffm(a)suse.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6e1c2a22874f..f0524bdf49b3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10755,14 +10755,16 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, * We don't want a transaction for this since the discard may take a * substantial amount of time. We don't require that a transaction be * running, but we do need to take a running transaction into account - * to ensure that we're not discarding chunks that were released in - * the current transaction. + * to ensure that we're not discarding chunks that were released or + * allocated in the current transaction. * * Holding the chunks lock will prevent other threads from allocating * or releasing chunks, but it won't prevent a running transaction * from committing and releasing the memory that the pending chunks * list head uses. For that, we need to take a reference to the - * transaction. + * transaction and hold the commit root sem. We only need to hold + * it while performing the free space search since we have already + * held back allocations. */ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 minlen, u64 *trimmed) @@ -10793,9 +10795,13 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, ret = mutex_lock_interruptible(&fs_info->chunk_mutex); if (ret) - return ret; + break; - down_read(&fs_info->commit_root_sem); + ret = down_read_killable(&fs_info->commit_root_sem); + if (ret) { + mutex_unlock(&fs_info->chunk_mutex); + break; + } spin_lock(&fs_info->trans_lock); trans = fs_info->running_transaction; @@ -10803,13 +10809,17 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, refcount_inc(&trans->use_count); spin_unlock(&fs_info->trans_lock); + if (!trans) + up_read(&fs_info->commit_root_sem); + ret = find_free_dev_extent_start(trans, device, minlen, start, &start, &len); - if (trans) + if (trans) { + up_read(&fs_info->commit_root_sem); btrfs_put_transaction(trans); + } if (ret) { - up_read(&fs_info->commit_root_sem); mutex_unlock(&fs_info->chunk_mutex); if (ret == -ENOSPC) ret = 0; @@ -10817,7 +10827,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, } ret = btrfs_issue_discard(device->bdev, start, len, &bytes); - up_read(&fs_info->commit_root_sem); mutex_unlock(&fs_info->chunk_mutex); if (ret)

6 years, 9 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: keep trim from interfering with transaction commits" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From fee7acc361314df6561208c2d3c0882d663dd537 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney <jeffm(a)suse.com> Date: Thu, 6 Sep 2018 17:18:16 -0400 Subject: [PATCH] btrfs: keep trim from interfering with transaction commits Commit 499f377f49f08 (btrfs: iterate over unused chunk space in FITRIM) fixed free space trimming, but introduced latency when it was running. This is due to it pinning the transaction using both a incremented refcount and holding the commit root sem for the duration of a single trim operation. This was to ensure safety but it's unnecessary. We already hold the the chunk mutex so we know that the chunk we're using can't be allocated while we're trimming it. In order to check against chunks allocated already in this transaction, we need to check the pending chunks list. To to that safely without joining the transaction (or attaching than then having to commit it) we need to ensure that the dev root's commit root doesn't change underneath us and the pending chunk lists stays around until we're done with it. We can ensure the former by holding the commit root sem and the latter by pinning the transaction. We do this now, but the critical section covers the trim operation itself and we don't need to do that. This patch moves the pinning and unpinning logic into helpers and unpins the transaction after performing the search and check for pending chunks. Limiting the critical section of the transaction pinning improves the latency substantially on slower storage (e.g. image files over NFS). Fixes: 499f377f49f08 ("btrfs: iterate over unused chunk space in FITRIM") CC: stable(a)vger.kernel.org # 4.4+ Signed-off-by: Jeff Mahoney <jeffm(a)suse.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6e1c2a22874f..f0524bdf49b3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10755,14 +10755,16 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, * We don't want a transaction for this since the discard may take a * substantial amount of time. We don't require that a transaction be * running, but we do need to take a running transaction into account - * to ensure that we're not discarding chunks that were released in - * the current transaction. + * to ensure that we're not discarding chunks that were released or + * allocated in the current transaction. * * Holding the chunks lock will prevent other threads from allocating * or releasing chunks, but it won't prevent a running transaction * from committing and releasing the memory that the pending chunks * list head uses. For that, we need to take a reference to the - * transaction. + * transaction and hold the commit root sem. We only need to hold + * it while performing the free space search since we have already + * held back allocations. */ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 minlen, u64 *trimmed) @@ -10793,9 +10795,13 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, ret = mutex_lock_interruptible(&fs_info->chunk_mutex); if (ret) - return ret; + break; - down_read(&fs_info->commit_root_sem); + ret = down_read_killable(&fs_info->commit_root_sem); + if (ret) { + mutex_unlock(&fs_info->chunk_mutex); + break; + } spin_lock(&fs_info->trans_lock); trans = fs_info->running_transaction; @@ -10803,13 +10809,17 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, refcount_inc(&trans->use_count); spin_unlock(&fs_info->trans_lock); + if (!trans) + up_read(&fs_info->commit_root_sem); + ret = find_free_dev_extent_start(trans, device, minlen, start, &start, &len); - if (trans) + if (trans) { + up_read(&fs_info->commit_root_sem); btrfs_put_transaction(trans); + } if (ret) { - up_read(&fs_info->commit_root_sem); mutex_unlock(&fs_info->chunk_mutex); if (ret == -ENOSPC) ret = 0; @@ -10817,7 +10827,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, } ret = btrfs_issue_discard(device->bdev, start, len, &bytes); - up_read(&fs_info->commit_root_sem); mutex_unlock(&fs_info->chunk_mutex); if (ret)

6 years, 9 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: keep trim from interfering with transaction commits" failed to apply to 4.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From fee7acc361314df6561208c2d3c0882d663dd537 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney <jeffm(a)suse.com> Date: Thu, 6 Sep 2018 17:18:16 -0400 Subject: [PATCH] btrfs: keep trim from interfering with transaction commits Commit 499f377f49f08 (btrfs: iterate over unused chunk space in FITRIM) fixed free space trimming, but introduced latency when it was running. This is due to it pinning the transaction using both a incremented refcount and holding the commit root sem for the duration of a single trim operation. This was to ensure safety but it's unnecessary. We already hold the the chunk mutex so we know that the chunk we're using can't be allocated while we're trimming it. In order to check against chunks allocated already in this transaction, we need to check the pending chunks list. To to that safely without joining the transaction (or attaching than then having to commit it) we need to ensure that the dev root's commit root doesn't change underneath us and the pending chunk lists stays around until we're done with it. We can ensure the former by holding the commit root sem and the latter by pinning the transaction. We do this now, but the critical section covers the trim operation itself and we don't need to do that. This patch moves the pinning and unpinning logic into helpers and unpins the transaction after performing the search and check for pending chunks. Limiting the critical section of the transaction pinning improves the latency substantially on slower storage (e.g. image files over NFS). Fixes: 499f377f49f08 ("btrfs: iterate over unused chunk space in FITRIM") CC: stable(a)vger.kernel.org # 4.4+ Signed-off-by: Jeff Mahoney <jeffm(a)suse.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6e1c2a22874f..f0524bdf49b3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10755,14 +10755,16 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, * We don't want a transaction for this since the discard may take a * substantial amount of time. We don't require that a transaction be * running, but we do need to take a running transaction into account - * to ensure that we're not discarding chunks that were released in - * the current transaction. + * to ensure that we're not discarding chunks that were released or + * allocated in the current transaction. * * Holding the chunks lock will prevent other threads from allocating * or releasing chunks, but it won't prevent a running transaction * from committing and releasing the memory that the pending chunks * list head uses. For that, we need to take a reference to the - * transaction. + * transaction and hold the commit root sem. We only need to hold + * it while performing the free space search since we have already + * held back allocations. */ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 minlen, u64 *trimmed) @@ -10793,9 +10795,13 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, ret = mutex_lock_interruptible(&fs_info->chunk_mutex); if (ret) - return ret; + break; - down_read(&fs_info->commit_root_sem); + ret = down_read_killable(&fs_info->commit_root_sem); + if (ret) { + mutex_unlock(&fs_info->chunk_mutex); + break; + } spin_lock(&fs_info->trans_lock); trans = fs_info->running_transaction; @@ -10803,13 +10809,17 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, refcount_inc(&trans->use_count); spin_unlock(&fs_info->trans_lock); + if (!trans) + up_read(&fs_info->commit_root_sem); + ret = find_free_dev_extent_start(trans, device, minlen, start, &start, &len); - if (trans) + if (trans) { + up_read(&fs_info->commit_root_sem); btrfs_put_transaction(trans); + } if (ret) { - up_read(&fs_info->commit_root_sem); mutex_unlock(&fs_info->chunk_mutex); if (ret == -ENOSPC) ret = 0; @@ -10817,7 +10827,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, } ret = btrfs_issue_discard(device->bdev, start, len, &bytes); - up_read(&fs_info->commit_root_sem); mutex_unlock(&fs_info->chunk_mutex); if (ret)

6 years, 9 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: Ensure btrfs_trim_fs can trim the whole filesystem" failed to apply to 4.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 6ba9fc8e628becf0e3ec94083450d089b0dec5f5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo <wqu(a)suse.com> Date: Fri, 7 Sep 2018 14:16:24 +0800 Subject: [PATCH] btrfs: Ensure btrfs_trim_fs can trim the whole filesystem [BUG] fstrim on some btrfs only trims the unallocated space, not trimming any space in existing block groups. [CAUSE] Before fstrim_range passed to btrfs_trim_fs(), it gets truncated to range [0, super->total_bytes). So later btrfs_trim_fs() will only be able to trim block groups in range [0, super->total_bytes). While for btrfs, any bytenr aligned to sectorsize is valid, since btrfs uses its logical address space, there is nothing limiting the location where we put block groups. For filesystem with frequent balance, it's quite easy to relocate all block groups and bytenr of block groups will start beyond super->total_bytes. In that case, btrfs will not trim existing block groups. [FIX] Just remove the truncation in btrfs_ioctl_fitrim(), so btrfs_trim_fs() can get the unmodified range, which is normally set to [0, U64_MAX]. Reported-by: Chris Murphy <lists(a)colorremedies.com> Fixes: f4c697e6406d ("btrfs: return EINVAL if start > total_bytes in fitrim ioctl") CC: <stable(a)vger.kernel.org> # v4.4+ Signed-off-by: Qu Wenruo <wqu(a)suse.com> Reviewed-by: Nikolay Borisov <nborisov(a)suse.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5dbb3f713125..da3257585e29 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10851,21 +10851,13 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) u64 start; u64 end; u64 trimmed = 0; - u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); u64 bg_failed = 0; u64 dev_failed = 0; int bg_ret = 0; int dev_ret = 0; int ret = 0; - /* - * try to trim all FS space, our block group may start from non-zero. - */ - if (range->len == total_bytes) - cache = btrfs_lookup_first_block_group(fs_info, range->start); - else - cache = btrfs_lookup_block_group(fs_info, range->start); - + cache = btrfs_lookup_first_block_group(fs_info, range->start); for (; cache; cache = next_block_group(fs_info, cache)) { if (cache->key.objectid >= (range->start + range->len)) { btrfs_put_block_group(cache); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4905d13dee0a..a990a9045139 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -491,7 +491,6 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) struct fstrim_range range; u64 minlen = ULLONG_MAX; u64 num_devices = 0; - u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); int ret; if (!capable(CAP_SYS_ADMIN)) @@ -515,11 +514,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) return -EOPNOTSUPP; if (copy_from_user(&range, arg, sizeof(range))) return -EFAULT; - if (range.start > total_bytes || - range.len < fs_info->sb->s_blocksize) + + /* + * NOTE: Don't truncate the range using super->total_bytes. Bytenr of + * block group is in the logical address space, which can be any + * sectorsize aligned bytenr in the range [0, U64_MAX]. + */ + if (range.len < fs_info->sb->s_blocksize) return -EINVAL; - range.len = min(range.len, total_bytes - range.start); range.minlen = max(range.minlen, minlen); ret = btrfs_trim_fs(fs_info, &range); if (ret < 0)

6 years, 9 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror