This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying to drop stale pages resulting in wrong data access.
Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
Cc: stable@vger.kernel.org Cc: Jens Axboe axboe@kernel.dk Cc: linux-block@vger.kernel.org Cc: Bart Van Assche bvanassche@acm.org Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed") Reported-by: Gwendal Grignou gwendal@chromium.org Reported-by: grygorii tertychnyi gtertych@cisco.com Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org --- drivers/block/loop.c | 44 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 27 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 102d79575895..7c7d2d9c47d0 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1212,6 +1212,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) kuid_t uid = current_uid(); struct block_device *bdev; bool partscan = false; + bool drop_caches = false;
err = mutex_lock_killable(&loop_ctl_mutex); if (err) @@ -1232,10 +1233,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) }
if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit) { - sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); - } + lo->lo_sizelimit != info->lo_sizelimit) + drop_caches = true;
/* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1265,14 +1264,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { - /* kill_bdev should have truncated all the pages */ - if (lo->lo_device->bd_inode->i_mapping->nrpages) { - err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", - __func__, lo->lo_number, lo->lo_file_name, - lo->lo_device->bd_inode->i_mapping->nrpages); - goto out_unfreeze; - } if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto out_unfreeze; @@ -1317,6 +1308,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) bdev = lo->lo_device; partscan = true; } + + /* truncate stale pages cached by previous operations */ + if (!err && drop_caches) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } out_unlock: mutex_unlock(&loop_ctl_mutex); if (partscan) @@ -1498,6 +1495,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { + bool drop_caches = false; int err = 0;
if (lo->lo_state != Lo_bound) @@ -1506,23 +1504,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL;
- if (lo->lo_queue->limits.logical_block_size != arg) { - sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); - } + if (lo->lo_queue->limits.logical_block_size != arg) + drop_caches = true;
blk_mq_freeze_queue(lo->lo_queue); - - /* kill_bdev should have truncated all the pages */ - if (lo->lo_queue->limits.logical_block_size != arg && - lo->lo_device->bd_inode->i_mapping->nrpages) { - err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", - __func__, lo->lo_number, lo->lo_file_name, - lo->lo_device->bd_inode->i_mapping->nrpages); - goto out_unfreeze; - } - blk_queue_logical_block_size(lo->lo_queue, arg); blk_queue_physical_block_size(lo->lo_queue, arg); blk_queue_io_min(lo->lo_queue, arg); @@ -1530,6 +1515,11 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue);
+ /* truncate stale pages cached by previous operations */ + if (drop_caches) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } return err; }
This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying to drop stale pages resulting in wrong data access.
Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
Cc: stable@vger.kernel.org Cc: Jens Axboe axboe@kernel.dk Cc: linux-block@vger.kernel.org Cc: Bart Van Assche bvanassche@acm.org Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed") Reported-by: Gwendal Grignou gwendal@chromium.org Reported-by: grygorii tertychnyi gtertych@cisco.com Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org --- v2 from v1: - remove obsolete jump
drivers/block/loop.c | 45 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 28 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 102d79575895..42994de2dd12 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1212,6 +1212,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) kuid_t uid = current_uid(); struct block_device *bdev; bool partscan = false; + bool drop_caches = false;
err = mutex_lock_killable(&loop_ctl_mutex); if (err) @@ -1232,10 +1233,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) }
if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit) { - sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); - } + lo->lo_sizelimit != info->lo_sizelimit) + drop_caches = true;
/* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1265,14 +1264,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { - /* kill_bdev should have truncated all the pages */ - if (lo->lo_device->bd_inode->i_mapping->nrpages) { - err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", - __func__, lo->lo_number, lo->lo_file_name, - lo->lo_device->bd_inode->i_mapping->nrpages); - goto out_unfreeze; - } if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto out_unfreeze; @@ -1317,6 +1308,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) bdev = lo->lo_device; partscan = true; } + + /* truncate stale pages cached by previous operations */ + if (!err && drop_caches) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } out_unlock: mutex_unlock(&loop_ctl_mutex); if (partscan) @@ -1498,6 +1495,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { + bool drop_caches = false; int err = 0;
if (lo->lo_state != Lo_bound) @@ -1506,30 +1504,21 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL;
- if (lo->lo_queue->limits.logical_block_size != arg) { - sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); - } + if (lo->lo_queue->limits.logical_block_size != arg) + drop_caches = true;
blk_mq_freeze_queue(lo->lo_queue); - - /* kill_bdev should have truncated all the pages */ - if (lo->lo_queue->limits.logical_block_size != arg && - lo->lo_device->bd_inode->i_mapping->nrpages) { - err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", - __func__, lo->lo_number, lo->lo_file_name, - lo->lo_device->bd_inode->i_mapping->nrpages); - goto out_unfreeze; - } - blk_queue_logical_block_size(lo->lo_queue, arg); blk_queue_physical_block_size(lo->lo_queue, arg); blk_queue_io_min(lo->lo_queue, arg); loop_update_dio(lo); -out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue);
+ /* truncate stale pages cached by previous operations */ + if (drop_caches) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } return err; }
Jens,
Any chance to get a review for this?
(Added Tested-by:)
On 05/17, Jaegeuk Kim wrote:
This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying to drop stale pages resulting in wrong data access.
Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
Cc: stable@vger.kernel.org Cc: Jens Axboe axboe@kernel.dk Cc: linux-block@vger.kernel.org Cc: Bart Van Assche bvanassche@acm.org Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed") Reported-by: Gwendal Grignou gwendal@chromium.org Reported-by: grygorii tertychnyi gtertych@cisco.com
Tested-by: Francesco Ruggeri fruggeri@arista.com
Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org
v2 from v1:
- remove obsolete jump
drivers/block/loop.c | 45 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 28 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 102d79575895..42994de2dd12 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1212,6 +1212,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) kuid_t uid = current_uid(); struct block_device *bdev; bool partscan = false;
- bool drop_caches = false;
err = mutex_lock_killable(&loop_ctl_mutex); if (err) @@ -1232,10 +1233,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) } if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
- }
lo->lo_sizelimit != info->lo_sizelimit)
drop_caches = true;
/* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1265,14 +1264,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) {
/* kill_bdev should have truncated all the pages */
if (lo->lo_device->bd_inode->i_mapping->nrpages) {
err = -EAGAIN;
pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto out_unfreeze;}
@@ -1317,6 +1308,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) bdev = lo->lo_device; partscan = true; }
- /* truncate stale pages cached by previous operations */
- if (!err && drop_caches) {
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
- }
out_unlock: mutex_unlock(&loop_ctl_mutex); if (partscan) @@ -1498,6 +1495,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) static int loop_set_block_size(struct loop_device *lo, unsigned long arg) {
- bool drop_caches = false; int err = 0;
if (lo->lo_state != Lo_bound) @@ -1506,30 +1504,21 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL;
- if (lo->lo_queue->limits.logical_block_size != arg) {
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
- }
- if (lo->lo_queue->limits.logical_block_size != arg)
drop_caches = true;
blk_mq_freeze_queue(lo->lo_queue);
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_queue->limits.logical_block_size != arg &&
lo->lo_device->bd_inode->i_mapping->nrpages) {
err = -EAGAIN;
pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
- }
- blk_queue_logical_block_size(lo->lo_queue, arg); blk_queue_physical_block_size(lo->lo_queue, arg); blk_queue_io_min(lo->lo_queue, arg); loop_update_dio(lo);
-out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue);
- /* truncate stale pages cached by previous operations */
- if (drop_caches) {
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
- } return err;
} -- 2.19.0.605.g01d371f741-goog
Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Any chance to get a review on this patch?
We're running Singularity containers in an mpi environment where multiple concurrent container image loop mounts occur and we hit this issue as reported to Sylabs by us here:
https://github.com/sylabs/singularity/issues/3860
It is affecting our production. This email and any accompanying attachments are confidential. If you received this email by mistake, please delete it from your system. Any review, disclosure, copying, distribution, or use of the email by others is strictly prohibited.
On Mon, Nov 18, 2019 at 11:36:16AM -0700, Andrew Norrie wrote:
This email and any accompanying attachments are confidential. If you received this email by mistake, please delete it from your system. Any review, disclosure, copying, distribution, or use of the email by others is strictly prohibited.
Now deleted. This is not compatible with Linux mailing lists, sorry.
On 5/17/19 5:53 PM, Jaegeuk Kim wrote:
This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying to drop stale pages resulting in wrong data access.
Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
Please provide a more detailed commit description. What is wrong with the current implementation and why is the new behavior considered the correct behavior?
This patch moves draining code from before the following comment to after that comment:
/* I/O need to be drained during transfer transition */
Is that comment still correct or should it perhaps be updated?
Thanks,
Bart.
On 11/19, Bart Van Assche wrote:
On 5/17/19 5:53 PM, Jaegeuk Kim wrote:
This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying to drop stale pages resulting in wrong data access.
Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
Please provide a more detailed commit description. What is wrong with the current implementation and why is the new behavior considered the correct behavior?
Some history would be:
Original bug fix is: commit 5db470e229e2 ("loop: drop caches if offset or block_size are changed"), which returns EAGAIN so that user land like Chrome would require enhancing their error handling routines.
So, this patch tries to avoid EAGAIN while addressing the original bug.
This patch moves draining code from before the following comment to after that comment:
/* I/O need to be drained during transfer transition */
Is that comment still correct or should it perhaps be updated?
IMHO, it's still valid.
Thanks,
Bart.
On 11/25/19 9:59 AM, Jaegeuk Kim wrote:
On 11/19, Bart Van Assche wrote:
On 5/17/19 5:53 PM, Jaegeuk Kim wrote:
This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying to drop stale pages resulting in wrong data access.
Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
Please provide a more detailed commit description. What is wrong with the current implementation and why is the new behavior considered the correct behavior?
Some history would be:
Original bug fix is: commit 5db470e229e2 ("loop: drop caches if offset or block_size are changed"), which returns EAGAIN so that user land like Chrome would require enhancing their error handling routines.
So, this patch tries to avoid EAGAIN while addressing the original bug.
This patch moves draining code from before the following comment to after that comment:
/* I/O need to be drained during transfer transition */
Is that comment still correct or should it perhaps be updated?
IMHO, it's still valid.
Hi Jaegeuk,
Thank you for the additional and very helpful clarification. Can you have a look at the (totally untested) patch below? I prefer that version because it prevents concurrent processing of requests and syncing/killing the bdev.
Thanks,
Bart.
Subject: [PATCH] loop: Avoid EAGAIN if offset or block_size are changed
After sync_blockdev() and kill_bdev() have been called, more requests can be submitted to the loop device. These requests dirty additional pages, causing loop_set_status() to return -EAGAIN. Not all user space code that changes the offset and/or the block size handles -EAGAIN correctly. Hence make sure that loop_set_status() does not return -EAGAIN.
Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed") Reported-by: Gwendal Grignou gwendal@chromium.org Reported-by: grygorii tertychnyi gtertych@cisco.com Reported-by: Jaegeuk Kim jaegeuk@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche bvanassche@acm.org --- drivers/block/loop.c | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 739b372a5112..48cfc8b9c247 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1264,15 +1264,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) goto out_unlock; }
+ /* I/O need to be drained during transfer transition */ + blk_mq_freeze_queue(lo->lo_queue); + if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { sync_blockdev(lo->lo_device); kill_bdev(lo->lo_device); }
- /* I/O need to be drained during transfer transition */ - blk_mq_freeze_queue(lo->lo_queue); - err = loop_release_xfer(lo); if (err) goto out_unfreeze; @@ -1298,14 +1298,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { - /* kill_bdev should have truncated all the pages */ - if (lo->lo_device->bd_inode->i_mapping->nrpages) { - err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", - __func__, lo->lo_number, lo->lo_file_name, - lo->lo_device->bd_inode->i_mapping->nrpages); - goto out_unfreeze; - } if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto out_unfreeze; @@ -1531,39 +1523,26 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { - int err = 0; - if (lo->lo_state != Lo_bound) return -ENXIO;
if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL;
+ blk_mq_freeze_queue(lo->lo_queue); + if (lo->lo_queue->limits.logical_block_size != arg) { sync_blockdev(lo->lo_device); kill_bdev(lo->lo_device); } - - blk_mq_freeze_queue(lo->lo_queue); - - /* kill_bdev should have truncated all the pages */ - if (lo->lo_queue->limits.logical_block_size != arg && - lo->lo_device->bd_inode->i_mapping->nrpages) { - err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", - __func__, lo->lo_number, lo->lo_file_name, - lo->lo_device->bd_inode->i_mapping->nrpages); - goto out_unfreeze; - } - blk_queue_logical_block_size(lo->lo_queue, arg); blk_queue_physical_block_size(lo->lo_queue, arg); blk_queue_io_min(lo->lo_queue, arg); loop_update_dio(lo); -out_unfreeze: + blk_mq_unfreeze_queue(lo->lo_queue);
- return err; + return 0; }
static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
Hi Bart,
On 11/25, Bart Van Assche wrote:
On 11/25/19 9:59 AM, Jaegeuk Kim wrote:
On 11/19, Bart Van Assche wrote:
On 5/17/19 5:53 PM, Jaegeuk Kim wrote:
This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying to drop stale pages resulting in wrong data access.
Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
Please provide a more detailed commit description. What is wrong with the current implementation and why is the new behavior considered the correct behavior?
Some history would be:
Original bug fix is: commit 5db470e229e2 ("loop: drop caches if offset or block_size are changed"), which returns EAGAIN so that user land like Chrome would require enhancing their error handling routines.
So, this patch tries to avoid EAGAIN while addressing the original bug.
This patch moves draining code from before the following comment to after that comment:
/* I/O need to be drained during transfer transition */
Is that comment still correct or should it perhaps be updated?
IMHO, it's still valid.
Hi Jaegeuk,
Thank you for the additional and very helpful clarification. Can you have a look at the (totally untested) patch below? I prefer that version because it prevents concurrent processing of requests and syncing/killing the bdev.
Yeah, I thought this was much cleaner way, but wasn't sure it could be doable to sync|kill block device after freezing the queue. Is it okay?
Thanks,
Bart.
Subject: [PATCH] loop: Avoid EAGAIN if offset or block_size are changed
After sync_blockdev() and kill_bdev() have been called, more requests can be submitted to the loop device. These requests dirty additional pages, causing loop_set_status() to return -EAGAIN. Not all user space code that changes the offset and/or the block size handles -EAGAIN correctly. Hence make sure that loop_set_status() does not return -EAGAIN.
Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed") Reported-by: Gwendal Grignou gwendal@chromium.org Reported-by: grygorii tertychnyi gtertych@cisco.com Reported-by: Jaegeuk Kim jaegeuk@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche bvanassche@acm.org
drivers/block/loop.c | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 739b372a5112..48cfc8b9c247 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1264,15 +1264,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) goto out_unlock; }
- /* I/O need to be drained during transfer transition */
- blk_mq_freeze_queue(lo->lo_queue);
- if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { sync_blockdev(lo->lo_device); kill_bdev(lo->lo_device); }
- /* I/O need to be drained during transfer transition */
- blk_mq_freeze_queue(lo->lo_queue);
- err = loop_release_xfer(lo); if (err) goto out_unfreeze;
@@ -1298,14 +1298,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) {
/* kill_bdev should have truncated all the pages */
if (lo->lo_device->bd_inode->i_mapping->nrpages) {
err = -EAGAIN;
pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto out_unfreeze;}
@@ -1531,39 +1523,26 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
static int loop_set_block_size(struct loop_device *lo, unsigned long arg) {
int err = 0;
if (lo->lo_state != Lo_bound) return -ENXIO;
if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL;
- blk_mq_freeze_queue(lo->lo_queue);
- if (lo->lo_queue->limits.logical_block_size != arg) { sync_blockdev(lo->lo_device); kill_bdev(lo->lo_device); }
- blk_mq_freeze_queue(lo->lo_queue);
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_queue->limits.logical_block_size != arg &&
lo->lo_device->bd_inode->i_mapping->nrpages) {
err = -EAGAIN;
pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
- }
- blk_queue_logical_block_size(lo->lo_queue, arg); blk_queue_physical_block_size(lo->lo_queue, arg); blk_queue_io_min(lo->lo_queue, arg); loop_update_dio(lo);
-out_unfreeze:
- blk_mq_unfreeze_queue(lo->lo_queue);
- return err;
- return 0;
}
static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
On 11/25/19 11:22 AM, Jaegeuk Kim wrote:
On 11/25, Bart Van Assche wrote:
Thank you for the additional and very helpful clarification. Can you have a look at the (totally untested) patch below? I prefer that version because it prevents concurrent processing of requests and syncing/killing the bdev.
Yeah, I thought this was much cleaner way, but wasn't sure it could be doable to sync|kill block device after freezing the queue. Is it okay?
Hi Jaegeuk,
That patch was based on an incorrect interpretation of the meaning of lo_device. After having taken another loop at the block driver, I don't think that calling sync after freezing the queue is OK. How about using the following call sequence: * sync_blockdev() * blk_mq_freeze_queue() * kill_bdev()
Thanks,
Bart.
Previously, there was a bug where user could see stale buffer cache (e.g, 512B) attached in the 4KB-sized pager cache, when the block size was changed from 512B to 4KB. That was fixed by: commit 5db470e229e2 ("loop: drop caches if offset or block_size are changed")
But, there were some regression reports saying the fix returns EAGAIN easily. So, this patch removes previously added EAGAIN condition, nrpages != 0.
Instead, it changes the flow like this: - sync_blockdev() - blk_mq_freeze_queue() : change the loop configuration - blk_mq_unfreeze_queue() - sync_blockdev() - invalidate_bdev()
After invalidating the buffer cache, we must see the full valid 4KB page.
Additional concern came from Bart in which we can lose some data when changing the lo_offset. In that case, this patch adds: - sync_blockdev() - blk_set_queue_dying - blk_mq_freeze_queue() : change the loop configuration - blk_mq_unfreeze_queue() - blk_queue_flag_clear(QUEUE_FLAG_DYING); - sync_blockdev() - invalidate_bdev()
Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
Cc: stable@vger.kernel.org Cc: Jens Axboe axboe@kernel.dk Cc: linux-block@vger.kernel.org Cc: Bart Van Assche bvanassche@acm.org Fixes: 5db470e229e2 ("loop: drop caches if offset or block_size are changed") Reported-by: Gwendal Grignou gwendal@chromium.org Reported-by: grygorii tertychnyi gtertych@cisco.com Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org --- drivers/block/loop.c | 65 ++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 32 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f6f77eaa7217..b583050d513a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1232,6 +1232,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) kuid_t uid = current_uid(); struct block_device *bdev; bool partscan = false; + bool drop_request = false; + bool drop_cache = false;
err = mutex_lock_killable(&loop_ctl_mutex); if (err) @@ -1251,14 +1253,21 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) goto out_unlock; }
+ if (lo->lo_offset != info->lo_offset) + drop_request = true; if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit) { - sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); - } + lo->lo_sizelimit != info->lo_sizelimit) + drop_cache = true;
- /* I/O need to be drained during transfer transition */ - blk_mq_freeze_queue(lo->lo_queue); + sync_blockdev(lo->lo_device); + + if (drop_request) { + blk_set_queue_dying(lo->lo_queue); + blk_mq_freeze_queue_wait(lo->lo_queue); + } else { + /* I/O need to be drained during transfer transition */ + blk_mq_freeze_queue(lo->lo_queue); + }
err = loop_release_xfer(lo); if (err) @@ -1285,14 +1294,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { - /* kill_bdev should have truncated all the pages */ - if (lo->lo_device->bd_inode->i_mapping->nrpages) { - err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", - __func__, lo->lo_number, lo->lo_file_name, - lo->lo_device->bd_inode->i_mapping->nrpages); - goto out_unfreeze; - } if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto out_unfreeze; @@ -1329,6 +1330,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); + if (drop_request) + blk_queue_flag_clear(QUEUE_FLAG_DYING, lo->lo_queue);
if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) && !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { @@ -1337,6 +1340,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) bdev = lo->lo_device; partscan = true; } + + /* truncate stale pages cached by previous operations */ + if (!err && drop_cache) { + sync_blockdev(lo->lo_device); + invalidate_bdev(lo->lo_device); + } out_unlock: mutex_unlock(&loop_ctl_mutex); if (partscan) @@ -1518,7 +1527,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { - int err = 0; + bool drop_cache = false;
if (lo->lo_state != Lo_bound) return -ENXIO; @@ -1526,31 +1535,23 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL;
- if (lo->lo_queue->limits.logical_block_size != arg) { - sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); - } + if (lo->lo_queue->limits.logical_block_size != arg) + drop_cache = true;
+ sync_blockdev(lo->lo_device); blk_mq_freeze_queue(lo->lo_queue); - - /* kill_bdev should have truncated all the pages */ - if (lo->lo_queue->limits.logical_block_size != arg && - lo->lo_device->bd_inode->i_mapping->nrpages) { - err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", - __func__, lo->lo_number, lo->lo_file_name, - lo->lo_device->bd_inode->i_mapping->nrpages); - goto out_unfreeze; - } - blk_queue_logical_block_size(lo->lo_queue, arg); blk_queue_physical_block_size(lo->lo_queue, arg); blk_queue_io_min(lo->lo_queue, arg); loop_update_dio(lo); -out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue);
- return err; + /* truncate stale pages cached by previous operations */ + if (drop_cache) { + sync_blockdev(lo->lo_device); + invalidate_bdev(lo->lo_device); + } + return 0; }
static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
On 11/27/19 10:18 AM, Jaegeuk Kim wrote:
Previously, there was a bug where user could see stale buffer cache (e.g, 512B) attached in the 4KB-sized pager cache, when the block size was changed from 512B to 4KB. That was fixed by: commit 5db470e229e2 ("loop: drop caches if offset or block_size are changed")
[ ... ]
Reviewed-by: Bart Van Assche bvanassche@acm.org
Hi,
Just checking again the status of this patch? It doesn't look like it's made it into the kernel yet?
On Fri 17-05-19 17:47:51, Jaegeuk Kim wrote:
This patch tries to avoid EAGAIN due to nrpages!=0 that was originally trying to drop stale pages resulting in wrong data access.
Report: https://bugs.chromium.org/p/chromium/issues/detail?id=938958#c38
...
drivers/block/loop.c | 44 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 27 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 102d79575895..7c7d2d9c47d0 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1212,6 +1212,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) kuid_t uid = current_uid(); struct block_device *bdev; bool partscan = false;
- bool drop_caches = false;
err = mutex_lock_killable(&loop_ctl_mutex); if (err) @@ -1232,10 +1233,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) } if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
- }
lo->lo_sizelimit != info->lo_sizelimit)
drop_caches = true;
I don't think this solution of moving buffer cache invalidation after loop device is updated is really correct.
If there's any dirty data in the buffer cache, god knows where it ends up being written after the loop device is reconfigured. Think e.g. of a file offset being changed. It may not be even possible to write it if say block size increased and we have now improperly sized buffers in the buffer cache...
Frankly, I have rather limited sympathy to someone trying to reconfigure a loop device while it is in use. Is there any sane usecase? I'd be inclined to just use a similar trick as we did with LOOP_SET_FD and allow these changes only if the caller has the loop device open exclusively or we are able to upgrade to exclusive open. As otherwise say mounted filesystem on top of loop device being reconfigured is very likely to be in serious trouble (e.g. it's impossible to fully invalidate buffer cache in that case).
But that's probably somewhat tangential to the problem you have. For your case I don't really see a race-free way to invalidate buffer cache and update loop configuration - the best I can see is to flush & invalidate the cache, freeze the bdev so that new data cannot be read into the buffer cache, check the cache is still empty - if yes, go ahead. If not, unfreeze and try again...
Honza
/* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1265,14 +1264,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) {
/* kill_bdev should have truncated all the pages */
if (lo->lo_device->bd_inode->i_mapping->nrpages) {
err = -EAGAIN;
pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto out_unfreeze;}
@@ -1317,6 +1308,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) bdev = lo->lo_device; partscan = true; }
- /* truncate stale pages cached by previous operations */
- if (!err && drop_caches) {
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
- }
out_unlock: mutex_unlock(&loop_ctl_mutex); if (partscan) @@ -1498,6 +1495,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) static int loop_set_block_size(struct loop_device *lo, unsigned long arg) {
- bool drop_caches = false; int err = 0;
if (lo->lo_state != Lo_bound) @@ -1506,23 +1504,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL;
- if (lo->lo_queue->limits.logical_block_size != arg) {
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
- }
- if (lo->lo_queue->limits.logical_block_size != arg)
drop_caches = true;
blk_mq_freeze_queue(lo->lo_queue);
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_queue->limits.logical_block_size != arg &&
lo->lo_device->bd_inode->i_mapping->nrpages) {
err = -EAGAIN;
pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
- }
- blk_queue_logical_block_size(lo->lo_queue, arg); blk_queue_physical_block_size(lo->lo_queue, arg); blk_queue_io_min(lo->lo_queue, arg);
@@ -1530,6 +1515,11 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue);
- /* truncate stale pages cached by previous operations */
- if (drop_caches) {
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
- } return err;
}
linux-stable-mirror@lists.linaro.org