The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7f11a2cc10a4ae3a70e2c73361f4a9a33503539b Mon Sep 17 00:00:00 2001
From: Tianyu Lan <Tianyu.Lan(a)microsoft.com>
Date: Mon, 6 Apr 2020 08:53:27 -0700
Subject: [PATCH] x86/Hyper-V: Free hv_panic_page when fail to register kmsg
dump
If kmsg_dump_register() fails, hv_panic_page will not be used
anywhere. So free and reset it.
Fixes: 81b18bce48af ("Drivers: HV: Send one page worth of kmsg dump over Hyper-V during panic")
Reviewed-by: Michael Kelley <mikelley(a)microsoft.com>
Signed-off-by: Tianyu Lan <Tianyu.Lan(a)microsoft.com>
Link: https://lore.kernel.org/r/20200406155331.2105-3-Tianyu.Lan@microsoft.com
Signed-off-by: Wei Liu <wei.liu(a)kernel.org>
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 6478240d11ab..00a511f15926 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1385,9 +1385,13 @@ static int vmbus_bus_init(void)
hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page();
if (hv_panic_page) {
ret = kmsg_dump_register(&hv_kmsg_dumper);
- if (ret)
+ if (ret) {
pr_err("Hyper-V: kmsg dump register "
"error 0x%x\n", ret);
+ hv_free_hyperv_page(
+ (unsigned long)hv_panic_page);
+ hv_panic_page = NULL;
+ }
} else
pr_err("Hyper-V: panic message page memory "
"allocation failed");
@@ -1416,7 +1420,6 @@ static int vmbus_bus_init(void)
hv_remove_vmbus_irq();
bus_unregister(&hv_bus);
- hv_free_hyperv_page((unsigned long)hv_panic_page);
unregister_sysctl_table(hv_ctl_table_hdr);
hv_ctl_table_hdr = NULL;
return ret;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7f11a2cc10a4ae3a70e2c73361f4a9a33503539b Mon Sep 17 00:00:00 2001
From: Tianyu Lan <Tianyu.Lan(a)microsoft.com>
Date: Mon, 6 Apr 2020 08:53:27 -0700
Subject: [PATCH] x86/Hyper-V: Free hv_panic_page when fail to register kmsg
dump
If kmsg_dump_register() fails, hv_panic_page will not be used
anywhere. So free and reset it.
Fixes: 81b18bce48af ("Drivers: HV: Send one page worth of kmsg dump over Hyper-V during panic")
Reviewed-by: Michael Kelley <mikelley(a)microsoft.com>
Signed-off-by: Tianyu Lan <Tianyu.Lan(a)microsoft.com>
Link: https://lore.kernel.org/r/20200406155331.2105-3-Tianyu.Lan@microsoft.com
Signed-off-by: Wei Liu <wei.liu(a)kernel.org>
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 6478240d11ab..00a511f15926 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1385,9 +1385,13 @@ static int vmbus_bus_init(void)
hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page();
if (hv_panic_page) {
ret = kmsg_dump_register(&hv_kmsg_dumper);
- if (ret)
+ if (ret) {
pr_err("Hyper-V: kmsg dump register "
"error 0x%x\n", ret);
+ hv_free_hyperv_page(
+ (unsigned long)hv_panic_page);
+ hv_panic_page = NULL;
+ }
} else
pr_err("Hyper-V: panic message page memory "
"allocation failed");
@@ -1416,7 +1420,6 @@ static int vmbus_bus_init(void)
hv_remove_vmbus_irq();
bus_unregister(&hv_bus);
- hv_free_hyperv_page((unsigned long)hv_panic_page);
unregister_sysctl_table(hv_ctl_table_hdr);
hv_ctl_table_hdr = NULL;
return ret;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 29acfb65598f91671413869e0d0a1ec4e74ac705 Mon Sep 17 00:00:00 2001
From: Frank Rowand <frank.rowand(a)sony.com>
Date: Thu, 16 Apr 2020 16:42:50 -0500
Subject: [PATCH] of: unittest: kmemleak in duplicate property update
kmemleak reports several memory leaks from devicetree unittest.
This is the fix for problem 5 of 5.
When overlay 'overlay_bad_add_dup_prop' is applied, the apply code
properly detects that a memory leak will occur if the overlay is removed
since the duplicate property is located in a base devicetree node and
reports via printk():
OF: overlay: WARNING: memory leak will occur if overlay removed, property: /testcase-data-2/substation@100/motor-1/rpm_avail
OF: overlay: WARNING: memory leak will occur if overlay removed, property: /testcase-data-2/substation@100/motor-1/rpm_avail
The overlay is removed when the apply code detects multiple changesets
modifying the same property. This is reported via printk():
OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/rpm_avail
As a result of this error, the overlay is removed resulting in the
expected memory leak.
Add another device node level to the overlay so that the duplicate
property is located in a node added by the overlay, thus no memory
leak will occur when the overlay is removed.
Thus users of kmemleak will not have to debug this leak in the future.
Fixes: 2fe0e8769df9 ("of: overlay: check prevents multiple fragments touching same property")
Reported-by: Erhard F. <erhard_f(a)mailbox.org>
Signed-off-by: Frank Rowand <frank.rowand(a)sony.com>
Signed-off-by: Rob Herring <robh(a)kernel.org>
diff --git a/drivers/of/unittest-data/overlay_bad_add_dup_prop.dts b/drivers/of/unittest-data/overlay_bad_add_dup_prop.dts
index c190da54f175..6327d1ffb963 100644
--- a/drivers/of/unittest-data/overlay_bad_add_dup_prop.dts
+++ b/drivers/of/unittest-data/overlay_bad_add_dup_prop.dts
@@ -3,22 +3,37 @@
/plugin/;
/*
- * &electric_1/motor-1 and &spin_ctrl_1 are the same node:
- * /testcase-data-2/substation@100/motor-1
+ * &electric_1/motor-1/electric and &spin_ctrl_1/electric are the same node:
+ * /testcase-data-2/substation@100/motor-1/electric
*
* Thus the property "rpm_avail" in each fragment will
* result in an attempt to update the same property twice.
* This will result in an error and the overlay apply
* will fail.
+ *
+ * The previous version of this test did not include the extra
+ * level of node 'electric'. That resulted in the 'rpm_avail'
+ * property being located in the pre-existing node 'motor-1'.
+ * Modifying a property results in a WARNING that a memory leak
+ * will occur if the overlay is removed. Since the overlay apply
+ * fails, the memory leak does actually occur, and kmemleak will
+ * further report the memory leak if CONFIG_DEBUG_KMEMLEAK is
+ * enabled. Adding the overlay node 'electric' avoids the
+ * memory leak and thus people who use kmemleak will not
+ * have to debug this non-problem again.
*/
&electric_1 {
motor-1 {
- rpm_avail = < 100 >;
+ electric {
+ rpm_avail = < 100 >;
+ };
};
};
&spin_ctrl_1 {
- rpm_avail = < 100 200 >;
+ electric {
+ rpm_avail = < 100 200 >;
+ };
};
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index f238b7a3865d..398de04fd19c 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -3181,21 +3181,21 @@ static __init void of_unittest_overlay_high_level(void)
"OF: overlay: ERROR: multiple fragments add and/or delete node /testcase-data-2/substation@100/motor-1/controller");
EXPECT_BEGIN(KERN_ERR,
- "OF: overlay: WARNING: memory leak will occur if overlay removed, property: /testcase-data-2/substation@100/motor-1/rpm_avail");
+ "OF: overlay: ERROR: multiple fragments add and/or delete node /testcase-data-2/substation@100/motor-1/electric");
EXPECT_BEGIN(KERN_ERR,
- "OF: overlay: WARNING: memory leak will occur if overlay removed, property: /testcase-data-2/substation@100/motor-1/rpm_avail");
+ "OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/electric/rpm_avail");
EXPECT_BEGIN(KERN_ERR,
- "OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/rpm_avail");
+ "OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/electric/name");
unittest(overlay_data_apply("overlay_bad_add_dup_prop", NULL),
"Adding overlay 'overlay_bad_add_dup_prop' failed\n");
EXPECT_END(KERN_ERR,
- "OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/rpm_avail");
+ "OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/electric/name");
EXPECT_END(KERN_ERR,
- "OF: overlay: WARNING: memory leak will occur if overlay removed, property: /testcase-data-2/substation@100/motor-1/rpm_avail");
+ "OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/electric/rpm_avail");
EXPECT_END(KERN_ERR,
- "OF: overlay: WARNING: memory leak will occur if overlay removed, property: /testcase-data-2/substation@100/motor-1/rpm_avail");
+ "OF: overlay: ERROR: multiple fragments add and/or delete node /testcase-data-2/substation@100/motor-1/electric");
unittest(overlay_data_apply("overlay_bad_phandle", NULL),
"Adding overlay 'overlay_bad_phandle' failed\n");
The patch below does not apply to the 5.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 29acfb65598f91671413869e0d0a1ec4e74ac705 Mon Sep 17 00:00:00 2001
From: Frank Rowand <frank.rowand(a)sony.com>
Date: Thu, 16 Apr 2020 16:42:50 -0500
Subject: [PATCH] of: unittest: kmemleak in duplicate property update
kmemleak reports several memory leaks from devicetree unittest.
This is the fix for problem 5 of 5.
When overlay 'overlay_bad_add_dup_prop' is applied, the apply code
properly detects that a memory leak will occur if the overlay is removed
since the duplicate property is located in a base devicetree node and
reports via printk():
OF: overlay: WARNING: memory leak will occur if overlay removed, property: /testcase-data-2/substation@100/motor-1/rpm_avail
OF: overlay: WARNING: memory leak will occur if overlay removed, property: /testcase-data-2/substation@100/motor-1/rpm_avail
The overlay is removed when the apply code detects multiple changesets
modifying the same property. This is reported via printk():
OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/rpm_avail
As a result of this error, the overlay is removed resulting in the
expected memory leak.
Add another device node level to the overlay so that the duplicate
property is located in a node added by the overlay, thus no memory
leak will occur when the overlay is removed.
Thus users of kmemleak will not have to debug this leak in the future.
Fixes: 2fe0e8769df9 ("of: overlay: check prevents multiple fragments touching same property")
Reported-by: Erhard F. <erhard_f(a)mailbox.org>
Signed-off-by: Frank Rowand <frank.rowand(a)sony.com>
Signed-off-by: Rob Herring <robh(a)kernel.org>
diff --git a/drivers/of/unittest-data/overlay_bad_add_dup_prop.dts b/drivers/of/unittest-data/overlay_bad_add_dup_prop.dts
index c190da54f175..6327d1ffb963 100644
--- a/drivers/of/unittest-data/overlay_bad_add_dup_prop.dts
+++ b/drivers/of/unittest-data/overlay_bad_add_dup_prop.dts
@@ -3,22 +3,37 @@
/plugin/;
/*
- * &electric_1/motor-1 and &spin_ctrl_1 are the same node:
- * /testcase-data-2/substation@100/motor-1
+ * &electric_1/motor-1/electric and &spin_ctrl_1/electric are the same node:
+ * /testcase-data-2/substation@100/motor-1/electric
*
* Thus the property "rpm_avail" in each fragment will
* result in an attempt to update the same property twice.
* This will result in an error and the overlay apply
* will fail.
+ *
+ * The previous version of this test did not include the extra
+ * level of node 'electric'. That resulted in the 'rpm_avail'
+ * property being located in the pre-existing node 'motor-1'.
+ * Modifying a property results in a WARNING that a memory leak
+ * will occur if the overlay is removed. Since the overlay apply
+ * fails, the memory leak does actually occur, and kmemleak will
+ * further report the memory leak if CONFIG_DEBUG_KMEMLEAK is
+ * enabled. Adding the overlay node 'electric' avoids the
+ * memory leak and thus people who use kmemleak will not
+ * have to debug this non-problem again.
*/
&electric_1 {
motor-1 {
- rpm_avail = < 100 >;
+ electric {
+ rpm_avail = < 100 >;
+ };
};
};
&spin_ctrl_1 {
- rpm_avail = < 100 200 >;
+ electric {
+ rpm_avail = < 100 200 >;
+ };
};
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index f238b7a3865d..398de04fd19c 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -3181,21 +3181,21 @@ static __init void of_unittest_overlay_high_level(void)
"OF: overlay: ERROR: multiple fragments add and/or delete node /testcase-data-2/substation@100/motor-1/controller");
EXPECT_BEGIN(KERN_ERR,
- "OF: overlay: WARNING: memory leak will occur if overlay removed, property: /testcase-data-2/substation@100/motor-1/rpm_avail");
+ "OF: overlay: ERROR: multiple fragments add and/or delete node /testcase-data-2/substation@100/motor-1/electric");
EXPECT_BEGIN(KERN_ERR,
- "OF: overlay: WARNING: memory leak will occur if overlay removed, property: /testcase-data-2/substation@100/motor-1/rpm_avail");
+ "OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/electric/rpm_avail");
EXPECT_BEGIN(KERN_ERR,
- "OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/rpm_avail");
+ "OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/electric/name");
unittest(overlay_data_apply("overlay_bad_add_dup_prop", NULL),
"Adding overlay 'overlay_bad_add_dup_prop' failed\n");
EXPECT_END(KERN_ERR,
- "OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/rpm_avail");
+ "OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/electric/name");
EXPECT_END(KERN_ERR,
- "OF: overlay: WARNING: memory leak will occur if overlay removed, property: /testcase-data-2/substation@100/motor-1/rpm_avail");
+ "OF: overlay: ERROR: multiple fragments add, update, and/or delete property /testcase-data-2/substation@100/motor-1/electric/rpm_avail");
EXPECT_END(KERN_ERR,
- "OF: overlay: WARNING: memory leak will occur if overlay removed, property: /testcase-data-2/substation@100/motor-1/rpm_avail");
+ "OF: overlay: ERROR: multiple fragments add and/or delete node /testcase-data-2/substation@100/motor-1/electric");
unittest(overlay_data_apply("overlay_bad_phandle", NULL),
"Adding overlay 'overlay_bad_phandle' failed\n");
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 216830d2413cc61be3f76bc02ffd905e47d2439e Mon Sep 17 00:00:00 2001
From: Frank Rowand <frank.rowand(a)sony.com>
Date: Thu, 16 Apr 2020 16:42:47 -0500
Subject: [PATCH] of: unittest: kmemleak in of_unittest_platform_populate()
kmemleak reports several memory leaks from devicetree unittest.
This is the fix for problem 2 of 5.
of_unittest_platform_populate() left an elevated reference count for
grandchild nodes (which are platform devices). Fix the platform
device reference counts so that the memory will be freed.
Fixes: fb2caa50fbac ("of/selftest: add testcase for nodes with same name and address")
Reported-by: Erhard F. <erhard_f(a)mailbox.org>
Signed-off-by: Frank Rowand <frank.rowand(a)sony.com>
Signed-off-by: Rob Herring <robh(a)kernel.org>
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 20ff2dfc3143..4c7818276857 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -1247,10 +1247,13 @@ static void __init of_unittest_platform_populate(void)
of_platform_populate(np, match, NULL, &test_bus->dev);
for_each_child_of_node(np, child) {
- for_each_child_of_node(child, grandchild)
- unittest(of_find_device_by_node(grandchild),
+ for_each_child_of_node(child, grandchild) {
+ pdev = of_find_device_by_node(grandchild);
+ unittest(pdev,
"Could not create device for node '%pOFn'\n",
grandchild);
+ of_dev_put(pdev);
+ }
}
of_platform_depopulate(&test_bus->dev);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 216830d2413cc61be3f76bc02ffd905e47d2439e Mon Sep 17 00:00:00 2001
From: Frank Rowand <frank.rowand(a)sony.com>
Date: Thu, 16 Apr 2020 16:42:47 -0500
Subject: [PATCH] of: unittest: kmemleak in of_unittest_platform_populate()
kmemleak reports several memory leaks from devicetree unittest.
This is the fix for problem 2 of 5.
of_unittest_platform_populate() left an elevated reference count for
grandchild nodes (which are platform devices). Fix the platform
device reference counts so that the memory will be freed.
Fixes: fb2caa50fbac ("of/selftest: add testcase for nodes with same name and address")
Reported-by: Erhard F. <erhard_f(a)mailbox.org>
Signed-off-by: Frank Rowand <frank.rowand(a)sony.com>
Signed-off-by: Rob Herring <robh(a)kernel.org>
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 20ff2dfc3143..4c7818276857 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -1247,10 +1247,13 @@ static void __init of_unittest_platform_populate(void)
of_platform_populate(np, match, NULL, &test_bus->dev);
for_each_child_of_node(np, child) {
- for_each_child_of_node(child, grandchild)
- unittest(of_find_device_by_node(grandchild),
+ for_each_child_of_node(child, grandchild) {
+ pdev = of_find_device_by_node(grandchild);
+ unittest(pdev,
"Could not create device for node '%pOFn'\n",
grandchild);
+ of_dev_put(pdev);
+ }
}
of_platform_depopulate(&test_bus->dev);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 952c48b0ed18919bff7528501e9a3fff8a24f8cd Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov(a)gmail.com>
Date: Mon, 16 Mar 2020 15:52:54 +0100
Subject: [PATCH] rbd: call rbd_dev_unprobe() after unwatching and flushing
notifies
rbd_dev_unprobe() is supposed to undo most of rbd_dev_image_probe(),
including rbd_dev_header_info(), which means that rbd_dev_header_info()
isn't supposed to be called after rbd_dev_unprobe().
However, rbd_dev_image_release() calls rbd_dev_unprobe() before
rbd_unregister_watch(). This is racy because a header update notify
can sneak in:
"rbd unmap" thread ceph-watch-notify worker
rbd_dev_image_release()
rbd_dev_unprobe()
free and zero out header
rbd_watch_cb()
rbd_dev_refresh()
rbd_dev_header_info()
read in header
The same goes for "rbd map" because rbd_dev_image_probe() calls
rbd_dev_unprobe() on errors. In both cases this results in a memory
leak.
Fixes: fd22aef8b47c ("rbd: move rbd_unregister_watch() call into rbd_dev_image_release()")
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
Reviewed-by: Jason Dillaman <dillaman(a)redhat.com>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ff2377e6d12c..7aec8bc5df6e 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -6898,9 +6898,10 @@ static void rbd_print_dne(struct rbd_device *rbd_dev, bool is_snap)
static void rbd_dev_image_release(struct rbd_device *rbd_dev)
{
- rbd_dev_unprobe(rbd_dev);
if (rbd_dev->opts)
rbd_unregister_watch(rbd_dev);
+
+ rbd_dev_unprobe(rbd_dev);
rbd_dev->image_format = 0;
kfree(rbd_dev->spec->image_id);
rbd_dev->spec->image_id = NULL;
@@ -6950,7 +6951,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
if (ret) {
if (ret == -ENOENT && !need_watch)
rbd_print_dne(rbd_dev, false);
- goto err_out_watch;
+ goto err_out_probe;
}
/*
@@ -6995,12 +6996,11 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
return 0;
err_out_probe:
- rbd_dev_unprobe(rbd_dev);
-err_out_watch:
if (!depth)
up_write(&rbd_dev->header_rwsem);
if (need_watch)
rbd_unregister_watch(rbd_dev);
+ rbd_dev_unprobe(rbd_dev);
err_out_format:
rbd_dev->image_format = 0;
kfree(rbd_dev->spec->image_id);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 952c48b0ed18919bff7528501e9a3fff8a24f8cd Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov(a)gmail.com>
Date: Mon, 16 Mar 2020 15:52:54 +0100
Subject: [PATCH] rbd: call rbd_dev_unprobe() after unwatching and flushing
notifies
rbd_dev_unprobe() is supposed to undo most of rbd_dev_image_probe(),
including rbd_dev_header_info(), which means that rbd_dev_header_info()
isn't supposed to be called after rbd_dev_unprobe().
However, rbd_dev_image_release() calls rbd_dev_unprobe() before
rbd_unregister_watch(). This is racy because a header update notify
can sneak in:
"rbd unmap" thread ceph-watch-notify worker
rbd_dev_image_release()
rbd_dev_unprobe()
free and zero out header
rbd_watch_cb()
rbd_dev_refresh()
rbd_dev_header_info()
read in header
The same goes for "rbd map" because rbd_dev_image_probe() calls
rbd_dev_unprobe() on errors. In both cases this results in a memory
leak.
Fixes: fd22aef8b47c ("rbd: move rbd_unregister_watch() call into rbd_dev_image_release()")
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
Reviewed-by: Jason Dillaman <dillaman(a)redhat.com>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ff2377e6d12c..7aec8bc5df6e 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -6898,9 +6898,10 @@ static void rbd_print_dne(struct rbd_device *rbd_dev, bool is_snap)
static void rbd_dev_image_release(struct rbd_device *rbd_dev)
{
- rbd_dev_unprobe(rbd_dev);
if (rbd_dev->opts)
rbd_unregister_watch(rbd_dev);
+
+ rbd_dev_unprobe(rbd_dev);
rbd_dev->image_format = 0;
kfree(rbd_dev->spec->image_id);
rbd_dev->spec->image_id = NULL;
@@ -6950,7 +6951,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
if (ret) {
if (ret == -ENOENT && !need_watch)
rbd_print_dne(rbd_dev, false);
- goto err_out_watch;
+ goto err_out_probe;
}
/*
@@ -6995,12 +6996,11 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
return 0;
err_out_probe:
- rbd_dev_unprobe(rbd_dev);
-err_out_watch:
if (!depth)
up_write(&rbd_dev->header_rwsem);
if (need_watch)
rbd_unregister_watch(rbd_dev);
+ rbd_dev_unprobe(rbd_dev);
err_out_format:
rbd_dev->image_format = 0;
kfree(rbd_dev->spec->image_id);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 0e4e1de5b63fa423b13593337a27fd2d2b0bcf77 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov(a)gmail.com>
Date: Fri, 13 Mar 2020 11:20:51 +0100
Subject: [PATCH] rbd: avoid a deadlock on header_rwsem when flushing notifies
rbd_unregister_watch() flushes notifies and therefore cannot be called
under header_rwsem because a header update notify takes header_rwsem to
synchronize with "rbd map". If mapping an image fails after the watch
is established and a header update notify sneaks in, we deadlock when
erroring out from rbd_dev_image_probe().
Move watch registration and unregistration out of the critical section.
The only reason they were put there was to make header_rwsem management
slightly more obvious.
Fixes: 811c66887746 ("rbd: fix rbd map vs notify races")
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
Reviewed-by: Jason Dillaman <dillaman(a)redhat.com>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 1e0a6b19ae0d..ff2377e6d12c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4527,6 +4527,10 @@ static void cancel_tasks_sync(struct rbd_device *rbd_dev)
cancel_work_sync(&rbd_dev->unlock_work);
}
+/*
+ * header_rwsem must not be held to avoid a deadlock with
+ * rbd_dev_refresh() when flushing notifies.
+ */
static void rbd_unregister_watch(struct rbd_device *rbd_dev)
{
cancel_tasks_sync(rbd_dev);
@@ -6907,6 +6911,9 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
* device. If this image is the one being mapped (i.e., not a
* parent), initiate a watch on its header object before using that
* object to get detailed information about the rbd image.
+ *
+ * On success, returns with header_rwsem held for write if called
+ * with @depth == 0.
*/
static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
{
@@ -6936,6 +6943,9 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
}
}
+ if (!depth)
+ down_write(&rbd_dev->header_rwsem);
+
ret = rbd_dev_header_info(rbd_dev);
if (ret) {
if (ret == -ENOENT && !need_watch)
@@ -6987,6 +6997,8 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
+ if (!depth)
+ up_write(&rbd_dev->header_rwsem);
if (need_watch)
rbd_unregister_watch(rbd_dev);
err_out_format:
@@ -7050,12 +7062,9 @@ static ssize_t do_rbd_add(struct bus_type *bus,
goto err_out_rbd_dev;
}
- down_write(&rbd_dev->header_rwsem);
rc = rbd_dev_image_probe(rbd_dev, 0);
- if (rc < 0) {
- up_write(&rbd_dev->header_rwsem);
+ if (rc < 0)
goto err_out_rbd_dev;
- }
if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) {
rbd_warn(rbd_dev, "alloc_size adjusted to %u",
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 0e4e1de5b63fa423b13593337a27fd2d2b0bcf77 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov(a)gmail.com>
Date: Fri, 13 Mar 2020 11:20:51 +0100
Subject: [PATCH] rbd: avoid a deadlock on header_rwsem when flushing notifies
rbd_unregister_watch() flushes notifies and therefore cannot be called
under header_rwsem because a header update notify takes header_rwsem to
synchronize with "rbd map". If mapping an image fails after the watch
is established and a header update notify sneaks in, we deadlock when
erroring out from rbd_dev_image_probe().
Move watch registration and unregistration out of the critical section.
The only reason they were put there was to make header_rwsem management
slightly more obvious.
Fixes: 811c66887746 ("rbd: fix rbd map vs notify races")
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
Reviewed-by: Jason Dillaman <dillaman(a)redhat.com>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 1e0a6b19ae0d..ff2377e6d12c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4527,6 +4527,10 @@ static void cancel_tasks_sync(struct rbd_device *rbd_dev)
cancel_work_sync(&rbd_dev->unlock_work);
}
+/*
+ * header_rwsem must not be held to avoid a deadlock with
+ * rbd_dev_refresh() when flushing notifies.
+ */
static void rbd_unregister_watch(struct rbd_device *rbd_dev)
{
cancel_tasks_sync(rbd_dev);
@@ -6907,6 +6911,9 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
* device. If this image is the one being mapped (i.e., not a
* parent), initiate a watch on its header object before using that
* object to get detailed information about the rbd image.
+ *
+ * On success, returns with header_rwsem held for write if called
+ * with @depth == 0.
*/
static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
{
@@ -6936,6 +6943,9 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
}
}
+ if (!depth)
+ down_write(&rbd_dev->header_rwsem);
+
ret = rbd_dev_header_info(rbd_dev);
if (ret) {
if (ret == -ENOENT && !need_watch)
@@ -6987,6 +6997,8 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
+ if (!depth)
+ up_write(&rbd_dev->header_rwsem);
if (need_watch)
rbd_unregister_watch(rbd_dev);
err_out_format:
@@ -7050,12 +7062,9 @@ static ssize_t do_rbd_add(struct bus_type *bus,
goto err_out_rbd_dev;
}
- down_write(&rbd_dev->header_rwsem);
rc = rbd_dev_image_probe(rbd_dev, 0);
- if (rc < 0) {
- up_write(&rbd_dev->header_rwsem);
+ if (rc < 0)
goto err_out_rbd_dev;
- }
if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) {
rbd_warn(rbd_dev, "alloc_size adjusted to %u",