Hi Greg,
Upstream commit a0040c014594 ("ACPI / PCI: Bail early in
acpi_pci_add_bus() if there is no ACPI handle ").
This patch fixes a NULL pointer access when ACPI_HANDLE() is NULL.
acpi_evaluate_dsm() fails as follows while there is no DSM in the tables.
ACPI: \: failed to evaluate _DSM (0x1001)
Sinan
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 44de022c4382541cebdd6de4465d1f4f465ff1dd Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso(a)mit.edu>
Date: Sun, 8 Jul 2018 19:35:02 -0400
Subject: [PATCH] ext4: fix false negatives *and* false positives in
ext4_check_descriptors()
Ext4_check_descriptors() was getting called before s_gdb_count was
initialized. So for file systems w/o the meta_bg feature, allocation
bitmaps could overlap the block group descriptors and ext4 wouldn't
notice.
For file systems with the meta_bg feature enabled, there was a
fencepost error which would cause the ext4_check_descriptors() to
incorrectly believe that the block allocation bitmap overlaps with the
block group descriptor blocks, and it would reject the mount.
Fix both of these problems.
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)vger.kernel.org
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ba2396a7bd04..eff5c983e067 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2342,7 +2342,7 @@ static int ext4_check_descriptors(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
ext4_fsblk_t last_block;
- ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1;
+ ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
ext4_fsblk_t block_bitmap;
ext4_fsblk_t inode_bitmap;
ext4_fsblk_t inode_table;
@@ -4085,14 +4085,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
}
+ sbi->s_gdb_count = db_count;
if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
ret = -EFSCORRUPTED;
goto failed_mount2;
}
- sbi->s_gdb_count = db_count;
-
timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
/* Register extent status tree shrinker */
Fix trivial conflict that caused the original commit to bounce from 4.4,
4.9, and 4.14.
-- >8 --
commit 44de022c4382541cebdd6de4465d1f4f465ff1dd upstream.
Ext4_check_descriptors() was getting called before s_gdb_count was
initialized. So for file systems w/o the meta_bg feature, allocation
bitmaps could overlap the block group descriptors and ext4 wouldn't
notice.
For file systems with the meta_bg feature enabled, there was a
fencepost error which would cause the ext4_check_descriptors() to
incorrectly believe that the block allocation bitmap overlaps with the
block group descriptor blocks, and it would reject the mount.
Fix both of these problems.
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)vger.kernel.org
Signed-off-by: Benjamin Gilbert <bgilbert(a)redhat.com>
---
fs/ext4/super.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fc32a67a7a19..a500f55d421a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2301,7 +2301,7 @@ static int ext4_check_descriptors(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
ext4_fsblk_t last_block;
- ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1;
+ ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
ext4_fsblk_t block_bitmap;
ext4_fsblk_t inode_bitmap;
ext4_fsblk_t inode_table;
@@ -4044,13 +4044,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
}
+ sbi->s_gdb_count = db_count;
if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
ret = -EFSCORRUPTED;
goto failed_mount2;
}
- sbi->s_gdb_count = db_count;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
spin_lock_init(&sbi->s_next_gen_lock);
--
2.17.1
Did you receive my email yesterday?
Just want to check if you have needs for photo editing for our studio?
We normally edit 300 images within 12-24 hours.
We take care of different kinds of e-commerce photos, jewelry images, and
portrait model images.
Including cutting out and clipping path and others, we also give retouching
for your images.
You may drop us a photo if you want to check our quality, we will provide
testing.
Thanks,
Roy
Since commit 63347db0affa ("ACPI / scan: Use acpi_bus_get_status() to
initialize ACPI_TYPE_DEVICE devs") the status field of normal acpi_devices
gets set to 0 by acpi_bus_type_and_status() and filled with its actual
value later when acpi_add_single_object() calls acpi_bus_get_status().
This means that status is 0 when acpi_device_enumeration_by_parent()
gets called which makes the acpi_match_device_ids() call in
acpi_is_indirect_io_slave() always return -ENOENT.
This commit fixes this by making acpi_bus_type_and_status() set status to
ACPI_STA_DEFAULT until acpi_add_single_object() calls acpi_bus_get_status()
Fixes: dfda4492322e ("ACPI / scan: Do not enumerate Indirect IO host children")
Cc: stable(a)vger.kernel.org
Cc: John Garry <john.garry(a)huawei.com>
Cc: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: Dann Frazier <dann.frazier(a)canonical.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/acpi/scan.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 970dd87d347c..6799d00dd790 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1612,7 +1612,8 @@ static int acpi_add_single_object(struct acpi_device **child,
* Note this must be done before the get power-/wakeup_dev-flags calls.
*/
if (type == ACPI_BUS_TYPE_DEVICE)
- acpi_bus_get_status(device);
+ if (acpi_bus_get_status(device) < 0)
+ acpi_set_device_status(device, 0);
acpi_bus_get_power_flags(device);
acpi_bus_get_wakeup_device_flags(device);
@@ -1690,7 +1691,7 @@ static int acpi_bus_type_and_status(acpi_handle handle, int *type,
* acpi_add_single_object updates this once we've an acpi_device
* so that acpi_bus_get_status' quirk handling can be used.
*/
- *sta = 0;
+ *sta = ACPI_STA_DEFAULT;
break;
case ACPI_TYPE_PROCESSOR:
*type = ACPI_BUS_TYPE_PROCESSOR;
--
2.18.0
Commit 3ac6d8c787b8 ("x86/entry/64: Clear registers for
exceptions/interrupts, to reduce speculation attack surface") unintendedly
broke Xen PV virtual machines by clearing the %rbx register at the end of
xen_failsafe_callback before the latter jumps to error_exit.
error_exit expects the %rbx register to be a flag indicating whether
there should be a return to kernel mode.
This commit makes sure that the %rbx register is not cleared by
the PUSH_AND_CLEAR_REGS macro, when the macro in question is instantiated
by xen_failsafe_callback, to avoid the issue.
The impact of this bug includes (and most likely is not limited to) kernel
BUGs when wine is run in Xen PV virtual machines. One example is included
below. This example depicts the bug when wine is used to run TeamViewer
version 13's portable version under a Xen PV virtual machine using an
up-to-date version of Qubes OS R3.2.
[...........] kernel tried to execute NX-protected page - exploit attempt? (uid: 1000)
[ +0.000021] BUG: unable to handle kernel paging request at ffffffff82012480
[ +0.000020] IP: init_task+0x0/0x2ac0
[ +0.000009] PGD 200e067 P4D 200e067 PUD 200f067 PMD 2d5e067 PTE 8010000002012067
[ +0.000019] Oops: 0011 [#1] SMP DEBUG_PAGEALLOC NOPTI
[ +0.000015] Modules linked in: fuse bluetooth ecdh_generic rfkill ip6table_filter ip6_tables xt_conntrack ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c intel_rapl x86_pkg_temp_thermal crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel xen_netfront intel_rapl_perf pcspkr binfmt_misc dummy_hcd udc_core xen_gntdev xen_gntalloc u2mfn(O) xen_blkback xenfs xen_evtchn xen_privcmd xen_blkfront
[ +0.000091] CPU: 0 PID: 1350 Comm: TeamViewer.exe Tainted: G O 4.14.20-11.d10d0bb86d #15
[ +0.000018] task: ffff8800042fda00 task.stack: ffffc900006f8000
[ +0.000015] RIP: e030:init_task+0x0/0x2ac0
[ +0.000009] RSP: e02b:ffffffff82003df8 EFLAGS: 00010002
[ +0.000012] RAX: 0000000000000040 RBX: 0000000000000004 RCX: 0000000000000000
[ +0.000015] RDX: 0000000000000000 RSI: 0000000000000202 RDI: ffffffff810011aa
[ +0.000015] RBP: 000000000033eb88 R08: 0000000000000000 R09: 0000000000000000
[ +0.000015] R10: 0000000000000000 R11: ffff88000d772600 R12: 0000000000000000
[ +0.000015] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ +0.000027] FS: 000000007ffd8000(0063) GS:ffff88000f400000(006b) knlGS:0000000000000000
[ +0.000016] CS: e033 DS: 002b ES: 002b CR0: 0000000080050033
[ +0.000014] CR2: ffffffff82012480 CR3: 0000000004880000 CR4: 0000000000042660
[ +0.000025] Call Trace:
[ +0.000007] Code: ff ff ff d0 5a 1e 81 ff ff ff ff 00 00 00 00 00 00 00 00 e0 d7 04 82 ff ff ff ff e8 98 c0 0d 00 88 ff ff 01 80 00 00 00 00 00 00 <00> 00 00 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ +0.000068] RIP: init_task+0x0/0x2ac0 RSP: ffffffff82003df8
[ +0.000011] CR2: ffffffff82012480
[ +0.000010] ---[ end trace 7fb0075d4247b2a2 ]---
The commit that introduces this bug was found with git-bisect in conjunction with
some scripts and Qubes OS's cross-VM RPCs for automation, and the correction was
prepared after a manual inspection of the changes introduced by the commit in
question.
To the best of my recollection, this issue is also reproducible in an Ubuntu
18.04 LTS dom0 instance with the version of the Xen hypervisor in Ubuntu's
package repositories.
Signed-off-by: M. Vefa Bicakci <m.v.b(a)runbox.com>
Cc: Dominik Brodowski <linux(a)dominikbrodowski.net>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: xen-devel(a)lists.xenproject.org
Cc: x86(a)kernel.org
Cc: stable(a)vger.kernel.org # for v4.14 and up
Fixes: 3ac6d8c787b8 ("x86/entry/64: Clear registers for exceptions/interrupts, to reduce speculation attack surface")
---
arch/x86/entry/calling.h | 4 +++-
arch/x86/entry/entry_64.S | 2 +-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 20d0885b00fb..71795767da94 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 clear_rbx=1
/*
* Push registers and sanitize registers of values that a
* speculation attack might otherwise want to exploit. The
@@ -127,7 +127,9 @@ For 32-bit we have the following conventions - kernel is built with
pushq %r11 /* pt_regs->r11 */
xorl %r11d, %r11d /* nospec r11*/
pushq %rbx /* pt_regs->rbx */
+ .if \clear_rbx
xorl %ebx, %ebx /* nospec rbx*/
+ .endif
pushq %rbp /* pt_regs->rbp */
xorl %ebp, %ebp /* nospec rbp*/
pushq %r12 /* pt_regs->r12 */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c7449f377a77..96e8ff34129e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1129,7 +1129,7 @@ ENTRY(xen_failsafe_callback)
addq $0x30, %rsp
UNWIND_HINT_IRET_REGS
pushq $-1 /* orig_ax = -1 => not a system call */
- PUSH_AND_CLEAR_REGS
+ PUSH_AND_CLEAR_REGS clear_rbx=0
ENCODE_FRAME_POINTER
jmp error_exit
END(xen_failsafe_callback)
--
2.17.1
Dear Client,
Please confirm if you are still alive because two gentle men
walked into my office this morning to claim your inheritance
funds with our bank. They said that you are dead and that they
are your
representative. I got your email from the file of your relative
who is yet to be paid for the Contract he has executed before his
death several years ago.You the beneficiary of this fund has
not been in contact with the bank to claim your fund. The
gentlemen submitted an address where they want your VISA DEBIT
ATM CARD sent.
If you are still alive, please indicate by sending your full
contact details within 7 day of receiving this message, faliure
to do so, I will send the card to the address submitted by your
representatives.
Regards
Derek
Greetings,
I represent business group in Middle East looking for projects to
fund; we seek any business that will guaranty a safe and secure
return
on investments. Alternative powers, movies, start up companies
etc. We
are also looking for commercial building projects, hotels,
casino,
strip malls etc.
If you have a project that differs from these and current budget
is
over $40M, Write us and Provide Executive Summary and project
details.
(a)100% financing is available.
(b)Possible JV partnerships or 100% loans.
(c)Project must be over $20M total budget.
I look forward to your reply,
Sir Abraham Oded
Sahara Petrochems Ltd
odoabraham5(a)gmail.com
I'm sure I don't need to tell you that fb_helper's locking is a mess.
That being said; fb_helper's locking mess can seriously complicate the
runtime suspend/resume operations of drivers because it can invoke
atomic commits and connector probing from anywhere that calls
drm_fb_helper_hotplug_event(). Since most drivers use
drm_fb_helper_output_poll_changed() as their output_poll_changed
handler, this can happen in every single context that can fire off a
hotplug event. An example:
[ 246.669625] INFO: task kworker/4:0:37 blocked for more than 120 seconds.
[ 246.673398] Not tainted 4.18.0-rc5Lyude-Test+ #2
[ 246.675271] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 246.676527] kworker/4:0 D 0 37 2 0x80000000
[ 246.677580] Workqueue: events output_poll_execute [drm_kms_helper]
[ 246.678704] Call Trace:
[ 246.679753] __schedule+0x322/0xaf0
[ 246.680916] schedule+0x33/0x90
[ 246.681924] schedule_preempt_disabled+0x15/0x20
[ 246.683023] __mutex_lock+0x569/0x9a0
[ 246.684035] ? kobject_uevent_env+0x117/0x7b0
[ 246.685132] ? drm_fb_helper_hotplug_event.part.28+0x20/0xb0 [drm_kms_helper]
[ 246.686179] mutex_lock_nested+0x1b/0x20
[ 246.687278] ? mutex_lock_nested+0x1b/0x20
[ 246.688307] drm_fb_helper_hotplug_event.part.28+0x20/0xb0 [drm_kms_helper]
[ 246.689420] drm_fb_helper_output_poll_changed+0x23/0x30 [drm_kms_helper]
[ 246.690462] drm_kms_helper_hotplug_event+0x2a/0x30 [drm_kms_helper]
[ 246.691570] output_poll_execute+0x198/0x1c0 [drm_kms_helper]
[ 246.692611] process_one_work+0x231/0x620
[ 246.693725] worker_thread+0x214/0x3a0
[ 246.694756] kthread+0x12b/0x150
[ 246.695856] ? wq_pool_ids_show+0x140/0x140
[ 246.696888] ? kthread_create_worker_on_cpu+0x70/0x70
[ 246.697998] ret_from_fork+0x3a/0x50
[ 246.699034] INFO: task kworker/0:1:60 blocked for more than 120 seconds.
[ 246.700153] Not tainted 4.18.0-rc5Lyude-Test+ #2
[ 246.701182] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 246.702278] kworker/0:1 D 0 60 2 0x80000000
[ 246.703293] Workqueue: pm pm_runtime_work
[ 246.704393] Call Trace:
[ 246.705403] __schedule+0x322/0xaf0
[ 246.706439] ? wait_for_completion+0x104/0x190
[ 246.707393] schedule+0x33/0x90
[ 246.708375] schedule_timeout+0x3a5/0x590
[ 246.709289] ? mark_held_locks+0x58/0x80
[ 246.710208] ? _raw_spin_unlock_irq+0x2c/0x40
[ 246.711222] ? wait_for_completion+0x104/0x190
[ 246.712134] ? trace_hardirqs_on_caller+0xf4/0x190
[ 246.713094] ? wait_for_completion+0x104/0x190
[ 246.713964] wait_for_completion+0x12c/0x190
[ 246.714895] ? wake_up_q+0x80/0x80
[ 246.715727] ? get_work_pool+0x90/0x90
[ 246.716649] flush_work+0x1c9/0x280
[ 246.717483] ? flush_workqueue_prep_pwqs+0x1b0/0x1b0
[ 246.718442] __cancel_work_timer+0x146/0x1d0
[ 246.719247] cancel_delayed_work_sync+0x13/0x20
[ 246.720043] drm_kms_helper_poll_disable+0x1f/0x30 [drm_kms_helper]
[ 246.721123] nouveau_pmops_runtime_suspend+0x3d/0xb0 [nouveau]
[ 246.721897] pci_pm_runtime_suspend+0x6b/0x190
[ 246.722825] ? pci_has_legacy_pm_support+0x70/0x70
[ 246.723737] __rpm_callback+0x7a/0x1d0
[ 246.724721] ? pci_has_legacy_pm_support+0x70/0x70
[ 246.725607] rpm_callback+0x24/0x80
[ 246.726553] ? pci_has_legacy_pm_support+0x70/0x70
[ 246.727376] rpm_suspend+0x142/0x6b0
[ 246.728185] pm_runtime_work+0x97/0xc0
[ 246.728938] process_one_work+0x231/0x620
[ 246.729796] worker_thread+0x44/0x3a0
[ 246.730614] kthread+0x12b/0x150
[ 246.731395] ? wq_pool_ids_show+0x140/0x140
[ 246.732202] ? kthread_create_worker_on_cpu+0x70/0x70
[ 246.732878] ret_from_fork+0x3a/0x50
[ 246.733768] INFO: task kworker/4:2:422 blocked for more than 120 seconds.
[ 246.734587] Not tainted 4.18.0-rc5Lyude-Test+ #2
[ 246.735393] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 246.736113] kworker/4:2 D 0 422 2 0x80000080
[ 246.736789] Workqueue: events_long drm_dp_mst_link_probe_work [drm_kms_helper]
[ 246.737665] Call Trace:
[ 246.738490] __schedule+0x322/0xaf0
[ 246.739250] schedule+0x33/0x90
[ 246.739908] rpm_resume+0x19c/0x850
[ 246.740750] ? finish_wait+0x90/0x90
[ 246.741541] __pm_runtime_resume+0x4e/0x90
[ 246.742370] nv50_disp_atomic_commit+0x31/0x210 [nouveau]
[ 246.743124] drm_atomic_commit+0x4a/0x50 [drm]
[ 246.743775] restore_fbdev_mode_atomic+0x1c8/0x240 [drm_kms_helper]
[ 246.744603] restore_fbdev_mode+0x31/0x140 [drm_kms_helper]
[ 246.745373] drm_fb_helper_restore_fbdev_mode_unlocked+0x54/0xb0 [drm_kms_helper]
[ 246.746220] drm_fb_helper_set_par+0x2d/0x50 [drm_kms_helper]
[ 246.746884] drm_fb_helper_hotplug_event.part.28+0x96/0xb0 [drm_kms_helper]
[ 246.747675] drm_fb_helper_output_poll_changed+0x23/0x30 [drm_kms_helper]
[ 246.748544] drm_kms_helper_hotplug_event+0x2a/0x30 [drm_kms_helper]
[ 246.749439] nv50_mstm_hotplug+0x15/0x20 [nouveau]
[ 246.750111] drm_dp_send_link_address+0x177/0x1c0 [drm_kms_helper]
[ 246.750764] drm_dp_check_and_send_link_address+0xa8/0xd0 [drm_kms_helper]
[ 246.751602] drm_dp_mst_link_probe_work+0x51/0x90 [drm_kms_helper]
[ 246.752314] process_one_work+0x231/0x620
[ 246.752979] worker_thread+0x44/0x3a0
[ 246.753838] kthread+0x12b/0x150
[ 246.754619] ? wq_pool_ids_show+0x140/0x140
[ 246.755386] ? kthread_create_worker_on_cpu+0x70/0x70
[ 246.756162] ret_from_fork+0x3a/0x50
[ 246.756847]
Showing all locks held in the system:
[ 246.758261] 3 locks held by kworker/4:0/37:
[ 246.759016] #0: 00000000f8df4d2d ((wq_completion)"events"){+.+.}, at: process_one_work+0x1b3/0x620
[ 246.759856] #1: 00000000e6065461 ((work_completion)(&(&dev->mode_config.output_poll_work)->work)){+.+.}, at: process_one_work+0x1b3/0x620
[ 246.760670] #2: 00000000cb66735f (&helper->lock){+.+.}, at: drm_fb_helper_hotplug_event.part.28+0x20/0xb0 [drm_kms_helper]
[ 246.761516] 2 locks held by kworker/0:1/60:
[ 246.762274] #0: 00000000fff6be0f ((wq_completion)"pm"){+.+.}, at: process_one_work+0x1b3/0x620
[ 246.762982] #1: 000000005ab44fb4 ((work_completion)(&dev->power.work)){+.+.}, at: process_one_work+0x1b3/0x620
[ 246.763890] 1 lock held by khungtaskd/64:
[ 246.764664] #0: 000000008cb8b5c3 (rcu_read_lock){....}, at: debug_show_all_locks+0x23/0x185
[ 246.765588] 5 locks held by kworker/4:2/422:
[ 246.766440] #0: 00000000232f0959 ((wq_completion)"events_long"){+.+.}, at: process_one_work+0x1b3/0x620
[ 246.767390] #1: 00000000bb59b134 ((work_completion)(&mgr->work)){+.+.}, at: process_one_work+0x1b3/0x620
[ 246.768154] #2: 00000000cb66735f (&helper->lock){+.+.}, at: drm_fb_helper_restore_fbdev_mode_unlocked+0x4c/0xb0 [drm_kms_helper]
[ 246.768966] #3: 000000004c8f0b6b (crtc_ww_class_acquire){+.+.}, at: restore_fbdev_mode_atomic+0x4b/0x240 [drm_kms_helper]
[ 246.769921] #4: 000000004c34a296 (crtc_ww_class_mutex){+.+.}, at: drm_modeset_backoff+0x8a/0x1b0 [drm]
[ 246.770839] 1 lock held by dmesg/1038:
[ 246.771739] 2 locks held by zsh/1172:
[ 246.772650] #0: 00000000836d0438 (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x37/0x40
[ 246.773680] #1: 000000001f4f4d48 (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0xc1/0x870
[ 246.775522] =============================================
Because of this, there's an unreasonable number of places that drm
drivers would need to insert special handling to prevent trying to
resume the device from all of these contexts that can deadlock. It's
difficult even to try synchronizing with fb_helper in these contexts as
well, since any of them could introduce a deadlock by waiting to acquire
the top-level fb_helper mutex, while it's being held by another thread
that might potentially call down to pm_runtime_get_sync().
Luckily-there's no actual reason we need to allow fb_helper to handle
hotplugging at all when runtime suspending a device. If a hotplug
happens during a runtime suspend operation, there's no reason the driver
can't just re-enable fbcon's hotplug handling and bring it up to speed
with hotplugging events it may have missed by calling
drm_fb_helper_hotplug_event().
So, let's make this easy and just add helpers to handle disabling and
enabling fb_helper connector probing() without having to potentially
wait on fb_helper to finish it's work. This will let us fix the runtime
suspend/resume deadlocks that we've been experiencing with nouveau,
along with being able to fix some of the incorrect runtime PM core
interaction that other DRM drivers currently perform to work around
these issues.
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Cc: stable(a)vger.kernel.org
Cc: Lukas Wunner <lukas(a)wunner.de>
Cc: Karol Herbst <karolherbst(a)gmail.com>
---
drivers/gpu/drm/drm_fb_helper.c | 65 +++++++++++++++++++++++++++++++++
include/drm/drm_fb_helper.h | 20 ++++++++++
2 files changed, 85 insertions(+)
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 2ee1eaa66188..28d59befbc92 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -2733,6 +2733,66 @@ int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel)
}
EXPORT_SYMBOL(drm_fb_helper_initial_config);
+/**
+ * drm_fb_helper_resume_hotplug - Allow fb_helper to handle probing new
+ * connectors again, and bring it up to date
+ * with the current device's connector status
+ * fb_helper: driver-allocated fbdev helper, can be NULL
+ *
+ * Allow fb_helper to react to connector status changes again after having
+ * been disabled through drm_fb_helper_suspend_hotplug(). This also schedules
+ * a fb_helper hotplug event to bring fb_helper up to date with the current
+ * status of the DRM device's connectors.
+ */
+void
+drm_fb_helper_resume_hotplug(struct drm_fb_helper *fb_helper)
+{
+ fb_helper->hotplug_suspended = false;
+ drm_fb_helper_hotplug_event(fb_helper);
+}
+EXPORT_SYMBOL(drm_fb_helper_resume_hotplug);
+
+/**
+ * drm_fb_helper_suspend_hotplug - Attempt to temporarily inhibit fb_helper's
+ * ability to respond to connector changes
+ * without waiting
+ * @fb_helper: driver-allocated fbdev helper, can be NULL
+ *
+ * Temporarily prevent fb_helper from being able to handle connector changes,
+ * but only if it isn't busy doing something else. The connector probing
+ * routines in fb_helper can potentially grab any modesetting lock imaginable,
+ * which dramatically complicates the runtime suspend process. This helper can
+ * be used to simplify the process of runtime suspend by allowing the driver
+ * to disable unpredictable fb_helper operations as early as possible, without
+ * requiring that we try waiting on fb_helper (as this could lead to very
+ * difficult to solve deadlocks with runtime suspend code if fb_helper ends up
+ * needing to acquire a runtime PM reference).
+ *
+ * This call should be put at the very start of a driver's runtime suspend
+ * operation if desired. The driver must be responsible for re-enabling
+ * fb_helper hotplug handling when normal hotplug detection becomes available
+ * on the device again. This will usually happen if runtime suspend is
+ * aborted, or when the device is runtime resumed.
+ *
+ * Returns: true if hotplug handling was disabled, false if disabling hotplug
+ * handling would mean waiting on fb_helper.
+ */
+bool
+drm_fb_helper_suspend_hotplug(struct drm_fb_helper *fb_helper)
+{
+ int ret;
+
+ ret = mutex_trylock(&fb_helper->lock);
+ if (!ret)
+ return false;
+
+ fb_helper->hotplug_suspended = true;
+ mutex_unlock(&fb_helper->lock);
+
+ return true;
+}
+EXPORT_SYMBOL(drm_fb_helper_suspend_hotplug);
+
/**
* drm_fb_helper_hotplug_event - respond to a hotplug notification by
* probing all the outputs attached to the fb
@@ -2774,6 +2834,11 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
return err;
}
+ if (fb_helper->hotplug_suspended) {
+ mutex_unlock(&fb_helper->lock);
+ return err;
+ }
+
DRM_DEBUG_KMS("\n");
drm_setup_crtcs(fb_helper, fb_helper->fb->width, fb_helper->fb->height);
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index b069433e7fc1..30881131075c 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -232,6 +232,14 @@ struct drm_fb_helper {
* See also: @deferred_setup
*/
int preferred_bpp;
+
+ /**
+ * @hotplug_suspended:
+ *
+ * Whether or not we can currently handle hotplug events, or if we
+ * need to wait for the DRM device to uninhibit us.
+ */
+ bool hotplug_suspended;
};
/**
@@ -330,6 +338,10 @@ void drm_fb_helper_fbdev_teardown(struct drm_device *dev);
void drm_fb_helper_lastclose(struct drm_device *dev);
void drm_fb_helper_output_poll_changed(struct drm_device *dev);
+
+void drm_fb_helper_resume_hotplug(struct drm_fb_helper *fb_helper);
+bool drm_fb_helper_suspend_hotplug(struct drm_fb_helper *fb_helper);
+
#else
static inline void drm_fb_helper_prepare(struct drm_device *dev,
struct drm_fb_helper *helper,
@@ -564,6 +576,14 @@ static inline void drm_fb_helper_output_poll_changed(struct drm_device *dev)
{
}
+static inline void
+drm_fb_helper_resume_hotplug(struct drm_fb_helper *fb_helper)
+{
+}
+static inline bool
+drm_fb_helper_suspend_hotplug(struct drm_fb_helper *fb_helper)
+{
+}
#endif
static inline int
--
2.17.1