July 2018 - Linux-stable-mirror

[PATCH v3 8/8] drm/nouveau: Call pm_runtime_get_noresume() from hpd handlers

by Lyude Paul

We can't and don't need to try resuming the device from our hotplug handlers, but hotplug events are generally something we'd like to keep the device awake for whenever possible. So, grab a PM ref safely in our hotplug handlers using pm_runtime_get_noresume() and mark the device as busy once we're finished. Signed-off-by: Lyude Paul <lyude(a)redhat.com> Cc: stable(a)vger.kernel.org Cc: Lukas Wunner <lukas(a)wunner.de> Cc: Karol Herbst <karolherbst(a)gmail.com> --- drivers/gpu/drm/nouveau/nouveau_connector.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 8409c3f2c3a1..5a8e8c1ad647 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1152,6 +1152,11 @@ nouveau_connector_hotplug(struct nvif_notify *notify) const char *name = connector->name; struct nouveau_encoder *nv_encoder; + /* Resuming the device here isn't possible; but the suspend PM ops + * will wait for us to finish our work before disabling us so this + * should be enough + */ + pm_runtime_get_noresume(drm->dev->dev); nv_connector->hpd_task = current; if (rep->mask & NVIF_NOTIFY_CONN_V0_IRQ) { @@ -1171,6 +1176,9 @@ nouveau_connector_hotplug(struct nvif_notify *notify) } nv_connector->hpd_task = NULL; + + pm_runtime_mark_last_busy(drm->dev->dev); + pm_runtime_put_autosuspend(drm->dev->dev); return NVIF_NOTIFY_KEEP; } -- 2.17.1

7 years

1
0
0 0

[PATCH v3 7/8] drm/nouveau: Fix deadlocks in nouveau_connector_detect()

by Lyude Paul

When we disable hotplugging on the GPU, we need to be able to synchronize with each connector's hotplug interrupt handler before the interrupt is finally disabled. This can be a problem however, since nouveau_connector_detect() currently grabs a runtime power reference when handling connector probing. This will deadlock the runtime suspend handler like so: [ 861.480896] INFO: task kworker/0:2:61 blocked for more than 120 seconds. [ 861.483290] Tainted: G O 4.18.0-rc6Lyude-Test+ #1 [ 861.485158] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 861.486332] kworker/0:2 D 0 61 2 0x80000000 [ 861.487044] Workqueue: events nouveau_display_hpd_work [nouveau] [ 861.487737] Call Trace: [ 861.488394] __schedule+0x322/0xaf0 [ 861.489070] schedule+0x33/0x90 [ 861.489744] rpm_resume+0x19c/0x850 [ 861.490392] ? finish_wait+0x90/0x90 [ 861.491068] __pm_runtime_resume+0x4e/0x90 [ 861.491753] nouveau_display_hpd_work+0x22/0x60 [nouveau] [ 861.492416] process_one_work+0x231/0x620 [ 861.493068] worker_thread+0x44/0x3a0 [ 861.493722] kthread+0x12b/0x150 [ 861.494342] ? wq_pool_ids_show+0x140/0x140 [ 861.494991] ? kthread_create_worker_on_cpu+0x70/0x70 [ 861.495648] ret_from_fork+0x3a/0x50 [ 861.496304] INFO: task kworker/6:2:320 blocked for more than 120 seconds. [ 861.496968] Tainted: G O 4.18.0-rc6Lyude-Test+ #1 [ 861.497654] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 861.498341] kworker/6:2 D 0 320 2 0x80000080 [ 861.499045] Workqueue: pm pm_runtime_work [ 861.499739] Call Trace: [ 861.500428] __schedule+0x322/0xaf0 [ 861.501134] ? wait_for_completion+0x104/0x190 [ 861.501851] schedule+0x33/0x90 [ 861.502564] schedule_timeout+0x3a5/0x590 [ 861.503284] ? mark_held_locks+0x58/0x80 [ 861.503988] ? _raw_spin_unlock_irq+0x2c/0x40 [ 861.504710] ? wait_for_completion+0x104/0x190 [ 861.505417] ? trace_hardirqs_on_caller+0xf4/0x190 [ 861.506136] ? wait_for_completion+0x104/0x190 [ 861.506845] wait_for_completion+0x12c/0x190 [ 861.507555] ? wake_up_q+0x80/0x80 [ 861.508268] flush_work+0x1c9/0x280 [ 861.508990] ? flush_workqueue_prep_pwqs+0x1b0/0x1b0 [ 861.509735] nvif_notify_put+0xb1/0xc0 [nouveau] [ 861.510482] nouveau_display_fini+0xbd/0x170 [nouveau] [ 861.511241] nouveau_display_suspend+0x67/0x120 [nouveau] [ 861.511969] nouveau_do_suspend+0x5e/0x2d0 [nouveau] [ 861.512715] nouveau_pmops_runtime_suspend+0x47/0xb0 [nouveau] [ 861.513435] pci_pm_runtime_suspend+0x6b/0x180 [ 861.514165] ? pci_has_legacy_pm_support+0x70/0x70 [ 861.514897] __rpm_callback+0x7a/0x1d0 [ 861.515618] ? pci_has_legacy_pm_support+0x70/0x70 [ 861.516313] rpm_callback+0x24/0x80 [ 861.517027] ? pci_has_legacy_pm_support+0x70/0x70 [ 861.517741] rpm_suspend+0x142/0x6b0 [ 861.518449] pm_runtime_work+0x97/0xc0 [ 861.519144] process_one_work+0x231/0x620 [ 861.519831] worker_thread+0x44/0x3a0 [ 861.520522] kthread+0x12b/0x150 [ 861.521220] ? wq_pool_ids_show+0x140/0x140 [ 861.521925] ? kthread_create_worker_on_cpu+0x70/0x70 [ 861.522622] ret_from_fork+0x3a/0x50 [ 861.523299] INFO: task kworker/6:0:1329 blocked for more than 120 seconds. [ 861.523977] Tainted: G O 4.18.0-rc6Lyude-Test+ #1 [ 861.524644] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 861.525349] kworker/6:0 D 0 1329 2 0x80000000 [ 861.526073] Workqueue: events nvif_notify_work [nouveau] [ 861.526751] Call Trace: [ 861.527411] __schedule+0x322/0xaf0 [ 861.528089] schedule+0x33/0x90 [ 861.528758] rpm_resume+0x19c/0x850 [ 861.529399] ? finish_wait+0x90/0x90 [ 861.530073] __pm_runtime_resume+0x4e/0x90 [ 861.530798] nouveau_connector_detect+0x7e/0x510 [nouveau] [ 861.531459] ? ww_mutex_lock+0x47/0x80 [ 861.532097] ? ww_mutex_lock+0x47/0x80 [ 861.532819] ? drm_modeset_lock+0x88/0x130 [drm] [ 861.533481] drm_helper_probe_detect_ctx+0xa0/0x100 [drm_kms_helper] [ 861.534127] drm_helper_hpd_irq_event+0xa4/0x120 [drm_kms_helper] [ 861.534940] nouveau_connector_hotplug+0x98/0x120 [nouveau] [ 861.535556] nvif_notify_work+0x2d/0xb0 [nouveau] [ 861.536221] process_one_work+0x231/0x620 [ 861.536994] worker_thread+0x44/0x3a0 [ 861.537757] kthread+0x12b/0x150 [ 861.538463] ? wq_pool_ids_show+0x140/0x140 [ 861.539102] ? kthread_create_worker_on_cpu+0x70/0x70 [ 861.539815] ret_from_fork+0x3a/0x50 [ 861.540521] Showing all locks held in the system: [ 861.541696] 2 locks held by kworker/0:2/61: [ 861.542406] #0: 000000002dbf8af5 ((wq_completion)"events"){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.543071] #1: 0000000076868126 ((work_completion)(&drm->hpd_work)){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.543814] 1 lock held by khungtaskd/64: [ 861.544535] #0: 0000000059db4b53 (rcu_read_lock){....}, at: debug_show_all_locks+0x23/0x185 [ 861.545160] 3 locks held by kworker/6:2/320: [ 861.545896] #0: 00000000d9e1bc59 ((wq_completion)"pm"){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.546702] #1: 00000000c9f92d84 ((work_completion)(&dev->power.work)){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.547443] #2: 000000004afc5de1 (drm_connector_list_iter){.+.+}, at: nouveau_display_fini+0x96/0x170 [nouveau] [ 861.548146] 1 lock held by dmesg/983: [ 861.548889] 2 locks held by zsh/1250: [ 861.549605] #0: 00000000348e3cf6 (&tty->ldisc_sem){++++}, at: ldsem_down_read+0x37/0x40 [ 861.550393] #1: 000000007009a7a8 (&ldata->atomic_read_lock){+.+.}, at: n_tty_read+0xc1/0x870 [ 861.551122] 6 locks held by kworker/6:0/1329: [ 861.551957] #0: 000000002dbf8af5 ((wq_completion)"events"){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.552765] #1: 00000000ddb499ad ((work_completion)(&notify->work)#2){+.+.}, at: process_one_work+0x1b3/0x620 [ 861.553582] #2: 000000006e013cbe (&dev->mode_config.mutex){+.+.}, at: drm_helper_hpd_irq_event+0x6c/0x120 [drm_kms_helper] [ 861.554357] #3: 000000004afc5de1 (drm_connector_list_iter){.+.+}, at: drm_helper_hpd_irq_event+0x78/0x120 [drm_kms_helper] [ 861.555227] #4: 0000000044f294d9 (crtc_ww_class_acquire){+.+.}, at: drm_helper_probe_detect_ctx+0x3d/0x100 [drm_kms_helper] [ 861.556133] #5: 00000000db193642 (crtc_ww_class_mutex){+.+.}, at: drm_modeset_lock+0x4b/0x130 [drm] [ 861.557864] ============================================= [ 861.559507] NMI backtrace for cpu 2 [ 861.560363] CPU: 2 PID: 64 Comm: khungtaskd Tainted: G O 4.18.0-rc6Lyude-Test+ #1 [ 861.561197] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET78W (1.51 ) 05/18/2018 [ 861.561948] Call Trace: [ 861.562757] dump_stack+0x8e/0xd3 [ 861.563516] nmi_cpu_backtrace.cold.3+0x14/0x5a [ 861.564269] ? lapic_can_unplug_cpu.cold.27+0x42/0x42 [ 861.565029] nmi_trigger_cpumask_backtrace+0xa1/0xae [ 861.565789] arch_trigger_cpumask_backtrace+0x19/0x20 [ 861.566558] watchdog+0x316/0x580 [ 861.567355] kthread+0x12b/0x150 [ 861.568114] ? reset_hung_task_detector+0x20/0x20 [ 861.568863] ? kthread_create_worker_on_cpu+0x70/0x70 [ 861.569598] ret_from_fork+0x3a/0x50 [ 861.570370] Sending NMI from CPU 2 to CPUs 0-1,3-7: [ 861.571426] NMI backtrace for cpu 6 skipped: idling at intel_idle+0x7f/0x120 [ 861.571429] NMI backtrace for cpu 7 skipped: idling at intel_idle+0x7f/0x120 [ 861.571432] NMI backtrace for cpu 3 skipped: idling at intel_idle+0x7f/0x120 [ 861.571464] NMI backtrace for cpu 5 skipped: idling at intel_idle+0x7f/0x120 [ 861.571467] NMI backtrace for cpu 0 skipped: idling at intel_idle+0x7f/0x120 [ 861.571469] NMI backtrace for cpu 4 skipped: idling at intel_idle+0x7f/0x120 [ 861.571472] NMI backtrace for cpu 1 skipped: idling at intel_idle+0x7f/0x120 [ 861.572428] Kernel panic - not syncing: hung_task: blocked tasks So: fix this with a new trick; store the current task_struct that's executing in the nouveau_connector structure, then avoid attempting to runtime resume the device when we know that we're just running from the context of our hotplug interrupt handler. Since hpd interrupts are only enabled while the device is runtime active, this should be totally safe. Signed-off-by: Lyude Paul <lyude(a)redhat.com> Cc: stable(a)vger.kernel.org Cc: Lukas Wunner <lukas(a)wunner.de> Cc: Karol Herbst <karolherbst(a)gmail.com> --- drivers/gpu/drm/nouveau/nouveau_connector.c | 16 ++++++++++------ drivers/gpu/drm/nouveau/nouveau_connector.h | 1 + 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 9714e09f17db..8409c3f2c3a1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -572,13 +572,14 @@ nouveau_connector_detect(struct drm_connector *connector, bool force) nv_connector->edid = NULL; } - /* Outputs are only polled while runtime active, so resuming the - * device here is unnecessary (and would deadlock upon runtime suspend - * because it waits for polling to finish). We do however, want to - * prevent the autosuspend timer from elapsing during this operation - * if possible. + /* Output polling and HPD only happens while we're runtime active, so + * resuming the device here is unnecessary (and would deadlock upon + * runtime suspend because it waits for polling to finish). We do + * however, want to prevent the autosuspend timer from elapsing during + * this operation if possible. */ - if (drm_kms_helper_is_poll_worker()) { + if (drm_kms_helper_is_poll_worker() || + nv_connector->hpd_task == current) { pm_runtime_get_noresume(dev->dev); } else { ret = pm_runtime_get_sync(dev->dev); @@ -1151,6 +1152,8 @@ nouveau_connector_hotplug(struct nvif_notify *notify) const char *name = connector->name; struct nouveau_encoder *nv_encoder; + nv_connector->hpd_task = current; + if (rep->mask & NVIF_NOTIFY_CONN_V0_IRQ) { NV_DEBUG(drm, "service %s\n", name); if ((nv_encoder = find_encoder(connector, DCB_OUTPUT_DP))) @@ -1167,6 +1170,7 @@ nouveau_connector_hotplug(struct nvif_notify *notify) nouveau_connector_hotplug_probe(nv_connector); } + nv_connector->hpd_task = NULL; return NVIF_NOTIFY_KEEP; } diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h index 2d9d35a146a4..1964e682ba13 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.h +++ b/drivers/gpu/drm/nouveau/nouveau_connector.h @@ -45,6 +45,7 @@ struct nouveau_connector { u8 *dcb; struct nvif_notify hpd; + struct task_struct *hpd_task; struct drm_dp_aux aux; -- 2.17.1

7 years

1
0
0 0

[PATCH v3 6/8] drm/nouveau: Respond to HPDs by probing one conn at a time

by Lyude Paul

There isn't actually any reason we need to call drm_hpd_irq_event() from our hotplug handler, as we already know which connector the hotplug event was fired for. We're also going to need to avoid probing all connectors needlessly from hotplug handlers anyway so that we can track when nouveau_connector_detect() is being called from the context of it's connector's hotplug handler in order to fix the next deadlocking issue. This is (slightly) faster anyway! Signed-off-by: Lyude Paul <lyude(a)redhat.com> Cc: stable(a)vger.kernel.org Cc: Lukas Wunner <lukas(a)wunner.de> Cc: Karol Herbst <karolherbst(a)gmail.com> --- drivers/gpu/drm/nouveau/nouveau_connector.c | 28 ++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 010d6db14cba..9714e09f17db 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1114,6 +1114,32 @@ nouveau_connector_funcs_lvds = { .atomic_get_property = nouveau_conn_atomic_get_property, }; +static void +nouveau_connector_hotplug_probe(struct nouveau_connector *nv_conn) +{ + struct drm_modeset_acquire_ctx ctx; + struct drm_connector *conn = &nv_conn->base; + enum drm_connector_status old_status; + struct drm_device *dev = conn->dev; + bool changed; + + mutex_lock(&dev->mode_config.mutex); + + drm_modeset_acquire_init(&ctx, 0); + drm_modeset_lock(&dev->mode_config.connection_mutex, &ctx); + + old_status = conn->status; + conn->status = drm_helper_probe_detect(conn, &ctx, true); + changed = old_status != conn->status; + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + mutex_unlock(&dev->mode_config.mutex); + + if (changed) + drm_kms_helper_hotplug_event(dev); +} + static int nouveau_connector_hotplug(struct nvif_notify *notify) { @@ -1138,7 +1164,7 @@ nouveau_connector_hotplug(struct nvif_notify *notify) nv50_mstm_remove(nv_encoder->dp.mstm); } - drm_helper_hpd_irq_event(connector->dev); + nouveau_connector_hotplug_probe(nv_connector); } return NVIF_NOTIFY_KEEP; -- 2.17.1

7 years

1
0
0 0

[PATCH v3 5/8] drm/nouveau: Use pm_runtime_get_noresume() in connector_detect()

by Lyude Paul

It's true we can't resume the device from poll workers in nouveau_connector_detect(). We can however, prevent the autosuspend timer from elapsing immediately if it hasn't already without risking any sort of deadlock with the runtime suspend/resume operations. So do that instead of entirely avoiding grabbing a power reference. Signed-off-by: Lyude Paul <lyude(a)redhat.com> Cc: stable(a)vger.kernel.org Cc: Lukas Wunner <lukas(a)wunner.de> Cc: Karol Herbst <karolherbst(a)gmail.com> --- drivers/gpu/drm/nouveau/nouveau_connector.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 2a45b4c2ceb0..010d6db14cba 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -572,12 +572,16 @@ nouveau_connector_detect(struct drm_connector *connector, bool force) nv_connector->edid = NULL; } - /* Outputs are only polled while runtime active, so acquiring a - * runtime PM ref here is unnecessary (and would deadlock upon - * runtime suspend because it waits for polling to finish). + /* Outputs are only polled while runtime active, so resuming the + * device here is unnecessary (and would deadlock upon runtime suspend + * because it waits for polling to finish). We do however, want to + * prevent the autosuspend timer from elapsing during this operation + * if possible. */ - if (!drm_kms_helper_is_poll_worker()) { - ret = pm_runtime_get_sync(connector->dev->dev); + if (drm_kms_helper_is_poll_worker()) { + pm_runtime_get_noresume(dev->dev); + } else { + ret = pm_runtime_get_sync(dev->dev); if (ret < 0 && ret != -EACCES) return conn_status; } @@ -655,10 +659,8 @@ nouveau_connector_detect(struct drm_connector *connector, bool force) out: - if (!drm_kms_helper_is_poll_worker()) { - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); - } + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); return conn_status; } -- 2.17.1

7 years

1
0
0 0

[PATCH v3 4/8] drm/nouveau: Fix deadlock with fb_helper using new helpers

by Lyude Paul

This removes the potential of deadlocking with fb_helper entirely by preventing it from handling hotplugs during the runtime suspend process as early as possible in the suspend process. If it turns out this is not possible, due to some fb_helper action having been queued up before we got a time to disable hotplugging, we simply return -EBUSY so that the runtime PM core attempts autosuspending the device again once fb_helper isn't doing anything. This fixes one of the issues causing deadlocks on runtime suspend/resume with nouveau on my P50. Signed-off-by: Lyude Paul <lyude(a)redhat.com> Cc: stable(a)vger.kernel.org Cc: Lukas Wunner <lukas(a)wunner.de> Cc: Karol Herbst <karolherbst(a)gmail.com> --- drivers/gpu/drm/nouveau/nouveau_drm.c | 8 ++++++++ drivers/gpu/drm/nouveau/nouveau_fbcon.c | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index ee2546db09c9..d47cb5b2af98 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -836,6 +836,14 @@ nouveau_pmops_runtime_suspend(struct device *dev) return -EBUSY; } + /* There's no way for us to stop fb_helper work in reaction to + * hotplugs later in the RPM process. First off: we don't want to, + * fb_helper should be able to keep the GPU awake. Second off: it is + * capable of grabbing basically any lock in existence. + */ + if (!drm_fb_helper_suspend_hotplug(drm_dev->fb_helper)) + return -EBUSY; + nouveau_switcheroo_optimus_dsm(); ret = nouveau_do_suspend(drm_dev, true); pci_save_state(pdev); diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 85c1f10bc2b6..963ba630fd04 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -466,6 +466,7 @@ nouveau_fbcon_set_suspend_work(struct work_struct *work) console_unlock(); if (state == FBINFO_STATE_RUNNING) { + drm_fb_helper_resume_hotplug(drm->dev->fb_helper); pm_runtime_mark_last_busy(drm->dev->dev); pm_runtime_put_sync(drm->dev->dev); } -- 2.17.1

7 years

1
0
0 0

[PATCH v3 2/8] drm/nouveau: Enable polling even if we have runtime PM

by Lyude Paul

Having runtime PM makes no difference on whether or not we want polling, and it's now safe to just enable polling unconditionally in drm_load() thanks to d61a5c106351 ("drm/nouveau: Fix deadlock on runtime suspend") Signed-off-by: Lyude Paul <lyude(a)redhat.com> Cc: Lukas Wunner <lukas(a)wunner.de> Cc: Peter Ujfalusi <peter.ujfalusi(a)ti.com> Cc: stable(a)vger.kernel.org --- drivers/gpu/drm/nouveau/nouveau_drm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 5fdc1fbe2ee5..ee2546db09c9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -592,10 +592,11 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) pm_runtime_allow(dev->dev); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put(dev->dev); - } else { - /* enable polling for external displays */ - drm_kms_helper_poll_enable(dev); } + + /* enable polling for connectors without hpd */ + drm_kms_helper_poll_enable(dev); + return 0; fail_dispinit: -- 2.17.1

7 years

1
0
0 0

[PATCH v3 1/8] drm/nouveau: Fix bogus drm_kms_helper_poll_enable() placement

by Lyude Paul

Turns out this part is my fault for not noticing when reviewing 9a2eba337cace ("drm/nouveau: Fix drm poll_helper handling"). Currently we call drm_kms_helper_poll_enable() from nouveau_display_hpd_work(). This makes basically no sense however, because that means we're calling drm_kms_helper_poll_enable() every time we schedule the hotplug detection work. This is also against the advice mentioned in drm_kms_helper_poll_enable()'s documentation: Note that calls to enable and disable polling must be strictly ordered, which is automatically the case when they're only call from suspend/resume callbacks. Of course, hotplugs can't really be ordered. They could even happen immediately after we called drm_kms_helper_poll_disable() in nouveau_display_fini(), which can lead to all sorts of issues. Additionally; enabling polling /after/ we call drm_helper_hpd_irq_event() could also mean that we'd miss a hotplug event anyway, since drm_helper_hpd_irq_event() wouldn't bother trying to probe connectors so long as polling is disabled. So; simply move this back into nouveau_display_init() again. The race condition that both of these patches attempted to work around has already been fixed properly in d61a5c106351 ("drm/nouveau: Fix deadlock on runtime suspend") Fixes: 9a2eba337cace ("drm/nouveau: Fix drm poll_helper handling") Signed-off-by: Lyude Paul <lyude(a)redhat.com> Cc: Lukas Wunner <lukas(a)wunner.de> Cc: Peter Ujfalusi <peter.ujfalusi(a)ti.com> Cc: stable(a)vger.kernel.org --- drivers/gpu/drm/nouveau/nouveau_display.c | 7 +++++-- drivers/gpu/drm/nouveau/nouveau_drm.c | 1 - 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index ec7861457b84..1d36ab5d4796 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -355,8 +355,6 @@ nouveau_display_hpd_work(struct work_struct *work) pm_runtime_get_sync(drm->dev->dev); drm_helper_hpd_irq_event(drm->dev); - /* enable polling for external displays */ - drm_kms_helper_poll_enable(drm->dev); pm_runtime_mark_last_busy(drm->dev->dev); pm_runtime_put_sync(drm->dev->dev); @@ -411,6 +409,11 @@ nouveau_display_init(struct drm_device *dev) if (ret) return ret; + /* enable connector detection and polling for connectors without HPD + * support + */ + drm_kms_helper_poll_enable(dev); + /* enable hotplug interrupts */ drm_connector_list_iter_begin(dev, &conn_iter); nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) { diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index c7ec86d6c3c9..5fdc1fbe2ee5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -835,7 +835,6 @@ nouveau_pmops_runtime_suspend(struct device *dev) return -EBUSY; } - drm_kms_helper_poll_disable(drm_dev); nouveau_switcheroo_optimus_dsm(); ret = nouveau_do_suspend(drm_dev, true); pci_save_state(pdev); -- 2.17.1

7 years

1
0
0 0

request for 4.14-stable: 520e18a5080d ("ACPI / APEI: Remove ghes_ioremap_area")

by Sudip Mukherjee

Hi Greg, This was missing in 4.14-stable. Though marked for stable, but not sure if it matches the stable rules. Please apply to your queue if it does. -- Regards Sudip

7 years

1
0
0 0

Missing signature-files for 4.9.116/4.14.59

by Rainer Fiebig

Hi! Seems the .sign-files for 4.9.116 and 4.14.59 are missing in https://mirrors.edge.kernel.org/pub/linux/kernel/v4.x/ - so my script complained. So long! Rainer Fiebig -- The truth always turns out to be simpler than you thought. Richard Feynman

7 years

2
2
0 0

[PATCH, RESEND] Avoid that SCSI device removal through sysfs triggers a deadlock

by Bart Van Assche

This patch avoids that self-removal triggers the following deadlock: ====================================================== WARNING: possible circular locking dependency detected 4.18.0-rc2-dbg+ #5 Not tainted ------------------------------------------------------ modprobe/6539 is trying to acquire lock: 000000008323c4cd (kn->count#202){++++}, at: kernfs_remove_by_name_ns+0x45/0x90 but task is already holding lock: 00000000a6ec2c69 (&shost->scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 [scsi_mod] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&shost->scan_mutex){+.+.}: __mutex_lock+0xfe/0xc70 mutex_lock_nested+0x1b/0x20 scsi_remove_device+0x26/0x40 [scsi_mod] sdev_store_delete+0x27/0x30 [scsi_mod] dev_attr_store+0x3e/0x50 sysfs_kf_write+0x87/0xa0 kernfs_fop_write+0x190/0x230 __vfs_write+0xd2/0x3b0 vfs_write+0x101/0x270 ksys_write+0xab/0x120 __x64_sys_write+0x43/0x50 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #0 (kn->count#202){++++}: lock_acquire+0xd2/0x260 __kernfs_remove+0x424/0x4a0 kernfs_remove_by_name_ns+0x45/0x90 remove_files.isra.1+0x3a/0x90 sysfs_remove_group+0x5c/0xc0 sysfs_remove_groups+0x39/0x60 device_remove_attrs+0x82/0xb0 device_del+0x251/0x580 __scsi_remove_device+0x19f/0x1d0 [scsi_mod] scsi_forget_host+0x37/0xb0 [scsi_mod] scsi_remove_host+0x9b/0x150 [scsi_mod] sdebug_driver_remove+0x4b/0x150 [scsi_debug] device_release_driver_internal+0x241/0x360 device_release_driver+0x12/0x20 bus_remove_device+0x1bc/0x290 device_del+0x259/0x580 device_unregister+0x1a/0x70 sdebug_remove_adapter+0x8b/0xf0 [scsi_debug] scsi_debug_exit+0x76/0xe8 [scsi_debug] __x64_sys_delete_module+0x1c1/0x280 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&shost->scan_mutex); lock(kn->count#202); lock(&shost->scan_mutex); lock(kn->count#202); *** DEADLOCK *** 2 locks held by modprobe/6539: #0: 00000000efaf9298 (&dev->mutex){....}, at: device_release_driver_internal+0x68/0x360 #1: 00000000a6ec2c69 (&shost->scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 [scsi_mod] stack backtrace: CPU: 10 PID: 6539 Comm: modprobe Not tainted 4.18.0-rc2-dbg+ #5 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xa4/0xf5 print_circular_bug.isra.34+0x213/0x221 __lock_acquire+0x1a7e/0x1b50 lock_acquire+0xd2/0x260 __kernfs_remove+0x424/0x4a0 kernfs_remove_by_name_ns+0x45/0x90 remove_files.isra.1+0x3a/0x90 sysfs_remove_group+0x5c/0xc0 sysfs_remove_groups+0x39/0x60 device_remove_attrs+0x82/0xb0 device_del+0x251/0x580 __scsi_remove_device+0x19f/0x1d0 [scsi_mod] scsi_forget_host+0x37/0xb0 [scsi_mod] scsi_remove_host+0x9b/0x150 [scsi_mod] sdebug_driver_remove+0x4b/0x150 [scsi_debug] device_release_driver_internal+0x241/0x360 device_release_driver+0x12/0x20 bus_remove_device+0x1bc/0x290 device_del+0x259/0x580 device_unregister+0x1a/0x70 sdebug_remove_adapter+0x8b/0xf0 [scsi_debug] scsi_debug_exit+0x76/0xe8 [scsi_debug] __x64_sys_delete_module+0x1c1/0x280 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe See also https://www.mail-archive.com/linux-scsi@vger.kernel.org/msg54525.html. Suggested-by: Eric W. Biederman <ebiederm(a)xmission.com> Fixes: ac0ece9174ac ("scsi: use device_remove_file_self() instead of device_schedule_callback()") Signed-off-by: Bart Van Assche <bart.vanassche(a)wdc.com> Cc: Eric W. Biederman <ebiederm(a)xmission.com> Cc: Tejun Heo <tj(a)kernel.org> Cc: Hannes Reinecke <hare(a)suse.com> Cc: Johannes Thumshirn <jthumshirn(a)suse.de> Cc: Ingo Molnar <mingo(a)kernel.org> Cc: Oleg Nesterov <oleg(a)redhat.com> Cc: <stable(a)vger.kernel.org> --- drivers/scsi/scsi_sysfs.c | 48 +++++++++++++++++++++++++++++++++++---- kernel/task_work.c | 1 + 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index de122354d09a..c43f645900d4 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -12,6 +12,7 @@ #include <linux/blkdev.h> #include <linux/device.h> #include <linux/pm_runtime.h> +#include <linux/task_work.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> @@ -718,14 +719,53 @@ store_rescan_field (struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field); +struct remove_dev_work { + struct callback_head head; + struct scsi_device *sdev; +}; + +static void delete_sdev(struct callback_head *head) +{ + struct remove_dev_work *work = container_of(head, typeof(*work), head); + struct scsi_device *sdev = work->sdev; + + scsi_remove_device(sdev); + kfree(work); + scsi_device_put(sdev); +} + static ssize_t sdev_store_delete(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - if (device_remove_file_self(dev, attr)) - scsi_remove_device(to_scsi_device(dev)); - return count; -}; + struct scsi_device *sdev = to_scsi_device(dev); + struct remove_dev_work *work; + int ret; + + ret = scsi_device_get(sdev); + if (ret < 0) + goto out; + ret = -ENOMEM; + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) + goto put; + work->head.func = delete_sdev; + work->sdev = sdev; + ret = task_work_add(current, &work->head, false); + if (ret < 0) + goto free; + ret = count; + +out: + return ret; + +free: + kfree(work); + +put: + scsi_device_put(sdev); + goto out; +} static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete); static ssize_t diff --git a/kernel/task_work.c b/kernel/task_work.c index 0fef395662a6..75dc496b9997 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -40,6 +40,7 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify) set_notify_resume(task); return 0; } +EXPORT_SYMBOL_GPL(task_work_add); /** * task_work_cancel - cancel a pending work added by task_work_add() -- 2.18.0

7 years

7
14
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror July 2018