From: Jan Kara <jack(a)suse.cz>
commit 74dae4278546b897eb81784fdfcce872ddd8b2b8 upstream.
Competing overwrite DIO in dioread_nolock mode will just overwrite
pointer to io_end in the inode. This may result in data corruption or
extent conversion happening from IO completion interrupt because we
don't properly set buffer_defer_completion() when unlocked DIO races
with locked DIO to unwritten extent.
Since unlocked DIO doesn't need io_end for anything, just avoid
allocating it and corrupting pointer from inode for locked DIO.
A cleaner fix would be to avoid these games with io_end pointer from the
inode but that requires more intrusive changes so we leave that for
later.
Cc: stable(a)vger.kernel.org
Signed-off-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Signed-off-by: Nathan Chancellor <natechancellor(a)gmail.com>
---
fs/ext4/inode.c | 40 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f0cabc8c96cb..fa99cae9552b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3256,29 +3256,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
* case, we allocate an io_end structure to hook to the iocb.
*/
iocb->private = NULL;
- ext4_inode_aio_set(inode, NULL);
- if (!is_sync_kiocb(iocb)) {
- io_end = ext4_init_io_end(inode, GFP_NOFS);
- if (!io_end) {
- ret = -ENOMEM;
- goto retake_lock;
- }
- /*
- * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
- */
- iocb->private = ext4_get_io_end(io_end);
- /*
- * we save the io structure for current async direct
- * IO, so that later ext4_map_blocks() could flag the
- * io structure whether there is a unwritten extents
- * needs to be converted when IO is completed.
- */
- ext4_inode_aio_set(inode, io_end);
- }
-
if (overwrite) {
get_block_func = ext4_get_block_write_nolock;
} else {
+ ext4_inode_aio_set(inode, NULL);
+ if (!is_sync_kiocb(iocb)) {
+ io_end = ext4_init_io_end(inode, GFP_NOFS);
+ if (!io_end) {
+ ret = -ENOMEM;
+ goto retake_lock;
+ }
+ /*
+ * Grab reference for DIO. Will be dropped in
+ * ext4_end_io_dio()
+ */
+ iocb->private = ext4_get_io_end(io_end);
+ /*
+ * we save the io structure for current async direct
+ * IO, so that later ext4_map_blocks() could flag the
+ * io structure whether there is a unwritten extents
+ * needs to be converted when IO is completed.
+ */
+ ext4_inode_aio_set(inode, io_end);
+ }
get_block_func = ext4_get_block_write;
dio_flags = DIO_LOCKING;
}
--
2.17.0
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 1c6ceeee6ebbcdf9839dbce00719162591180524 Mon Sep 17 00:00:00 2001
From: "Leo (Sunpeng) Li" <sunpeng.li(a)amd.com>
Date: Wed, 17 Jan 2018 12:51:08 +0100
Subject: [PATCH] drm/atomic: Fix memleak on ERESTARTSYS during non-blocking
commits
During a non-blocking commit, it is possible to return before the
commit_tail work is queued (-ERESTARTSYS, for example).
Since a reference on the crtc commit object is obtained for the pending
vblank event when preparing the commit, the above situation will leave
us with an extra reference.
Therefore, if the commit_tail worker has not consumed the event at the
end of a commit, release it's reference.
Changes since v1:
- Also check for state->event->base.completion being set, to
handle the case where stall_checks() fails in setup_crtc_commit().
Changes since v2:
- Add a flag to drm_crtc_commit, to prevent dereferencing a freed event.
i915 may unreference the state in a worker.
Fixes: 24835e442f28 ("drm: reference count event->completion")
Cc: <stable(a)vger.kernel.org> # v4.11+
Signed-off-by: Leo (Sunpeng) Li <sunpeng.li(a)amd.com>
Acked-by: Harry Wentland <harry.wentland(a)amd.com> #v1
Signed-off-by: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180117115108.29608-1-maarte…
Reviewed-by: Sean Paul <seanpaul(a)chromium.org>
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index ab4032167094..ae3cbfe9e01c 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1878,6 +1878,8 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state,
new_crtc_state->event->base.completion = &commit->flip_done;
new_crtc_state->event->base.completion_release = release_crtc_commit;
drm_crtc_commit_get(commit);
+
+ commit->abort_completion = true;
}
for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) {
@@ -3421,8 +3423,21 @@ EXPORT_SYMBOL(drm_atomic_helper_crtc_duplicate_state);
void __drm_atomic_helper_crtc_destroy_state(struct drm_crtc_state *state)
{
if (state->commit) {
+ /*
+ * In the event that a non-blocking commit returns
+ * -ERESTARTSYS before the commit_tail work is queued, we will
+ * have an extra reference to the commit object. Release it, if
+ * the event has not been consumed by the worker.
+ *
+ * state->event may be freed, so we can't directly look at
+ * state->event->base.completion.
+ */
+ if (state->event && state->commit->abort_completion)
+ drm_crtc_commit_put(state->commit);
+
kfree(state->commit->event);
state->commit->event = NULL;
+
drm_crtc_commit_put(state->commit);
}
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index 1c27526c499e..cf13842a6dbd 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -134,6 +134,15 @@ struct drm_crtc_commit {
* &drm_pending_vblank_event pointer to clean up private events.
*/
struct drm_pending_vblank_event *event;
+
+ /**
+ * @abort_completion:
+ *
+ * A flag that's set after drm_atomic_helper_setup_commit takes a second
+ * reference for the completion of $drm_crtc_state.event. It's used by
+ * the free code to remove the second reference if commit fails.
+ */
+ bool abort_completion;
};
struct __drm_planes_state {
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a0a73b950d0b2618690488ad067f96ab703e05c2 Mon Sep 17 00:00:00 2001
From: Eric Huang <JinHuiEric.Huang(a)amd.com>
Date: Mon, 26 Feb 2018 17:36:19 -0500
Subject: [PATCH] drm/amd/powerplay: fix power over limit on Fiji
power containment disabled only on Fiji and compute
power profile. It violates PCIe spec and may cause power
supply failed. Enabling it will fix the issue, even the
fix will drop performance of some compute tests.
Signed-off-by: Eric Huang <JinHuiEric.Huang(a)amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 535d786b79ae..731475b06be7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -4630,13 +4630,6 @@ static int smu7_set_power_profile_state(struct pp_hwmgr *hwmgr,
int tmp_result, result = 0;
uint32_t sclk_mask = 0, mclk_mask = 0;
- if (hwmgr->chip_id == CHIP_FIJI) {
- if (request->type == AMD_PP_GFX_PROFILE)
- smu7_enable_power_containment(hwmgr);
- else if (request->type == AMD_PP_COMPUTE_PROFILE)
- smu7_disable_power_containment(hwmgr);
- }
-
if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_AUTO)
return -EINVAL;
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a9b3c001650984993dc96e09e1c59f57b905fd2d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher(a)amd.com>
Date: Tue, 13 Feb 2018 14:26:54 -0500
Subject: [PATCH] drm/amd/powerplay/smu7: allow mclk switching with no displays
If there are no displays attached, there is no reason to disable
mclk switching.
Fixes mclks getting set to high when there are no displays attached.
Reviewed-by: Eric Huang <JinhuiEric.Huang(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 937971361b65..84600ff6f4de 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -2904,10 +2904,13 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
- disable_mclk_switching = ((1 < info.display_count) ||
- disable_mclk_switching_for_frame_lock ||
- smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) ||
- (mode_info.refresh_rate > 120));
+ if (info.display_count == 0)
+ disable_mclk_switching = false;
+ else
+ disable_mclk_switching = ((1 < info.display_count) ||
+ disable_mclk_switching_for_frame_lock ||
+ smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) ||
+ (mode_info.refresh_rate > 120));
sclk = smu7_ps->performance_levels[0].engine_clock;
mclk = smu7_ps->performance_levels[0].memory_clock;
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d6bca7e7146796002308988e913d493f0c67bec0 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher(a)amd.com>
Date: Tue, 13 Feb 2018 14:25:11 -0500
Subject: [PATCH] drm/amd/powerplay/vega10: allow mclk switching with no
displays
If there are no displays attached, there is no reason to disable
mclk switching.
Fixes mclks getting set to high when there are no displays attached.
Reviewed-by: Eric Huang <JinhuiEric.Huang(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 6a153ad1b942..1d442a498bf6 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -3171,10 +3171,13 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
PP_CAP(PHM_PlatformCaps_DisableMclkSwitchForVR);
force_mclk_high = PP_CAP(PHM_PlatformCaps_ForceMclkHigh);
- disable_mclk_switching = (info.display_count > 1) ||
- disable_mclk_switching_for_frame_lock ||
- disable_mclk_switching_for_vr ||
- force_mclk_high;
+ if (info.display_count == 0)
+ disable_mclk_switching = false;
+ else
+ disable_mclk_switching = (info.display_count > 1) ||
+ disable_mclk_switching_for_frame_lock ||
+ disable_mclk_switching_for_vr ||
+ force_mclk_high;
sclk = vega10_ps->performance_levels[0].gfx_clock;
mclk = vega10_ps->performance_levels[0].mem_clock;
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 0660b58c9038f5ef94b8fe33716449945ea786e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig(a)amd.com>
Date: Fri, 9 Mar 2018 14:44:32 +0100
Subject: [PATCH] drm/radeon: fix prime teardown order
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
We unmapped imported DMA-bufs when the GEM handle was dropped, not when the
hardware was done with the buffere.
Signed-off-by: Christian König <christian.koenig(a)amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer(a)amd.com>
CC: stable(a)vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index a9962ffba720..27d8e7dd2d06 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -34,8 +34,6 @@ void radeon_gem_object_free(struct drm_gem_object *gobj)
struct radeon_bo *robj = gem_to_radeon_bo(gobj);
if (robj) {
- if (robj->gem_base.import_attach)
- drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg);
radeon_mn_unregister(robj);
radeon_bo_unref(&robj);
}
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 38431f682ed0..edbb4cd519fd 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -82,6 +82,8 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
mutex_unlock(&bo->rdev->gem.mutex);
radeon_bo_clear_surface_reg(bo);
WARN_ON_ONCE(!list_empty(&bo->va));
+ if (bo->gem_base.import_attach)
+ drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
drm_gem_object_release(&bo->gem_base);
kfree(bo);
}
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9aff8b2ae71dcf7f02443821a894a736f40e4919 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher(a)amd.com>
Date: Thu, 15 Feb 2018 08:40:30 -0500
Subject: [PATCH] Revert "drm/radeon/pm: autoswitch power state when in
balanced mode"
This reverts commit 1c331f75aa6ccbf64ebcc5a019183e617c9d818a.
Breaks resume on some systems.
Bug: https://bugs.freedesktop.org/show_bug.cgi?id=100759
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 326ad068c15a..4b6542538ff9 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -47,7 +47,6 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev);
static bool radeon_pm_debug_check_in_vbl(struct radeon_device *rdev, bool finish);
static void radeon_pm_update_profile(struct radeon_device *rdev);
static void radeon_pm_set_clocks(struct radeon_device *rdev);
-static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev);
int radeon_pm_get_type_index(struct radeon_device *rdev,
enum radeon_pm_state_type ps_type,
@@ -80,8 +79,6 @@ void radeon_pm_acpi_event_handler(struct radeon_device *rdev)
radeon_dpm_enable_bapm(rdev, rdev->pm.dpm.ac_power);
}
mutex_unlock(&rdev->pm.mutex);
- /* allow new DPM state to be picked */
- radeon_pm_compute_clocks_dpm(rdev);
} else if (rdev->pm.pm_method == PM_METHOD_PROFILE) {
if (rdev->pm.profile == PM_PROFILE_AUTO) {
mutex_lock(&rdev->pm.mutex);
@@ -885,8 +882,7 @@ static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev,
dpm_state = POWER_STATE_TYPE_INTERNAL_3DPERF;
/* balanced states don't exist at the moment */
if (dpm_state == POWER_STATE_TYPE_BALANCED)
- dpm_state = rdev->pm.dpm.ac_power ?
- POWER_STATE_TYPE_PERFORMANCE : POWER_STATE_TYPE_BATTERY;
+ dpm_state = POWER_STATE_TYPE_PERFORMANCE;
restart_search:
/* Pick the best power state based on current conditions */
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a3f73c8cf43858455a979d0f8354815e1443c496 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer(a)amd.com>
Date: Wed, 14 Mar 2018 18:14:04 +0100
Subject: [PATCH] drm/radeon: Don't turn off DP sink when disconnected
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Turning off the sink in this case causes various issues, because
userspace expects it to stay on until it turns it off explicitly.
Instead, turn the sink off and back on when a display is connected
again. This dance seems necessary for link training to work correctly.
Bugzilla: https://bugs.freedesktop.org/105308
Cc: stable(a)vger.kernel.org
Reviewed-by: Alex Deucher <alexander.deucher(a)amd.com>
Signed-off-by: Michel Dänzer <michel.daenzer(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 5012f5e47a1e..b108eaabb6df 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -90,25 +90,18 @@ void radeon_connector_hotplug(struct drm_connector *connector)
/* don't do anything if sink is not display port, i.e.,
* passive dp->(dvi|hdmi) adaptor
*/
- if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
- int saved_dpms = connector->dpms;
- /* Only turn off the display if it's physically disconnected */
- if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
- drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
- } else if (radeon_dp_needs_link_train(radeon_connector)) {
- /* Don't try to start link training before we
- * have the dpcd */
- if (!radeon_dp_getdpcd(radeon_connector))
- return;
-
- /* set it to OFF so that drm_helper_connector_dpms()
- * won't return immediately since the current state
- * is ON at this point.
- */
- connector->dpms = DRM_MODE_DPMS_OFF;
- drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
- }
- connector->dpms = saved_dpms;
+ if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT &&
+ radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) &&
+ radeon_dp_needs_link_train(radeon_connector)) {
+ /* Don't start link training before we have the DPCD */
+ if (!radeon_dp_getdpcd(radeon_connector))
+ return;
+
+ /* Turn the connector off and back on immediately, which
+ * will trigger link training
+ */
+ drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+ drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
}
}
}
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6c24a85d236eb2348d2e221993769fd93c168f65 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Date: Thu, 8 Feb 2018 17:46:01 +0800
Subject: [PATCH] drm/amdgpu: add new device to use atpx quirk
The affected system (0x0813) is pretty similar to another one (0x0812),
it also needs to use ATPX power control.
Signed-off-by: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index e2c3c5ec42d1..c53095b3b0fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -568,6 +568,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
/* HG _PR3 doesn't seem to work on this A+A weston board */
{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
+ { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0, 0, 0, 0, 0 },
};
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ec7549df08c20ec7f046540f4372b646bde50fac Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu(a)amd.com>
Date: Tue, 6 Mar 2018 14:43:50 -0500
Subject: [PATCH] drm/amdgpu:Correct max uvd handles
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Max uvd handles should use adev->uvd.max_handles instead of
AMDGPU_MAX_UVD_HANDLES here.
Signed-off-by: James Zhu <James.Zhu(a)amd.com>
Reviewed-by: Leo Liu <leo.liu(a)amd.com>
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 7ad814d0a487..9d037cb3268a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -303,7 +303,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
if (atomic_read(&adev->uvd.handles[i]))
break;
- if (i == AMDGPU_MAX_UVD_HANDLES)
+ if (i == adev->uvd.max_handles)
return 0;
size = amdgpu_bo_size(adev->uvd.vcpu_bo);
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 589941e1a2d65f5425c91a5859a5454df64b6982 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu(a)amd.com>
Date: Tue, 27 Feb 2018 18:20:53 +0800
Subject: [PATCH] drm/amdgpu: Notify sbios device ready before send request
it is required if a platform supports PCIe root complex
core voltage reduction. After receiving this notification,
SBIOS can apply default PCIe root complex power policy.
Reviewed-by: Alex Deucher <alexander.deucher(a)amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 57afad79f55d..8fa850a070e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -540,6 +540,9 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
size_t size;
u32 retry = 3;
+ if (amdgpu_acpi_pcie_notify_device_ready(adev))
+ return -EINVAL;
+
/* Get the device handle */
handle = ACPI_HANDLE(&adev->pdev->dev);
if (!handle)
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f6c3b601bd490eda08c27b03607448abd4b4841b Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu(a)amd.com>
Date: Tue, 6 Mar 2018 14:52:35 -0500
Subject: [PATCH] drm/amdgpu:Always save uvd vcpu_bo in VM Mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When UVD is in VM mode, there is not uvd handle exchanged,
uvd.handles are always 0. So vcpu_bo always need save,
Otherwise amdgpu driver will fail during suspend/resume.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105021
Signed-off-by: James Zhu <James.Zhu(a)amd.com>
Reviewed-by: Leo Liu <leo.liu(a)amd.com>
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 9d037cb3268a..f3c459b7c0bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -299,12 +299,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
cancel_delayed_work_sync(&adev->uvd.idle_work);
- for (i = 0; i < adev->uvd.max_handles; ++i)
- if (atomic_read(&adev->uvd.handles[i]))
- break;
+ /* only valid for physical mode */
+ if (adev->asic_type < CHIP_POLARIS10) {
+ for (i = 0; i < adev->uvd.max_handles; ++i)
+ if (atomic_read(&adev->uvd.handles[i]))
+ break;
- if (i == adev->uvd.max_handles)
- return 0;
+ if (i == adev->uvd.max_handles)
+ return 0;
+ }
size = amdgpu_bo_size(adev->uvd.vcpu_bo);
ptr = adev->uvd.cpu_addr;
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7f8fb919655ef3011b01bb114fef42107957860a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig(a)amd.com>
Date: Fri, 9 Mar 2018 14:42:54 +0100
Subject: [PATCH] drm/amdgpu: fix prime teardown order
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
We unmapped imported DMA-bufs when the GEM handle was dropped, not when the
hardware was done with the buffere.
Signed-off-by: Christian König <christian.koenig(a)amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer(a)amd.com>
CC: stable(a)vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index bb9b21266b67..46b9ea4e6103 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -36,8 +36,6 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
if (robj) {
- if (robj->gem_base.import_attach)
- drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg);
amdgpu_mn_unregister(robj);
amdgpu_bo_unref(&robj);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ec6ec1f8a085..fc720603b970 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -60,6 +60,8 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
amdgpu_bo_kunmap(bo);
+ if (bo->gem_base.import_attach)
+ drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
drm_gem_object_release(&bo->gem_base);
amdgpu_bo_unref(&bo->parent);
if (!list_empty(&bo->shadow_list)) {
Hi Greg and Ted,
I've been looking at ext4 history the past couple of days seeing if the
patches that you attempted to apply from 4.17-rc1 were relevant and I
noticed a couple from previous 4.x versions that seem like they should
be applied here, as they are clean picks and tagged for stable:
74dae4278546 ("ext4: fix crashes in dioread_nolock mode")
c755e251357a ("ext4: fix deadlock between inline_data and ext4_expand_extra_isize_ea()")
I've been running them for the day and not had any issues (though I
didn't have any before). If there are any objections, let me know!
Thanks!
Nathan
These are all the ext4 patches that were tagged for -stable and failed
to apply to 3.18.y.
Patch e40ff2138985 ("ext4: force revalidation of directory pointer after seekdir(2)")
was Cc'd to stable as well but it requires commmit ae5e165d855d
("fs: new API for handling inode->i_version") to be applied as well
which is neither a stable candidate nor under 100 lines so I've skipped e40ff2138985.
If somebody can suggest a backport of the commit which doesn't require ae5e165d855d, I'll
be glad.
Theodore Ts'o (3):
ext4: add validity checks for bitmap block numbers
ext4: fail ext4_iget for root directory if unallocated
ext4: don't allow r/w mounts if metadata blocks overlap the superblock
fs/ext4/balloc.c | 16 ++++++++++++++--
fs/ext4/ialloc.c | 8 +++++++-
fs/ext4/inode.c | 6 ++++++
fs/ext4/super.c | 6 ++++++
4 files changed, 33 insertions(+), 3 deletions(-)
--
2.15.0.2308.g658a28aa74af
Hi Greg,
Upstream commit cf0d53ba4947 ("vfio/pci: Virtualize Maximum Read Request
Size") and commit 523184972b28 ("vfio/pci: Virtualize Maximum Payload Size")
fixes nasty PCIe virtualization issues for platforms that support Maximum
Payload Size bigger than 128.
Issue shows up when a device is assigned to the guest machine as a passthrough.
Guest machine configures the MPS/MRRS settings to values that are incompatible
with the parent bridge device.
This causes PCIe transaction timeouts and AER errors to be spilled in the host
kernel.
Please apply commit cf0d53ba4947 and 523184972b28 to all affected releases to
fix the resulting regression.
Thanks,
Sinan
--
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
These are all the ext4 patches that were tagged for -stable and failed
to apply to 3.18.y.
Side note: Patch e15dc99dbb9c ("ALSA: pcm: Fix endless loop for XRUN recovery in OSS emulation")
which was tagged for -stable is not required on 3.18.y so I have skipped
the backport.
Theodore Ts'o (4):
ext4: add validity checks for bitmap block numbers
ext4: fail ext4_iget for root directory if unallocated
ext4: don't allow r/w mounts if metadata blocks overlap the superblock
ext4: force revalidation of directory pointer after seekdir(2)
fs/ext4/balloc.c | 16 ++++++++++++++--
fs/ext4/dir.c | 8 +++++---
fs/ext4/ialloc.c | 7 +++++++
fs/ext4/inode.c | 6 ++++++
fs/ext4/super.c | 6 ++++++
5 files changed, 38 insertions(+), 5 deletions(-)
--
2.15.0.2308.g658a28aa74af
The blk-mq timeout handling code ignores completions that occur after
blk_mq_check_expired() has been called and before blk_mq_rq_timed_out()
has reset rq->aborted_gstate. If a block driver timeout handler always
returns BLK_EH_RESET_TIMER then the result will be that the request
never terminates.
Fix this race as follows:
- Use the deadline instead of the request generation to detect whether
or not a request timer fired after reinitialization of a request.
- Store the request state in the lowest two bits of the deadline instead
of the lowest two bits of 'gstate'.
- Rename MQ_RQ_STATE_MASK into RQ_STATE_MASK and change it from an
enumeration member into a #define such that its type can be changed
into unsigned long. That allows to write & ~RQ_STATE_MASK instead of
~(unsigned long)RQ_STATE_MASK.
- Remove all request member variables that became superfluous due to
this change: gstate, gstate_seq and aborted_gstate_sync.
- Remove the request state information that became superfluous due to this
patch, namely RQF_MQ_TIMEOUT_EXPIRED.
- Remove the code that became superfluous due to this change, namely
the RCU lock and unlock statements in blk_mq_complete_request() and
also the synchronize_rcu() call in the timeout handler.
Signed-off-by: Bart Van Assche <bart.vanassche(a)wdc.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Ming Lei <ming.lei(a)redhat.com>
Cc: Sagi Grimberg <sagi(a)grimberg.me>
Cc: Israel Rukshin <israelr(a)mellanox.com>,
Cc: Max Gurtovoy <maxg(a)mellanox.com>
Cc: <stable(a)vger.kernel.org> # v4.16
---
Changes compared to v5:
- Restored the synchronize_rcu() call between marking a request for timeout
handling and the actual timeout handling to avoid that timeout handling
starts while .queue_rq() is still in progress if the timeout is very short.
- Only use cmpxchg() if another context could attempt to change the request
state concurrently. Use WRITE_ONCE() otherwise.
Changes compared to v4:
- Addressed multiple review comments from Christoph. The most important are
that atomic_long_cmpxchg() has been changed into cmpxchg() and also that
there is now a nice and clean split between the legacy and blk-mq versions
of blk_add_timer().
- Changed the patch name and modified the patch description because there is
disagreement about whether or not the v4.16 blk-mq core can complete a
single request twice. Kept the "Cc: stable" tag because of
https://bugzilla.kernel.org/show_bug.cgi?id=199077.
Changes compared to v3 (see also https://www.mail-archive.com/linux-block@vger.kernel.org/msg20073.html):
- Removed the spinlock again that was introduced to protect the request state.
v4 uses atomic_long_cmpxchg() instead.
- Split __deadline into two variables - one for the legacy block layer and one
for blk-mq.
Changes compared to v2 (https://www.mail-archive.com/linux-block@vger.kernel.org/msg18338.html):
- Rebased and retested on top of kernel v4.16.
Changes compared to v1 (https://www.mail-archive.com/linux-block@vger.kernel.org/msg18089.html):
- Removed the gstate and aborted_gstate members of struct request and used
the __deadline member to encode both the generation and state information.
block/blk-core.c | 6 --
block/blk-mq-debugfs.c | 1 -
block/blk-mq.c | 158 ++++++++++---------------------------------------
block/blk-mq.h | 85 +++++++++++++++++---------
block/blk-timeout.c | 89 ++++++++++++++++------------
block/blk.h | 13 ++--
include/linux/blkdev.h | 29 +++------
7 files changed, 154 insertions(+), 227 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index de90ecab61cd..730a8e3be7ce 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -200,12 +200,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->start_time = jiffies;
set_start_time_ns(rq);
rq->part = NULL;
- seqcount_init(&rq->gstate_seq);
- u64_stats_init(&rq->aborted_gstate_sync);
- /*
- * See comment of blk_mq_init_request
- */
- WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
}
EXPORT_SYMBOL(blk_rq_init);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index adb8d6f00098..529383841b3b 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -346,7 +346,6 @@ static const char *const rqf_name[] = {
RQF_NAME(STATS),
RQF_NAME(SPECIAL_PAYLOAD),
RQF_NAME(ZONE_WRITE_LOCKED),
- RQF_NAME(MQ_TIMEOUT_EXPIRED),
RQF_NAME(MQ_POLL_SLEPT),
};
#undef RQF_NAME
diff --git a/block/blk-mq.c b/block/blk-mq.c
index bb7f59d319fa..6f20845827f4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -481,7 +481,8 @@ void blk_mq_free_request(struct request *rq)
if (blk_rq_rl(rq))
blk_put_rl(blk_rq_rl(rq));
- blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
+ if (!blk_mq_change_rq_state(rq, blk_mq_rq_state(rq), MQ_RQ_IDLE))
+ WARN_ON_ONCE(true);
if (rq->tag != -1)
blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
if (sched_tag != -1)
@@ -527,8 +528,7 @@ static void __blk_mq_complete_request(struct request *rq)
bool shared = false;
int cpu;
- WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT);
- blk_mq_rq_update_state(rq, MQ_RQ_COMPLETE);
+ WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_COMPLETE);
if (rq->internal_tag != -1)
blk_mq_sched_completed_request(rq);
@@ -577,36 +577,6 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
*srcu_idx = srcu_read_lock(hctx->srcu);
}
-static void blk_mq_rq_update_aborted_gstate(struct request *rq, u64 gstate)
-{
- unsigned long flags;
-
- /*
- * blk_mq_rq_aborted_gstate() is used from the completion path and
- * can thus be called from irq context. u64_stats_fetch in the
- * middle of update on the same CPU leads to lockup. Disable irq
- * while updating.
- */
- local_irq_save(flags);
- u64_stats_update_begin(&rq->aborted_gstate_sync);
- rq->aborted_gstate = gstate;
- u64_stats_update_end(&rq->aborted_gstate_sync);
- local_irq_restore(flags);
-}
-
-static u64 blk_mq_rq_aborted_gstate(struct request *rq)
-{
- unsigned int start;
- u64 aborted_gstate;
-
- do {
- start = u64_stats_fetch_begin(&rq->aborted_gstate_sync);
- aborted_gstate = rq->aborted_gstate;
- } while (u64_stats_fetch_retry(&rq->aborted_gstate_sync, start));
-
- return aborted_gstate;
-}
-
/**
* blk_mq_complete_request - end I/O on a request
* @rq: the request being processed
@@ -618,27 +588,12 @@ static u64 blk_mq_rq_aborted_gstate(struct request *rq)
void blk_mq_complete_request(struct request *rq)
{
struct request_queue *q = rq->q;
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
- int srcu_idx;
if (unlikely(blk_should_fake_timeout(q)))
return;
- /*
- * If @rq->aborted_gstate equals the current instance, timeout is
- * claiming @rq and we lost. This is synchronized through
- * hctx_lock(). See blk_mq_timeout_work() for details.
- *
- * Completion path never blocks and we can directly use RCU here
- * instead of hctx_lock() which can be either RCU or SRCU.
- * However, that would complicate paths which want to synchronize
- * against us. Let stay in sync with the issue path so that
- * hctx_lock() covers both issue and completion paths.
- */
- hctx_lock(hctx, &srcu_idx);
- if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
+ if (blk_mq_change_rq_state(rq, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE))
__blk_mq_complete_request(rq);
- hctx_unlock(hctx, srcu_idx);
}
EXPORT_SYMBOL(blk_mq_complete_request);
@@ -662,27 +617,7 @@ void blk_mq_start_request(struct request *rq)
wbt_issue(q->rq_wb, &rq->issue_stat);
}
- WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
-
- /*
- * Mark @rq in-flight which also advances the generation number,
- * and register for timeout. Protect with a seqcount to allow the
- * timeout path to read both @rq->gstate and @rq->deadline
- * coherently.
- *
- * This is the only place where a request is marked in-flight. If
- * the timeout path reads an in-flight @rq->gstate, the
- * @rq->deadline it reads together under @rq->gstate_seq is
- * guaranteed to be the matching one.
- */
- preempt_disable();
- write_seqcount_begin(&rq->gstate_seq);
-
- blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
- blk_add_timer(rq);
-
- write_seqcount_end(&rq->gstate_seq);
- preempt_enable();
+ blk_mq_add_timer(rq, MQ_RQ_IDLE, MQ_RQ_IN_FLIGHT);
if (q->dma_drain_size && blk_rq_bytes(rq)) {
/*
@@ -695,22 +630,19 @@ void blk_mq_start_request(struct request *rq)
}
EXPORT_SYMBOL(blk_mq_start_request);
-/*
- * When we reach here because queue is busy, it's safe to change the state
- * to IDLE without checking @rq->aborted_gstate because we should still be
- * holding the RCU read lock and thus protected against timeout.
- */
static void __blk_mq_requeue_request(struct request *rq)
{
struct request_queue *q = rq->q;
+ enum mq_rq_state old_state = blk_mq_rq_state(rq);
blk_mq_put_driver_tag(rq);
trace_block_rq_requeue(q, rq);
wbt_requeue(q->rq_wb, &rq->issue_stat);
- if (blk_mq_rq_state(rq) != MQ_RQ_IDLE) {
- blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
+ if (old_state != MQ_RQ_IDLE) {
+ if (!blk_mq_change_rq_state(rq, old_state, MQ_RQ_IDLE))
+ WARN_ON_ONCE(true);
if (q->dma_drain_size && blk_rq_bytes(rq))
rq->nr_phys_segments--;
}
@@ -819,8 +751,6 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
const struct blk_mq_ops *ops = req->q->mq_ops;
enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
- req->rq_flags |= RQF_MQ_TIMEOUT_EXPIRED;
-
if (ops->timeout)
ret = ops->timeout(req, reserved);
@@ -829,13 +759,7 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
__blk_mq_complete_request(req);
break;
case BLK_EH_RESET_TIMER:
- /*
- * As nothing prevents from completion happening while
- * ->aborted_gstate is set, this may lead to ignored
- * completions and further spurious timeouts.
- */
- blk_mq_rq_update_aborted_gstate(req, 0);
- blk_add_timer(req);
+ blk_mq_add_timer(req, MQ_RQ_COMPLETE, MQ_RQ_IN_FLIGHT);
break;
case BLK_EH_NOT_HANDLED:
break;
@@ -849,48 +773,35 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv, bool reserved)
{
struct blk_mq_timeout_data *data = priv;
- unsigned long gstate, deadline;
- int start;
-
- might_sleep();
+ unsigned long __deadline = READ_ONCE(rq->__deadline);
+ unsigned long deadline = __deadline & ~RQ_STATE_MASK;
+ enum mq_rq_state rq_state = __deadline & RQ_STATE_MASK;
- if (rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED)
- return;
-
- /* read coherent snapshots of @rq->state_gen and @rq->deadline */
- while (true) {
- start = read_seqcount_begin(&rq->gstate_seq);
- gstate = READ_ONCE(rq->gstate);
- deadline = blk_rq_deadline(rq);
- if (!read_seqcount_retry(&rq->gstate_seq, start))
- break;
- cond_resched();
- }
-
- /* if in-flight && overdue, mark for abortion */
- if ((gstate & MQ_RQ_STATE_MASK) == MQ_RQ_IN_FLIGHT &&
- time_after_eq(jiffies, deadline)) {
- blk_mq_rq_update_aborted_gstate(rq, gstate);
+ rq->aborted_gstate = __deadline ^ (1ULL << 63);
+ if (time_after_eq(jiffies, deadline) && rq_state == MQ_RQ_IN_FLIGHT) {
+ rq->aborted_gstate = __deadline;
data->nr_expired++;
hctx->nr_expired++;
} else if (!data->next_set || time_after(data->next, deadline)) {
data->next = deadline;
data->next_set = 1;
}
+
}
static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv, bool reserved)
{
+ unsigned long old_val = rq->aborted_gstate;
+ unsigned long new_val = (rq->aborted_gstate & ~RQ_STATE_MASK) |
+ MQ_RQ_COMPLETE;
+
/*
- * We marked @rq->aborted_gstate and waited for RCU. If there were
- * completions that we lost to, they would have finished and
- * updated @rq->gstate by now; otherwise, the completion path is
- * now guaranteed to see @rq->aborted_gstate and yield. If
- * @rq->aborted_gstate still matches @rq->gstate, @rq is ours.
+ * We marked @rq->aborted_gstate and waited for ongoing .queue_rq()
+ * calls. If rq->__deadline has not changed that means that it is
+ * now safe to change the request state and to handle the timeout.
*/
- if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) &&
- READ_ONCE(rq->gstate) == rq->aborted_gstate)
+ if (cmpxchg(&rq->__deadline, old_val, new_val) == old_val)
blk_mq_rq_timed_out(rq, reserved);
}
@@ -929,10 +840,10 @@ static void blk_mq_timeout_work(struct work_struct *work)
bool has_rcu = false;
/*
- * Wait till everyone sees ->aborted_gstate. The
- * sequential waits for SRCUs aren't ideal. If this ever
- * becomes a problem, we can add per-hw_ctx rcu_head and
- * wait in parallel.
+ * For very short timeouts it can happen that
+ * blk_mq_check_expired() modifies the state of a request
+ * while .queue_rq() is still in progress. Hence wait until
+ * these .queue_rq() calls have finished.
*/
queue_for_each_hw_ctx(q, hctx, i) {
if (!hctx->nr_expired)
@@ -948,7 +859,7 @@ static void blk_mq_timeout_work(struct work_struct *work)
if (has_rcu)
synchronize_rcu();
- /* terminate the ones we won */
+ /* Terminate the requests marked by blk_mq_check_expired(). */
blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
}
@@ -2060,15 +1971,6 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
return ret;
}
- seqcount_init(&rq->gstate_seq);
- u64_stats_init(&rq->aborted_gstate_sync);
- /*
- * start gstate with gen 1 instead of 0, otherwise it will be equal
- * to aborted_gstate, and be identified timed out by
- * blk_mq_terminate_expired.
- */
- WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
-
return 0;
}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 88c558f71819..66efc8a3988b 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -27,18 +27,11 @@ struct blk_mq_ctx {
struct kobject kobj;
} ____cacheline_aligned_in_smp;
-/*
- * Bits for request->gstate. The lower two bits carry MQ_RQ_* state value
- * and the upper bits the generation number.
- */
+/* Lowest two bits of request->__deadline. */
enum mq_rq_state {
MQ_RQ_IDLE = 0,
MQ_RQ_IN_FLIGHT = 1,
MQ_RQ_COMPLETE = 2,
-
- MQ_RQ_STATE_BITS = 2,
- MQ_RQ_STATE_MASK = (1 << MQ_RQ_STATE_BITS) - 1,
- MQ_RQ_GEN_INC = 1 << MQ_RQ_STATE_BITS,
};
void blk_mq_freeze_queue(struct request_queue *q);
@@ -100,37 +93,73 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
void blk_mq_release(struct request_queue *q);
+/*
+ * If the state of request @rq equals @old_state, update deadline and request
+ * state atomically to @time and @new_state. blk-mq only. cmpxchg() is only
+ * used if there could be a concurrent update attempt from another context.
+ */
+static inline bool blk_mq_rq_set_deadline(struct request *rq,
+ unsigned long new_time,
+ enum mq_rq_state old_state,
+ enum mq_rq_state new_state)
+{
+ unsigned long old_val, new_val;
+
+ if (old_state != MQ_RQ_IN_FLIGHT) {
+ old_val = READ_ONCE(rq->__deadline);
+ if ((old_val & RQ_STATE_MASK) != old_state)
+ return false;
+ new_val = (new_time & ~RQ_STATE_MASK) |
+ (new_state & RQ_STATE_MASK);
+ WRITE_ONCE(rq->__deadline, new_val);
+ return true;
+ }
+
+ do {
+ old_val = READ_ONCE(rq->__deadline);
+ if ((old_val & RQ_STATE_MASK) != old_state)
+ return false;
+ new_val = (new_time & ~RQ_STATE_MASK) |
+ (new_state & RQ_STATE_MASK);
+ } while (cmpxchg(&rq->__deadline, old_val, new_val) != old_val);
+
+ return true;
+}
+
/**
* blk_mq_rq_state() - read the current MQ_RQ_* state of a request
* @rq: target request.
*/
-static inline int blk_mq_rq_state(struct request *rq)
+static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
{
- return READ_ONCE(rq->gstate) & MQ_RQ_STATE_MASK;
+ return READ_ONCE(rq->__deadline) & RQ_STATE_MASK;
}
/**
- * blk_mq_rq_update_state() - set the current MQ_RQ_* state of a request
- * @rq: target request.
- * @state: new state to set.
+ * blk_mq_change_rq_state - atomically test and set request state
+ * @rq: Request pointer.
+ * @old_state: Old request state.
+ * @new_state: New request state.
*
- * Set @rq's state to @state. The caller is responsible for ensuring that
- * there are no other updaters. A request can transition into IN_FLIGHT
- * only from IDLE and doing so increments the generation number.
+ * Returns %true if and only if the old state was @old and if the state has
+ * been changed into @new.
*/
-static inline void blk_mq_rq_update_state(struct request *rq,
- enum mq_rq_state state)
+static inline bool blk_mq_change_rq_state(struct request *rq,
+ enum mq_rq_state old_state,
+ enum mq_rq_state new_state)
{
- u64 old_val = READ_ONCE(rq->gstate);
- u64 new_val = (old_val & ~MQ_RQ_STATE_MASK) | state;
-
- if (state == MQ_RQ_IN_FLIGHT) {
- WARN_ON_ONCE((old_val & MQ_RQ_STATE_MASK) != MQ_RQ_IDLE);
- new_val += MQ_RQ_GEN_INC;
- }
-
- /* avoid exposing interim values */
- WRITE_ONCE(rq->gstate, new_val);
+ unsigned long old_val = (READ_ONCE(rq->__deadline) & ~RQ_STATE_MASK) |
+ old_state;
+ unsigned long new_val = (old_val & ~RQ_STATE_MASK) | new_state;
+
+ /*
+ * For transitions from state in-flight to another state cmpxchg() must
+ * be used. For other state transitions it is safe to use WRITE_ONCE().
+ */
+ if (old_state == MQ_RQ_IN_FLIGHT)
+ return cmpxchg(&rq->__deadline, old_val, new_val) == old_val;
+ WRITE_ONCE(rq->__deadline, new_val);
+ return true;
}
static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 50a191720055..e98da6db7d4b 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -165,8 +165,9 @@ void blk_abort_request(struct request *req)
* immediately and that scan sees the new timeout value.
* No need for fancy synchronizations.
*/
- blk_rq_set_deadline(req, jiffies);
- kblockd_schedule_work(&req->q->timeout_work);
+ if (blk_mq_rq_set_deadline(req, jiffies, MQ_RQ_IN_FLIGHT,
+ MQ_RQ_IN_FLIGHT))
+ kblockd_schedule_work(&req->q->timeout_work);
} else {
if (blk_mark_rq_complete(req))
return;
@@ -187,52 +188,17 @@ unsigned long blk_rq_timeout(unsigned long timeout)
return timeout;
}
-/**
- * blk_add_timer - Start timeout timer for a single request
- * @req: request that is about to start running.
- *
- * Notes:
- * Each request has its own timer, and as it is added to the queue, we
- * set up the timer. When the request completes, we cancel the timer.
- */
-void blk_add_timer(struct request *req)
+static void __blk_add_timer(struct request *req)
{
struct request_queue *q = req->q;
unsigned long expiry;
- if (!q->mq_ops)
- lockdep_assert_held(q->queue_lock);
-
- /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
- if (!q->mq_ops && !q->rq_timed_out_fn)
- return;
-
- BUG_ON(!list_empty(&req->timeout_list));
-
- /*
- * Some LLDs, like scsi, peek at the timeout to prevent a
- * command from being retried forever.
- */
- if (!req->timeout)
- req->timeout = q->rq_timeout;
-
- blk_rq_set_deadline(req, jiffies + req->timeout);
- req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED;
-
- /*
- * Only the non-mq case needs to add the request to a protected list.
- * For the mq case we simply scan the tag map.
- */
- if (!q->mq_ops)
- list_add_tail(&req->timeout_list, &req->q->timeout_list);
-
/*
* If the timer isn't already pending or this timeout is earlier
* than an existing one, modify the timer. Round up to next nearest
* second.
*/
expiry = blk_rq_timeout(round_jiffies_up(blk_rq_deadline(req)));
-
if (!timer_pending(&q->timeout) ||
time_before(expiry, q->timeout.expires)) {
unsigned long diff = q->timeout.expires - expiry;
@@ -247,5 +213,52 @@ void blk_add_timer(struct request *req)
if (!timer_pending(&q->timeout) || (diff >= HZ / 2))
mod_timer(&q->timeout, expiry);
}
+}
+
+/**
+ * blk_add_timer - Start timeout timer for a single request
+ * @req: request that is about to start running.
+ *
+ * Notes:
+ * Each request has its own timer, and as it is added to the queue, we
+ * set up the timer. When the request completes, we cancel the timer.
+ */
+void blk_add_timer(struct request *req)
+{
+ struct request_queue *q = req->q;
+
+ lockdep_assert_held(q->queue_lock);
+ if (!q->rq_timed_out_fn)
+ return;
+ if (!req->timeout)
+ req->timeout = q->rq_timeout;
+
+ blk_rq_set_deadline(req, jiffies + req->timeout);
+ list_add_tail(&req->timeout_list, &req->q->timeout_list);
+
+ return __blk_add_timer(req);
+}
+
+/**
+ * blk_mq_add_timer - set the deadline for a single request
+ * @req: request for which to set the deadline.
+ * @old: current request state.
+ * @new: new request state.
+ *
+ * Sets the deadline of a request if and only if it has state @old and
+ * at the same time changes the request state from @old into @new. The caller
+ * must guarantee that the request state won't be modified while this function
+ * is in progress.
+ */
+void blk_mq_add_timer(struct request *req, enum mq_rq_state old,
+ enum mq_rq_state new)
+{
+ struct request_queue *q = req->q;
+
+ if (!req->timeout)
+ req->timeout = q->rq_timeout;
+ if (!blk_mq_rq_set_deadline(req, jiffies + req->timeout, old, new))
+ WARN_ON_ONCE(true);
+ return __blk_add_timer(req);
}
diff --git a/block/blk.h b/block/blk.h
index b034fd2460c4..7cd64f533a46 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -170,6 +170,8 @@ static inline bool bio_integrity_endio(struct bio *bio)
void blk_timeout_work(struct work_struct *work);
unsigned long blk_rq_timeout(unsigned long timeout);
void blk_add_timer(struct request *req);
+void blk_mq_add_timer(struct request *req, enum mq_rq_state old,
+ enum mq_rq_state new);
void blk_delete_timer(struct request *);
@@ -308,18 +310,19 @@ static inline void req_set_nomerge(struct request_queue *q, struct request *req)
}
/*
- * Steal a bit from this field for legacy IO path atomic IO marking. Note that
- * setting the deadline clears the bottom bit, potentially clearing the
- * completed bit. The user has to be OK with this (current ones are fine).
+ * Steal two bits from this field. The legacy IO path uses the lowest bit for
+ * atomic IO marking. Note that setting the deadline clears the bottom bit,
+ * potentially clearing the completed bit. The current legacy block layer is
+ * fine with that. Must be called with the request queue lock held.
*/
static inline void blk_rq_set_deadline(struct request *rq, unsigned long time)
{
- rq->__deadline = time & ~0x1UL;
+ rq->__deadline = time & RQ_STATE_MASK;
}
static inline unsigned long blk_rq_deadline(struct request *rq)
{
- return rq->__deadline & ~0x1UL;
+ return rq->__deadline & ~RQ_STATE_MASK;
}
/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b7681f3ee793..51cd69f14537 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,8 +27,6 @@
#include <linux/percpu-refcount.h>
#include <linux/scatterlist.h>
#include <linux/blkzoned.h>
-#include <linux/seqlock.h>
-#include <linux/u64_stats_sync.h>
struct module;
struct scsi_ioctl_command;
@@ -125,10 +123,8 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
/* The per-zone write lock is held for this request */
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
-/* timeout is expired */
-#define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20))
/* already slept for hybrid poll */
-#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21))
+#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20))
/* flags that prevent us from merging requests: */
#define RQF_NOMERGE_FLAGS \
@@ -225,28 +221,19 @@ struct request {
unsigned int extra_len; /* length of alignment and padding */
- /*
- * On blk-mq, the lower bits of ->gstate (generation number and
- * state) carry the MQ_RQ_* state value and the upper bits the
- * generation number which is monotonically incremented and used to
- * distinguish the reuse instances.
- *
- * ->gstate_seq allows updates to ->gstate and other fields
- * (currently ->deadline) during request start to be read
- * atomically from the timeout path, so that it can operate on a
- * coherent set of information.
- */
- seqcount_t gstate_seq;
- u64 gstate;
-
/*
* ->aborted_gstate is used by the timeout to claim a specific
* recycle instance of this request. See blk_mq_timeout_work().
*/
- struct u64_stats_sync aborted_gstate_sync;
u64 aborted_gstate;
- /* access through blk_rq_set_deadline, blk_rq_deadline */
+ /*
+ * Access through blk_rq_deadline() and blk_rq_set_deadline(),
+ * blk_mark_rq_complete(), blk_clear_rq_complete() and
+ * blk_rq_is_complete() for legacy queues or blk_mq_rq_state() for
+ * blk-mq queues.
+ */
+#define RQ_STATE_MASK 0x3UL
unsigned long __deadline;
struct list_head timeout_list;
--
2.16.3
Friend,
My name is Miss Qadesa AbdulAziz and I am 17 years old girl from Syria. There is serious war crisis here in Syria, and I have lost my parents and
my two brothers in this war. I want you to help me and receive ($7.md) which my late father deposited with my name in a bank in London. I want
to come to your country and start a new life and invest with you, because am the only survival
in my family.
I wait to hear from you, Please do not let me die here, I begging you the name of Almighty. please respond here my pirate email qadesa(a)protonmail.com
Regards
Miss Qadesa AbdulAziz