June 2023 - Linux-stable-mirror

[PATCH] HID: logitech-hidpp: rework one more time the retries attempts

by Benjamin Tissoires

Make the code looks less like Pascal. Extract the internal code inside a helper function, fix the initialization of the parameters used in the helper function (`hidpp->answer_available` was not reset and `*response` wasn't too), and use a `do {...} while();` loop. Fixes: 586e8fede795 ("HID: logitech-hidpp: Retry commands when device is busy") Cc: stable(a)vger.kernel.org Signed-off-by: Benjamin Tissoires <benjamin.tissoires(a)redhat.com> --- as requested by https://lore.kernel.org/all/CAHk-=wiMbF38KCNhPFiargenpSBoecSXTLQACKS2UMyo_V… This is a rewrite of that particular piece of code. --- drivers/hid/hid-logitech-hidpp.c | 102 +++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 41 deletions(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index dfe8e09a18de..3d1ffe199f08 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -275,21 +275,20 @@ static int __hidpp_send_report(struct hid_device *hdev, } /* - * hidpp_send_message_sync() returns 0 in case of success, and something else - * in case of a failure. - * - If ' something else' is positive, that means that an error has been raised - * by the protocol itself. - * - If ' something else' is negative, that means that we had a classic error - * (-ENOMEM, -EPIPE, etc...) + * Effectively send the message to the device, waiting for its answer. + * + * Must be called with hidpp->send_mutex locked + * + * Same return protocol than hidpp_send_message_sync(): + * - success on 0 + * - negative error means transport error + * - positive value means protocol error */ -static int hidpp_send_message_sync(struct hidpp_device *hidpp, +static int __do_hidpp_send_message_sync(struct hidpp_device *hidpp, struct hidpp_report *message, struct hidpp_report *response) { - int ret = -1; - int max_retries = 3; - - mutex_lock(&hidpp->send_mutex); + int ret; hidpp->send_receive_buf = response; hidpp->answer_available = false; @@ -300,41 +299,62 @@ static int hidpp_send_message_sync(struct hidpp_device *hidpp, */ *response = *message; - for (; max_retries != 0 && ret; max_retries--) { - ret = __hidpp_send_report(hidpp->hid_dev, message); + ret = __hidpp_send_report(hidpp->hid_dev, message); + if (ret) { + dbg_hid("__hidpp_send_report returned err: %d\n", ret); + memset(response, 0, sizeof(struct hidpp_report)); + return ret; + } - if (ret) { - dbg_hid("__hidpp_send_report returned err: %d\n", ret); - memset(response, 0, sizeof(struct hidpp_report)); - break; - } + if (!wait_event_timeout(hidpp->wait, hidpp->answer_available, + 5*HZ)) { + dbg_hid("%s:timeout waiting for response\n", __func__); + memset(response, 0, sizeof(struct hidpp_report)); + return -ETIMEDOUT; + } - if (!wait_event_timeout(hidpp->wait, hidpp->answer_available, - 5*HZ)) { - dbg_hid("%s:timeout waiting for response\n", __func__); - memset(response, 0, sizeof(struct hidpp_report)); - ret = -ETIMEDOUT; - break; - } + if (response->report_id == REPORT_ID_HIDPP_SHORT && + response->rap.sub_id == HIDPP_ERROR) { + ret = response->rap.params[1]; + dbg_hid("%s:got hidpp error %02X\n", __func__, ret); + return ret; + } - if (response->report_id == REPORT_ID_HIDPP_SHORT && - response->rap.sub_id == HIDPP_ERROR) { - ret = response->rap.params[1]; - dbg_hid("%s:got hidpp error %02X\n", __func__, ret); + if ((response->report_id == REPORT_ID_HIDPP_LONG || + response->report_id == REPORT_ID_HIDPP_VERY_LONG) && + response->fap.feature_index == HIDPP20_ERROR) { + ret = response->fap.params[1]; + dbg_hid("%s:got hidpp 2.0 error %02X\n", __func__, ret); + return ret; + } + + return 0; +} + +/* + * hidpp_send_message_sync() returns 0 in case of success, and something else + * in case of a failure. + * - If ' something else' is positive, that means that an error has been raised + * by the protocol itself. + * - If ' something else' is negative, that means that we had a classic error + * (-ENOMEM, -EPIPE, etc...) + */ +static int hidpp_send_message_sync(struct hidpp_device *hidpp, + struct hidpp_report *message, + struct hidpp_report *response) +{ + int ret; + int max_retries = 3; + + mutex_lock(&hidpp->send_mutex); + + do { + ret = __do_hidpp_send_message_sync(hidpp, message, response); + if (ret != HIDPP20_ERROR_BUSY) break; - } - if ((response->report_id == REPORT_ID_HIDPP_LONG || - response->report_id == REPORT_ID_HIDPP_VERY_LONG) && - response->fap.feature_index == HIDPP20_ERROR) { - ret = response->fap.params[1]; - if (ret != HIDPP20_ERROR_BUSY) { - dbg_hid("%s:got hidpp 2.0 error %02X\n", __func__, ret); - break; - } - dbg_hid("%s:got busy hidpp 2.0 error %02X, retrying\n", __func__, ret); - } - } + dbg_hid("%s:got busy hidpp 2.0 error %02X, retrying\n", __func__, ret); + } while (--max_retries); mutex_unlock(&hidpp->send_mutex); return ret; --- base-commit: b98ec211af5508457e2b1c4cc99373630a83fa81 change-id: 20230621-logitech-fixes-a4c0e66ea2ad Best regards, -- Benjamin Tissoires <benjamin.tissoires(a)redhat.com>

2 years, 4 months

4
9
0 0

[PATCH v4 00/17] Venus QoL / maintainability fixes

by Konrad Dybcio

v3 -> v4: - Rebase on Stanimir's venus-for-next-v6.5 - Collapse 2 identical if-statements in "Sanitize venus_boot_core() per-VPU-version" - Reword "Assign registers based on VPU version" - Check for IS_IRIS2_1() instead of wrongly checking for core->use_tz, update commit msg in "media: venus: firmware: Correct IS_V6() checks" - Access correct struct fields in "Use newly-introduced hfi_buffer_requirements accessors", drop Bryan's r-b v3: https://lore.kernel.org/r/20230228-topic-venus-v3-0-6092ae43b58f@linaro.org v2 -> v3: - Rephrase "Write to VIDC_CTRL_INIT after unmasking interrupts" commit msg - Drop "Remap bufreq fields on HFI6XX" - Rephrase "Introduce VPU version distinction" commit msg - Better explain "Leave a clue for homegrown porters" - Drop incorrect fixes tags/rephrase version check alternations - Drop AR50L/IRIS1 from if-conditions, they'll be introduced separately - pick up tags - rebase on next-20230517 (no effective changes) v2: https://lore.kernel.org/r/20230228-topic-venus-v2-0-d95d14949c79@linaro.org v1 -> v2: - Move "Write to VIDC_CTRL_INIT after unmasking interrupts" up and add a Fixes tag & Cc stable - Reword the comment in "Correct IS_V6() checks" - Move up "media: venus: Remap bufreq fields on HFI6XX", add Fixes and Cc stable - Use better English in "Use newly-introduced hfi_buffer_requirements accessors" commit message - Mention "Restrict writing SCIACMDARG3 to Venus V1/V2" doesn't seem to regress SM8250 in the commit message - Pick up tags (note: I capitalized the R in Dikshita's 'reviewed-by' and removed one occurrence of random '**' to make sure review tools like b4 don't go crazy) - Handle AR50_LITE in "Assign registers based on VPU version" - Drop /* VPUn */ comments, they're invalid as explained by Vikash - Take a different approach to the sys_idle problem in patch 1 v1: https://lore.kernel.org/r/20230228-topic-venus-v1-0-58c2c88384e9@linaro.org Currently upstream assumes all (well, almost all - see 7280 or CrOS specific checks) Venus implementations using the same version of the Hardware Firmware Interface can be treated the same way. This is however not the case. This series tries to introduce the groundwork to start differentiating them based on the VPU (Video Processing Unit) hardware type, fixes a couple of issues that were an effect of that generalized assumption and lays the foundation for supporting 8150 (IRIS1) and SM6115/QCM2290 (AR50 Lite), which will hopefully come soon. Tested on 8250, but pretty please test it on your boards too! Signed-off-by: Konrad Dybcio <konrad.dybcio(a)linaro.org> --- Konrad Dybcio (17): media: venus: hfi_venus: Only consider sys_idle_indicator on V1 media: venus: hfi_venus: Write to VIDC_CTRL_INIT after unmasking interrupts media: venus: Introduce VPU version distinction media: venus: Add vpu_version to most SoCs media: venus: firmware: Leave a clue about obtaining CP VARs media: venus: hfi_venus: Sanitize venus_boot_core() per-VPU-version media: venus: core: Assign registers based on VPU version media: venus: hfi_venus: Sanitize venus_halt_axi() per-VPU-version media: venus: hfi_venus: Sanitize venus_isr() per-VPU-version media: venus: hfi_venus: Sanitize venus_cpu_and_video_core_idle() per-VPU-version media: venus: hfi_venus: Sanitize venus_cpu_idle_and_pc_ready() per-VPU-version media: venus: firmware: Sanitize per-VPU-version media: venus: hfi_platform: Check vpu_version instead of device compatible media: venus: vdec: Sanitize vdec_set_work_route() per-VPU-version media: venus: Introduce accessors for remapped hfi_buffer_reqs members media: venus: Use newly-introduced hfi_buffer_requirements accessors media: venus: hfi_venus: Restrict writing SCIACMDARG3 to Venus V1/V2 drivers/media/platform/qcom/venus/core.c | 7 ++- drivers/media/platform/qcom/venus/core.h | 15 ++++++ drivers/media/platform/qcom/venus/firmware.c | 18 +++++-- drivers/media/platform/qcom/venus/helpers.c | 7 +-- drivers/media/platform/qcom/venus/hfi_helper.h | 61 +++++++++++++++++++--- drivers/media/platform/qcom/venus/hfi_msgs.c | 2 +- .../media/platform/qcom/venus/hfi_plat_bufs_v6.c | 22 ++++---- drivers/media/platform/qcom/venus/hfi_platform.c | 2 +- drivers/media/platform/qcom/venus/hfi_venus.c | 42 +++++++-------- drivers/media/platform/qcom/venus/vdec.c | 10 ++-- drivers/media/platform/qcom/venus/vdec_ctrls.c | 2 +- drivers/media/platform/qcom/venus/venc.c | 4 +- drivers/media/platform/qcom/venus/venc_ctrls.c | 2 +- 13 files changed, 133 insertions(+), 61 deletions(-) --- base-commit: 9f9f8ca6f012d25428f8605cb36369a449db8508 change-id: 20230228-topic-venus-70ea3bc76688 Best regards, -- Konrad Dybcio <konrad.dybcio(a)linaro.org>

2 years, 4 months

2
3
0 0

[PATCH v3 1/4] tpm_tis: Explicitly check for error code

by Alexander Steffen

recv_data either returns the number of received bytes, or a negative value representing an error code. Adding the return value directly to the total number of received bytes therefore looks a little weird, since it might add a negative error code to a sum of bytes. The following check for size < expected usually makes the function return ETIME in that case, so it does not cause too many problems in practice. But to make the code look cleaner and because the caller might still be interested in the original error code, explicitly check for the presence of an error code and pass that through. Cc: stable(a)vger.kernel.org Fixes: cb5354253af2 ("[PATCH] tpm: spacing cleanups 2") Signed-off-by: Alexander Steffen <Alexander.Steffen(a)infineon.com> --- drivers/char/tpm/tpm_tis_core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 558144fa707a..aaaa136044ae 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -363,8 +363,13 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) goto out; } - size += recv_data(chip, &buf[TPM_HEADER_SIZE], - expected - TPM_HEADER_SIZE); + rc = recv_data(chip, &buf[TPM_HEADER_SIZE], + expected - TPM_HEADER_SIZE); + if (rc < 0) { + size = rc; + goto out; + } + size += rc; if (size < expected) { dev_err(&chip->dev, "Unable to read remainder of result\n"); size = -ETIME; -- 2.25.1

2 years, 4 months

2
1
0 0

[PATCH 2/2] drm/ofdrm: Update expected device name

by Cyril Brulebois

Since commit 241d2fb56a18 ("of: Make OF framebuffer device names unique"), as spotted by Frédéric Bonnard, the historical "of-display" device is gone: the updated logic creates "of-display.0" instead, then as many "of-display.N" as required. This means that offb no longer finds the expected device, which prevents the Debian Installer from setting up its interface, at least on ppc64el. Given the code similarity it is likely to affect ofdrm in the same way. It might be better to iterate on all possible nodes, but updating the hardcoded device from "of-display" to "of-display.0" is likely to help as a first step. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217328 Link: https://bugs.debian.org/1033058 Fixes: 241d2fb56a18 ("of: Make OF framebuffer device names unique") Cc: stable(a)vger.kernel.org # v6.2+ Signed-off-by: Cyril Brulebois <cyril(a)debamax.com> --- drivers/gpu/drm/tiny/ofdrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c index 6e349ca42485..92df021d71df 100644 --- a/drivers/gpu/drm/tiny/ofdrm.c +++ b/drivers/gpu/drm/tiny/ofdrm.c @@ -1390,7 +1390,7 @@ MODULE_DEVICE_TABLE(of, ofdrm_of_match_display); static struct platform_driver ofdrm_platform_driver = { .driver = { - .name = "of-display", + .name = "of-display.0", .of_match_table = ofdrm_of_match_display, }, .probe = ofdrm_probe, -- 2.30.2

2 years, 4 months

6
6
0 0

[PATCH 1/2] fbdev/offb: Update expected device name

by Cyril Brulebois

Since commit 241d2fb56a18 ("of: Make OF framebuffer device names unique"), as spotted by Frédéric Bonnard, the historical "of-display" device is gone: the updated logic creates "of-display.0" instead, then as many "of-display.N" as required. This means that offb no longer finds the expected device, which prevents the Debian Installer from setting up its interface, at least on ppc64el. It might be better to iterate on all possible nodes, but updating the hardcoded device from "of-display" to "of-display.0" is confirmed to fix the Debian Installer at the very least. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217328 Link: https://bugs.debian.org/1033058 Fixes: 241d2fb56a18 ("of: Make OF framebuffer device names unique") Cc: stable(a)vger.kernel.org Signed-off-by: Cyril Brulebois <cyril(a)debamax.com> --- drivers/video/fbdev/offb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c index b97d251d894b..6264c7184457 100644 --- a/drivers/video/fbdev/offb.c +++ b/drivers/video/fbdev/offb.c @@ -698,7 +698,7 @@ MODULE_DEVICE_TABLE(of, offb_of_match_display); static struct platform_driver offb_driver_display = { .driver = { - .name = "of-display", + .name = "of-display.0", .of_match_table = offb_of_match_display, }, .probe = offb_probe_display, -- 2.30.2

2 years, 4 months

8
12
0 0

[PATCH] nvme: mark ctrl as DEAD if removing from error recovery

by Ming Lei

namespace's request queue is frozen and quiesced during error recovering, writeback IO is blocked in bio_queue_enter(), so fsync_bdev() <- del_gendisk() can't move on, and causes IO hang. Removal could be from sysfs, hard unplug or error handling. Fix this kind of issue by marking controller as DEAD if removal breaks error recovery. This ways is reasonable too, because controller can't be recovered any more after being removed. Cc: stable(a)vger.kernel.org Reported-by: Chunguang Xu <brookxu.cn(a)gmail.com> Closes: https://lore.kernel.org/linux-nvme/cover.1685350577.git.chunguang.xu@shopee… Reported-by: Yi Zhang <yi.zhang(a)redhat.com> Signed-off-by: Ming Lei <ming.lei(a)redhat.com> --- drivers/nvme/host/core.c | 4 +++- drivers/nvme/host/nvme.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fdfcf2781c85..b4cebc01cc00 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -567,6 +567,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, } if (changed) { + ctrl->old_state = ctrl->state; ctrl->state = new_state; wake_up_all(&ctrl->state_wq); } @@ -4055,7 +4056,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) * removing the namespaces' disks; fail all the queues now to avoid * potentially having to clean up the failed sync later. */ - if (ctrl->state == NVME_CTRL_DEAD) { + if (ctrl->state == NVME_CTRL_DEAD || + ctrl->old_state != NVME_CTRL_LIVE) { nvme_mark_namespaces_dead(ctrl); nvme_unquiesce_io_queues(ctrl); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9a98c14c552a..ce67856d4d4f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -254,6 +254,7 @@ struct nvme_ctrl { bool comp_seen; bool identified; enum nvme_ctrl_state state; + enum nvme_ctrl_state old_state; spinlock_t lock; struct mutex scan_lock; const struct nvme_ctrl_ops *ops; -- 2.40.1

2 years, 4 months

3
14
0 0

[PATCH] tpm: return false from tpm_amd_is_rng_defective on non-x86 platforms

by Jerry Snitselaar

tpm_amd_is_rng_defective is for dealing with an issue related to the AMD firmware TPM, so on non-x86 architectures just have it inline and return false. Cc: Jarkko Sakkinen <jarkko(a)kernel.org> Cc: "Jason A. Donenfeld" <Jason(a)zx2c4.com> Cc: Jason Gunthorpe <jgg(a)ziepe.ca> Cc: Peter Huewe <peterhuewe(a)gmx.de> Cc: stable(a)vger.kernel.org Cc: Linux regressions mailing list <regressions(a)lists.linux.dev> Cc: Mario Limonciello <mario.limonciello(a)amd.com> Reported-by: Aneesh Kumar K. V <aneesh.kumar(a)linux.ibm.com> Reported-by: Sachin Sant <sachinp(a)linux.ibm.com> Closes: https://lore.kernel.org/lkml/99B81401-DB46-49B9-B321-CF832B50CAC3@linux.ibm… Fixes: f1324bbc4011 ("tpm: disable hwrng for fTPM on some AMD designs") Signed-off-by: Jerry Snitselaar <jsnitsel(a)redhat.com> --- drivers/char/tpm/tpm-chip.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index cd48033b804a..cf5499e51999 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -518,6 +518,7 @@ static int tpm_add_legacy_sysfs(struct tpm_chip *chip) * 6.x.y.z series: 6.0.18.6 + * 3.x.y.z series: 3.57.y.5 + */ +#ifdef CONFIG_X86 static bool tpm_amd_is_rng_defective(struct tpm_chip *chip) { u32 val1, val2; @@ -566,6 +567,12 @@ static bool tpm_amd_is_rng_defective(struct tpm_chip *chip) return true; } +#else +static inline bool tpm_amd_is_rng_defective(struct tpm_chip *chip) +{ + return false; +} +#endif /* CONFIG_X86 */ static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait) { -- 2.38.1

2 years, 4 months

4
6
0 0

amd_sfh driver causes kernel oops during boot

by Haochen Tong

Hi, Since kernel 6.3.0 (and also 6.4rc3), on a ThinkPad Z13 system with Arch Linux, I've noticed that the amd_sfh driver spews a lot of stack traces during boot. Sometimes it is an oops: BUG: unable to handle page fault for address: 000000000001000f #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 8 PID: 457 Comm: (udev-worker) Not tainted 6.3.3-arch1-1 #1 fa7b7e0107004b3021a57a74b951e0a25e7e8584 Hardware name: LENOVO 21D2CTO1WW/21D2CTO1WW, BIOS N3GET47W (1.27 ) 12/08/2022 RIP: 0010:amd_sfh_get_report+0x1e/0x110 [amd_sfh] Code: 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 0f 1f 44 00 00 41 57 41 56 41 55 41 54 55 53 48 8b 87 60 1d 00 00 48 8b 68 08 <8b> 45 10 85 c0 0f 84 a9 00 00 00 49 89 fc 41 89 f7 41 89 d6 31 db RSP: 0018:ffffb164426f3a20 EFLAGS: 00010246 RAX: ffff9b0ae6b7bd00 RBX: ffff9b0ac0f46000 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000000002 RDI: ffff9b0ac0f46000 RBP: 000000000000ffff R08: ffffb164426f3ab8 R09: ffffb164426f3ab8 R10: 000000000020031b R11: ffff9b0ace40ac00 R12: ffff9b0ace40ac00 R13: 0000000000000002 R14: 0000000000000002 R15: ffff9b0acd213010 FS: 00007fe9ceb82200(0000) GS:ffff9b1122000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000001000f CR3: 000000010940c000 CR4: 0000000000750ee0 PKRU: 55555554 Call Trace: <TASK> amdtp_hid_request+0x36/0x50 [amd_sfh 2e3095779aada9fdb1764f08ca578ccb14e41fe4] sensor_hub_get_feature+0xad/0x170 [hid_sensor_hub d6157999c9d260a1bfa6f27d4a0dc2c3e2c5654e] hid_sensor_parse_common_attributes+0x217/0x310 [hid_sensor_iio_common 07a7935272aa9c7a28193b574580b3e953a64ec4] hid_gyro_3d_probe+0x7f/0x2e0 [hid_sensor_gyro_3d 9f2eb51294a1f0c0315b365f335617cbaef01eab] platform_probe+0x44/0xa0 really_probe+0x19e/0x3e0 ? __pfx___driver_attach+0x10/0x10 __driver_probe_device+0x78/0x160 driver_probe_device+0x1f/0x90 __driver_attach+0xd2/0x1c0 bus_for_each_dev+0x88/0xd0 bus_add_driver+0x116/0x220 driver_register+0x59/0x100 ? __pfx_init_module+0x10/0x10 [hid_sensor_gyro_3d 9f2eb51294a1f0c0315b365f335617cbaef01eab] do_one_initcall+0x5d/0x240 do_init_module+0x4a/0x200 __do_sys_init_module+0x17f/0x1b0 do_syscall_64+0x60/0x90 ? ksys_read+0x6f/0xf0 ? syscall_exit_to_user_mode+0x1b/0x40 ? do_syscall_64+0x6c/0x90 ? exc_page_fault+0x7c/0x180 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fe9ce721f9e Code: 48 8b 0d bd ed 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 af 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8a ed 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffd280dd828 EFLAGS: 00000246 ORIG_RAX: 00000000000000af RAX: ffffffffffffffda RBX: 000055b72a37f630 RCX: 00007fe9ce721f9e RDX: 00007fe9cec7a343 RSI: 00000000000077f8 RDI: 000055b72a56c7f0 RBP: 00007fe9cec7a343 R08: 00000000000077f8 R09: 0000000000000000 R10: 000000000001a0a1 R11: 0000000000000246 R12: 0000000000020000 R13: 000055b72a363b90 R14: 000055b72a37f630 R15: 000055b72a36a070 </TASK> Modules linked in: hid_sensor_accel_3d(+) hid_sensor_gyro_3d(+) qrtr hid_sensor_trigger snd_sof industrialio_triggered_buffer ath11k_pci(+) kfifo_buf snd_sof_utils hid_sensor_iio_common joydev ath11k industrialio snd_soc_core mousedev qmi_helpers snd_compress hid_sensor_hub snd_hda_scodec_cs35l41_spi ac97_bus snd_hda_codec_realtek(+) snd_pcm_dmaengine intel_rapl_msr snd_hda_codec_hdmi snd_hda_codec_generic intel_rapl_common mac80211 snd_pci_ps btusb snd_rpl_pci_acp6x btrtl snd_hda_intel edac_mce_amd uvcvideo btbcm snd_acp_pci snd_intel_dspcfg snd_pci_acp6x videobuf2_vmalloc snd_intel_sdw_acpi libarc4 uvc btintel snd_usb_audio(+) snd_pci_acp5x videobuf2_memops btmtk snd_hda_codec kvm_amd videobuf2_v4l2 snd_hda_scodec_cs35l41_i2c snd_usbmidi_lib snd_hda_scodec_cs35l41 snd_rn_pci_acp3x ucsi_acpi bluetooth videodev snd_hda_core typec_ucsi snd_acp_config snd_hda_cs_dsp_ctls wacom(+) hid_multitouch cfg80211 snd_rawmidi sp5100_tco kvm snd_seq_device cs_dsp videobuf2_common typec ecdh_generic snd_soc_acpi think_lmi snd_hwdep snd_pcm irqbypass crc16 snd_soc_cs35l41_lib mhi thunderbolt firmware_attributes_class snd_pci_acp3x amd_sfh(+) k10temp psmouse roles rapl i2c_piix4 mc snd_timer wmi_bmof serial_multi_instantiate i2c_hid_acpi acpi_tad i2c_hid amd_pmf amd_pmc mac_hid sch_fq tcp_bbr dm_multipath i2c_dev crypto_user fuse loop zram ip_tables x_tables xfs libcrc32c crc32c_generic dm_crypt cbc encrypted_keys trusted asn1_encoder tee usbhid dm_mod amdgpu i2c_algo_bit serio_raw thinkpad_acpi drm_ttm_helper atkbd libps2 crct10dif_pclmul vivaldi_fmap crc32_pclmul ledtrig_audio crc32c_intel polyval_clmulni ttm polyval_generic drm_buddy nvme gf128mul platform_profile gpu_sched ghash_clmulni_intel sha512_ssse3 snd aesni_intel soundcore drm_display_helper crypto_simd rfkill nvme_core xhci_pci cryptd cec ccp xhci_pci_renesas i8042 video nvme_common serio wmi CR2: 000000000001000f ---[ end trace 0000000000000000 ]--- RIP: 0010:amd_sfh_get_report+0x1e/0x110 [amd_sfh] Code: 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 0f 1f 44 00 00 41 57 41 56 41 55 41 54 55 53 48 8b 87 60 1d 00 00 48 8b 68 08 <8b> 45 10 85 c0 0f 84 a9 00 00 00 49 89 fc 41 89 f7 41 89 d6 31 db RSP: 0018:ffffb164426f3a20 EFLAGS: 00010246 RAX: ffff9b0ae6b7bd00 RBX: ffff9b0ac0f46000 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000000002 RDI: ffff9b0ac0f46000 RBP: 000000000000ffff R08: ffffb164426f3ab8 R09: ffffb164426f3ab8 R10: 000000000020031b R11: ffff9b0ace40ac00 R12: ffff9b0ace40ac00 R13: 0000000000000002 R14: 0000000000000002 R15: ffff9b0acd213010 FS: 00007fe9ceb82200(0000) GS:ffff9b1122000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000001000f CR3: 000000010940c000 CR4: 0000000000750ee0 PKRU: 55555554 Sometimes it is a list corruption in the same function with a similar stack: ------------[ cut here ]------------ list_add corruption. next is NULL. WARNING: CPU: 5 PID: 433 at lib/list_debug.c:25 __list_add_valid+0x57/0xa0 ... CPU: 5 PID: 433 Comm: (udev-worker) Not tainted 6.4.0-rc3-1-mainline #1 b60166e85cb97a6631db26f9dcda0196ed7a0c93 Hardware name: LENOVO 21D2CTO1WW/21D2CTO1WW, BIOS N3GET47W (1.27 ) 12/08/2022 RIP: 0010:__list_add_valid+0x57/0xa0 Code: 01 00 00 00 c3 cc cc cc cc 48 c7 c7 58 91 e6 9a e8 1e b9 a8 ff 0f 0b 31 c0 c3 cc cc cc cc 48 c7 c7 80 91 e6 9a e8 09 b9 a8 ff <0f> 0b eb e9 48 89 c1 48 c7 c7 a8 91 e6 9a e8 f6 b8 a8 ff 0f 0b eb RSP: 0018:ffffad9dc0c7bb10 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff92d5a8099448 RCX: 0000000000000027 RDX: ffff92dbe1f61688 RSI: 0000000000000001 RDI: ffff92dbe1f61680 RBP: ffff92d59ea93508 R08: 0000000000000000 R09: ffffad9dc0c7b9a0 R10: 0000000000000003 R11: ffffffff9b6ca808 R12: 0000000000000000 R13: ffff92d5a8099440 R14: ffff92d59ea93760 R15: 0000000000000002 FS: 00007fbaf0262200(0000) GS:ffff92dbe1f40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005651de666000 CR3: 000000011cfee000 CR4: 0000000000750ee0 PKRU: 55555554 Call Trace: <TASK> amd_sfh_get_report+0xba/0x110 [amd_sfh 78bf82e66cdb2ccf24cbe871a0835ef4eedddb17] amdtp_hid_request+0x36/0x50 [amd_sfh 78bf82e66cdb2ccf24cbe871a0835ef4eedddb17] sensor_hub_get_feature+0xad/0x170 [hid_sensor_hub 30e53e2c49ea1702e2482c0b3860e22265679e39] hid_sensor_parse_common_attributes+0x217/0x310 [hid_sensor_iio_common ed7fba7a4d4147d48156e6a4b2a034ad3fc94350] hid_gyro_3d_probe+0x7f/0x2e0 [hid_sensor_gyro_3d 10978a2cdfc8979f2a7366fcd005e0ea826088eb] platform_probe+0x44/0xa0 really_probe+0x19e/0x3e0 ? __pfx___driver_attach+0x10/0x10 __driver_probe_device+0x78/0x160 driver_probe_device+0x1f/0x90 __driver_attach+0xd2/0x1c0 bus_for_each_dev+0x88/0xd0 bus_add_driver+0x116/0x220 driver_register+0x59/0x100 ? __pfx_hid_gyro_3d_platform_driver_init+0x10/0x10 [hid_sensor_gyro_3d 10978a2cdfc8979f2a7366fcd005e0ea826088eb] do_one_initcall+0x5d/0x240 do_init_module+0x60/0x240 __do_sys_init_module+0x17f/0x1b0 do_syscall_64+0x60/0x90 ? exc_page_fault+0x7f/0x180 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fbaf06c0f9e Code: 48 8b 0d bd ed 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 af 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8a ed 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffc5ce88528 EFLAGS: 00000246 ORIG_RAX: 00000000000000af RAX: ffffffffffffffda RBX: 00005651de36dff0 RCX: 00007fbaf06c0f9e RDX: 00007fbaf0ba9343 RSI: 00000000000079f0 RDI: 00005651de646fe0 RBP: 00007fbaf0ba9343 R08: 00000000000079f0 R09: 0000000000000000 R10: 0000000000019fb1 R11: 0000000000000246 R12: 0000000000020000 R13: 00005651de45fb10 R14: 00005651de36dff0 R15: 00005651de44d5f0 </TASK> ---[ end trace 0000000000000000 ]--- This occurs during almost every boot. When it happens there is usually a (udev-worker) process lingering forever, which is unkillable and even prevents shutdown. Looking at past journals it never happened before 6.3 so I believe it is a regression. Relevant device: 63:00.7 Signal processing controller [1180]: Advanced Micro Devices, Inc. [AMD] Sensor Fusion Hub [1022:15e4] Subsystem: Lenovo Sensor Fusion Hub [17aa:22f1] Kernel driver in use: pcie_mp2_amd Kernel modules: amd_sfh I would appreciate it if someone could take a look at this. Best regards, Haochen Tong

2 years, 4 months

7
20
0 0

[Patch v3] net: mana: Batch ringing RX queue doorbell on receiving packets

by longli＠linuxonhyperv.com

From: Long Li <longli(a)microsoft.com> It's inefficient to ring the doorbell page every time a WQE is posted to the received queue. Excessive MMIO writes result in CPU spending more time waiting on LOCK instructions (atomic operations), resulting in poor scaling performance. Move the code for ringing doorbell page to where after we have posted all WQEs to the receive queue during a callback from napi_poll(). With this change, tests showed an improvement from 120G/s to 160G/s on a 200G physical link, with 16 or 32 hardware queues. Tests showed no regression in network latency benchmarks on single connection. While we are making changes in this code path, change the code for ringing doorbell to set the WQE_COUNT to 0 for Receive Queue. The hardware specification specifies that it should set to 0. Although currently the hardware doesn't enforce the check, in the future releases it may do. Cc: stable(a)vger.kernel.org Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Reviewed-by: Haiyang Zhang <haiyangz(a)microsoft.com> Reviewed-by: Dexuan Cui <decui(a)microsoft.com> Signed-off-by: Long Li <longli(a)microsoft.com> --- Change log: v2: Check for comp_read > 0 as it might be negative on completion error. Set rq.wqe_cnt to 0 according to BNIC spec. v3: Add details in the commit on the reason of performance increase and test numbers. Add details in the commit on why rq.wqe_cnt should be set to 0 according to hardware spec. Add "Reviewed-by" from Haiyang and Dexuan. drivers/net/ethernet/microsoft/mana/gdma_main.c | 5 ++++- drivers/net/ethernet/microsoft/mana/mana_en.c | 10 ++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 8f3f78b68592..3765d3389a9a 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -300,8 +300,11 @@ static void mana_gd_ring_doorbell(struct gdma_context *gc, u32 db_index, void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue) { + /* Hardware Spec specifies that software client should set 0 for + * wqe_cnt for Receive Queues. This value is not used in Send Queues. + */ mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type, - queue->id, queue->head * GDMA_WQE_BU_SIZE, 1); + queue->id, queue->head * GDMA_WQE_BU_SIZE, 0); } void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index cd4d5ceb9f2d..1d8abe63fcb8 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1383,8 +1383,8 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq) recv_buf_oob = &rxq->rx_oobs[curr_index]; - err = mana_gd_post_and_ring(rxq->gdma_rq, &recv_buf_oob->wqe_req, - &recv_buf_oob->wqe_inf); + err = mana_gd_post_work_request(rxq->gdma_rq, &recv_buf_oob->wqe_req, + &recv_buf_oob->wqe_inf); if (WARN_ON_ONCE(err)) return; @@ -1654,6 +1654,12 @@ static void mana_poll_rx_cq(struct mana_cq *cq) mana_process_rx_cqe(rxq, cq, &comp[i]); } + if (comp_read > 0) { + struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; + + mana_gd_wq_ring_doorbell(gc, rxq->gdma_rq); + } + if (rxq->xdp_flush) xdp_do_flush(); } -- 2.34.1

2 years, 5 months

6
14
0 0

[PATCH] HID: amd_sfh: Check that sensors are enabled before set/get report

by Mario Limonciello

A crash was reported in amd-sfh related to hid core initialization before SFH initialization has run. ``` amdtp_hid_request+0x36/0x50 [amd_sfh 2e3095779aada9fdb1764f08ca578ccb14e41fe4] sensor_hub_get_feature+0xad/0x170 [hid_sensor_hub d6157999c9d260a1bfa6f27d4a0dc2c3e2c5654e] hid_sensor_parse_common_attributes+0x217/0x310 [hid_sensor_iio_common 07a7935272aa9c7a28193b574580b3e953a64ec4] hid_gyro_3d_probe+0x7f/0x2e0 [hid_sensor_gyro_3d 9f2eb51294a1f0c0315b365f335617cbaef01eab] platform_probe+0x44/0xa0 really_probe+0x19e/0x3e0 ``` Ensure that sensors have been set up before calling into amd_sfh_get_report() or amd_sfh_set_report(). Cc: stable(a)vger.kernel.org Cc: Linux regression tracking (Thorsten Leemhuis) <regressions(a)leemhuis.info> Fixes: 7bcfdab3f0c6 ("HID: amd_sfh: if no sensors are enabled, clean up") Reported-by: Haochen Tong <linux(a)hexchain.org> Link: https://lore.kernel.org/all/3250319.ancTxkQ2z5@zen/T/ Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com> --- drivers/hid/amd-sfh-hid/amd_sfh_client.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index d9b7b01900b5..88f3d913eaa1 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -25,6 +25,9 @@ void amd_sfh_set_report(struct hid_device *hid, int report_id, struct amdtp_cl_data *cli_data = hid_data->cli_data; int i; + if (!cli_data->is_any_sensor_enabled) + return; + for (i = 0; i < cli_data->num_hid_devices; i++) { if (cli_data->hid_sensor_hubs[i] == hid) { cli_data->cur_hid_dev = i; @@ -41,6 +44,9 @@ int amd_sfh_get_report(struct hid_device *hid, int report_id, int report_type) struct request_list *req_list = &cli_data->req_list; int i; + if (!cli_data->is_any_sensor_enabled) + return -ENODEV; + for (i = 0; i < cli_data->num_hid_devices; i++) { if (cli_data->hid_sensor_hubs[i] == hid) { struct request_list *new = kzalloc(sizeof(*new), GFP_KERNEL); -- 2.34.1

2 years, 5 months

5
4
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror June 2023