From: Vitor Soares <vitor.soares(a)toradex.com>
When the mcp251xfd_start_xmit() function fails, the driver stops
processing messages, and the interrupt routine does not return,
running indefinitely even after killing the running application.
Error messages:
[ 441.298819] mcp251xfd spi2.0 can0: ERROR in mcp251xfd_start_xmit: -16
[ 441.306498] mcp251xfd spi2.0 can0: Transmit Event FIFO buffer not empty. (seq=0x000017c7, tef_tail=0x000017cf, tef_head=0x000017d0, tx_head=0x000017d3).
... and repeat forever.
The issue can be triggered when multiple devices share the same
SPI interface. And there is concurrent access to the bus.
The problem occurs because tx_ring->head increments even if
mcp251xfd_start_xmit() fails. Consequently, the driver skips one
TX package while still expecting a response in
mcp251xfd_handle_tefif_one().
This patch resolves the issue by decreasing tx_ring->head if
mcp251xfd_start_xmit() fails. With the fix, if we attempt to trigger
the issue again, the driver prints an error and discard the message.
Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN")
Cc: stable(a)vger.kernel.org
Signed-off-by: Vitor Soares <vitor.soares(a)toradex.com>
---
drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c | 27 ++++++++++----------
1 file changed, 14 insertions(+), 13 deletions(-)
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
index 160528d3cc26..a8eb941c1b95 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
@@ -181,25 +181,26 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
tx_obj = mcp251xfd_get_tx_obj_next(tx_ring);
mcp251xfd_tx_obj_from_skb(priv, tx_obj, skb, tx_ring->head);
- /* Stop queue if we occupy the complete TX FIFO */
tx_head = mcp251xfd_get_tx_head(tx_ring);
- tx_ring->head++;
- if (mcp251xfd_get_tx_free(tx_ring) == 0)
- netif_stop_queue(ndev);
-
frame_len = can_skb_get_frame_len(skb);
- err = can_put_echo_skb(skb, ndev, tx_head, frame_len);
- if (!err)
- netdev_sent_queue(priv->ndev, frame_len);
+ can_put_echo_skb(skb, ndev, tx_head, frame_len);
+
+ tx_ring->head++;
err = mcp251xfd_tx_obj_write(priv, tx_obj);
- if (err)
- goto out_err;
+ if (err) {
+ can_free_echo_skb(ndev, tx_head, NULL);
- return NETDEV_TX_OK;
+ tx_ring->head--;
+
+ netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
+ } else {
+ /* Stop queue if we occupy the complete TX FIFO */
+ if (mcp251xfd_get_tx_free(tx_ring) == 0)
+ netif_stop_queue(ndev);
- out_err:
- netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
+ netdev_sent_queue(priv->ndev, frame_len);
+ }
return NETDEV_TX_OK;
}
--
2.34.1
From: Muhammad Ahmed <ahmed.ahmed(a)amd.com>
[WHY]
Blackscreen hang @ PC EF000025 when trying to wake up from S0i3. DCN
gets powered off due to dc_power_down_on_boot() being called after
timeout.
[HOW]
Setting the power_down_on_boot function pointer to null since we don't
expect the function to be called for APU.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas(a)amd.com>
Acked-by: Alex Hung <alex.hung(a)amd.com>
Signed-off-by: Muhammad Ahmed <ahmed.ahmed(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
index dce620d359a6..d4e0abbef28e 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
@@ -39,7 +39,7 @@
static const struct hw_sequencer_funcs dcn35_funcs = {
.program_gamut_remap = dcn30_program_gamut_remap,
.init_hw = dcn35_init_hw,
- .power_down_on_boot = dcn35_power_down_on_boot,
+ .power_down_on_boot = NULL,
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = NULL,
.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
--
2.34.1
We need to avoid the first page if we don't read it entirely.
We need to avoid the last page if we don't read it entirely.
While rather simple, this logic has been failed in the previous
fix. This time I wrote about 30 unit tests locally to check each
possible condition, hopefully I covered them all.
Reported-by: Christophe Kerello <christophe.kerello(a)foss.st.com>
Closes: https://lore.kernel.org/linux-mtd/20240221175327.42f7076d@xps-13/T/#m399bac…
Suggested-by: Christophe Kerello <christophe.kerello(a)foss.st.com>
Fixes: 828f6df1bcba ("mtd: rawnand: Clarify conditions to enable continuous reads")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/mtd/nand/raw/nand_base.c | 38 ++++++++++++++++++--------------
1 file changed, 22 insertions(+), 16 deletions(-)
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 3b3ce2926f5d..bcfd99a1699f 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -3466,30 +3466,36 @@ static void rawnand_enable_cont_reads(struct nand_chip *chip, unsigned int page,
u32 readlen, int col)
{
struct mtd_info *mtd = nand_to_mtd(chip);
- unsigned int end_page, end_col;
+ unsigned int first_page, last_page;
chip->cont_read.ongoing = false;
if (!chip->controller->supported_op.cont_read)
return;
- end_page = DIV_ROUND_UP(col + readlen, mtd->writesize);
- end_col = (col + readlen) % mtd->writesize;
+ /*
+ * Don't bother making any calculations if the length is too small.
+ * Side effect: avoids possible integer underflows below.
+ */
+ if (readlen < (2 * mtd->writesize))
+ return;
+ /* Derive the page where continuous read should start (the first full page read) */
+ first_page = page;
if (col)
- page++;
-
- if (end_col && end_page)
- end_page--;
-
- if (page + 1 > end_page)
- return;
-
- chip->cont_read.first_page = page;
- chip->cont_read.last_page = end_page;
- chip->cont_read.ongoing = true;
-
- rawnand_cap_cont_reads(chip);
+ first_page++;
+
+ /* Derive the page where continuous read should stop (the last full page read) */
+ last_page = page + ((col + readlen) / mtd->writesize) - 1;
+
+ /* Configure and enable continuous read when suitable */
+ if (first_page < last_page) {
+ chip->cont_read.first_page = first_page;
+ chip->cont_read.last_page = last_page;
+ chip->cont_read.ongoing = true;
+ /* May reset the ongoing flag */
+ rawnand_cap_cont_reads(chip);
+ }
}
static void rawnand_cont_read_skip_first_page(struct nand_chip *chip, unsigned int page)
--
2.34.1
While crossing a LUN boundary, it is probably safer (and clearer) to
keep all members of the continuous read structure aligned, including the
pause page (which is the last page of the lun or the last page of the
continuous read). Once these members properly in sync, we can use the
rawnand_cap_cont_reads() helper everywhere to "prepare" the next
continuous read if there is one.
Fixes: bbcd80f53a5e ("mtd: rawnand: Prevent crossing LUN boundaries during sequential reads")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
This is not 100% a fix but I believe it is worth backporting as there
may be corner cases which were not identified with the initial
implementation.
---
drivers/mtd/nand/raw/nand_base.c | 23 ++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index d6a27e08b112..4d5a663e4e05 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -1232,6 +1232,15 @@ static void rawnand_cap_cont_reads(struct nand_chip *chip)
chip->cont_read.pause_page = rawnand_last_page_of_lun(ppl, first_lun);
else
chip->cont_read.pause_page = chip->cont_read.last_page;
+
+ if (chip->cont_read.first_page == chip->cont_read.pause_page) {
+ chip->cont_read.first_page++;
+ chip->cont_read.pause_page = min(chip->cont_read.last_page,
+ rawnand_last_page_of_lun(ppl, first_lun + 1));
+ }
+
+ if (chip->cont_read.first_page >= chip->cont_read.last_page)
+ chip->cont_read.ongoing = false;
}
static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip, unsigned int page,
@@ -1298,12 +1307,11 @@ static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip, unsigned int p
if (!chip->cont_read.ongoing)
return 0;
- if (page == chip->cont_read.pause_page &&
- page != chip->cont_read.last_page) {
- chip->cont_read.first_page = chip->cont_read.pause_page + 1;
- rawnand_cap_cont_reads(chip);
- } else if (page == chip->cont_read.last_page) {
+ if (page == chip->cont_read.last_page) {
chip->cont_read.ongoing = false;
+ } else if (page == chip->cont_read.pause_page) {
+ chip->cont_read.first_page++;
+ rawnand_cap_cont_reads(chip);
}
return 0;
@@ -3510,10 +3518,7 @@ static void rawnand_cont_read_skip_first_page(struct nand_chip *chip, unsigned i
return;
chip->cont_read.first_page++;
- if (chip->cont_read.first_page == chip->cont_read.pause_page)
- chip->cont_read.first_page++;
- if (chip->cont_read.first_page >= chip->cont_read.last_page)
- chip->cont_read.ongoing = false;
+ rawnand_cap_cont_reads(chip);
}
/**
--
2.34.1
To add another datapoint to this - I've seen the same problem on Dell
PowerEdge R6615 servers... but no others.
The problem also crept into the 6.1.79 kernel with the commit
mentioned earlier, and is fixed by reverting that commit. Adding
nogbpages to the kernel command line can cause the failure to
reproduce on that hardware as well.
From: Jakub Kicinski <kuba(a)kernel.org>
[ Upstream commit e01e3934a1b2d122919f73bc6ddbe1cdafc4bbdb ]
Similarly to previous commit, the submitting thread (recvmsg/sendmsg)
may exit as soon as the async crypto handler calls complete().
Reorder scheduling the work before calling complete().
This seems more logical in the first place, as it's
the inverse order of what the submitting thread will do.
Reported-by: valis <sec(a)valis.email>
Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Reviewed-by: Sabrina Dubroca <sd(a)queasysnail.net>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
(cherry picked from commit 6db22d6c7a6dc914b12c0469b94eb639b6a8a146)
[Lee: Fixed merge-conflict in Stable branches linux-6.1.y and older]
Signed-off-by: Lee Jones <lee(a)kernel.org>
---
net/tls/tls_sw.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 910da98d6bfb3..58d9b5c06cf89 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -440,7 +440,6 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
struct scatterlist *sge;
struct sk_msg *msg_en;
struct tls_rec *rec;
- bool ready = false;
int pending;
rec = container_of(aead_req, struct tls_rec, aead_req);
@@ -472,8 +471,12 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
/* If received record is at head of tx_list, schedule tx */
first_rec = list_first_entry(&ctx->tx_list,
struct tls_rec, list);
- if (rec == first_rec)
- ready = true;
+ if (rec == first_rec) {
+ /* Schedule the transmission */
+ if (!test_and_set_bit(BIT_TX_SCHEDULED,
+ &ctx->tx_bitmask))
+ schedule_delayed_work(&ctx->tx_work.work, 1);
+ }
}
spin_lock_bh(&ctx->encrypt_compl_lock);
@@ -482,13 +485,6 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
if (!pending && ctx->async_notify)
complete(&ctx->async_wait.completion);
spin_unlock_bh(&ctx->encrypt_compl_lock);
-
- if (!ready)
- return;
-
- /* Schedule the transmission */
- if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
- schedule_delayed_work(&ctx->tx_work.work, 1);
}
static int tls_do_encryption(struct sock *sk,
--
2.44.0.278.ge034bb2e1d-goog