From: Ajay Agarwal <ajayagarwal(a)google.com>
[ Upstream commit 7447990137bf06b2aeecad9c6081e01a9f47f2aa ]
PCIe r6.2, sec 5.5.4, requires that:
If setting either or both of the enable bits for ASPM L1 PM Substates,
both ports must be configured as described in this section while ASPM L1
is disabled.
Previously, pcie_config_aspm_l1ss() assumed that "setting enable bits"
meant "setting them to 1", and it configured L1SS as follows:
- Clear L1SS enable bits
- Disable L1
- Configure L1SS enable bits as required
- Enable L1 if required
With this sequence, when disabling L1SS on an ARM A-core with a Synopsys
DesignWare PCIe core, the CPU occasionally hangs when reading
PCI_L1SS_CTL1, leading to a reboot when the CPU watchdog expires.
Move the L1 disable to the caller (pcie_config_aspm_link(), where L1 was
already enabled) so L1 is always disabled while updating the L1SS bits:
- Disable L1
- Clear L1SS enable bits
- Configure L1SS enable bits as required
- Enable L1 if required
Change pcie_aspm_cap_init() similarly.
Link: https://lore.kernel.org/r/20241007032917.872262-1-ajayagarwal@google.com
Signed-off-by: Ajay Agarwal <ajayagarwal(a)google.com>
[bhelgaas: comments, commit log, compute L1SS setting before config access]
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Tested-by: Johnny-CC Chang <Johnny-CC.Chang(a)mediatek.com>
Signed-off-by: Macpaul Lin <macpaul.lin(a)mediatek.com>
---
drivers/pci/pcie/aspm.c | 92 ++++++++++++++++++++++-------------------
1 file changed, 50 insertions(+), 42 deletions(-)
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index cee2365e54b8..e943691bc931 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -805,6 +805,15 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &parent_lnkctl);
pcie_capability_read_word(child, PCI_EXP_LNKCTL, &child_lnkctl);
+ /* Disable L0s/L1 before updating L1SS config */
+ if (FIELD_GET(PCI_EXP_LNKCTL_ASPMC, child_lnkctl) ||
+ FIELD_GET(PCI_EXP_LNKCTL_ASPMC, parent_lnkctl)) {
+ pcie_capability_write_word(child, PCI_EXP_LNKCTL,
+ child_lnkctl & ~PCI_EXP_LNKCTL_ASPMC);
+ pcie_capability_write_word(parent, PCI_EXP_LNKCTL,
+ parent_lnkctl & ~PCI_EXP_LNKCTL_ASPMC);
+ }
+
/*
* Setup L0s state
*
@@ -829,6 +838,13 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
aspm_l1ss_init(link);
+ /* Restore L0s/L1 if they were enabled */
+ if (FIELD_GET(PCI_EXP_LNKCTL_ASPMC, child_lnkctl) ||
+ FIELD_GET(PCI_EXP_LNKCTL_ASPMC, parent_lnkctl)) {
+ pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_lnkctl);
+ pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_lnkctl);
+ }
+
/* Save default state */
link->aspm_default = link->aspm_enabled;
@@ -845,25 +861,28 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
}
}
-/* Configure the ASPM L1 substates */
+/* Configure the ASPM L1 substates. Caller must disable L1 first. */
static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
{
- u32 val, enable_req;
+ u32 val;
struct pci_dev *child = link->downstream, *parent = link->pdev;
- enable_req = (link->aspm_enabled ^ state) & state;
+ val = 0;
+ if (state & PCIE_LINK_STATE_L1_1)
+ val |= PCI_L1SS_CTL1_ASPM_L1_1;
+ if (state & PCIE_LINK_STATE_L1_2)
+ val |= PCI_L1SS_CTL1_ASPM_L1_2;
+ if (state & PCIE_LINK_STATE_L1_1_PCIPM)
+ val |= PCI_L1SS_CTL1_PCIPM_L1_1;
+ if (state & PCIE_LINK_STATE_L1_2_PCIPM)
+ val |= PCI_L1SS_CTL1_PCIPM_L1_2;
/*
- * Here are the rules specified in the PCIe spec for enabling L1SS:
- * - When enabling L1.x, enable bit at parent first, then at child
- * - When disabling L1.x, disable bit at child first, then at parent
- * - When enabling ASPM L1.x, need to disable L1
- * (at child followed by parent).
- * - The ASPM/PCIPM L1.2 must be disabled while programming timing
+ * PCIe r6.2, sec 5.5.4, rules for enabling L1 PM Substates:
+ * - Clear L1.x enable bits at child first, then at parent
+ * - Set L1.x enable bits at parent first, then at child
+ * - ASPM/PCIPM L1.2 must be disabled while programming timing
* parameters
- *
- * To keep it simple, disable all L1SS bits first, and later enable
- * what is needed.
*/
/* Disable all L1 substates */
@@ -871,26 +890,6 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
PCI_L1SS_CTL1_L1SS_MASK, 0);
pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
PCI_L1SS_CTL1_L1SS_MASK, 0);
- /*
- * If needed, disable L1, and it gets enabled later
- * in pcie_config_aspm_link().
- */
- if (enable_req & (PCIE_LINK_STATE_L1_1 | PCIE_LINK_STATE_L1_2)) {
- pcie_capability_clear_word(child, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPM_L1);
- pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPM_L1);
- }
-
- val = 0;
- if (state & PCIE_LINK_STATE_L1_1)
- val |= PCI_L1SS_CTL1_ASPM_L1_1;
- if (state & PCIE_LINK_STATE_L1_2)
- val |= PCI_L1SS_CTL1_ASPM_L1_2;
- if (state & PCIE_LINK_STATE_L1_1_PCIPM)
- val |= PCI_L1SS_CTL1_PCIPM_L1_1;
- if (state & PCIE_LINK_STATE_L1_2_PCIPM)
- val |= PCI_L1SS_CTL1_PCIPM_L1_2;
/* Enable what we need to enable */
pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
@@ -937,21 +936,30 @@ static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state)
dwstream |= PCI_EXP_LNKCTL_ASPM_L1;
}
+ /*
+ * Per PCIe r6.2, sec 5.5.4, setting either or both of the enable
+ * bits for ASPM L1 PM Substates must be done while ASPM L1 is
+ * disabled. Disable L1 here and apply new configuration after L1SS
+ * configuration has been completed.
+ *
+ * Per sec 7.5.3.7, when disabling ASPM L1, software must disable
+ * it in the Downstream component prior to disabling it in the
+ * Upstream component, and ASPM L1 must be enabled in the Upstream
+ * component prior to enabling it in the Downstream component.
+ *
+ * Sec 7.5.3.7 also recommends programming the same ASPM Control
+ * value for all functions of a multi-function device.
+ */
+ list_for_each_entry(child, &linkbus->devices, bus_list)
+ pcie_config_aspm_dev(child, 0);
+ pcie_config_aspm_dev(parent, 0);
+
if (link->aspm_capable & PCIE_LINK_STATE_L1SS)
pcie_config_aspm_l1ss(link, state);
- /*
- * Spec 2.0 suggests all functions should be configured the
- * same setting for ASPM. Enabling ASPM L1 should be done in
- * upstream component first and then downstream, and vice
- * versa for disabling ASPM L1. Spec doesn't mention L0S.
- */
- if (state & PCIE_LINK_STATE_L1)
- pcie_config_aspm_dev(parent, upstream);
+ pcie_config_aspm_dev(parent, upstream);
list_for_each_entry(child, &linkbus->devices, bus_list)
pcie_config_aspm_dev(child, dwstream);
- if (!(state & PCIE_LINK_STATE_L1))
- pcie_config_aspm_dev(parent, upstream);
link->aspm_enabled = state;
--
2.45.2
From: Ming Lei <ming.lei(a)redhat.com>
[ Upstream commit 26064d3e2b4d9a14df1072980e558c636fb023ea ]
>4GB folio is possible on some ARCHs, such as aarch64, 16GB hugepage
is supported, then 'offset' of folio can't be held in 'unsigned int',
cause warning in bio_add_folio_nofail() and IO failure.
Fix it by adjusting 'page' & trimming 'offset' so that `->bi_offset` won't
be overflow, and folio can be added to bio successfully.
Fixes: ed9832bc08db ("block: introduce folio awareness and add a bigger size from folio")
Cc: Kundan Kumar <kundan.kumar(a)samsung.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Luis Chamberlain <mcgrof(a)kernel.org>
Cc: Gavin Shan <gshan(a)redhat.com>
Signed-off-by: Ming Lei <ming.lei(a)redhat.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Link: https://lore.kernel.org/r/20250312145136.2891229-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Alva Lan <alvalan9(a)foxmail.com>
---
The follow-up fix fbecd731de05 ("xfs: fix zoned GC data corruption due
to wrong bv_offset") addresses issues in the file fs/xfs/xfs_zone_gc.c.
This file was first introduced in version v6.15-rc1. So don't backport
the follow up fix to 6.12.y.
---
block/bio.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index 20c74696bf23..094a5adf79d2 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1156,9 +1156,10 @@ EXPORT_SYMBOL(bio_add_page);
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
size_t off)
{
+ unsigned long nr = off / PAGE_SIZE;
+
WARN_ON_ONCE(len > UINT_MAX);
- WARN_ON_ONCE(off > UINT_MAX);
- __bio_add_page(bio, &folio->page, len, off);
+ __bio_add_page(bio, folio_page(folio, nr), len, off % PAGE_SIZE);
}
EXPORT_SYMBOL_GPL(bio_add_folio_nofail);
@@ -1179,9 +1180,11 @@ EXPORT_SYMBOL_GPL(bio_add_folio_nofail);
bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
size_t off)
{
- if (len > UINT_MAX || off > UINT_MAX)
+ unsigned long nr = off / PAGE_SIZE;
+
+ if (len > UINT_MAX)
return false;
- return bio_add_page(bio, &folio->page, len, off) > 0;
+ return bio_add_page(bio, folio_page(folio, nr), len, off % PAGE_SIZE) > 0;
}
EXPORT_SYMBOL(bio_add_folio);
--
2.34.1
Fix possible overflow in the address expression used as the second
argument to iommu_map() and iommu_unmap(). Without an explicit cast,
this expression may overflow when 'r->offset' or 'i' are large. Cast
the result to unsigned long before shifting to ensure correct IOVA
computation and prevent unintended wraparound.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Cc: stable(a)vger.kernel.org # v4.4+
Signed-off-by: Alexey Nepomnyashih <sdl(a)nppct.ru>
---
drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index 201022ae9214..17a0e1a46211 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -334,7 +334,7 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
/* Unmap pages from GPU address space and free them */
for (i = 0; i < node->base.mn->length; i++) {
iommu_unmap(imem->domain,
- (r->offset + i) << imem->iommu_pgshift, PAGE_SIZE);
+ ((unsigned long)r->offset + i) << imem->iommu_pgshift, PAGE_SIZE);
dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE,
DMA_BIDIRECTIONAL);
__free_page(node->pages[i]);
@@ -472,7 +472,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
/* Map into GPU address space */
for (i = 0; i < npages; i++) {
- u32 offset = (r->offset + i) << imem->iommu_pgshift;
+ unsigned long offset = ((unsigned long)r->offset + i) << imem->iommu_pgshift;
ret = iommu_map(imem->domain, offset, node->dma_addrs[i],
PAGE_SIZE, IOMMU_READ | IOMMU_WRITE,
--
2.43.0
Fix possible overflow in the address expression used as the second
argument to iommu_map() and iommu_unmap(). Without an explicit cast,
this expression may overflow when 'r->offset' or 'i' are large. Cast
the result to unsigned long before shifting to ensure correct IOVA
computation and prevent unintended wraparound.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Cc: stable(a)vger.kernel.org # v4.4+
Signed-off-by: Alexey Nepomnyashih <sdl(a)nppct.ru>
---
drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index 201022ae9214..17a0e1a46211 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -334,7 +334,7 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
/* Unmap pages from GPU address space and free them */
for (i = 0; i < node->base.mn->length; i++) {
iommu_unmap(imem->domain,
- (r->offset + i) << imem->iommu_pgshift, PAGE_SIZE);
+ ((unsigned long)r->offset + i) << imem->iommu_pgshift, PAGE_SIZE);
dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE,
DMA_BIDIRECTIONAL);
__free_page(node->pages[i]);
@@ -472,7 +472,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
/* Map into GPU address space */
for (i = 0; i < npages; i++) {
- u32 offset = (r->offset + i) << imem->iommu_pgshift;
+ unsigned long offset = ((unsigned long)r->offset + i) << imem->iommu_pgshift;
ret = iommu_map(imem->domain, offset, node->dma_addrs[i],
PAGE_SIZE, IOMMU_READ | IOMMU_WRITE,
--
2.43.0
From: Praveen Kaligineedi <pkaligineedi(a)google.com>
gve_tx_timeout was calculating missed completions in a way that is only
relevant in the GQ queue format. Additionally, it was attempting to
disable device interrupts, which is not needed in either GQ or DQ queue
formats.
As a result, TX timeouts with the DQ queue format likely would have
triggered early resets without kicking the queue at all.
This patch drops the check for pending work altogether and always kicks
the queue after validating the queue has not seen a TX timeout too
recently.
Fixes: 87a7f321bb6a ("gve: Recover from queue stall due to missed IRQ")
Co-developed-by: Tim Hostetler <thostet(a)google.com>
Signed-off-by: Tim Hostetler <thostet(a)google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi(a)google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy(a)google.com>
---
drivers/net/ethernet/google/gve/gve_main.c | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index c3791cf..0c6328b 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -1921,7 +1921,6 @@ static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
struct gve_notify_block *block;
struct gve_tx_ring *tx = NULL;
struct gve_priv *priv;
- u32 last_nic_done;
u32 current_time;
u32 ntfy_idx;
@@ -1941,17 +1940,10 @@ static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
goto reset;
- /* Check to see if there are missed completions, which will allow us to
- * kick the queue.
- */
- last_nic_done = gve_tx_load_event_counter(priv, tx);
- if (last_nic_done - tx->done) {
- netdev_info(dev, "Kicking queue %d", txqueue);
- iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
- napi_schedule(&block->napi);
- tx->last_kick_msec = current_time;
- goto out;
- } // Else reset.
+ netdev_info(dev, "Kicking queue %d", txqueue);
+ napi_schedule(&block->napi);
+ tx->last_kick_msec = current_time;
+ goto out;
reset:
gve_schedule_reset(priv);
--
2.49.0.805.g082f7c87e0-goog