From: Nigel Kirkland nkirkland2304@gmail.com
[ Upstream commit 8ae5b3a685dc59a8cf7ccfe0e850999ba9727a3c ]
The nvme_fc_fcp_op structure describing an AEN operation is initialized with a null request structure pointer. An FC LLDD may make a call to nvme_fc_io_getuuid passing a pointer to an nvmefc_fcp_req for an AEN operation.
Add validation of the request structure pointer before dereference.
Signed-off-by: Nigel Kirkland nkirkland2304@gmail.com Reviewed-by: James Smart jsmart2021@gmail.com Signed-off-by: Keith Busch kbusch@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1cd2bf82319a9..a15b37750d6e9 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1924,7 +1924,7 @@ char *nvme_fc_io_getuuid(struct nvmefc_fcp_req *req) struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); struct request *rq = op->rq;
- if (!IS_ENABLED(CONFIG_BLK_CGROUP_FC_APPID) || !rq->bio) + if (!IS_ENABLED(CONFIG_BLK_CGROUP_FC_APPID) || !rq || !rq->bio) return NULL; return blkcg_get_fc_appid(rq->bio); }
From: Helge Deller deller@gmx.de
[ Upstream commit eb3255ee8f6f4691471a28fbf22db5e8901116cd ]
Fix this makecheck warning: drivers/parisc/sba_iommu.c:98:19: warning: symbol 'sba_list' was not declared. Should it be static?
Signed-off-by: Helge Deller deller@gmx.de Signed-off-by: Sasha Levin sashal@kernel.org --- arch/parisc/include/asm/ropes.h | 3 +++ drivers/char/agp/parisc-agp.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/parisc/include/asm/ropes.h b/arch/parisc/include/asm/ropes.h index 8e51c775c80a6..62399c7ea94a1 100644 --- a/arch/parisc/include/asm/ropes.h +++ b/arch/parisc/include/asm/ropes.h @@ -86,6 +86,9 @@ struct sba_device { struct ioc ioc[MAX_IOC]; };
+/* list of SBA's in system, see drivers/parisc/sba_iommu.c */ +extern struct sba_device *sba_list; + #define ASTRO_RUNWAY_PORT 0x582 #define IKE_MERCED_PORT 0x803 #define REO_MERCED_PORT 0x804 diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c index 514f9f287a781..c6f181702b9a7 100644 --- a/drivers/char/agp/parisc-agp.c +++ b/drivers/char/agp/parisc-agp.c @@ -394,8 +394,6 @@ find_quicksilver(struct device *dev, void *data) static int __init parisc_agp_init(void) { - extern struct sba_device *sba_list; - int err = -1; struct parisc_device *sba = NULL, *lba = NULL; struct lba_device *lbadev = NULL;
From: Helge Deller deller@gmx.de
[ Upstream commit c1ebb94071cb4455177bafa619423acb3494d15d ]
Fix sparse warnings, as pdir is __le64 *.
Signed-off-by: Helge Deller deller@gmx.de Signed-off-by: Sasha Levin sashal@kernel.org --- arch/parisc/include/asm/ropes.h | 4 ++-- drivers/parisc/iommu-helpers.h | 4 ++-- drivers/parisc/sba_iommu.c | 28 ++++++++++++++-------------- 3 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/arch/parisc/include/asm/ropes.h b/arch/parisc/include/asm/ropes.h index 62399c7ea94a1..c46ad399a74f2 100644 --- a/arch/parisc/include/asm/ropes.h +++ b/arch/parisc/include/asm/ropes.h @@ -29,7 +29,7 @@ struct ioc { void __iomem *ioc_hpa; /* I/O MMU base address */ char *res_map; /* resource map, bit == pdir entry */ - u64 *pdir_base; /* physical base address */ + __le64 *pdir_base; /* physical base address */ unsigned long ibase; /* pdir IOV Space base - shared w/lba_pci */ unsigned long imask; /* pdir IOV Space mask - shared w/lba_pci */ #ifdef ZX1_SUPPORT @@ -113,7 +113,7 @@ static inline int IS_PLUTO(struct parisc_device *d) {
#define SBA_PDIR_VALID_BIT 0x8000000000000000ULL
-#define SBA_AGPGART_COOKIE 0x0000badbadc0ffeeULL +#define SBA_AGPGART_COOKIE (__force __le64) 0x0000badbadc0ffeeULL
#define SBA_FUNC_ID 0x0000 /* function id */ #define SBA_FCLASS 0x0008 /* function class, bist, header, rev... */ diff --git a/drivers/parisc/iommu-helpers.h b/drivers/parisc/iommu-helpers.h index 0905be256de08..a00c38b6224ab 100644 --- a/drivers/parisc/iommu-helpers.h +++ b/drivers/parisc/iommu-helpers.h @@ -14,13 +14,13 @@ static inline unsigned int iommu_fill_pdir(struct ioc *ioc, struct scatterlist *startsg, int nents, unsigned long hint, - void (*iommu_io_pdir_entry)(u64 *, space_t, unsigned long, + void (*iommu_io_pdir_entry)(__le64 *, space_t, unsigned long, unsigned long)) { struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ unsigned int n_mappings = 0; unsigned long dma_offset = 0, dma_len = 0; - u64 *pdirp = NULL; + __le64 *pdirp = NULL;
/* Horrible hack. For efficiency's sake, dma_sg starts one * entry below the true start (it is immediately incremented diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index b8e91cbb60567..780ea219cd8d4 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -202,7 +202,7 @@ static void sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide) { /* start printing from lowest pde in rval */ - u64 *ptr = &(ioc->pdir_base[pide & (~0U * BITS_PER_LONG)]); + __le64 *ptr = &(ioc->pdir_base[pide & (~0U * BITS_PER_LONG)]); unsigned long *rptr = (unsigned long *) &(ioc->res_map[(pide >>3) & ~(sizeof(unsigned long) - 1)]); uint rcnt;
@@ -569,7 +569,7 @@ typedef unsigned long space_t; */
static void -sba_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba, +sba_io_pdir_entry(__le64 *pdir_ptr, space_t sid, unsigned long vba, unsigned long hint) { u64 pa; /* physical address */ @@ -613,7 +613,7 @@ static void sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) { u32 iovp = (u32) SBA_IOVP(ioc,iova); - u64 *pdir_ptr = &ioc->pdir_base[PDIR_INDEX(iovp)]; + __le64 *pdir_ptr = &ioc->pdir_base[PDIR_INDEX(iovp)];
#ifdef ASSERT_PDIR_SANITY /* Assert first pdir entry is set. @@ -714,7 +714,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, unsigned long flags; dma_addr_t iovp; dma_addr_t offset; - u64 *pdir_start; + __le64 *pdir_start; int pide;
ioc = GET_IOC(dev); @@ -1432,7 +1432,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
ioc->pdir_size = pdir_size = (iova_space_size/IOVP_SIZE) * sizeof(u64);
- DBG_INIT("%s() hpa 0x%lx mem %ldMB IOV %dMB (%d bits)\n", + DBG_INIT("%s() hpa %px mem %ldMB IOV %dMB (%d bits)\n", __func__, ioc->ioc_hpa, (unsigned long) totalram_pages() >> (20 - PAGE_SHIFT), @@ -1469,7 +1469,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num) ioc->iovp_mask = ~(iova_space_mask + PAGE_SIZE - 1); #endif
- DBG_INIT("%s() IOV base 0x%lx mask 0x%0lx\n", + DBG_INIT("%s() IOV base %#lx mask %#0lx\n", __func__, ioc->ibase, ioc->imask);
/* @@ -1581,7 +1581,7 @@ printk("sba_hw_init(): mem_boot 0x%x 0x%x 0x%x 0x%x\n", PAGE0->mem_boot.hpa,
if (!IS_PLUTO(sba_dev->dev)) { ioc_ctl = READ_REG(sba_dev->sba_hpa+IOC_CTRL); - DBG_INIT("%s() hpa 0x%lx ioc_ctl 0x%Lx ->", + DBG_INIT("%s() hpa %px ioc_ctl 0x%Lx ->", __func__, sba_dev->sba_hpa, ioc_ctl); ioc_ctl &= ~(IOC_CTRL_RM | IOC_CTRL_NC | IOC_CTRL_CE); ioc_ctl |= IOC_CTRL_DD | IOC_CTRL_D4 | IOC_CTRL_TC; @@ -1666,14 +1666,14 @@ printk("sba_hw_init(): mem_boot 0x%x 0x%x 0x%x 0x%x\n", PAGE0->mem_boot.hpa, /* flush out the last writes */ READ_REG(sba_dev->ioc[i].ioc_hpa + ROPE7_CTL);
- DBG_INIT(" ioc[%d] ROPE_CFG 0x%Lx ROPE_DBG 0x%Lx\n", + DBG_INIT(" ioc[%d] ROPE_CFG %#lx ROPE_DBG %lx\n", i, - READ_REG(sba_dev->ioc[i].ioc_hpa + 0x40), - READ_REG(sba_dev->ioc[i].ioc_hpa + 0x50) + (unsigned long) READ_REG(sba_dev->ioc[i].ioc_hpa + 0x40), + (unsigned long) READ_REG(sba_dev->ioc[i].ioc_hpa + 0x50) ); - DBG_INIT(" STATUS_CONTROL 0x%Lx FLUSH_CTRL 0x%Lx\n", - READ_REG(sba_dev->ioc[i].ioc_hpa + 0x108), - READ_REG(sba_dev->ioc[i].ioc_hpa + 0x400) + DBG_INIT(" STATUS_CONTROL %#lx FLUSH_CTRL %#lx\n", + (unsigned long) READ_REG(sba_dev->ioc[i].ioc_hpa + 0x108), + (unsigned long) READ_REG(sba_dev->ioc[i].ioc_hpa + 0x400) );
if (IS_PLUTO(sba_dev->dev)) { @@ -1737,7 +1737,7 @@ sba_common_init(struct sba_device *sba_dev) #ifdef ASSERT_PDIR_SANITY /* Mark first bit busy - ie no IOVA 0 */ sba_dev->ioc[i].res_map[0] = 0x80; - sba_dev->ioc[i].pdir_base[0] = 0xeeffc0addbba0080ULL; + sba_dev->ioc[i].pdir_base[0] = (__force __le64) 0xeeffc0addbba0080ULL; #endif
/* Third (and last) part of PIRANHA BUG */
From: Helge Deller deller@gmx.de
[ Upstream commit 9a47a710cf517801a8b4fff9949c4cecb5fd019a ]
Signed-off-by: Helge Deller deller@gmx.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/parisc/ccio-dma.c | 18 +++++++++--------- drivers/parisc/iommu-helpers.h | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 10e846286f4ef..623707fc6ff1c 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -222,7 +222,7 @@ struct ioa_registers { struct ioc { struct ioa_registers __iomem *ioc_regs; /* I/O MMU base address */ u8 *res_map; /* resource map, bit == pdir entry */ - u64 *pdir_base; /* physical base address */ + __le64 *pdir_base; /* physical base address */ u32 pdir_size; /* bytes, function of IOV Space size */ u32 res_hint; /* next available IOVP - circular search */ @@ -347,7 +347,7 @@ ccio_alloc_range(struct ioc *ioc, struct device *dev, size_t size) BUG_ON(pages_needed == 0); BUG_ON((pages_needed * IOVP_SIZE) > DMA_CHUNK_SIZE);
- DBG_RES("%s() size: %d pages_needed %d\n", + DBG_RES("%s() size: %zu pages_needed %d\n", __func__, size, pages_needed);
/* @@ -435,7 +435,7 @@ ccio_free_range(struct ioc *ioc, dma_addr_t iova, unsigned long pages_mapped) BUG_ON((pages_mapped * IOVP_SIZE) > DMA_CHUNK_SIZE); BUG_ON(pages_mapped > BITS_PER_LONG);
- DBG_RES("%s(): res_idx: %d pages_mapped %d\n", + DBG_RES("%s(): res_idx: %d pages_mapped %lu\n", __func__, res_idx, pages_mapped);
#ifdef CCIO_COLLECT_STATS @@ -551,7 +551,7 @@ static u32 hint_lookup[] = { * index are bits 12:19 of the value returned by LCI. */ static void -ccio_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba, +ccio_io_pdir_entry(__le64 *pdir_ptr, space_t sid, unsigned long vba, unsigned long hints) { register unsigned long pa; @@ -727,7 +727,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size, unsigned long flags; dma_addr_t iovp; dma_addr_t offset; - u64 *pdir_start; + __le64 *pdir_start; unsigned long hint = hint_lookup[(int)direction];
BUG_ON(!dev); @@ -754,8 +754,8 @@ ccio_map_single(struct device *dev, void *addr, size_t size,
pdir_start = &(ioc->pdir_base[idx]);
- DBG_RUN("%s() 0x%p -> 0x%lx size: %0x%x\n", - __func__, addr, (long)iovp | offset, size); + DBG_RUN("%s() %px -> %#lx size: %zu\n", + __func__, addr, (long)(iovp | offset), size);
/* If not cacheline aligned, force SAFE_DMA on the whole mess */ if((size % L1_CACHE_BYTES) || ((unsigned long)addr % L1_CACHE_BYTES)) @@ -813,7 +813,7 @@ ccio_unmap_page(struct device *dev, dma_addr_t iova, size_t size, return; }
- DBG_RUN("%s() iovp 0x%lx/%x\n", + DBG_RUN("%s() iovp %#lx/%zx\n", __func__, (long)iova, size);
iova ^= offset; /* clear offset bits */ @@ -1291,7 +1291,7 @@ ccio_ioc_init(struct ioc *ioc) iova_space_size>>20, iov_order + PAGE_SHIFT);
- ioc->pdir_base = (u64 *)__get_free_pages(GFP_KERNEL, + ioc->pdir_base = (__le64 *)__get_free_pages(GFP_KERNEL, get_order(ioc->pdir_size)); if(NULL == ioc->pdir_base) { panic("%s() could not allocate I/O Page Table\n", __func__); diff --git a/drivers/parisc/iommu-helpers.h b/drivers/parisc/iommu-helpers.h index a00c38b6224ab..c43f1a212a5c8 100644 --- a/drivers/parisc/iommu-helpers.h +++ b/drivers/parisc/iommu-helpers.h @@ -31,8 +31,8 @@ iommu_fill_pdir(struct ioc *ioc, struct scatterlist *startsg, int nents, unsigned long vaddr; long size;
- DBG_RUN_SG(" %d : %08lx/%05x %p/%05x\n", nents, - (unsigned long)sg_dma_address(startsg), cnt, + DBG_RUN_SG(" %d : %08lx %p/%05x\n", nents, + (unsigned long)sg_dma_address(startsg), sg_virt(startsg), startsg->length );
From: Helge Deller deller@gmx.de
[ Upstream commit 927c6c8aa27c284a799b8c18784e37d3373af908 ]
Signed-off-by: Helge Deller deller@gmx.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/parisc/iosapic.c | 4 ++-- drivers/parisc/iosapic_private.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c index bcc1dae007803..890c3c0f3d140 100644 --- a/drivers/parisc/iosapic.c +++ b/drivers/parisc/iosapic.c @@ -202,9 +202,9 @@ static inline void iosapic_write(void __iomem *iosapic, unsigned int reg, u32 va
static DEFINE_SPINLOCK(iosapic_lock);
-static inline void iosapic_eoi(void __iomem *addr, unsigned int data) +static inline void iosapic_eoi(__le32 __iomem *addr, __le32 data) { - __raw_writel(data, addr); + __raw_writel((__force u32)data, addr); }
/* diff --git a/drivers/parisc/iosapic_private.h b/drivers/parisc/iosapic_private.h index 73ecc657ad954..bd8ff40162b4b 100644 --- a/drivers/parisc/iosapic_private.h +++ b/drivers/parisc/iosapic_private.h @@ -118,8 +118,8 @@ struct iosapic_irt { struct vector_info { struct iosapic_info *iosapic; /* I/O SAPIC this vector is on */ struct irt_entry *irte; /* IRT entry */ - u32 __iomem *eoi_addr; /* precalculate EOI reg address */ - u32 eoi_data; /* IA64: ? PA: swapped txn_data */ + __le32 __iomem *eoi_addr; /* precalculate EOI reg address */ + __le32 eoi_data; /* IA64: ? PA: swapped txn_data */ int txn_irq; /* virtual IRQ number for processor */ ulong txn_addr; /* IA64: id_eid PA: partial HPA */ u32 txn_data; /* CPU interrupt bit */
From: Helge Deller deller@gmx.de
[ Upstream commit b137b9d60b8add5620a06c687a71ce18776730b0 ]
Fix "warning: directive in macro's argument list" warning.
Signed-off-by: Helge Deller deller@gmx.de Signed-off-by: Sasha Levin sashal@kernel.org --- arch/parisc/kernel/drivers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 8f4b77648491a..ed8b759480614 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -925,9 +925,9 @@ static __init void qemu_header(void) pr_info("#define PARISC_MODEL "%s"\n\n", boot_cpu_data.pdc.sys_model_name);
+ #define p ((unsigned long *)&boot_cpu_data.pdc.model) pr_info("#define PARISC_PDC_MODEL 0x%lx, 0x%lx, 0x%lx, " "0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx\n\n", - #define p ((unsigned long *)&boot_cpu_data.pdc.model) p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8]); #undef p
From: Helge Deller deller@gmx.de
[ Upstream commit b1bef1388c427cdad7331a9c8eb4ebbbe5b954b0 ]
Signed-off-by: Helge Deller deller@gmx.de Signed-off-by: Sasha Levin sashal@kernel.org --- arch/parisc/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 12c4d4104ade4..2f81bfd4f15e1 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -365,7 +365,7 @@ union irq_stack_union { volatile unsigned int lock[1]; };
-DEFINE_PER_CPU(union irq_stack_union, irq_stack_union) = { +static DEFINE_PER_CPU(union irq_stack_union, irq_stack_union) = { .slock = { 1,1,1,1 }, }; #endif
From: Javed Hasan jhasan@marvell.com
[ Upstream commit 7df0b2605489bef3f4223ad66f1f9bb8d50d4cd2 ]
Avoid race condition between I/O completion and abort processing by protecting the cmd_type with the rport lock.
Signed-off-by: Javed Hasan jhasan@marvell.com Signed-off-by: Saurav Kashyap skashyap@marvell.com Link: https://lore.kernel.org/r/20230901060646.27885-1-skashyap@marvell.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/qedf/qedf_io.c | 10 ++++++++-- drivers/scsi/qedf/qedf_main.c | 7 ++++++- 2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 4750ec5789a80..10fe3383855c0 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -1904,6 +1904,7 @@ int qedf_initiate_abts(struct qedf_ioreq *io_req, bool return_scsi_cmd_on_abts) goto drop_rdata_kref; }
+ spin_lock_irqsave(&fcport->rport_lock, flags); if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags) || test_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags) || test_bit(QEDF_CMD_IN_ABORT, &io_req->flags)) { @@ -1911,17 +1912,20 @@ int qedf_initiate_abts(struct qedf_ioreq *io_req, bool return_scsi_cmd_on_abts) "io_req xid=0x%x sc_cmd=%p already in cleanup or abort processing or already completed.\n", io_req->xid, io_req->sc_cmd); rc = 1; + spin_unlock_irqrestore(&fcport->rport_lock, flags); goto drop_rdata_kref; }
+ /* Set the command type to abort */ + io_req->cmd_type = QEDF_ABTS; + spin_unlock_irqrestore(&fcport->rport_lock, flags); + kref_get(&io_req->refcount);
xid = io_req->xid; qedf->control_requests++; qedf->packet_aborts++;
- /* Set the command type to abort */ - io_req->cmd_type = QEDF_ABTS; io_req->return_scsi_cmd_on_abts = return_scsi_cmd_on_abts;
set_bit(QEDF_CMD_IN_ABORT, &io_req->flags); @@ -2210,7 +2214,9 @@ int qedf_initiate_cleanup(struct qedf_ioreq *io_req, refcount, fcport, fcport->rdata->ids.port_id);
/* Cleanup cmds re-use the same TID as the original I/O */ + spin_lock_irqsave(&fcport->rport_lock, flags); io_req->cmd_type = QEDF_CLEANUP; + spin_unlock_irqrestore(&fcport->rport_lock, flags); io_req->return_scsi_cmd_on_abts = return_scsi_cmd_on_abts;
init_completion(&io_req->cleanup_done); diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 7825765c936cd..91f3f1d7098eb 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -2805,6 +2805,8 @@ void qedf_process_cqe(struct qedf_ctx *qedf, struct fcoe_cqe *cqe) struct qedf_ioreq *io_req; struct qedf_rport *fcport; u32 comp_type; + u8 io_comp_type; + unsigned long flags;
comp_type = (cqe->cqe_data >> FCOE_CQE_CQE_TYPE_SHIFT) & FCOE_CQE_CQE_TYPE_MASK; @@ -2838,11 +2840,14 @@ void qedf_process_cqe(struct qedf_ctx *qedf, struct fcoe_cqe *cqe) return; }
+ spin_lock_irqsave(&fcport->rport_lock, flags); + io_comp_type = io_req->cmd_type; + spin_unlock_irqrestore(&fcport->rport_lock, flags);
switch (comp_type) { case FCOE_GOOD_COMPLETION_CQE_TYPE: atomic_inc(&fcport->free_sqes); - switch (io_req->cmd_type) { + switch (io_comp_type) { case QEDF_SCSI_CMD: qedf_scsi_completion(qedf, cqe, io_req); break;
From: Kiwoong Kim kwmad.kim@samsung.com
[ Upstream commit 2d3f59cf868b4a2dd678a96cd49bdd91411bd59f ]
__ufshcd_send_uic_cmd() is wrapped by uic_cmd_mutex and its related contexts are accessed within the section wrapped by uic_cmd_mutex. Thus, wrapping with host_lock is redundant.
Signed-off-by: Kiwoong Kim kwmad.kim@samsung.com Link: https://lore.kernel.org/r/782ba5f26f0a96e58d85dff50751787d2d2a6b2b.169379006... Reviewed-by: Bart Van Assche bvanassche@acm.org Reviewed-by: Chanwoo Lee cw9316.lee@samsung.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/ufs/core/ufshcd.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 9615a076735bd..75c6628af2c0e 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2416,7 +2416,6 @@ __ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd, bool completion) { lockdep_assert_held(&hba->uic_cmd_mutex); - lockdep_assert_held(hba->host->host_lock);
if (!ufshcd_ready_for_uic_cmd(hba)) { dev_err(hba->dev, @@ -2443,7 +2442,6 @@ __ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd, int ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) { int ret; - unsigned long flags;
if (hba->quirks & UFSHCD_QUIRK_BROKEN_UIC_CMD) return 0; @@ -2452,9 +2450,7 @@ int ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) mutex_lock(&hba->uic_cmd_mutex); ufshcd_add_delay_before_dme_cmd(hba);
- spin_lock_irqsave(hba->host->host_lock, flags); ret = __ufshcd_send_uic_cmd(hba, uic_cmd, true); - spin_unlock_irqrestore(hba->host->host_lock, flags); if (!ret) ret = ufshcd_wait_for_uic_cmd(hba, uic_cmd);
@@ -4166,8 +4162,8 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) wmb(); reenable_intr = true; } - ret = __ufshcd_send_uic_cmd(hba, cmd, false); spin_unlock_irqrestore(hba->host->host_lock, flags); + ret = __ufshcd_send_uic_cmd(hba, cmd, false); if (ret) { dev_err(hba->dev, "pwr ctrl cmd 0x%x with mode 0x%x uic error %d\n",
From: Kiwoong Kim kwmad.kim@samsung.com
[ Upstream commit d32533d30e2119b0c0aa17596734f1f842f750df ]
With auto hibern8 enabled, UIC could be busy processing a hibern8 operation and the HCI would reports UIC not ready for a short while through HCS.UCRDY. The UFS driver doesn't currently handle this situation. The UFSHCI spec specifies UCRDY like this: whether the host controller is ready to process UIC COMMAND
The 'ready' could be seen as many different meanings. If the meaning includes not processing any request from HCI, processing a hibern8 operation can be 'not ready'. In this situation, the driver needs to wait until the operations is completed.
Signed-off-by: Kiwoong Kim kwmad.kim@samsung.com Link: https://lore.kernel.org/r/550484ffb66300bdcec63d3e304dfd55cb432f1f.169379006... Reviewed-by: Adrian Hunter adrian.hunter@intel.com Reviewed-by: Chanwoo Lee cw9316.lee@samsung.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/ufs/core/ufshcd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 75c6628af2c0e..80c48eb6bf85c 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -22,6 +22,7 @@ #include <linux/module.h> #include <linux/regulator/consumer.h> #include <linux/sched/clock.h> +#include <linux/iopoll.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_dbg.h> #include <scsi/scsi_driver.h> @@ -2324,7 +2325,11 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) */ static inline bool ufshcd_ready_for_uic_cmd(struct ufs_hba *hba) { - return ufshcd_readl(hba, REG_CONTROLLER_STATUS) & UIC_COMMAND_READY; + u32 val; + int ret = read_poll_timeout(ufshcd_readl, val, val & UIC_COMMAND_READY, + 500, UIC_CMD_TIMEOUT * 1000, false, hba, + REG_CONTROLLER_STATUS); + return ret == 0 ? true : false; }
/**
From: Zheng Yejian zhengyejian1@huawei.com
[ Upstream commit f4e4ada586995b17f828c6d147d1800eb1471450 ]
Function instance_set() expects to enable event 'sched_switch', so we should set 1 to its 'enable' file.
Testcase passed after this patch: # ./ftracetest test.d/instances/instance-event.tc === Ftrace unit tests === [1] Test creation and deletion of trace instances while setting an event [PASS]
# of passed: 1 # of failed: 0 # of unresolved: 0 # of untested: 0 # of unsupported: 0 # of xfailed: 0 # of undefined(test bug): 0
Signed-off-by: Zheng Yejian zhengyejian1@huawei.com Acked-by: Masami Hiramatsu (Google) mhiramat@kernel.org Acked-by: Steven Rostedt (Google) rostedt@goodmis.org Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- .../testing/selftests/ftrace/test.d/instances/instance-event.tc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc index 0eb47fbb3f44d..42422e4251078 100644 --- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc +++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc @@ -39,7 +39,7 @@ instance_read() {
instance_set() { while :; do - echo 1 > foo/events/sched/sched_switch + echo 1 > foo/events/sched/sched_switch/enable done 2> /dev/null }
From: Zheng Yejian zhengyejian1@huawei.com
[ Upstream commit f6bd2c92488c30ef53b5bd80c52f0a7eee9d545a ]
When user resize all trace ring buffer through file 'buffer_size_kb', then in ring_buffer_resize(), kernel allocates buffer pages for each cpu in a loop.
If the kernel preemption model is PREEMPT_NONE and there are many cpus and there are many buffer pages to be allocated, it may not give up cpu for a long time and finally cause a softlockup.
To avoid it, call cond_resched() after each cpu buffer allocation.
Link: https://lore.kernel.org/linux-trace-kernel/20230906081930.3939106-1-zhengyej...
Cc: mhiramat@kernel.org Signed-off-by: Zheng Yejian zhengyejian1@huawei.com Signed-off-by: Steven Rostedt (Google) rostedt@goodmis.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/trace/ring_buffer.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 52dea5dd5362e..1267e1016ab5c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2206,6 +2206,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, err = -ENOMEM; goto out_err; } + + cond_resched(); }
cpus_read_lock();
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit 77d20c685b6baeb942606a93ed861c191381b73e ]
Internally I got a report of very long stalls on normal operations like creating a new file when auto relocation was running. The reporter used the 'bpf offcputime' tracer to show that we would get stuck in start_transaction for 5 to 30 seconds, and were always being woken up by the transaction commit.
Using my timing-everything script, which times how long a function takes and what percentage of that total time is taken up by its children, I saw several traces like this
1083 took 32812902424 ns 29929002926 ns 91.2110% wait_for_commit_duration 25568 ns 7.7920e-05% commit_fs_roots_duration 1007751 ns 0.00307% commit_cowonly_roots_duration 446855602 ns 1.36182% btrfs_run_delayed_refs_duration 271980 ns 0.00082% btrfs_run_delayed_items_duration 2008 ns 6.1195e-06% btrfs_apply_pending_changes_duration 9656 ns 2.9427e-05% switch_commit_roots_duration 1598 ns 4.8700e-06% btrfs_commit_device_sizes_duration 4314 ns 1.3147e-05% btrfs_free_log_root_tree_duration
Here I was only tracing functions that happen where we are between START_COMMIT and UNBLOCKED in order to see what would be keeping us blocked for so long. The wait_for_commit() we do is where we wait for a previous transaction that hasn't completed it's commit. This can include all of the unpin work and other cleanups, which tends to be the longest part of our transaction commit.
There is no reason we should be blocking new things from entering the transaction at this point, it just adds to random latency spikes for no reason.
Fix this by adding a PREP stage. This allows us to properly deal with multiple committers coming in at the same time, we retain the behavior that the winner waits on the previous transaction and the losers all wait for this transaction commit to occur. Nothing else is blocked during the PREP stage, and then once the wait is complete we switch to COMMIT_START and all of the same behavior as before is maintained.
Reviewed-by: Filipe Manana fdmanana@suse.com Signed-off-by: Josef Bacik josef@toxicpanda.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/locking.h | 2 +- fs/btrfs/transaction.c | 39 ++++++++++++++++++++++++--------------- fs/btrfs/transaction.h | 1 + 4 files changed, 30 insertions(+), 20 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4494883a19abc..93554f8aef8cf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1552,7 +1552,7 @@ static int transaction_kthread(void *arg)
delta = ktime_get_seconds() - cur->start_time; if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) && - cur->state < TRANS_STATE_COMMIT_START && + cur->state < TRANS_STATE_COMMIT_PREP && delta < fs_info->commit_interval) { spin_unlock(&fs_info->trans_lock); delay -= msecs_to_jiffies((delta - 1) * 1000); @@ -2689,8 +2689,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters); btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered); btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent); - btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start, - BTRFS_LOCKDEP_TRANS_COMMIT_START); + btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_prep, + BTRFS_LOCKDEP_TRANS_COMMIT_PREP); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked, BTRFS_LOCKDEP_TRANS_UNBLOCKED); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed, @@ -4892,7 +4892,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) while (!list_empty(&fs_info->trans_list)) { t = list_first_entry(&fs_info->trans_list, struct btrfs_transaction, list); - if (t->state >= TRANS_STATE_COMMIT_START) { + if (t->state >= TRANS_STATE_COMMIT_PREP) { refcount_inc(&t->use_count); spin_unlock(&fs_info->trans_lock); btrfs_wait_for_commit(fs_info, t->transid); diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index edb9b4a0dba15..7d6ee1e609bf2 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -79,7 +79,7 @@ enum btrfs_lock_nesting { };
enum btrfs_lockdep_trans_states { - BTRFS_LOCKDEP_TRANS_COMMIT_START, + BTRFS_LOCKDEP_TRANS_COMMIT_PREP, BTRFS_LOCKDEP_TRANS_UNBLOCKED, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED, BTRFS_LOCKDEP_TRANS_COMPLETED, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5bbd288b9cb54..942cfa906ed48 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -56,12 +56,17 @@ static struct kmem_cache *btrfs_trans_handle_cachep; * | Call btrfs_commit_transaction() on any trans handle attached to * | transaction N * V - * Transaction N [[TRANS_STATE_COMMIT_START]] + * Transaction N [[TRANS_STATE_COMMIT_PREP]] + * | + * | If there are simultaneous calls to btrfs_commit_transaction() one will win + * | the race and the rest will wait for the winner to commit the transaction. + * | + * | The winner will wait for previous running transaction to completely finish + * | if there is one. * | - * | Will wait for previous running transaction to completely finish if there - * | is one + * Transaction N [[TRANS_STATE_COMMIT_START]] * | - * | Then one of the following happes: + * | Then one of the following happens: * | - Wait for all other trans handle holders to release. * | The btrfs_commit_transaction() caller will do the commit work. * | - Wait for current transaction to be committed by others. @@ -112,6 +117,7 @@ static struct kmem_cache *btrfs_trans_handle_cachep; */ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { [TRANS_STATE_RUNNING] = 0U, + [TRANS_STATE_COMMIT_PREP] = 0U, [TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH), [TRANS_STATE_COMMIT_DOING] = (__TRANS_START | __TRANS_ATTACH | @@ -1980,7 +1986,7 @@ void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans) * Wait for the current transaction commit to start and block * subsequent transaction joins */ - btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); + btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP); wait_event(fs_info->transaction_blocked_wait, cur_trans->state >= TRANS_STATE_COMMIT_START || TRANS_ABORTED(cur_trans)); @@ -2127,7 +2133,7 @@ static void add_pending_snapshot(struct btrfs_trans_handle *trans) return;
lockdep_assert_held(&trans->fs_info->trans_lock); - ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START); + ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_PREP);
list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots); } @@ -2151,7 +2157,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ktime_t interval;
ASSERT(refcount_read(&trans->use_count) == 1); - btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); + btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
@@ -2211,7 +2217,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) }
spin_lock(&fs_info->trans_lock); - if (cur_trans->state >= TRANS_STATE_COMMIT_START) { + if (cur_trans->state >= TRANS_STATE_COMMIT_PREP) { enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
add_pending_snapshot(trans); @@ -2223,7 +2229,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) want_state = TRANS_STATE_SUPER_COMMITTED;
btrfs_trans_state_lockdep_release(fs_info, - BTRFS_LOCKDEP_TRANS_COMMIT_START); + BTRFS_LOCKDEP_TRANS_COMMIT_PREP); ret = btrfs_end_transaction(trans); wait_for_commit(cur_trans, want_state);
@@ -2235,9 +2241,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) return ret; }
- cur_trans->state = TRANS_STATE_COMMIT_START; + cur_trans->state = TRANS_STATE_COMMIT_PREP; wake_up(&fs_info->transaction_blocked_wait); - btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); + btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
if (cur_trans->list.prev != &fs_info->trans_list) { enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED; @@ -2258,11 +2264,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) btrfs_put_transaction(prev_trans); if (ret) goto lockdep_release; - } else { - spin_unlock(&fs_info->trans_lock); + spin_lock(&fs_info->trans_lock); } } else { - spin_unlock(&fs_info->trans_lock); /* * The previous transaction was aborted and was already removed * from the list of transactions at fs_info->trans_list. So we @@ -2270,11 +2274,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * corrupt state (pointing to trees with unwritten nodes/leafs). */ if (BTRFS_FS_ERROR(fs_info)) { + spin_unlock(&fs_info->trans_lock); ret = -EROFS; goto lockdep_release; } }
+ cur_trans->state = TRANS_STATE_COMMIT_START; + wake_up(&fs_info->transaction_blocked_wait); + spin_unlock(&fs_info->trans_lock); + /* * Get the time spent on the work done by the commit thread and not * the time spent waiting on a previous commit @@ -2584,7 +2593,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) goto cleanup_transaction;
lockdep_trans_commit_start_release: - btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); + btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP); btrfs_end_transaction(trans); return ret; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 8e9fa23bd7fed..6b309f8a99a86 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -14,6 +14,7 @@
enum btrfs_trans_state { TRANS_STATE_RUNNING, + TRANS_STATE_COMMIT_PREP, TRANS_STATE_COMMIT_START, TRANS_STATE_COMMIT_DOING, TRANS_STATE_UNBLOCKED,
On Sun, Sep 24, 2023 at 09:15:01AM -0400, Sasha Levin wrote:
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit 77d20c685b6baeb942606a93ed861c191381b73e ]
Internally I got a report of very long stalls on normal operations like creating a new file when auto relocation was running. The reporter used the 'bpf offcputime' tracer to show that we would get stuck in start_transaction for 5 to 30 seconds, and were always being woken up by the transaction commit.
Using my timing-everything script, which times how long a function takes and what percentage of that total time is taken up by its children, I saw several traces like this
1083 took 32812902424 ns 29929002926 ns 91.2110% wait_for_commit_duration 25568 ns 7.7920e-05% commit_fs_roots_duration 1007751 ns 0.00307% commit_cowonly_roots_duration 446855602 ns 1.36182% btrfs_run_delayed_refs_duration 271980 ns 0.00082% btrfs_run_delayed_items_duration 2008 ns 6.1195e-06% btrfs_apply_pending_changes_duration 9656 ns 2.9427e-05% switch_commit_roots_duration 1598 ns 4.8700e-06% btrfs_commit_device_sizes_duration 4314 ns 1.3147e-05% btrfs_free_log_root_tree_duration
Here I was only tracing functions that happen where we are between START_COMMIT and UNBLOCKED in order to see what would be keeping us blocked for so long. The wait_for_commit() we do is where we wait for a previous transaction that hasn't completed it's commit. This can include all of the unpin work and other cleanups, which tends to be the longest part of our transaction commit.
There is no reason we should be blocking new things from entering the transaction at this point, it just adds to random latency spikes for no reason.
Fix this by adding a PREP stage. This allows us to properly deal with multiple committers coming in at the same time, we retain the behavior that the winner waits on the previous transaction and the losers all wait for this transaction commit to occur. Nothing else is blocked during the PREP stage, and then once the wait is complete we switch to COMMIT_START and all of the same behavior as before is maintained.
Reviewed-by: Filipe Manana fdmanana@suse.com Signed-off-by: Josef Bacik josef@toxicpanda.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
Please postpone adding this patch to stable trees until 6.6 is released. Thanks.
On Mon, Sep 25, 2023 at 03:01:12PM +0200, David Sterba wrote:
On Sun, Sep 24, 2023 at 09:15:01AM -0400, Sasha Levin wrote:
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit 77d20c685b6baeb942606a93ed861c191381b73e ]
Internally I got a report of very long stalls on normal operations like creating a new file when auto relocation was running. The reporter used the 'bpf offcputime' tracer to show that we would get stuck in start_transaction for 5 to 30 seconds, and were always being woken up by the transaction commit.
Using my timing-everything script, which times how long a function takes and what percentage of that total time is taken up by its children, I saw several traces like this
1083 took 32812902424 ns 29929002926 ns 91.2110% wait_for_commit_duration 25568 ns 7.7920e-05% commit_fs_roots_duration 1007751 ns 0.00307% commit_cowonly_roots_duration 446855602 ns 1.36182% btrfs_run_delayed_refs_duration 271980 ns 0.00082% btrfs_run_delayed_items_duration 2008 ns 6.1195e-06% btrfs_apply_pending_changes_duration 9656 ns 2.9427e-05% switch_commit_roots_duration 1598 ns 4.8700e-06% btrfs_commit_device_sizes_duration 4314 ns 1.3147e-05% btrfs_free_log_root_tree_duration
Here I was only tracing functions that happen where we are between START_COMMIT and UNBLOCKED in order to see what would be keeping us blocked for so long. The wait_for_commit() we do is where we wait for a previous transaction that hasn't completed it's commit. This can include all of the unpin work and other cleanups, which tends to be the longest part of our transaction commit.
There is no reason we should be blocking new things from entering the transaction at this point, it just adds to random latency spikes for no reason.
Fix this by adding a PREP stage. This allows us to properly deal with multiple committers coming in at the same time, we retain the behavior that the winner waits on the previous transaction and the losers all wait for this transaction commit to occur. Nothing else is blocked during the PREP stage, and then once the wait is complete we switch to COMMIT_START and all of the same behavior as before is maintained.
Reviewed-by: Filipe Manana fdmanana@suse.com Signed-off-by: Josef Bacik josef@toxicpanda.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
Please postpone adding this patch to stable trees until 6.6 is released. Thanks.
Ack.
From: Filipe Manana fdmanana@suse.com
[ Upstream commit 91bfe3104b8db0310f76f2dcb6aacef24c889366 ]
If we fail to add a delayed dir index item because there's already another item with the same index number, we print an error message (and then BUG). However that message isn't very helpful to debug anything because we don't know what's the index number and what are the values of index counters in the inode and its delayed inode (index_cnt fields of struct btrfs_inode and struct btrfs_delayed_node).
So update the error message to include the index number and counters.
We actually had a recent case where this issue was hit by a syzbot report (see the link below).
Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/ Reviewed-by: Qu Wenruo wqu@suse.com Signed-off-by: Filipe Manana fdmanana@suse.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/delayed-inode.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 6d51db066503b..88db451697b0a 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1498,9 +1498,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, ret = __btrfs_add_delayed_item(delayed_node, delayed_item); if (unlikely(ret)) { btrfs_err(trans->fs_info, - "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", - name_len, name, delayed_node->root->root_key.objectid, - delayed_node->inode_id, ret); +"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d", + name_len, name, index, btrfs_root_id(delayed_node->root), + delayed_node->inode_id, dir->index_cnt, + delayed_node->index_cnt, ret); BUG(); } mutex_unlock(&delayed_node->mutex);
From: Filipe Manana fdmanana@suse.com
[ Upstream commit a57c2d4e46f519b24558ae0752c17eec416ac72a ]
When removing a delayed item, or releasing which will remove it as well, we will modify one of the delayed node's rbtrees and item counter if the delayed item is in one of the rbtrees. This require having the delayed node's mutex locked, otherwise we will race with other tasks modifying the rbtrees and the counter.
This is motivated by a previous version of another patch actually calling btrfs_release_delayed_item() after unlocking the delayed node's mutex and against a delayed item that is in a rbtree.
So assert at __btrfs_remove_delayed_item() that the delayed node's mutex is locked.
Reviewed-by: Qu Wenruo wqu@suse.com Signed-off-by: Filipe Manana fdmanana@suse.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/delayed-inode.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 88db451697b0a..f7fb42833e9db 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -412,6 +412,7 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root)
static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) { + struct btrfs_delayed_node *delayed_node = delayed_item->delayed_node; struct rb_root_cached *root; struct btrfs_delayed_root *delayed_root;
@@ -419,18 +420,21 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) if (RB_EMPTY_NODE(&delayed_item->rb_node)) return;
- delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root; + /* If it's in a rbtree, then we need to have delayed node locked. */ + lockdep_assert_held(&delayed_node->mutex); + + delayed_root = delayed_node->root->fs_info->delayed_root;
BUG_ON(!delayed_root);
if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM) - root = &delayed_item->delayed_node->ins_root; + root = &delayed_node->ins_root; else - root = &delayed_item->delayed_node->del_root; + root = &delayed_node->del_root;
rb_erase_cached(&delayed_item->rb_node, root); RB_CLEAR_NODE(&delayed_item->rb_node); - delayed_item->delayed_node->count--; + delayed_node->count--;
finish_one_item(delayed_root); }
From: "Ricardo B. Marliere" rbmarliere@gmail.com
[ Upstream commit 5f9dd2e896a91bfca90f8463eb6808c03d535d8a ]
This patch fixes inconsistencies in the parsing rules of the levels 1 and 2 of the kselftest_deps.sh. It was added the levels 4 and 5 to account for a few edge cases that are present in some tests, also some minor identation styling have been fixed (s/ /\t/g).
Signed-off-by: Ricardo B. Marliere rbmarliere@gmail.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/kselftest_deps.sh | 77 +++++++++++++++++++---- 1 file changed, 65 insertions(+), 12 deletions(-)
diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh index 4bc14d9e8ff1d..de59cc8f03c3f 100755 --- a/tools/testing/selftests/kselftest_deps.sh +++ b/tools/testing/selftests/kselftest_deps.sh @@ -46,11 +46,11 @@ fi print_targets=0
while getopts "p" arg; do - case $arg in - p) + case $arg in + p) print_targets=1 shift;; - esac + esac done
if [ $# -eq 0 ] @@ -92,6 +92,10 @@ pass_cnt=0 # Get all TARGETS from selftests Makefile targets=$(grep -E "^TARGETS +|^TARGETS =" Makefile | cut -d "=" -f2)
+# Initially, in LDLIBS related lines, the dep checker needs +# to ignore lines containing the following strings: +filter="$(VAR_LDLIBS)|pkg-config|PKG_CONFIG|IOURING_EXTRA_LIBS" + # Single test case if [ $# -eq 2 ] then @@ -100,6 +104,8 @@ then l1_test $test l2_test $test l3_test $test + l4_test $test + l5_test $test
print_results $1 $2 exit $? @@ -113,7 +119,7 @@ fi # Append space at the end of the list to append more tests.
l1_tests=$(grep -r --include=Makefile "^LDLIBS" | \ - grep -v "VAR_LDLIBS" | awk -F: '{print $1}') + grep -v "$filter" | awk -F: '{print $1}' | uniq)
# Level 2: LDLIBS set dynamically. # @@ -126,7 +132,7 @@ l1_tests=$(grep -r --include=Makefile "^LDLIBS" | \ # Append space at the end of the list to append more tests.
l2_tests=$(grep -r --include=Makefile ": LDLIBS" | \ - grep -v "VAR_LDLIBS" | awk -F: '{print $1}') + grep -v "$filter" | awk -F: '{print $1}' | uniq)
# Level 3 # memfd and others use pkg-config to find mount and fuse libs @@ -138,11 +144,32 @@ l2_tests=$(grep -r --include=Makefile ": LDLIBS" | \ # VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null)
l3_tests=$(grep -r --include=Makefile "^VAR_LDLIBS" | \ - grep -v "pkg-config" | awk -F: '{print $1}') + grep -v "pkg-config|PKG_CONFIG" | awk -F: '{print $1}' | uniq)
-#echo $l1_tests -#echo $l2_1_tests -#echo $l3_tests +# Level 4 +# some tests may fall back to default using `|| echo -l<libname>` +# if pkg-config doesn't find the libs, instead of using VAR_LDLIBS +# as per level 3 checks. +# e.g: +# netfilter/Makefile +# LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) +l4_tests=$(grep -r --include=Makefile "^LDLIBS" | \ + grep "pkg-config|PKG_CONFIG" | awk -F: '{print $1}' | uniq) + +# Level 5 +# some tests may use IOURING_EXTRA_LIBS to add extra libs to LDLIBS, +# which in turn may be defined in a sub-Makefile +# e.g.: +# mm/Makefile +# $(OUTPUT)/gup_longterm: LDLIBS += $(IOURING_EXTRA_LIBS) +l5_tests=$(grep -r --include=Makefile "LDLIBS +=.*$(IOURING_EXTRA_LIBS)" | \ + awk -F: '{print $1}' | uniq) + +#echo l1_tests $l1_tests +#echo l2_tests $l2_tests +#echo l3_tests $l3_tests +#echo l4_tests $l4_tests +#echo l5_tests $l5_tests
all_tests print_results $1 $2 @@ -164,24 +191,32 @@ all_tests() for test in $l3_tests; do l3_test $test done + + for test in $l4_tests; do + l4_test $test + done + + for test in $l5_tests; do + l5_test $test + done }
# Use same parsing used for l1_tests and pick libraries this time. l1_test() { test_libs=$(grep --include=Makefile "^LDLIBS" $test | \ - grep -v "VAR_LDLIBS" | \ + grep -v "$filter" | \ sed -e 's/:/ /' | \ sed -e 's/+/ /' | cut -d "=" -f 2)
check_libs $test $test_libs }
-# Use same parsing used for l2__tests and pick libraries this time. +# Use same parsing used for l2_tests and pick libraries this time. l2_test() { test_libs=$(grep --include=Makefile ": LDLIBS" $test | \ - grep -v "VAR_LDLIBS" | \ + grep -v "$filter" | \ sed -e 's/:/ /' | sed -e 's/+/ /' | \ cut -d "=" -f 2)
@@ -197,6 +232,24 @@ l3_test() check_libs $test $test_libs }
+l4_test() +{ + test_libs=$(grep --include=Makefile "^VAR_LDLIBS|^LDLIBS" $test | \ + grep "(pkg-config|PKG_CONFIG).*|| echo " | \ + sed -e 's/.*|| echo //' | sed -e 's/)$//') + + check_libs $test $test_libs +} + +l5_test() +{ + tests=$(find $(dirname "$test") -type f -name "*.mk") + test_libs=$(grep "^IOURING_EXTRA_LIBS +?=" $tests | \ + cut -d "=" -f 2) + + check_libs $test $test_libs +} + check_libs() {
From: "Steven Rostedt (Google)" rostedt@goodmis.org
[ Upstream commit 95a404bd60af6c4d9d8db01ad14fe8957ece31ca ]
When iterating over the ring buffer while the ring buffer is active, the writer can corrupt the reader. There's barriers to help detect this and handle it, but that code missed the case where the last event was at the very end of the page and has only 4 bytes left.
The checks to detect the corruption by the writer to reads needs to see the length of the event. If the length in the first 4 bytes is zero then the length is stored in the second 4 bytes. But if the writer is in the process of updating that code, there's a small window where the length in the first 4 bytes could be zero even though the length is only 4 bytes. That will cause rb_event_length() to read the next 4 bytes which could happen to be off the allocated page.
To protect against this, fail immediately if the next event pointer is less than 8 bytes from the end of the commit (last byte of data), as all events must be a minimum of 8 bytes anyway.
Link: https://lore.kernel.org/all/20230905141245.26470-1-Tze-nan.Wu@mediatek.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230907122820.0899019c@gandalf.l...
Cc: Masami Hiramatsu mhiramat@kernel.org Cc: Mark Rutland mark.rutland@arm.com Reported-by: Tze-nan Wu Tze-nan.Wu@mediatek.com Signed-off-by: Steven Rostedt (Google) rostedt@goodmis.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/trace/ring_buffer.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1267e1016ab5c..53b73b85cf737 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2398,6 +2398,11 @@ rb_iter_head_event(struct ring_buffer_iter *iter) */ commit = rb_page_commit(iter_head_page); smp_rmb(); + + /* An event needs to be at least 8 bytes in size */ + if (iter->head > commit - 8) + goto reset; + event = __rb_page_index(iter_head_page, iter->head); length = rb_event_length(event);
From: Guangguan Wang guangguan.wang@linux.alibaba.com
[ Upstream commit 6912e724832c47bb381eb1bd1e483ec8df0d0f0f ]
In the macro SMC_STAT_SERV_SUCC_INC, the smcd_version is used to determin whether to increase the v1 statistic or the v2 statistic. It is correct for SMCD. But for SMCR, smcr_version should be used.
Signed-off-by: Guangguan Wang guangguan.wang@linux.alibaba.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/smc/smc_stats.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h index b60fe1eb37ab6..aa8928975cc63 100644 --- a/net/smc/smc_stats.h +++ b/net/smc/smc_stats.h @@ -243,8 +243,9 @@ while (0) #define SMC_STAT_SERV_SUCC_INC(net, _ini) \ do { \ typeof(_ini) i = (_ini); \ - bool is_v2 = (i->smcd_version & SMC_V2); \ bool is_smcd = (i->is_smcd); \ + u8 version = is_smcd ? i->smcd_version : i->smcr_version; \ + bool is_v2 = (version & SMC_V2); \ typeof(net->smc.smc_stats) smc_stats = (net)->smc.smc_stats; \ if (is_v2 && is_smcd) \ this_cpu_inc(smc_stats->smc[SMC_TYPE_D].srv_v2_succ_cnt); \
From: Christophe JAILLET christophe.jaillet@wanadoo.fr
[ Upstream commit e97eb65dd464e7f118a16a26337322d07eb653e2 ]
snprintf() returns the "number of characters which *would* be generated for the given input", not the size *really* generated.
In order to avoid too large values for 'o' (and potential negative values for "sizeof(linebuf) o") use scnprintf() instead of snprintf().
Note that given the "w < 4" in the for loop, the buffer can NOT overflow, but using the *right* function is always better.
Signed-off-by: Christophe JAILLET christophe.jaillet@wanadoo.fr Signed-off-by: Damien Le Moal dlemoal@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/ata/sata_mv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index d404e631d1527..68e660e4cc410 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -1255,8 +1255,8 @@ static void mv_dump_mem(struct device *dev, void __iomem *start, unsigned bytes)
for (b = 0; b < bytes; ) { for (w = 0, o = 0; b < bytes && w < 4; w++) { - o += snprintf(linebuf + o, sizeof(linebuf) - o, - "%08x ", readl(start + b)); + o += scnprintf(linebuf + o, sizeof(linebuf) - o, + "%08x ", readl(start + b)); b += sizeof(u32); } dev_dbg(dev, "%s: %p: %s\n",
From: Ard Biesheuvel ardb@kernel.org
[ Upstream commit aba7e066c738d4b349413a271b2a236aa55bacbc ]
CONFIG_EFI_RUNTIME_MAP needs to be enabled in order for kexec to be able to provide the required information about the EFI runtime mappings to the incoming kernel, regardless of whether kexec_load() or kexec_file_load() is being used. Without this information, kexec boot in EFI mode is not possible.
The CONFIG_EFI_RUNTIME_MAP option is currently directly configurable if CONFIG_EXPERT is enabled, so that it can be turned on for debugging purposes even if KEXEC is not enabled. However, the upshot of this is that it can also be disabled even when it shouldn't.
So tweak the Kconfig declarations to avoid this situation.
Reported-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Signed-off-by: Ard Biesheuvel ardb@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e36261b4ea14f..68ce4f786dcd1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1955,6 +1955,7 @@ config EFI select UCS2_STRING select EFI_RUNTIME_WRAPPERS select ARCH_USE_MEMREMAP_PROT + select EFI_RUNTIME_MAP if KEXEC_CORE help This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). @@ -2030,7 +2031,6 @@ config EFI_MAX_FAKE_MEM config EFI_RUNTIME_MAP bool "Export EFI runtime maps to sysfs" if EXPERT depends on EFI - default KEXEC_CORE help Export EFI runtime memory regions to /sys/firmware/efi/runtime-map. That memory map is required by the 2nd kernel to set up EFI virtual
From: David Thompson davthompson@nvidia.com
[ Upstream commit c2dffda1d8f7511505bbbf16ba282f2079b30089 ]
The latest version of the mlxbf_bootctl driver utilizes "sysfs_format_mac", and this API is only available if NET is defined in the kernel configuration. This patch changes the mlxbf_bootctl Kconfig to depend on NET.
Reported-by: kernel test robot lkp@intel.com Closes: https://lore.kernel.org/oe-kbuild-all/202309031058.JvwNDBKt-lkp@intel.com/ Reported-by: Randy Dunlap rdunlap@infradead.org Signed-off-by: David Thompson davthompson@nvidia.com Link: https://lore.kernel.org/r/20230905133243.31550-1-davthompson@nvidia.com Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/mellanox/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig index 30b50920b278c..f7dfa0e785fd6 100644 --- a/drivers/platform/mellanox/Kconfig +++ b/drivers/platform/mellanox/Kconfig @@ -60,6 +60,7 @@ config MLXBF_BOOTCTL tristate "Mellanox BlueField Firmware Boot Control driver" depends on ARM64 depends on ACPI + depends on NET help The Mellanox BlueField firmware implements functionality to request swapping the primary and alternate eMMC boot partition,
From: "Luke D. Jones" luke@ljones.dev
[ Upstream commit 4106a70ddad57ee6d8f98b81d6f036740c72762b ]
Add quirk for ASUS ROG X16 (GV601V, 2023 versions) Flow 2-in-1 to enable tablet mode with lid flip (all screen rotations).
Signed-off-by: Luke D. Jones luke@ljones.dev Link: https://lore.kernel.org/r/20230905082813.13470-1-luke@ljones.dev Reviewed-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/asus-nb-wmi.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index fdf7da06af306..d85d895fee894 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -478,6 +478,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_tablet_mode, }, + { + .callback = dmi_matched, + .ident = "ASUS ROG FLOW X16", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "GV601V"), + }, + .driver_data = &quirk_asus_tablet_mode, + }, { .callback = dmi_matched, .ident = "ASUS VivoBook E410MA",
From: Julia Lawall Julia.Lawall@inria.fr
[ Upstream commit 8a81cf96f5510aaf9a65d103f7405079a7b0fcc5 ]
for_each_child_of_node performs an of_node_get on each iteration, so a break out of the loop requires an of_node_put.
This was done using the Coccinelle semantic patch iterators/for_each_child.cocci
Signed-off-by: Julia Lawall Julia.Lawall@inria.fr Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/thermal/thermal_of.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 22272f9c5934a..e615f735f4c03 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -38,8 +38,10 @@ static int of_find_trip_id(struct device_node *np, struct device_node *trip) */ for_each_child_of_node(trips, t) {
- if (t == trip) + if (t == trip) { + of_node_put(t); goto out; + } i++; }
@@ -402,8 +404,10 @@ static int thermal_of_for_each_cooling_maps(struct thermal_zone_device *tz,
for_each_child_of_node(cm_np, child) { ret = thermal_of_for_each_cooling_device(tz_np, child, tz, cdev, action); - if (ret) + if (ret) { + of_node_put(child); break; + } }
of_node_put(cm_np);
From: Mukul Joshi mukul.joshi@amd.com
[ Upstream commit 97e3c6a853f2af9145daf0c6ca25bcdf55c759d4 ]
Currently, we store CU info only for a single XCC assuming that it is the same for all XCCs. However, that may not be true. As a result, store CU info for all XCCs. This info is later used for CU masking.
Signed-off-by: Mukul Joshi mukul.joshi@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 76 +++++++++---------- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 8 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 11 ++- .../gpu/drm/amd/include/kgd_kfd_interface.h | 6 +- 14 files changed, 60 insertions(+), 65 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index b4fcad0e62f7e..a7c8beff1647c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -492,7 +492,7 @@ void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *c cu_info->cu_active_number = acu_info.number; cu_info->cu_ao_mask = acu_info.ao_cu_mask; memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], - sizeof(acu_info.bitmap)); + sizeof(cu_info->cu_bitmap)); cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index a4ff515ce8966..59ba03d387fcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -43,6 +43,7 @@ #define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
#define AMDGPU_MAX_GC_INSTANCES 8 +#define KGD_MAX_QUEUES 128
#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES @@ -254,7 +255,7 @@ struct amdgpu_cu_info { uint32_t number; uint32_t ao_cu_mask; uint32_t ao_cu_bitmap[4][4]; - uint32_t bitmap[4][4]; + uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4]; };
struct amdgpu_gfx_ras { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index d4ca19ba5a289..f678bdd5f353d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -839,7 +839,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0], sizeof(adev->gfx.cu_info.ao_cu_bitmap)); memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], - sizeof(adev->gfx.cu_info.bitmap)); + sizeof(dev_info->cu_bitmap)); dev_info->vram_type = adev->gmc.vram_type; dev_info->vram_bit_width = adev->gmc.vram_width; dev_info->vce_harvest_config = adev->vce.harvest_config; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 44af8022b89fa..f743bf2c92877 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9448,7 +9448,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh( adev, disable_masks[i * 2 + j]); bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0451533ddde41..a82cba884c48f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6394,7 +6394,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} */ - cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap; + cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index da6caff78c22b..34f9211b26793 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3577,7 +3577,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) gfx_v6_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); bitmap = gfx_v6_0_get_cu_enabled(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 8c174c11eaee0..6feae2548e8ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -5122,7 +5122,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) gfx_v7_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); bitmap = gfx_v7_0_get_cu_active_bitmap(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 51c1745c83697..885ebd703260f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -7121,7 +7121,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) gfx_v8_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); bitmap = gfx_v8_0_get_cu_active_bitmap(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 65577eca58f1c..e511e49d3023f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1497,7 +1497,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { - if (cu_info->bitmap[i][j] & mask) { + if (cu_info->bitmap[0][i][j] & mask) { if (counter == pg_always_on_cu_num) WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); if (counter < always_on_cu_num) @@ -7234,7 +7234,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, * SE6,SH0 --> bitmap[2][1] * SE7,SH0 --> bitmap[3][1] */ - cu_info->bitmap[i % 4][j + i / 4] = bitmap; + cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 4f883b94f98ef..84a74a6c6b2de 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4228,7 +4228,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) }
static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, - u32 bitmap) + u32 bitmap, int xcc_id) { u32 data;
@@ -4238,15 +4238,15 @@ static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
- WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data); }
-static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev) +static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id) { u32 data, mask;
- data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG); - data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG); + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; @@ -4259,7 +4259,7 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev) static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info) { - int i, j, k, counter, active_cu_number = 0; + int i, j, k, counter, xcc_id, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; unsigned disable_masks[4 * 4];
@@ -4278,46 +4278,38 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, adev->gfx.config.max_sh_per_se);
mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - mask = 1; - ao_bitmap = 0; - counter = 0; - gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0); - gfx_v9_4_3_set_user_cu_inactive_bitmap( - adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); - bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev); - - /* - * The bitmap(and ao_cu_bitmap) in cu_info structure is - * 4x4 size array, and it's usually suitable for Vega - * ASICs which has 4*2 SE/SH layout. - * But for Arcturus, SE/SH layout is changed to 8*1. - * To mostly reduce the impact, we make it compatible - * with current bitmap array as below: - * SE4,SH0 --> bitmap[0][1] - * SE5,SH0 --> bitmap[1][1] - * SE6,SH0 --> bitmap[2][1] - * SE7,SH0 --> bitmap[3][1] - */ - cu_info->bitmap[i % 4][j + i / 4] = bitmap; - - for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { - if (bitmap & mask) { - if (counter < adev->gfx.config.max_cu_per_sh) - ao_bitmap |= mask; - counter++; + for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + mask = 1; + ao_bitmap = 0; + counter = 0; + gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id); + gfx_v9_4_3_set_user_cu_inactive_bitmap( + adev, + disable_masks[i * adev->gfx.config.max_sh_per_se + j], + xcc_id); + bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id); + + cu_info->bitmap[xcc_id][i][j] = bitmap; + + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { + if (bitmap & mask) { + if (counter < adev->gfx.config.max_cu_per_sh) + ao_bitmap |= mask; + counter++; + } + mask <<= 1; } - mask <<= 1; + active_cu_number += counter; + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } - active_cu_number += counter; - if (i < 2 && j < 2) - ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); - cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - 0); mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index f5a6f562e2a80..11b9837292536 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -2154,7 +2154,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); cu->num_simd_per_cu = cu_info.simd_per_cu; - cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; + cu->num_simd_cores = cu_info.simd_per_cu * + (cu_info.cu_active_number / kdev->kfd->num_nodes); cu->max_waves_simd = cu_info.max_waves_per_simd;
cu->wave_front_size = cu_info.wave_front_size; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 863cf060af484..35e05ee89eac5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -104,11 +104,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0); uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1; int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1; + uint32_t cu_active_per_node;
amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
- if (cu_mask_count > cu_info.cu_active_number) - cu_mask_count = cu_info.cu_active_number; + cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes; + if (cu_mask_count > cu_active_per_node) + cu_mask_count = cu_active_per_node;
/* Exceeding these bounds corrupts the stack and indicates a coding error. * Returning with no CU's enabled will hang the queue, which should be @@ -141,7 +143,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, for (se = 0; se < cu_info.num_shader_engines; se++) for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) cu_per_sh[se][sh] = hweight32( - cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]); + cu_info.cu_bitmap[0][se % 4][sh + (se / 4) * cu_bitmap_sh_mul]);
/* Symmetrically map cu_mask to all SEs & SHs: * se_mask programs up to 2 SH in the upper and lower 16 bits. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 4a17bb7c7b27d..ea67a353beb00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -450,8 +450,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count", dev->node_props.cpu_cores_count); sysfs_show_32bit_prop(buffer, offs, "simd_count", - dev->gpu ? (dev->node_props.simd_count * - NUM_XCC(dev->gpu->xcc_mask)) : 0); + dev->gpu ? dev->node_props.simd_count : 0); sysfs_show_32bit_prop(buffer, offs, "mem_banks_count", dev->node_props.mem_banks_count); sysfs_show_32bit_prop(buffer, offs, "caches_count", @@ -1658,7 +1657,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, int i, j, k; struct kfd_cache_properties *pcache = NULL;
- cu_sibling_map_mask = cu_info->cu_bitmap[0][0]; + cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0]; cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); first_active_cu = ffs(cu_sibling_map_mask); @@ -1701,7 +1700,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); k += 4;
- cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4]; + cu_sibling_map_mask = cu_info->cu_bitmap[0][i % 4][j + i / 4]; cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); } } @@ -1762,8 +1761,8 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, - pcu_info->cu_bitmap[i % 4][j + i / 4], ct, - cu_processor_id, k); + pcu_info->cu_bitmap[0][i % 4][j + i / 4], ct, + cu_processor_id, k);
if (ret < 0) break; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index d0df3381539f0..74cc545085a02 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -31,12 +31,12 @@ #include <linux/types.h> #include <linux/bitmap.h> #include <linux/dma-fence.h> +#include "amdgpu_irq.h" +#include "amdgpu_gfx.h"
struct pci_dev; struct amdgpu_device;
-#define KGD_MAX_QUEUES 128 - struct kfd_dev; struct kgd_mem;
@@ -68,7 +68,7 @@ struct kfd_cu_info { uint32_t wave_front_size; uint32_t max_scratch_slots_per_cu; uint32_t lds_size; - uint32_t cu_bitmap[4][4]; + uint32_t cu_bitmap[AMDGPU_MAX_GC_INSTANCES][4][4]; };
/* For getting GPU local memory information from KGD */
From: Mukul Joshi mukul.joshi@amd.com
[ Upstream commit 0752e66e91fa86fa5481b04b22053363833ffb85 ]
Update cache info reporting in sysfs to report the correct number of CUs and associated cache information based on different spatial partitioning modes.
Signed-off-by: Mukul Joshi mukul.joshi@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 4 ++ drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 82 +++++++++++++---------- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 2 +- 3 files changed, 51 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index fc719389b5d65..4684711aa695a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -79,6 +79,10 @@ struct crat_header { #define CRAT_SUBTYPE_IOLINK_AFFINITY 5 #define CRAT_SUBTYPE_MAX 6
+/* + * Do not change the value of CRAT_SIBLINGMAP_SIZE from 32 + * as it breaks the ABI. + */ #define CRAT_SIBLINGMAP_SIZE 32
/* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index ea67a353beb00..5582191022106 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1650,14 +1650,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, struct kfd_gpu_cache_info *pcache_info, struct kfd_cu_info *cu_info, - int cache_type, unsigned int cu_processor_id) + int cache_type, unsigned int cu_processor_id, + struct kfd_node *knode) { unsigned int cu_sibling_map_mask; int first_active_cu; - int i, j, k; + int i, j, k, xcc, start, end; struct kfd_cache_properties *pcache = NULL;
- cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0]; + start = ffs(knode->xcc_mask) - 1; + end = start + NUM_XCC(knode->xcc_mask); + cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0]; cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); first_active_cu = ffs(cu_sibling_map_mask); @@ -1692,16 +1695,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1); k = 0;
- for (i = 0; i < cu_info->num_shader_engines; i++) { - for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { - pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); - pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); - pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); - pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); - k += 4; - - cu_sibling_map_mask = cu_info->cu_bitmap[0][i % 4][j + i / 4]; - cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); + for (xcc = start; xcc < end; xcc++) { + for (i = 0; i < cu_info->num_shader_engines; i++) { + for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) { + pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF); + pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); + pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); + pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); + k += 4; + + cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4]; + cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1); + } } } pcache->sibling_map_size = k; @@ -1719,7 +1724,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev) { struct kfd_gpu_cache_info *pcache_info = NULL; - int i, j, k; + int i, j, k, xcc, start, end; int ct = 0; unsigned int cu_processor_id; int ret; @@ -1753,37 +1758,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct * then it will consider only one CU from * the shared unit */ + start = ffs(kdev->xcc_mask) - 1; + end = start + NUM_XCC(kdev->xcc_mask); + for (ct = 0; ct < num_of_cache_types; ct++) { cu_processor_id = gpu_processor_id; if (pcache_info[ct].cache_level == 1) { - for (i = 0; i < pcu_info->num_shader_engines; i++) { - for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { - for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { - - ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, - pcu_info->cu_bitmap[0][i % 4][j + i / 4], ct, - cu_processor_id, k); - - if (ret < 0) - break; - - if (!ret) { - num_of_entries++; - list_add_tail(&props_ext->list, &dev->cache_props); + for (xcc = start; xcc < end; xcc++) { + for (i = 0; i < pcu_info->num_shader_engines; i++) { + for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) { + for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) { + + ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info, + pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct, + cu_processor_id, k); + + if (ret < 0) + break; + + if (!ret) { + num_of_entries++; + list_add_tail(&props_ext->list, &dev->cache_props); + } + + /* Move to next CU block */ + num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= + pcu_info->num_cu_per_sh) ? + pcache_info[ct].num_cu_shared : + (pcu_info->num_cu_per_sh - k); + cu_processor_id += num_cu_shared; } - - /* Move to next CU block */ - num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <= - pcu_info->num_cu_per_sh) ? - pcache_info[ct].num_cu_shared : - (pcu_info->num_cu_per_sh - k); - cu_processor_id += num_cu_shared; } } } } else { ret = fill_in_l2_l3_pcache(&props_ext, pcache_info, - pcu_info, ct, cu_processor_id); + pcu_info, ct, cu_processor_id, kdev);
if (ret < 0) break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index cba2cd5ed9d19..46927263e014d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -86,7 +86,7 @@ struct kfd_mem_properties { struct attribute attr; };
-#define CACHE_SIBLINGMAP_SIZE 64 +#define CACHE_SIBLINGMAP_SIZE 128
struct kfd_cache_properties { struct list_head list;
From: Mukul Joshi mukul.joshi@amd.com
[ Upstream commit fc6efed2c728c9c10b058512fc9c1613f870a8e8 ]
The CU mask passed from user-space will change based on different spatial partitioning mode. As a result, update CU masking code for GFX9.4.3 to work for all partitioning modes.
Signed-off-by: Mukul Joshi mukul.joshi@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 28 ++++++++--- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 46 ++++++++++++------- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 2 +- 7 files changed, 56 insertions(+), 28 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 35e05ee89eac5..254f343f967a3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -97,14 +97,16 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, const uint32_t *cu_mask, uint32_t cu_mask_count, - uint32_t *se_mask) + uint32_t *se_mask, uint32_t inst) { struct kfd_cu_info cu_info; uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0}; bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0); uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1; - int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1; + int i, se, sh, cu, cu_bitmap_sh_mul, cu_inc = wgp_mode_req ? 2 : 1; uint32_t cu_active_per_node; + int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask); + int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1;
amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
@@ -143,7 +145,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, for (se = 0; se < cu_info.num_shader_engines; se++) for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) cu_per_sh[se][sh] = hweight32( - cu_info.cu_bitmap[0][se % 4][sh + (se / 4) * cu_bitmap_sh_mul]); + cu_info.cu_bitmap[xcc_inst][se % 4][sh + (se / 4) * + cu_bitmap_sh_mul]);
/* Symmetrically map cu_mask to all SEs & SHs: * se_mask programs up to 2 SH in the upper and lower 16 bits. @@ -166,20 +169,33 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, * cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1) * ... * + * For GFX 9.4.3, the following code only looks at a + * subset of the cu_mask corresponding to the inst parameter. + * If we have n XCCs under one GPU node + * cu_mask[0] bit0 -> XCC0 se_mask[0] bit0 (XCC0,SE0,SH0,CU0) + * cu_mask[0] bit1 -> XCC1 se_mask[0] bit0 (XCC1,SE0,SH0,CU0) + * .. + * cu_mask[0] bitn -> XCCn se_mask[0] bit0 (XCCn,SE0,SH0,CU0) + * cu_mask[0] bit n+1 -> XCC0 se_mask[1] bit0 (XCC0,SE1,SH0,CU0) + * + * For example, if there are 6 XCCs under 1 KFD node, this code + * running for each inst, will look at the bits as: + * inst, inst + 6, inst + 12... + * * First ensure all CUs are disabled, then enable user specified CUs. */ for (i = 0; i < cu_info.num_shader_engines; i++) se_mask[i] = 0;
- i = 0; - for (cu = 0; cu < 16; cu += inc) { + i = inst; + for (cu = 0; cu < 16; cu += cu_inc) { for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) { for (se = 0; se < cu_info.num_shader_engines; se++) { if (cu_per_sh[se][sh] > cu) { if (cu_mask[i / 32] & (en_mask << (i % 32))) se_mask[se] |= en_mask << (cu + sh * 16); i += inc; - if (i == cu_mask_count) + if (i >= cu_mask_count) return; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 23158db7da035..57bf5e513f4d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -138,7 +138,7 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, const uint32_t *cu_mask, uint32_t cu_mask_count, - uint32_t *se_mask); + uint32_t *se_mask, uint32_t inst);
int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 65c9f01a1f86c..faa01ee0d1655 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, return;
mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd); m->compute_static_thread_mgmt_se0 = se_mask[0]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 94c0fc2e57b7f..0fcb176601295 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, return;
mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd); m->compute_static_thread_mgmt_se0 = se_mask[0]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 23b30783dce31..97f754949ca92 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -71,7 +71,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, }
mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m->compute_static_thread_mgmt_se0 = se_mask[0]; m->compute_static_thread_mgmt_se1 = se_mask[1]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 601bb9f68048c..a76ae27c8a919 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -60,7 +60,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) }
static void update_cu_mask(struct mqd_manager *mm, void *mqd, - struct mqd_update_info *minfo) + struct mqd_update_info *minfo, uint32_t inst) { struct v9_mqd *m; uint32_t se_mask[KFD_MAX_NUM_SE] = {0}; @@ -69,27 +69,36 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, return;
mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, inst);
m = get_mqd(mqd); + m->compute_static_thread_mgmt_se0 = se_mask[0]; m->compute_static_thread_mgmt_se1 = se_mask[1]; m->compute_static_thread_mgmt_se2 = se_mask[2]; m->compute_static_thread_mgmt_se3 = se_mask[3]; - m->compute_static_thread_mgmt_se4 = se_mask[4]; - m->compute_static_thread_mgmt_se5 = se_mask[5]; - m->compute_static_thread_mgmt_se6 = se_mask[6]; - m->compute_static_thread_mgmt_se7 = se_mask[7]; - - pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n", - m->compute_static_thread_mgmt_se0, - m->compute_static_thread_mgmt_se1, - m->compute_static_thread_mgmt_se2, - m->compute_static_thread_mgmt_se3, - m->compute_static_thread_mgmt_se4, - m->compute_static_thread_mgmt_se5, - m->compute_static_thread_mgmt_se6, - m->compute_static_thread_mgmt_se7); + if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) { + m->compute_static_thread_mgmt_se4 = se_mask[4]; + m->compute_static_thread_mgmt_se5 = se_mask[5]; + m->compute_static_thread_mgmt_se6 = se_mask[6]; + m->compute_static_thread_mgmt_se7 = se_mask[7]; + + pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n", + m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3, + m->compute_static_thread_mgmt_se4, + m->compute_static_thread_mgmt_se5, + m->compute_static_thread_mgmt_se6, + m->compute_static_thread_mgmt_se7); + } else { + pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n", + inst, m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3); + } }
static void set_priority(struct v9_mqd *m, struct queue_properties *q) @@ -290,7 +299,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) m->cp_hqd_ctx_save_control = 0;
- update_cu_mask(mm, mqd, minfo); + if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) + update_cu_mask(mm, mqd, minfo, 0); set_priority(m, q);
q->is_active = QUEUE_IS_ACTIVE(*q); @@ -654,6 +664,8 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd + size * xcc); update_mqd(mm, m, q, minfo);
+ update_cu_mask(mm, mqd, minfo, xcc); + if (q->format == KFD_QUEUE_FORMAT_AQL) { switch (xcc) { case 0: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index d1e962da51dd3..2551a7529b5e0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -55,7 +55,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, return;
mqd_symmetrically_map_cu_mask(mm, - minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask); + minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
m = get_mqd(mqd); m->compute_static_thread_mgmt_se0 = se_mask[0];
From: Bhawanpreet Lakha bhawanpreet.lakha@amd.com
[ Upstream commit 679fc891bf11845730b572fc44f8a0eb846aba29 ]
Dirty rect can be used with replay, so enable them to allow for more powersaving.
Reviewed-by: Sun peng Li sunpeng.li@amd.com Acked-by: Stylon Wang stylon.wang@amd.com Signed-off-by: Bhawanpreet Lakha bhawanpreet.lakha@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3123ea2f4f30a..5c63a13f25193 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8038,7 +8038,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].plane_info = &bundle->plane_infos[planes_count];
- if (acrtc_state->stream->link->psr_settings.psr_feature_enabled) { + if (acrtc_state->stream->link->psr_settings.psr_feature_enabled || + acrtc_state->stream->link->replay_settings.replay_feature_enabled) { fill_dc_dirty_rects(plane, old_plane_state, new_plane_state, new_crtc_state, &bundle->flip_addrs[planes_count],
From: Swapnil Patel swapnil.patel@amd.com
[ Upstream commit f5b2c10b57615828b531bb0ae56bd6325a41167e ]
[Why] Currently the driver looks DCN registers to access if BL is on or not. This check is not valid if we are using AUX based brightness control. This causes driver to not send out "backlight off" command during power off sequence as it already thinks it is off.
[How] Only check DCN registers if we aren't using AUX based brightness control.
Reviewed-by: Wenjing Liu wenjing.liu@amd.com Acked-by: Stylon Wang stylon.wang@amd.com Signed-off-by: Swapnil Patel swapnil.patel@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 6966420dfbac3..15fa19ee748cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -964,7 +964,9 @@ void dce110_edp_backlight_control( return; }
- if (link->panel_cntl) { + if (link->panel_cntl && !(link->dpcd_sink_ext_caps.bits.oled || + link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 || + link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) { bool is_backlight_on = link->panel_cntl->funcs->is_panel_backlight_on(link->panel_cntl);
if ((enable && is_backlight_on) || (!enable && !is_backlight_on)) {
From: Alex Deucher alexander.deucher@amd.com
[ Upstream commit 1832403cd41ca6b19b24e9d64f79cb08d920ca44 ]
This matches the behavior for soc15 and nv.
Acked-by: Christian König christian.koenig@amd.com Reviewed-by: Timmy Tsai timmtsai@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/soc21.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index e5e5d68a4d702..1a5ffbf884891 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -786,7 +786,7 @@ static int soc21_common_hw_init(void *handle) * for the purpose of expose those registers * to process space */ - if (adev->nbio.funcs->remap_hdp_registers) + if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev)) adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
From: Alex Deucher alexander.deucher@amd.com
[ Upstream commit ab43213e7afd08ac68d4282060bacf309e70fd14 ]
Needed for HDP flush to work correctly.
Reviewed-by: Timmy Tsai timmtsai@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c index d5ed9e0e1a5f1..e5b5b0f4940f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c @@ -345,6 +345,9 @@ static void nbio_v4_3_init_registers(struct amdgpu_device *adev) data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK; WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data); } + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, + regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; }
static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
From: Hawking Zhang Hawking.Zhang@amd.com
[ Upstream commit ffd6bde302061aeee405ab364403af30210f0b99 ]
So driver doesn't generate incorrect message until the new format is settled down for aqua_vanjaram
Signed-off-by: Hawking Zhang Hawking.Zhang@amd.com Reviewed-by: Yang Wang kevinyang.wang@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 8aaa427f8c0f6..7d5019a884024 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1061,7 +1061,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, info->ce_count = obj->err_data.ce_count;
if (err_data.ce_count) { - if (adev->smuio.funcs && + if (!adev->aid_mask && + adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { dev_info(adev->dev, "socket: %d, die: %d " @@ -1081,7 +1082,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, } } if (err_data.ue_count) { - if (adev->smuio.funcs && + if (!adev->aid_mask && + adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { dev_info(adev->dev, "socket: %d, die: %d "
From: David Francis David.Francis@amd.com
[ Upstream commit 9296da8c40900b4dae3d973aa22be306e2a77671 ]
The code in kfd_mqd_manager_v11.c to support criu dump and restore of queue state was missing.
Added it; should be equivalent to kfd_mqd_manager_v10.c.
CC: Felix Kuehling felix.kuehling@amd.com Reviewed-by: Harish Kasiviswanathan Harish.Kasiviswanathan@amd.com Acked-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: David Francis David.Francis@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 97f754949ca92..352757f2d3202 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, return 0; }
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst) +{ + struct v11_compute_mqd *m; + + m = get_mqd(mqd); + + memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd)); +} + +static void restore_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *qp, + const void *mqd_src, + const void *ctl_stack_src, const u32 ctl_stack_size) +{ + uint64_t addr; + struct v11_compute_mqd *m; + + m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr; + addr = mqd_mem_obj->gpu_addr; + + memcpy(m, mqd_src, sizeof(*m)); + + *mqd = m; + if (gart_addr) + *gart_addr = addr; + + m->cp_hqd_pq_doorbell_control = + qp->doorbell_off << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + + qp->is_active = 0; +} + + static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -438,6 +475,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->mqd_size = sizeof(struct v11_compute_mqd); mqd->get_wave_state = get_wave_state; mqd->mqd_stride = kfd_mqd_stride; + mqd->checkpoint_mqd = checkpoint_mqd; + mqd->restore_mqd = restore_mqd; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif @@ -482,6 +521,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = kfd_destroy_mqd_sdma; mqd->is_occupied = kfd_is_occupied_sdma; + mqd->checkpoint_mqd = checkpoint_mqd; + mqd->restore_mqd = restore_mqd; mqd->mqd_size = sizeof(struct v11_sdma_mqd); mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS)
From: David Francis David.Francis@amd.com
[ Upstream commit 5e7e82254270c8cf8b107451c5de01cee2f135ae ]
On some APU systems, there is no atom context and so the atom_context struct is null.
Add a check to the VBIOS_INFO branch of amdgpu_info_ioctl to handle this case, returning all zeroes.
Reviewed-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: David Francis David.Francis@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index f678bdd5f353d..b9fc7e2db5e59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -940,12 +940,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct atom_context *atom_context;
atom_context = adev->mode_info.atom_context; - memcpy(vbios_info.name, atom_context->name, sizeof(atom_context->name)); - memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, sizeof(atom_context->vbios_pn)); - vbios_info.version = atom_context->version; - memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str, - sizeof(atom_context->vbios_ver_str)); - memcpy(vbios_info.date, atom_context->date, sizeof(atom_context->date)); + if (atom_context) { + memcpy(vbios_info.name, atom_context->name, + sizeof(atom_context->name)); + memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, + sizeof(atom_context->vbios_pn)); + vbios_info.version = atom_context->version; + memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str, + sizeof(atom_context->vbios_ver_str)); + memcpy(vbios_info.date, atom_context->date, + sizeof(atom_context->date)); + }
return copy_to_user(out, &vbios_info, min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0;
From: Josh Poimboeuf jpoimboe@kernel.org
[ Upstream commit 72178d5d1a38dd185d1db15f177f2d122ef10d9b ]
Cold functions and their non-cold counterparts can use _THIS_IP_ to reference each other. Don't warn about !ENDBR in that case.
Note that for GCC this is currently irrelevant in light of the following commit
c27cd083cfb9 ("Compiler attributes: GCC cold function alignment workarounds")
which disabled cold functions in the kernel. However this may still be possible with Clang.
Fixes several warnings like the following:
drivers/scsi/bnx2i/bnx2i.prelink.o: warning: objtool: bnx2i_hw_ep_disconnect+0x19d: relocation to !ENDBR: bnx2i_hw_ep_disconnect.cold+0x0 drivers/net/ipvlan/ipvlan.prelink.o: warning: objtool: ipvlan_addr4_event.cold+0x28: relocation to !ENDBR: ipvlan_addr4_event+0xda drivers/net/ipvlan/ipvlan.prelink.o: warning: objtool: ipvlan_addr6_event.cold+0x26: relocation to !ENDBR: ipvlan_addr6_event+0xb7 drivers/net/ethernet/broadcom/tg3.prelink.o: warning: objtool: tg3_set_ringparam.cold+0x17: relocation to !ENDBR: tg3_set_ringparam+0x115 drivers/net/ethernet/broadcom/tg3.prelink.o: warning: objtool: tg3_self_test.cold+0x17: relocation to !ENDBR: tg3_self_test+0x2e1 drivers/target/iscsi/cxgbit/cxgbit.prelink.o: warning: objtool: __cxgbit_free_conn.cold+0x24: relocation to !ENDBR: __cxgbit_free_conn+0xfb net/can/can.prelink.o: warning: objtool: can_rx_unregister.cold+0x2c: relocation to !ENDBR: can_rx_unregister+0x11b drivers/net/ethernet/qlogic/qed/qed.prelink.o: warning: objtool: qed_spq_post+0xc0: relocation to !ENDBR: qed_spq_post.cold+0x9a drivers/net/ethernet/qlogic/qed/qed.prelink.o: warning: objtool: qed_iwarp_ll2_comp_syn_pkt.cold+0x12f: relocation to !ENDBR: qed_iwarp_ll2_comp_syn_pkt+0x34b net/tipc/tipc.prelink.o: warning: objtool: tipc_nametbl_publish.cold+0x21: relocation to !ENDBR: tipc_nametbl_publish+0xa6
Signed-off-by: Josh Poimboeuf jpoimboe@kernel.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lore.kernel.org/r/d8f1ab6a23a6105bc023c132b105f245c7976be6.169447655... Signed-off-by: Sasha Levin sashal@kernel.org --- tools/objtool/check.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 1384090530dbe..e308d1ba664ef 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4333,7 +4333,8 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn continue; }
- if (insn_func(dest) && insn_func(dest) == insn_func(insn)) { + if (insn_func(dest) && insn_func(insn) && + insn_func(dest)->pfunc == insn_func(insn)->pfunc) { /* * Anything from->to self is either _THIS_IP_ or * IRET-to-self.
From: Pratyush Yadav ptyadav@amazon.de
[ Upstream commit dad651b2a44eb6b201738f810254279dca29d30d ]
If a device has no NUMA node information associated with it, the driver puts the device in node first_memory_node (say node 0). Not having a NUMA node and being associated with node 0 are completely different things and it makes little sense to mix the two.
Signed-off-by: Pratyush Yadav ptyadav@amazon.de Signed-off-by: Keith Busch kbusch@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/pci.c | 3 --- 1 file changed, 3 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2f57da12d9836..347cb5daebc3c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2916,9 +2916,6 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, struct nvme_dev *dev; int ret = -ENOMEM;
- if (node == NUMA_NO_NODE) - set_dev_node(&pdev->dev, first_memory_node); - dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) return ERR_PTR(-ENOMEM);
From: Icenowy Zheng uwu@icenowy.me
[ Upstream commit 8eb8fe67e2c84324398f5983c41b4f831d0705b3 ]
The dcache.cva encoding shown in the comments are wrong, it's for dcache.cval1 (which is restricted to L1) instead.
Fix this in the comment and in the hardcoded instruction.
Signed-off-by: Icenowy Zheng uwu@icenowy.me Tested-by: Sergey Matyukevich sergey.matyukevich@syntacore.com Reviewed-by: Heiko Stuebner heiko@sntech.de Reviewed-by: Guo Ren guoren@kernel.org Tested-by: Drew Fustini dfustini@baylibre.com Link: https://lore.kernel.org/r/20230912072410.2481-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt palmer@rivosinc.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/riscv/include/asm/errata_list.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index fb1a810f3d8ce..feab334dd8329 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -100,7 +100,7 @@ asm volatile(ALTERNATIVE( \ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | * 0000001 01001 rs1 000 00000 0001011 * dcache.cva rs1 (clean, virtual address) - * 0000001 00100 rs1 000 00000 0001011 + * 0000001 00101 rs1 000 00000 0001011 * * dcache.cipa rs1 (clean then invalidate, physical address) * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | @@ -113,7 +113,7 @@ asm volatile(ALTERNATIVE( \ * 0000000 11001 00000 000 00000 0001011 */ #define THEAD_inval_A0 ".long 0x0265000b" -#define THEAD_clean_A0 ".long 0x0245000b" +#define THEAD_clean_A0 ".long 0x0255000b" #define THEAD_flush_A0 ".long 0x0275000b" #define THEAD_SYNC_S ".long 0x0190000b"
From: Michal Grzedzicki mge@meta.com
[ Upstream commit 71996bb835aed58c7ec4967be1d05190a27339ec ]
Some cards have more than one SAS address. Using an incorrect address causes communication issues with some devices like expanders.
Closes: https://lore.kernel.org/linux-kernel/A57AEA84-5CA0-403E-8053-106033C73C70@fb... Signed-off-by: Michal Grzedzicki mge@meta.com Link: https://lore.kernel.org/r/20230913155611.3183612-1-mge@meta.com Acked-by: Jack Wang jinpu.wang@ionos.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/pm8001/pm8001_hwi.c | 2 +- drivers/scsi/pm8001/pm80xx_hwi.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 73cd25f30ca58..00f22058ccf4e 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -4180,7 +4180,7 @@ pm8001_chip_phy_start_req(struct pm8001_hba_info *pm8001_ha, u8 phy_id) payload.sas_identify.dev_type = SAS_END_DEVICE; payload.sas_identify.initiator_bits = SAS_PROTOCOL_ALL; memcpy(payload.sas_identify.sas_addr, - pm8001_ha->sas_addr, SAS_ADDR_SIZE); + &pm8001_ha->phy[phy_id].dev_sas_addr, SAS_ADDR_SIZE); payload.sas_identify.phy_id = phy_id;
return pm8001_mpi_build_cmd(pm8001_ha, 0, opcode, &payload, diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 39a12ee94a72f..9689fb830a5fb 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -4676,7 +4676,7 @@ pm80xx_chip_phy_start_req(struct pm8001_hba_info *pm8001_ha, u8 phy_id) payload.sas_identify.dev_type = SAS_END_DEVICE; payload.sas_identify.initiator_bits = SAS_PROTOCOL_ALL; memcpy(payload.sas_identify.sas_addr, - &pm8001_ha->sas_addr, SAS_ADDR_SIZE); + &pm8001_ha->phy[phy_id].dev_sas_addr, SAS_ADDR_SIZE); payload.sas_identify.phy_id = phy_id;
return pm8001_mpi_build_cmd(pm8001_ha, 0, opcode, &payload,
From: Michal Grzedzicki mge@meta.com
[ Upstream commit c13e7331745852d0dd7c35eabbe181cbd5b01172 ]
Tags allocated for OPC_INB_SET_CONTROLLER_CONFIG command need to be freed when we receive the response.
Signed-off-by: Michal Grzedzicki mge@meta.com Link: https://lore.kernel.org/r/20230911170340.699533-2-mge@meta.com Acked-by: Jack Wang jinpu.wang@ionos.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/pm8001/pm80xx_hwi.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 9689fb830a5fb..e543bc36c84df 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -3671,10 +3671,12 @@ static int mpi_set_controller_config_resp(struct pm8001_hba_info *pm8001_ha, (struct set_ctrl_cfg_resp *)(piomb + 4); u32 status = le32_to_cpu(pPayload->status); u32 err_qlfr_pgcd = le32_to_cpu(pPayload->err_qlfr_pgcd); + u32 tag = le32_to_cpu(pPayload->tag);
pm8001_dbg(pm8001_ha, MSG, "SET CONTROLLER RESP: status 0x%x qlfr_pgcd 0x%x\n", status, err_qlfr_pgcd); + pm8001_tag_free(pm8001_ha, tag);
return 0; }
From: Steve French stfrench@microsoft.com
[ Upstream commit ebc3d4e44a7e05457825e03d0560153687265523 ]
checkpatch flagged a few places with: WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP Also fixed minor typo
Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/smb/client/inode.c | 2 +- fs/smb/client/smb2ops.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index c3eeae07e1390..cb85d7977b1e3 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2610,7 +2610,7 @@ int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start, }
cifsFileInfo_put(cfile); - return -ENOTSUPP; + return -EOPNOTSUPP; }
int cifs_truncate_page(struct address_space *mapping, loff_t from) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index dd6a423dc6e11..a5cba71c30aed 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -297,7 +297,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server, cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)", credits->value, new_val);
- return -ENOTSUPP; + return -EOPNOTSUPP; }
spin_lock(&server->req_lock); @@ -1159,7 +1159,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, /* Use a fudge factor of 256 bytes in case we collide * with a different set_EAs command. */ - if(CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE - + if (CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE - MAX_SMB2_CLOSE_RESPONSE_SIZE - 256 < used_len + ea_name_len + ea_value_len + 1) { rc = -ENOSPC; @@ -4716,7 +4716,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
if (shdr->Command != SMB2_READ) { cifs_server_dbg(VFS, "only big read responses are supported\n"); - return -ENOTSUPP; + return -EOPNOTSUPP; }
if (server->ops->is_session_expired &&
From: Niklas Cassel niklas.cassel@wdc.com
[ Upstream commit 80cc944eca4f0baa9c381d0706f3160e491437f2 ]
ata_scsi_port_error_handler() starts off by clearing ATA_PFLAG_EH_PENDING, before calling ap->ops->error_handler() (without holding the ap->lock).
If an error IRQ is received while ap->ops->error_handler() is running, the irq handler will set ATA_PFLAG_EH_PENDING.
Once ap->ops->error_handler() returns, ata_scsi_port_error_handler() checks if ATA_PFLAG_EH_PENDING is set, and if it is, another iteration of ATA EH is performed.
The problem is that ATA_PFLAG_EH_PENDING is not only cleared by ata_scsi_port_error_handler(), it is also cleared by ata_eh_reset().
ata_eh_reset() is called by ap->ops->error_handler(). This additional clearing done by ata_eh_reset() breaks the whole retry logic in ata_scsi_port_error_handler(). Thus, if an error IRQ is received while ap->ops->error_handler() is running, the port will currently remain frozen and will never get re-enabled.
The additional clearing in ata_eh_reset() was introduced in commit 1e641060c4b5 ("libata: clear eh_info on reset completion").
Looking at the original error report: https://marc.info/?l=linux-ide&m=124765325828495&w=2
We can see the following happening: [ 1.074659] ata3: XXX port freeze [ 1.074700] ata3: XXX hardresetting link, stopping engine [ 1.074746] ata3: XXX flipping SControl
[ 1.411471] ata3: XXX irq_stat=400040 CONN|PHY [ 1.411475] ata3: XXX port freeze
[ 1.420049] ata3: XXX starting engine [ 1.420096] ata3: XXX rc=0, class=1 [ 1.420142] ata3: XXX clearing IRQs for thawing [ 1.420188] ata3: XXX port thawed [ 1.420234] ata3: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
We are not supposed to be able to receive an error IRQ while the port is frozen (PxIE is set to 0, i.e. all IRQs for the port are disabled).
AHCI 1.3.1 section 10.7.1.1 First Tier (IS Register) states: "Each bit location can be thought of as reporting a '1' if the virtual "interrupt line" for that port is indicating it wishes to generate an interrupt. That is, if a port has one or more interrupt status bit set, and the enables for those status bits are set, then this bit shall be set."
Additionally, AHCI state P:ComInit clearly shows that the state machine will only jump to P:ComInitSetIS (which sets IS.IPS(x) to '1'), if PxIE.PCE is set to '1'. In our case, PxIE is set to 0, so IS.IPS(x) won't get set.
So IS.IPS(x) only gets set if PxIS and PxIE is set.
AHCI 1.3.1 section 10.7.1.1 First Tier (IS Register) also states: "The bits in this register are read/write clear. It is set by the level of the virtual interrupt line being a set, and cleared by a write of '1' from the software."
So if IS.IPS(x) is set, you need to explicitly clear it by writing a 1 to IS.IPS(x) for that port.
Since PxIE is cleared, the only way to get an interrupt while the port is frozen, is if IS.IPS(x) is set, and the only way IS.IPS(x) can be set when the port is frozen, is if it was set before the port was frozen.
However, since commit 737dd811a3db ("ata: libahci: clear pending interrupt status"), we clear both PxIS and IS.IPS(x) after freezing the port, but before the COMRESET, so the problem that commit 1e641060c4b5 ("libata: clear eh_info on reset completion") fixed can no longer happen.
Thus, revert commit 1e641060c4b5 ("libata: clear eh_info on reset completion"), so that the retry logic in ata_scsi_port_error_handler() works once again. (The retry logic is still needed, since we can still get an error IRQ _after_ the port has been thawed, but before ata_scsi_port_error_handler() takes the ap->lock in order to check if ATA_PFLAG_EH_PENDING is set.)
Signed-off-by: Niklas Cassel niklas.cassel@wdc.com Signed-off-by: Damien Le Moal dlemoal@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/ata/libata-eh.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-)
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 35e03679b0bfe..d7914c7d1a0d1 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2822,18 +2822,11 @@ int ata_eh_reset(struct ata_link *link, int classify, } }
- /* - * Some controllers can't be frozen very well and may set spurious - * error conditions during reset. Clear accumulated error - * information and re-thaw the port if frozen. As reset is the - * final recovery action and we cross check link onlineness against - * device classification later, no hotplug event is lost by this. - */ + /* clear cached SError */ spin_lock_irqsave(link->ap->lock, flags); - memset(&link->eh_info, 0, sizeof(link->eh_info)); + link->eh_info.serror = 0; if (slave) - memset(&slave->eh_info, 0, sizeof(link->eh_info)); - ap->pflags &= ~ATA_PFLAG_EH_PENDING; + slave->eh_info.serror = 0; spin_unlock_irqrestore(link->ap->lock, flags);
if (ata_port_is_frozen(ap))
From: Niklas Cassel niklas.cassel@wdc.com
[ Upstream commit 7a3bc2b3989e05bbaa904a63279049a401491c84 ]
commit 1e641060c4b5 ("libata: clear eh_info on reset completion") added a workaround that broke the retry mechanism in ATA EH.
Tejun himself suggested to remove this workaround when it was identified to cause additional problems: https://lore.kernel.org/linux-ide/20110426135027.GI878@htj.dyndns.org/
He even said: "Hmm... it seems I wasn't thinking straight when I added that work around." https://lore.kernel.org/linux-ide/20110426155229.GM878@htj.dyndns.org/
While removing the workaround solved the issue, however, the workaround was kept to avoid "spurious hotplug events during reset", and instead another workaround was added on top of the existing workaround in commit 8c56cacc724c ("libata: fix unexpectedly frozen port after ata_eh_reset()").
Because these IRQs happened when the port was frozen, we know that they were actually a side effect of PxIS and IS.IPS(x) not being cleared before the COMRESET. This is now done in commit 94152042eaa9 ("ata: libahci: clear pending interrupt status"), so these workarounds can now be removed.
Since commit 1e641060c4b5 ("libata: clear eh_info on reset completion") has now been reverted, the ATA EH retry mechanism is functional again, so there is once again no need to thaw the port more than once in ata_eh_reset().
This reverts "the workaround on top of the workaround" introduced in commit 8c56cacc724c ("libata: fix unexpectedly frozen port after ata_eh_reset()").
Signed-off-by: Niklas Cassel niklas.cassel@wdc.com Signed-off-by: Damien Le Moal dlemoal@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/ata/libata-eh.c | 3 --- 1 file changed, 3 deletions(-)
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index d7914c7d1a0d1..960ef5c6f2c10 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2829,9 +2829,6 @@ int ata_eh_reset(struct ata_link *link, int classify, slave->eh_info.serror = 0; spin_unlock_irqrestore(link->ap->lock, flags);
- if (ata_port_is_frozen(ap)) - ata_eh_thaw_port(ap); - /* * Make sure onlineness and classification result correspond. * Hotplug could have happened during reset and some
linux-stable-mirror@lists.linaro.org