When we allocate space in the GGTT we may have to allocate a larger region than will be populated by the object to accommodate fencing. Make sure that this space beyond the end of the buffer points safely into scratch space, in case the HW tries to access it anyway (e.g. fenced access to the last tile row).
Reported-by: Imre Deak imre.deak@intel.com References: https://gitlab.freedesktop.org/drm/intel/-/issues/1554 Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Matthew Auld matthew.auld@intel.com Cc: Imre Deak imre.deak@intel.com Cc: stable@vger.kernel.org --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index d8944dabed55..ad56059651b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -191,10 +191,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, enum i915_cache_level level, u32 flags) { + const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0); struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; - gen8_pte_t __iomem *gtt_entries; - const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0); + gen8_pte_t __iomem *gte; + gen8_pte_t __iomem *end; dma_addr_t addr;
/* @@ -202,10 +203,16 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, * not to allow the user to override access to a read only page. */
- gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; - gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; + gte = (gen8_pte_t __iomem *)ggtt->gsm; + gte += vma->node.start / I915_GTT_PAGE_SIZE; + end = gte + vma->node.size / I915_GTT_PAGE_SIZE; for_each_sgt_daddr(addr, sgt_iter, vma->pages) - gen8_set_pte(gtt_entries++, pte_encode | addr); + gen8_set_pte(gte++, pte_encode | addr); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + gen8_set_pte(gte++, vm->scratch[0].encode);
/* * We want to flush the TLBs only after we're certain all the PTE
On Tue, 31 Mar 2020 at 13:42, Chris Wilson chris@chris-wilson.co.uk wrote:
When we allocate space in the GGTT we may have to allocate a larger region than will be populated by the object to accommodate fencing. Make sure that this space beyond the end of the buffer points safely into scratch space, in case the HW tries to access it anyway (e.g. fenced access to the last tile row).
Reported-by: Imre Deak imre.deak@intel.com References: https://gitlab.freedesktop.org/drm/intel/-/issues/1554 Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Matthew Auld matthew.auld@intel.com Cc: Imre Deak imre.deak@intel.com Cc: stable@vger.kernel.org
Do we not need similar treatment for gen6? It seems to also play tricks with the nop clear range, or did we disable gen7 ppgtt in the end?
Reviewed-by: Matthew Auld matthew.auld@intel.com
Quoting Matthew Auld (2020-03-31 16:07:21)
On Tue, 31 Mar 2020 at 13:42, Chris Wilson chris@chris-wilson.co.uk wrote:
When we allocate space in the GGTT we may have to allocate a larger region than will be populated by the object to accommodate fencing. Make sure that this space beyond the end of the buffer points safely into scratch space, in case the HW tries to access it anyway (e.g. fenced access to the last tile row).
Reported-by: Imre Deak imre.deak@intel.com References: https://gitlab.freedesktop.org/drm/intel/-/issues/1554 Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Matthew Auld matthew.auld@intel.com Cc: Imre Deak imre.deak@intel.com Cc: stable@vger.kernel.org
Do we not need similar treatment for gen6? It seems to also play tricks with the nop clear range, or did we disable gen7 ppgtt in the end?
Currently disabled. But yes, if we use nop_clear_range we will need similar clearing. As this method turned out to be much easier than expected, I guess we should just do it anyway. -Chris
When we allocate space in the GGTT we may have to allocate a larger region than will be populated by the object to accommodate fencing. Make sure that this space beyond the end of the buffer points safely into scratch space, in case the HW tries to access it anyway (e.g. fenced access to the last tile row).
v2: Preemptively / conservatively guard gen6 ggtt as well.
Reported-by: Imre Deak imre.deak@intel.com References: https://gitlab.freedesktop.org/drm/intel/-/issues/1554 Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Matthew Auld matthew.auld@intel.com Cc: Imre Deak imre.deak@intel.com Cc: stable@vger.kernel.org Reviewed-by: Matthew Auld matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 37 ++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index d8944dabed55..ae07bcd7c226 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -191,10 +191,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, enum i915_cache_level level, u32 flags) { - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - struct sgt_iter sgt_iter; - gen8_pte_t __iomem *gtt_entries; const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0); + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *gte; + gen8_pte_t __iomem *end; + struct sgt_iter iter; dma_addr_t addr;
/* @@ -202,10 +203,17 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, * not to allow the user to override access to a read only page. */
- gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; - gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; - for_each_sgt_daddr(addr, sgt_iter, vma->pages) - gen8_set_pte(gtt_entries++, pte_encode | addr); + gte = (gen8_pte_t __iomem *)ggtt->gsm; + gte += vma->node.start / I915_GTT_PAGE_SIZE; + end = gte + vma->node.size / I915_GTT_PAGE_SIZE; + + for_each_sgt_daddr(addr, iter, vma->pages) + gen8_set_pte(gte++, pte_encode | addr); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + gen8_set_pte(gte++, vm->scratch[0].encode);
/* * We want to flush the TLBs only after we're certain all the PTE @@ -241,13 +249,22 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; - unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; + gen6_pte_t __iomem *gte; + gen6_pte_t __iomem *end; struct sgt_iter iter; dma_addr_t addr;
+ gte = (gen6_pte_t __iomem *)ggtt->gsm; + gte += vma->node.start / I915_GTT_PAGE_SIZE; + end = gte + vma->node.size / I915_GTT_PAGE_SIZE; + for_each_sgt_daddr(addr, iter, vma->pages) - iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); + iowrite32(vm->pte_encode(addr, level, flags), gte++); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + iowrite32(vm->scratch[0].encode, gte++);
/* * We want to flush the TLBs only after we're certain all the PTE
On Tue, Mar 31, 2020 at 01:42:02PM +0100, Chris Wilson wrote:
When we allocate space in the GGTT we may have to allocate a larger region than will be populated by the object to accommodate fencing. Make sure that this space beyond the end of the buffer points safely into scratch space, in case the HW tries to access it anyway (e.g. fenced access to the last tile row).
Reported-by: Imre Deak imre.deak@intel.com References: https://gitlab.freedesktop.org/drm/intel/-/issues/1554 Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Matthew Auld matthew.auld@intel.com Cc: Imre Deak imre.deak@intel.com Cc: stable@vger.kernel.org
Thanks, Reviewed-by: Imre Deak imre.deak@intel.com
drivers/gpu/drm/i915/gt/intel_ggtt.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index d8944dabed55..ad56059651b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -191,10 +191,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, enum i915_cache_level level, u32 flags) {
- const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0); struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter;
- gen8_pte_t __iomem *gtt_entries;
- const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0);
- gen8_pte_t __iomem *gte;
- gen8_pte_t __iomem *end; dma_addr_t addr;
/* @@ -202,10 +203,16 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, * not to allow the user to override access to a read only page. */
- gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
- gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
- gte = (gen8_pte_t __iomem *)ggtt->gsm;
- gte += vma->node.start / I915_GTT_PAGE_SIZE;
- end = gte + vma->node.size / I915_GTT_PAGE_SIZE; for_each_sgt_daddr(addr, sgt_iter, vma->pages)
gen8_set_pte(gtt_entries++, pte_encode | addr);
gen8_set_pte(gte++, pte_encode | addr);
- GEM_BUG_ON(gte > end);
- /* Fill the allocated but "unused" space beyond the end of the buffer */
- while (gte < end)
gen8_set_pte(gte++, vm->scratch[0].encode);
/* * We want to flush the TLBs only after we're certain all the PTE -- 2.20.1
linux-stable-mirror@lists.linaro.org