The patch below does not apply to the 6.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y git checkout FETCH_HEAD git cherry-pick -x d30203739be798d3de5c84db3060e96f00c54e82 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to 'stable@vger.kernel.org' --in-reply-to '2025102055-prayer-clock-414f@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d30203739be798d3de5c84db3060e96f00c54e82 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi lucas.demarchi@intel.com Date: Thu, 18 Sep 2025 13:58:57 -0700 Subject: [PATCH] drm/xe: Move rebar to be done earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit
There may be cases in which the BAR0 also needs to move to accommodate the bigger BAR2. However if it's not released, the BAR2 resize fails. During the vram probe it can't be released as it's already in use by xe_mmio for early register access.
Add a new function in xe_vram and let xe_pci call it directly before even early device probe. This allows the BAR2 to resize in cases BAR0 also needs to move, assuming there aren't other reasons to hold that move:
[] xe 0000:03:00.0: vgaarb: deactivate vga console [] xe 0000:03:00.0: [drm] Attempting to resize bar from 8192MiB -> 16384MiB [] xe 0000:03:00.0: BAR 0 [mem 0x83000000-0x83ffffff 64bit]: releasing [] xe 0000:03:00.0: BAR 2 [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] xe 0000:03:00.0: BAR 2 [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] xe 0000:03:00.0: BAR 0 [mem 0x83000000-0x83ffffff 64bit]: assigned [] pcieport 0000:00:01.0: PCI bridge to [bus 01-04] [] pcieport 0000:00:01.0: bridge window [mem 0x83000000-0x840fffff] [] pcieport 0000:00:01.0: bridge window [mem 0x4000000000-0x44007fffff 64bit pref] [] pcieport 0000:01:00.0: PCI bridge to [bus 02-04] [] pcieport 0000:01:00.0: bridge window [mem 0x83000000-0x840fffff] [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref] [] pcieport 0000:02:01.0: PCI bridge to [bus 03] [] pcieport 0000:02:01.0: bridge window [mem 0x83000000-0x83ffffff] [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref] [] xe 0000:03:00.0: [drm] BAR2 resized to 16384M [] xe 0000:03:00.0: [drm:xe_pci_probe [xe]] BATTLEMAGE e221:0000 dgfx:1 gfx:Xe2_HPG (20.02) ...
For BMG there are additional fix needed in the PCI side, but this helps getting it to a working resize.
All the rebar logic is more pci-specific than xe-specific and can be done very early in the probe sequence. In future it would be good to move it out of xe_vram.c, but this refactor is left for later.
Cc: Ilpo Järvinen ilpo.jarvinen@linux.intel.com Cc: stable@vger.kernel.org # 6.12+ Link: https://lore.kernel.org/intel-xe/fafda2a3-fc63-ce97-d22b-803f771a4d19@linux.... Reviewed-by: Ilpo Järvinen ilpo.jarvinen@linux.intel.com Link: https://lore.kernel.org/r/20250918-xe-pci-rebar-2-v1-2-6c094702a074@intel.co... Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com (cherry picked from commit 45e33f220fd625492c11e15733d8e9b4f9db82a4) Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index be91343829dd..9a6df79fc5b6 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -867,6 +867,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err;
+ xe_vram_resize_bar(xe); + err = xe_device_probe_early(xe); /* * In Boot Survivability mode, no drm card is exposed and driver diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index b44ebf50fedb..652df7a5f4f6 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -26,15 +26,35 @@
#define BAR_SIZE_SHIFT 20
-static void -_resize_bar(struct xe_device *xe, int resno, resource_size_t size) +/* + * Release all the BARs that could influence/block LMEMBAR resizing, i.e. + * assigned IORESOURCE_MEM_64 BARs + */ +static void release_bars(struct pci_dev *pdev) +{ + struct resource *res; + int i; + + pci_dev_for_each_resource(pdev, res, i) { + /* Resource already un-assigned, do not reset it */ + if (!res->parent) + continue; + + /* No need to release unrelated BARs */ + if (!(res->flags & IORESOURCE_MEM_64)) + continue; + + pci_release_resource(pdev, i); + } +} + +static void resize_bar(struct xe_device *xe, int resno, resource_size_t size) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int bar_size = pci_rebar_bytes_to_size(size); int ret;
- if (pci_resource_len(pdev, resno)) - pci_release_resource(pdev, resno); + release_bars(pdev);
ret = pci_resize_resource(pdev, resno, bar_size); if (ret) { @@ -50,7 +70,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) * if force_vram_bar_size is set, attempt to set to the requested size * else set to maximum possible size */ -static void resize_vram_bar(struct xe_device *xe) +void xe_vram_resize_bar(struct xe_device *xe) { int force_vram_bar_size = xe_modparam.force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -119,7 +139,7 @@ static void resize_vram_bar(struct xe_device *xe) pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
- _resize_bar(xe, LMEM_BAR, rebar_size); + resize_bar(xe, LMEM_BAR, rebar_size);
pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); @@ -148,8 +168,6 @@ static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region * return -ENXIO; }
- resize_vram_bar(xe); - lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); if (!lmem_bar->io_size) diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index 72860f714fc6..13505cfb184d 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -11,6 +11,7 @@ struct xe_device; struct xe_vram_region;
+void xe_vram_resize_bar(struct xe_device *xe); int xe_vram_probe(struct xe_device *xe);
struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement);
From: Piotr Piórkowski piotr.piorkowski@intel.com
[ Upstream commit 922ae875230be91c7f05f2aa90d176b6693e2601 ]
Let's replace the manual call to ioremap_wc function with devm_ioremap_wc function, ensuring that VRAM mappings are automatically released when the driver is detached. Since devm_ioremap_wc registers the mapping with the device's managed resources, the explicit iounmap call in vram_fini is no longer needed, so let's remove it.
Signed-off-by: Piotr Piórkowski piotr.piorkowski@intel.com Suggested-by: Matthew Auld matthew.auld@intel.com Reviewed-by: Matthew Auld matthew.auld@intel.com Acked-by: Matthew Brost matthew.brost@intel.com Link: https://lore.kernel.org/r/20250714184818.89201-2-piotr.piorkowski@intel.com Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com Stable-dep-of: d30203739be7 ("drm/xe: Move rebar to be done earlier") Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/xe/xe_vram.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index e421a74fb87c6..3a4c84e9efc66 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -156,7 +156,8 @@ static int determine_lmem_bar_size(struct xe_device *xe) xe->mem.vram.dpa_base = 0;
/* set up a map to the total memory area. */ - xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); + xe->mem.vram.mapping = devm_ioremap_wc(&pdev->dev, xe->mem.vram.io_start, + xe->mem.vram.io_size);
return 0; } @@ -278,9 +279,6 @@ static void vram_fini(void *arg) struct xe_tile *tile; int id;
- if (xe->mem.vram.mapping) - iounmap(xe->mem.vram.mapping); - xe->mem.vram.mapping = NULL;
for_each_tile(tile, xe, id)
From: Piotr Piórkowski piotr.piorkowski@intel.com
[ Upstream commit f92cfd72d9a650f90260c54accd840c6500c4c3a ]
In future platforms, we will need to represent the device and tile VRAM regions in a more dynamic way, so let's abandon the static allocation of these structures and start use a dynamic allocation.
v2: - Add a helpers for accessing fields of the xe_vram_region structure v3: - Add missing EXPORT_SYMBOL_IF_KUNIT for xe_vram_region_actual_physical_size
Signed-off-by: Piotr Piórkowski piotr.piorkowski@intel.com Cc: Stuart Summers stuart.summers@intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Satyanarayana K V P satyanarayana.k.v.p@intel.com Reviewed-by: Satyanarayana K V P satyanarayana.k.v.p@intel.com Acked-by: Matthew Brost matthew.brost@intel.com Link: https://lore.kernel.org/r/20250714184818.89201-3-piotr.piorkowski@intel.com Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com Stable-dep-of: d30203739be7 ("drm/xe: Move rebar to be done earlier") Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 4 +- drivers/gpu/drm/xe/display/xe_plane_initial.c | 2 +- drivers/gpu/drm/xe/xe_assert.h | 4 +- drivers/gpu/drm/xe/xe_device.c | 19 +++ drivers/gpu/drm/xe/xe_device_types.h | 6 +- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 25 ++-- drivers/gpu/drm/xe/xe_pci.c | 6 + drivers/gpu/drm/xe/xe_query.c | 2 +- drivers/gpu/drm/xe/xe_svm.c | 24 +--- drivers/gpu/drm/xe/xe_tile.c | 34 ++++- drivers/gpu/drm/xe/xe_tile.h | 4 +- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 10 +- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 7 +- drivers/gpu/drm/xe/xe_vram.c | 121 +++++++++++++----- drivers/gpu/drm/xe/xe_vram.h | 9 ++ 16 files changed, 202 insertions(+), 77 deletions(-)
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index c38fba18effe1..2187b2de64e17 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -289,7 +289,7 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) && intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 && !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) { - struct xe_tile *tile = xe_device_get_root_tile(xe); + struct xe_vram_region *vram = xe_device_get_root_tile(xe)->mem.vram;
/* * If we need to able to access the clear-color value stored in @@ -297,7 +297,7 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, * accessible. This is important on small-bar systems where * only some subset of VRAM is CPU accessible. */ - if (tile->mem.vram.io_size < tile->mem.vram.usable_size) { + if (xe_vram_region_io_size(vram) < xe_vram_region_usable_size(vram)) { ret = -EINVAL; goto err; } diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index dcbc4b2d3fd94..b1bf7e8d9f9f2 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -103,7 +103,7 @@ initial_plane_bo(struct xe_device *xe, * We don't currently expect this to ever be placed in the * stolen portion. */ - if (phys_base >= tile0->mem.vram.usable_size) { + if (phys_base >= xe_vram_region_usable_size(tile0->mem.vram)) { drm_err(&xe->drm, "Initial plane programming using invalid range, phys_base=%pa\n", &phys_base); diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h index 68fe70ce2be3b..a818eaa05b7dc 100644 --- a/drivers/gpu/drm/xe/xe_assert.h +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -12,6 +12,7 @@
#include "xe_gt_types.h" #include "xe_step.h" +#include "xe_vram.h"
/** * DOC: Xe Asserts @@ -145,7 +146,8 @@ const struct xe_tile *__tile = (tile); \ char __buf[10] __maybe_unused; \ xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg, \ - __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1, \ + __tile->id, ({ string_get_size( \ + xe_vram_region_actual_physical_size(__tile->mem.vram), 1, \ STRING_UNITS_2, __buf, sizeof(__buf)); __buf; }), ## arg); \ })
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6ece4defa9df0..dab7e657044a6 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -688,6 +688,21 @@ static void sriov_update_device_info(struct xe_device *xe) } }
+static int xe_device_vram_alloc(struct xe_device *xe) +{ + struct xe_vram_region *vram; + + if (!IS_DGFX(xe)) + return 0; + + vram = drmm_kzalloc(&xe->drm, sizeof(*vram), GFP_KERNEL); + if (!vram) + return -ENOMEM; + + xe->mem.vram = vram; + return 0; +} + /** * xe_device_probe_early: Device early probe * @xe: xe device instance @@ -735,6 +750,10 @@ int xe_device_probe_early(struct xe_device *xe)
xe->wedged.mode = xe_modparam.wedged_mode;
+ err = xe_device_vram_alloc(xe); + if (err) + return err; + return 0; } ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */ diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 7ceb0c90f3914..d1edd430dc013 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -77,6 +77,8 @@ struct xe_pxp; * device, such as HBM memory or CXL extension memory. */ struct xe_vram_region { + /** @tile: Back pointer to tile */ + struct xe_tile *tile; /** @io_start: IO start address of this VRAM instance */ resource_size_t io_start; /** @@ -216,7 +218,7 @@ struct xe_tile { * Although VRAM is associated with a specific tile, it can * still be accessed by all tiles' GTs. */ - struct xe_vram_region vram; + struct xe_vram_region *vram;
/** @mem.ggtt: Global graphics translation table */ struct xe_ggtt *ggtt; @@ -412,7 +414,7 @@ struct xe_device { /** @mem: memory info for device */ struct { /** @mem.vram: VRAM info for device */ - struct xe_vram_region vram; + struct xe_vram_region *vram; /** @mem.sys_mgr: system TTM manager */ struct ttm_resource_manager sys_mgr; /** @mem.sys_mgr: system memory shrinker. */ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index d84831a03610d..e7b0ea2090604 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1604,7 +1604,7 @@ static u64 pf_query_free_lmem(struct xe_gt *gt) { struct xe_tile *tile = gt->tile;
- return xe_ttm_vram_get_avail(&tile->mem.vram.ttm.manager); + return xe_ttm_vram_get_avail(&tile->mem.vram->ttm.manager); }
static u64 pf_query_max_lmem(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 84f412fd3c5d2..13e287e037096 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -34,6 +34,7 @@ #include "xe_sync.h" #include "xe_trace_bo.h" #include "xe_vm.h" +#include "xe_vram.h"
/** * struct xe_migrate - migrate context. @@ -130,34 +131,36 @@ static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr, bool is_comp_pte) u64 identity_offset = IDENTITY_OFFSET;
if (GRAPHICS_VER(xe) >= 20 && is_comp_pte) - identity_offset += DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); + identity_offset += DIV_ROUND_UP_ULL(xe_vram_region_actual_physical_size + (xe->mem.vram), SZ_1G);
- addr -= xe->mem.vram.dpa_base; + addr -= xe_vram_region_dpa_base(xe->mem.vram); return addr + (identity_offset << xe_pt_shift(2)); }
static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo, u64 map_ofs, u64 vram_offset, u16 pat_index, u64 pt_2m_ofs) { + struct xe_vram_region *vram = xe->mem.vram; + resource_size_t dpa_base = xe_vram_region_dpa_base(vram); u64 pos, ofs, flags; u64 entry; /* XXX: Unclear if this should be usable_size? */ - u64 vram_limit = xe->mem.vram.actual_physical_size + - xe->mem.vram.dpa_base; + u64 vram_limit = xe_vram_region_actual_physical_size(vram) + dpa_base; u32 level = 2;
ofs = map_ofs + XE_PAGE_SIZE * level + vram_offset * 8; flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, true, 0);
- xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M)); + xe_assert(xe, IS_ALIGNED(xe_vram_region_usable_size(vram), SZ_2M));
/* * Use 1GB pages when possible, last chunk always use 2M * pages as mixing reserved memory (stolen, WOCPM) with a single * mapping is not allowed on certain platforms. */ - for (pos = xe->mem.vram.dpa_base; pos < vram_limit; + for (pos = dpa_base; pos < vram_limit; pos += SZ_1G, ofs += 8) { if (pos + SZ_1G >= vram_limit) { entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs, @@ -307,11 +310,11 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Identity map the entire vram at 256GiB offset */ if (IS_DGFX(xe)) { u64 pt30_ofs = xe_bo_size(bo) - 2 * XE_PAGE_SIZE; + resource_size_t actual_phy_size = xe_vram_region_actual_physical_size(xe->mem.vram);
xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET, pat_index, pt30_ofs); - xe_assert(xe, xe->mem.vram.actual_physical_size <= - (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G); + xe_assert(xe, actual_phy_size <= (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G);
/* * Identity map the entire vram for compressed pat_index for xe2+ @@ -320,11 +323,11 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe)) { u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION]; u64 vram_offset = IDENTITY_OFFSET + - DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); + DIV_ROUND_UP_ULL(actual_phy_size, SZ_1G); u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE;
- xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE - - IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G); + xe_assert(xe, actual_phy_size <= (MAX_NUM_PTE - IDENTITY_OFFSET - + IDENTITY_OFFSET / 2) * SZ_1G); xe_migrate_program_identity(xe, vm, bo, map_ofs, vram_offset, comp_pat_index, pt31_ofs); } diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3c40ef426f0cb..f64942737a0b1 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -687,6 +687,8 @@ static int xe_info_init(struct xe_device *xe, * All of these together determine the overall GT count. */ for_each_tile(tile, xe, id) { + int err; + gt = tile->primary_gt; gt->info.type = XE_GT_TYPE_MAIN; gt->info.id = tile->id * xe->info.max_gt_per_tile; @@ -694,6 +696,10 @@ static int xe_info_init(struct xe_device *xe, gt->info.engine_mask = graphics_desc->hw_engine_mask; xe->info.gt_count++;
+ err = xe_tile_alloc_vram(tile); + if (err) + return err; + if (MEDIA_VER(xe) < 13 && media_desc) gt->info.engine_mask |= media_desc->hw_engine_mask;
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 83fe77ce62f76..d9fcc81b960e6 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -334,7 +334,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) config->num_params = num_params; config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = xe->info.devid | (xe->info.revid << 16); - if (xe_device_get_root_tile(xe)->mem.vram.usable_size) + if (xe->mem.vram) config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM; if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM)) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index a7ff5975873f9..e6871734ffa98 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -306,16 +306,11 @@ static struct xe_vram_region *page_to_vr(struct page *page) return container_of(page_pgmap(page), struct xe_vram_region, pagemap); }
-static struct xe_tile *vr_to_tile(struct xe_vram_region *vr) -{ - return container_of(vr, struct xe_tile, mem.vram); -} - static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr, struct page *page) { u64 dpa; - struct xe_tile *tile = vr_to_tile(vr); + struct xe_tile *tile = vr->tile; u64 pfn = page_to_pfn(page); u64 offset;
@@ -370,7 +365,7 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
if (!vr && spage) { vr = page_to_vr(spage); - tile = vr_to_tile(vr); + tile = vr->tile; } XE_WARN_ON(spage && page_to_vr(spage) != vr);
@@ -508,7 +503,7 @@ static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset)
static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) { - return &tile->mem.vram.ttm.mm; + return &tile->mem.vram->ttm.mm; }
static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, @@ -522,7 +517,7 @@ static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocati
list_for_each_entry(block, blocks, link) { struct xe_vram_region *vr = block->private; - struct xe_tile *tile = vr_to_tile(vr); + struct xe_tile *tile = vr->tile; struct drm_buddy *buddy = tile_to_buddy(tile); u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block)); int i; @@ -683,20 +678,15 @@ u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *v }
#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) -static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) -{ - return &tile->mem.vram; -} - static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, unsigned long start, unsigned long end, struct mm_struct *mm, unsigned long timeslice_ms) { - struct xe_tile *tile = container_of(dpagemap, typeof(*tile), mem.vram.dpagemap); + struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap); + struct xe_tile *tile = vr->tile; struct xe_device *xe = tile_to_xe(tile); struct device *dev = xe->drm.dev; - struct xe_vram_region *vr = tile_to_vr(tile); struct drm_buddy_block *block; struct list_head *blocks; struct xe_bo *bo; @@ -722,7 +712,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, &dpagemap_devmem_ops, - &tile->mem.vram.dpagemap, + &tile->mem.vram->dpagemap, end - start);
blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 86e9811e60ba0..858ce0183aaae 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -19,6 +19,7 @@ #include "xe_tile_sysfs.h" #include "xe_ttm_vram_mgr.h" #include "xe_wa.h" +#include "xe_vram.h"
/** * DOC: Multi-tile Design @@ -95,6 +96,33 @@ static int xe_tile_alloc(struct xe_tile *tile) return 0; }
+/** + * xe_tile_alloc_vram - Perform per-tile VRAM structs allocation + * @tile: Tile to perform allocations for + * + * Allocates VRAM per-tile data structures using DRM-managed allocations. + * Does not touch the hardware. + * + * Returns -ENOMEM if allocations fail, otherwise 0. + */ +int xe_tile_alloc_vram(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_vram_region *vram; + + if (!IS_DGFX(xe)) + return 0; + + vram = drmm_kzalloc(&xe->drm, sizeof(*vram), GFP_KERNEL); + if (!vram) + return -ENOMEM; + + vram->tile = tile; + tile->mem.vram = vram; + + return 0; +} + /** * xe_tile_init_early - Initialize the tile and primary GT * @tile: Tile to initialize @@ -132,8 +160,8 @@ static int tile_ttm_mgr_init(struct xe_tile *tile) struct xe_device *xe = tile_to_xe(tile); int err;
- if (tile->mem.vram.usable_size) { - err = xe_ttm_vram_mgr_init(tile, &tile->mem.vram.ttm); + if (tile->mem.vram) { + err = xe_ttm_vram_mgr_init(tile, &tile->mem.vram->ttm); if (err) return err; xe->info.mem_region_mask |= BIT(tile->id) << 1; @@ -168,7 +196,7 @@ int xe_tile_init_noalloc(struct xe_tile *tile) xe_wa_apply_tile_workarounds(tile);
if (xe->info.has_usm && IS_DGFX(xe)) - xe_devm_add(tile, &tile->mem.vram); + xe_devm_add(tile, tile->mem.vram);
return xe_tile_sysfs_init(tile); } diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index cc33e87339830..440bc9e11c8b4 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -14,12 +14,14 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); int xe_tile_init_noalloc(struct xe_tile *tile); int xe_tile_init(struct xe_tile *tile);
+int xe_tile_alloc_vram(struct xe_tile *tile); + void xe_tile_migrate_wait(struct xe_tile *tile);
#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) { - return &tile->mem.vram.dpagemap; + return &tile->mem.vram->dpagemap; } #else static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index d9c9d2547aadf..9a9733447230b 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -25,6 +25,7 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_vram_mgr.h" #include "xe_wa.h" +#include "xe_vram.h"
struct xe_ttm_stolen_mgr { struct xe_ttm_vram_mgr base; @@ -82,15 +83,16 @@ static u32 get_wopcm_size(struct xe_device *xe)
static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { - struct xe_tile *tile = xe_device_get_root_tile(xe); + struct xe_vram_region *tile_vram = xe_device_get_root_tile(xe)->mem.vram; + resource_size_t tile_io_start = xe_vram_region_io_start(tile_vram); struct xe_mmio *mmio = xe_root_tile_mmio(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); u64 stolen_size, wopcm_size; u64 tile_offset; u64 tile_size;
- tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start; - tile_size = tile->mem.vram.actual_physical_size; + tile_offset = tile_io_start - xe_vram_region_io_start(xe->mem.vram); + tile_size = xe_vram_region_actual_physical_size(tile_vram);
/* Use DSM base address instead for stolen memory */ mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; @@ -107,7 +109,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
/* Verify usage fits in the actual resource available */ if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) - mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base; + mgr->io_base = tile_io_start + mgr->stolen_base;
/* * There may be few KB of platform dependent reserved memory at the end diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 9e375a40aee90..3de2df47959b7 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -340,10 +340,11 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr) { struct xe_device *xe = tile_to_xe(tile); - struct xe_vram_region *vram = &tile->mem.vram; + struct xe_vram_region *vram = tile->mem.vram;
return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id, - vram->usable_size, vram->io_size, + xe_vram_region_usable_size(vram), + xe_vram_region_io_size(vram), PAGE_SIZE); }
@@ -392,7 +393,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, */ xe_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { - phys_addr_t phys = cursor.start + tile->mem.vram.io_start; + phys_addr_t phys = cursor.start + xe_vram_region_io_start(tile->mem.vram); size_t size = min_t(u64, cursor.size, SZ_2G); dma_addr_t addr;
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index 3a4c84e9efc66..c93e9b96354bc 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -3,6 +3,7 @@ * Copyright © 2021-2024 Intel Corporation */
+#include <kunit/visibility.h> #include <linux/pci.h>
#include <drm/drm_managed.h> @@ -147,17 +148,17 @@ static int determine_lmem_bar_size(struct xe_device *xe)
resize_vram_bar(xe);
- xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR); - xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR); - if (!xe->mem.vram.io_size) + xe->mem.vram->io_start = pci_resource_start(pdev, LMEM_BAR); + xe->mem.vram->io_size = pci_resource_len(pdev, LMEM_BAR); + if (!xe->mem.vram->io_size) return -EIO;
/* XXX: Need to change when xe link code is ready */ - xe->mem.vram.dpa_base = 0; + xe->mem.vram->dpa_base = 0;
/* set up a map to the total memory area. */ - xe->mem.vram.mapping = devm_ioremap_wc(&pdev->dev, xe->mem.vram.io_start, - xe->mem.vram.io_size); + xe->mem.vram->mapping = devm_ioremap_wc(&pdev->dev, xe->mem.vram->io_start, + xe->mem.vram->io_size);
return 0; } @@ -279,10 +280,10 @@ static void vram_fini(void *arg) struct xe_tile *tile; int id;
- xe->mem.vram.mapping = NULL; + xe->mem.vram->mapping = NULL;
for_each_tile(tile, xe, id) - tile->mem.vram.mapping = NULL; + tile->mem.vram->mapping = NULL; }
/** @@ -318,10 +319,10 @@ int xe_vram_probe(struct xe_device *xe) if (err) return err;
- drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, - &xe->mem.vram.io_size); + drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram->io_start, + &xe->mem.vram->io_size);
- io_size = xe->mem.vram.io_size; + io_size = xe->mem.vram->io_size;
/* tile specific ranges */ for_each_tile(tile, xe, id) { @@ -329,45 +330,105 @@ int xe_vram_probe(struct xe_device *xe) if (err) return err;
- tile->mem.vram.actual_physical_size = tile_size; - tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; - tile->mem.vram.io_size = min_t(u64, vram_size, io_size); + tile->mem.vram->actual_physical_size = tile_size; + tile->mem.vram->io_start = xe->mem.vram->io_start + tile_offset; + tile->mem.vram->io_size = min_t(u64, vram_size, io_size);
- if (!tile->mem.vram.io_size) { + if (!tile->mem.vram->io_size) { drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); return -ENODEV; }
- tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset; - tile->mem.vram.usable_size = vram_size; - tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; + tile->mem.vram->dpa_base = xe->mem.vram->dpa_base + tile_offset; + tile->mem.vram->usable_size = vram_size; + tile->mem.vram->mapping = xe->mem.vram->mapping + tile_offset;
- if (tile->mem.vram.io_size < tile->mem.vram.usable_size) + if (tile->mem.vram->io_size < tile->mem.vram->usable_size) drm_info(&xe->drm, "Small BAR device\n"); - drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, - tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); - drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, - &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size, - &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size); + drm_info(&xe->drm, + "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", + id, tile->id, &tile->mem.vram->actual_physical_size, + &tile->mem.vram->usable_size, &tile->mem.vram->io_size); + drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", + id, tile->id, &tile->mem.vram->dpa_base, + tile->mem.vram->dpa_base + (u64)tile->mem.vram->actual_physical_size, + &tile->mem.vram->io_start, + tile->mem.vram->io_start + (u64)tile->mem.vram->io_size);
/* calculate total size using tile size to get the correct HW sizing */ total_size += tile_size; available_size += vram_size;
- if (total_size > xe->mem.vram.io_size) { + if (total_size > xe->mem.vram->io_size) { drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", - &total_size, &xe->mem.vram.io_size); + &total_size, &xe->mem.vram->io_size); }
io_size -= min_t(u64, tile_size, io_size); }
- xe->mem.vram.actual_physical_size = total_size; + xe->mem.vram->actual_physical_size = total_size;
- drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start, - &xe->mem.vram.actual_physical_size); - drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start, + drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram->io_start, + &xe->mem.vram->actual_physical_size); + drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram->io_start, &available_size);
return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe); } + +/** + * xe_vram_region_io_start - Get the IO start of a VRAM region + * @vram: the VRAM region + * + * Return: the IO start of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram) +{ + return vram ? vram->io_start : 0; +} + +/** + * xe_vram_region_io_size - Get the IO size of a VRAM region + * @vram: the VRAM region + * + * Return: the IO size of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram) +{ + return vram ? vram->io_size : 0; +} + +/** + * xe_vram_region_dpa_base - Get the DPA base of a VRAM region + * @vram: the VRAM region + * + * Return: the DPA base of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram) +{ + return vram ? vram->dpa_base : 0; +} + +/** + * xe_vram_region_usable_size - Get the usable size of a VRAM region + * @vram: the VRAM region + * + * Return: the usable size of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_usable_size(const struct xe_vram_region *vram) +{ + return vram ? vram->usable_size : 0; +} + +/** + * xe_vram_region_actual_physical_size - Get the actual physical size of a VRAM region + * @vram: the VRAM region + * + * Return: the actual physical size of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_actual_physical_size(const struct xe_vram_region *vram) +{ + return vram ? vram->actual_physical_size : 0; +} +EXPORT_SYMBOL_IF_KUNIT(xe_vram_region_actual_physical_size); diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index e31cc04ec0db2..d4bf1f9c2a72e 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -6,8 +6,17 @@ #ifndef _XE_VRAM_H_ #define _XE_VRAM_H_
+#include <linux/types.h> + struct xe_device; +struct xe_vram_region;
int xe_vram_probe(struct xe_device *xe);
+resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_usable_size(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_actual_physical_size(const struct xe_vram_region *vram); + #endif
From: Piotr Piórkowski piotr.piorkowski@intel.com
[ Upstream commit 7a20b4f558f4291161f71a5b7384262db9ccd6b0 ]
Let's move the xe_vram_region structure to a new header dedicated to VRAM to improve modularity and avoid unnecessary dependencies when only VRAM-related structures are needed.
v2: Fix build if CONFIG_DRM_XE_DEVMEM_MIRROR is enabled v3: Fix build if CONFIG_DRM_XE_DISPLAY is enabled v4: Move helper to get tile dpagemap to xe_svm.c
Signed-off-by: Piotr Piórkowski piotr.piorkowski@intel.com Suggested-by: Jani Nikula jani.nikula@intel.com Reviewed-by: Satyanarayana K V P satyanarayana.k.v.p@intel.com # rev3 Acked-by: Matthew Brost matthew.brost@intel.com Link: https://lore.kernel.org/r/20250714184818.89201-4-piotr.piorkowski@intel.com Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com Stable-dep-of: d30203739be7 ("drm/xe: Move rebar to be done earlier") Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 1 + drivers/gpu/drm/xe/display/xe_plane_initial.c | 1 + drivers/gpu/drm/xe/xe_bo.c | 1 + drivers/gpu/drm/xe/xe_bo_types.h | 1 + drivers/gpu/drm/xe/xe_device.c | 1 + drivers/gpu/drm/xe/xe_device_types.h | 60 +-------------- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 1 + drivers/gpu/drm/xe/xe_svm.c | 8 +- drivers/gpu/drm/xe/xe_tile.c | 1 + drivers/gpu/drm/xe/xe_tile.h | 12 --- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 1 + drivers/gpu/drm/xe/xe_vram.c | 1 + drivers/gpu/drm/xe/xe_vram_types.h | 74 +++++++++++++++++++ 13 files changed, 91 insertions(+), 72 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_vram_types.h
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 2187b2de64e17..f2cfba6748998 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -16,6 +16,7 @@ #include "xe_device.h" #include "xe_ggtt.h" #include "xe_pm.h" +#include "xe_vram_types.h"
static void write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ofs, diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index b1bf7e8d9f9f2..b2d27458def52 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -21,6 +21,7 @@ #include "intel_plane.h" #include "intel_plane_initial.h" #include "xe_bo.h" +#include "xe_vram_types.h" #include "xe_wa.h"
#include <generated/xe_wa_oob.h> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index bae7ff2e59276..50c79049ccea0 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -36,6 +36,7 @@ #include "xe_trace_bo.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" +#include "xe_vram_types.h"
const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = { [XE_PL_SYSTEM] = "system", diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index ff560d82496ff..57d34698139ee 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -9,6 +9,7 @@ #include <linux/iosys-map.h>
#include <drm/drm_gpusvm.h> +#include <drm/drm_pagemap.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_placement.h> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index dab7e657044a6..e53fb5a798c8d 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -64,6 +64,7 @@ #include "xe_ttm_sys_mgr.h" #include "xe_vm.h" #include "xe_vram.h" +#include "xe_vram_types.h" #include "xe_vsec.h" #include "xe_wait_user_fence.h" #include "xe_wa.h" diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index d1edd430dc013..ac6419f475733 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -10,7 +10,6 @@
#include <drm/drm_device.h> #include <drm/drm_file.h> -#include <drm/drm_pagemap.h> #include <drm/ttm/ttm_device.h>
#include "xe_devcoredump_types.h" @@ -26,7 +25,6 @@ #include "xe_sriov_vf_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" -#include "xe_ttm_vram_mgr_types.h"
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define TEST_VM_OPS_ERROR @@ -39,6 +37,7 @@ struct xe_ggtt; struct xe_i2c; struct xe_pat_ops; struct xe_pxp; +struct xe_vram_region;
#define XE_BO_INVALID_OFFSET LONG_MAX
@@ -71,63 +70,6 @@ struct xe_pxp; const struct xe_tile * : (const struct xe_device *)((tile__)->xe), \ struct xe_tile * : (tile__)->xe)
-/** - * struct xe_vram_region - memory region structure - * This is used to describe a memory region in xe - * device, such as HBM memory or CXL extension memory. - */ -struct xe_vram_region { - /** @tile: Back pointer to tile */ - struct xe_tile *tile; - /** @io_start: IO start address of this VRAM instance */ - resource_size_t io_start; - /** - * @io_size: IO size of this VRAM instance - * - * This represents how much of this VRAM we can access - * via the CPU through the VRAM BAR. This can be smaller - * than @usable_size, in which case only part of VRAM is CPU - * accessible (typically the first 256M). This - * configuration is known as small-bar. - */ - resource_size_t io_size; - /** @dpa_base: This memory regions's DPA (device physical address) base */ - resource_size_t dpa_base; - /** - * @usable_size: usable size of VRAM - * - * Usable size of VRAM excluding reserved portions - * (e.g stolen mem) - */ - resource_size_t usable_size; - /** - * @actual_physical_size: Actual VRAM size - * - * Actual VRAM size including reserved portions - * (e.g stolen mem) - */ - resource_size_t actual_physical_size; - /** @mapping: pointer to VRAM mappable space */ - void __iomem *mapping; - /** @ttm: VRAM TTM manager */ - struct xe_ttm_vram_mgr ttm; -#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) - /** @pagemap: Used to remap device memory as ZONE_DEVICE */ - struct dev_pagemap pagemap; - /** - * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory - * pages of this tile. - */ - struct drm_pagemap dpagemap; - /** - * @hpa_base: base host physical address - * - * This is generated when remap device memory as ZONE_DEVICE - */ - resource_size_t hpa_base; -#endif -}; - /** * struct xe_mmio - register mmio structure * diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index e7b0ea2090604..61a357946fe1e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -33,6 +33,7 @@ #include "xe_migrate.h" #include "xe_sriov.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram_types.h" #include "xe_wopcm.h"
#define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index e6871734ffa98..901f9a0268e64 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -17,6 +17,7 @@ #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" #include "xe_vm_types.h" +#include "xe_vram_types.h"
static bool xe_svm_range_in_vram(struct xe_svm_range *range) { @@ -989,6 +990,11 @@ int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range,
#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
+static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile) +{ + return &tile->mem.vram->dpagemap; +} + /** * xe_svm_alloc_vram()- Allocate device memory pages for range, * migrating existing data. @@ -1006,7 +1012,7 @@ int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, xe_assert(tile_to_xe(tile), range->base.flags.migrate_devmem); range_debug(range, "ALLOCATE VRAM");
- dpagemap = xe_tile_local_pagemap(tile); + dpagemap = tile_local_pagemap(tile); return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), xe_svm_range_end(range), range->base.gpusvm->mm, diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 858ce0183aaae..bd2ff91a7d1c0 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -20,6 +20,7 @@ #include "xe_ttm_vram_mgr.h" #include "xe_wa.h" #include "xe_vram.h" +#include "xe_vram_types.h"
/** * DOC: Multi-tile Design diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index 440bc9e11c8b4..dceb6297aa01d 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -18,18 +18,6 @@ int xe_tile_alloc_vram(struct xe_tile *tile);
void xe_tile_migrate_wait(struct xe_tile *tile);
-#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) -static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) -{ - return &tile->mem.vram->dpagemap; -} -#else -static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) -{ - return NULL; -} -#endif - static inline bool xe_tile_is_root(struct xe_tile *tile) { return tile->id == 0; diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 3de2df47959b7..8f9b8a1d2c058 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -15,6 +15,7 @@ #include "xe_gt.h" #include "xe_res_cursor.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram_types.h"
static inline struct drm_buddy_block * xe_ttm_vram_mgr_first_block(struct list_head *list) diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index c93e9b96354bc..366e5d8a85cac 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -21,6 +21,7 @@ #include "xe_module.h" #include "xe_sriov.h" #include "xe_vram.h" +#include "xe_vram_types.h"
#define BAR_SIZE_SHIFT 20
diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h new file mode 100644 index 0000000000000..a018382360366 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vram_types.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_VRAM_TYPES_H_ +#define _XE_VRAM_TYPES_H_ + +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +#include <drm/drm_pagemap.h> +#endif + +#include "xe_ttm_vram_mgr_types.h" + +struct xe_tile; + +/** + * struct xe_vram_region - memory region structure + * This is used to describe a memory region in xe + * device, such as HBM memory or CXL extension memory. + */ +struct xe_vram_region { + /** @tile: Back pointer to tile */ + struct xe_tile *tile; + /** @io_start: IO start address of this VRAM instance */ + resource_size_t io_start; + /** + * @io_size: IO size of this VRAM instance + * + * This represents how much of this VRAM we can access + * via the CPU through the VRAM BAR. This can be smaller + * than @usable_size, in which case only part of VRAM is CPU + * accessible (typically the first 256M). This + * configuration is known as small-bar. + */ + resource_size_t io_size; + /** @dpa_base: This memory regions's DPA (device physical address) base */ + resource_size_t dpa_base; + /** + * @usable_size: usable size of VRAM + * + * Usable size of VRAM excluding reserved portions + * (e.g stolen mem) + */ + resource_size_t usable_size; + /** + * @actual_physical_size: Actual VRAM size + * + * Actual VRAM size including reserved portions + * (e.g stolen mem) + */ + resource_size_t actual_physical_size; + /** @mapping: pointer to VRAM mappable space */ + void __iomem *mapping; + /** @ttm: VRAM TTM manager */ + struct xe_ttm_vram_mgr ttm; +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + /** @pagemap: Used to remap device memory as ZONE_DEVICE */ + struct dev_pagemap pagemap; + /** + * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory + * pages of this tile. + */ + struct drm_pagemap dpagemap; + /** + * @hpa_base: base host physical address + * + * This is generated when remap device memory as ZONE_DEVICE + */ + resource_size_t hpa_base; +#endif +}; + +#endif
From: Piotr Piórkowski piotr.piorkowski@intel.com
[ Upstream commit 4b0a5f5ce7849aab7a67ba9f113ed75626f6de36 ]
Currently in the drivers we have defined VRAM regions per device and per tile. Initialization of these regions is done in two completely different ways. To simplify the logic of the code and make it easier to add new regions in the future, let's unify the way we initialize VRAM regions.
v2: - fix doc comments in struct xe_vram_region - remove unnecessary includes (Jani) v3: - move code from xe_vram_init_regions_managers to xe_tile_init_noalloc (Matthew) - replace ioremap_wc to devm_ioremap_wc for mapping VRAM BAR (Matthew) - Replace the tile id parameter with vram region in the xe_pf_begin function. v4: - remove tile back pointer from struct xe_vram_region - add new back pointers: xe and migarte to xe_vram_region
Signed-off-by: Piotr Piórkowski piotr.piorkowski@intel.com Cc: Stuart Summers stuart.summers@intel.com Cc: Matthew Auld matthew.auld@intel.com Cc: Jani Nikula jani.nikula@intel.com Reviewed-by: Matthew Auld matthew.auld@intel.com # rev3 Acked-by: Matthew Brost matthew.brost@intel.com Link: https://lore.kernel.org/r/20250714184818.89201-6-piotr.piorkowski@intel.com Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com Stable-dep-of: d30203739be7 ("drm/xe: Move rebar to be done earlier") Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/xe/xe_bo.h | 4 +- drivers/gpu/drm/xe/xe_gt_pagefault.c | 13 ++- drivers/gpu/drm/xe/xe_query.c | 3 +- drivers/gpu/drm/xe/xe_svm.c | 43 ++++---- drivers/gpu/drm/xe/xe_tile.c | 37 +++---- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 16 ++- drivers/gpu/drm/xe/xe_ttm_vram_mgr.h | 3 +- drivers/gpu/drm/xe/xe_vram.c | 151 ++++++++++++++++----------- drivers/gpu/drm/xe/xe_vram.h | 2 + drivers/gpu/drm/xe/xe_vram_types.h | 17 ++- 10 files changed, 164 insertions(+), 125 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 9ce94d2520156..cfb1ec266a6da 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -12,6 +12,7 @@ #include "xe_macros.h" #include "xe_vm_types.h" #include "xe_vm.h" +#include "xe_vram_types.h"
#define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
@@ -23,8 +24,9 @@ #define XE_BO_FLAG_VRAM_MASK (XE_BO_FLAG_VRAM0 | XE_BO_FLAG_VRAM1) /* -- */ #define XE_BO_FLAG_STOLEN BIT(4) +#define XE_BO_FLAG_VRAM(vram) (XE_BO_FLAG_VRAM0 << ((vram)->id)) #define XE_BO_FLAG_VRAM_IF_DGFX(tile) (IS_DGFX(tile_to_xe(tile)) ? \ - XE_BO_FLAG_VRAM0 << (tile)->id : \ + XE_BO_FLAG_VRAM((tile)->mem.vram) : \ XE_BO_FLAG_SYSTEM) #define XE_BO_FLAG_GGTT BIT(5) #define XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE BIT(6) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 5a75d56d8558d..ab43dec527768 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -23,6 +23,7 @@ #include "xe_svm.h" #include "xe_trace_bo.h" #include "xe_vm.h" +#include "xe_vram_types.h"
struct pagefault { u64 page_addr; @@ -74,7 +75,7 @@ static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) }
static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, - bool atomic, unsigned int id) + bool atomic, struct xe_vram_region *vram) { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); @@ -84,14 +85,16 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, if (err) return err;
- if (atomic && IS_DGFX(vm->xe)) { + if (atomic && vram) { + xe_assert(vm->xe, IS_DGFX(vm->xe)); + if (xe_vma_is_userptr(vma)) { err = -EACCES; return err; }
/* Migrate to VRAM, move should invalidate the VMA first */ - err = xe_bo_migrate(bo, XE_PL_VRAM0 + id); + err = xe_bo_migrate(bo, vram->placement); if (err) return err; } else if (bo) { @@ -138,7 +141,7 @@ static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, /* Lock VM and BOs dma-resv */ drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { - err = xe_pf_begin(&exec, vma, atomic, tile->id); + err = xe_pf_begin(&exec, vma, atomic, tile->mem.vram); drm_exec_retry_on_contention(&exec); if (xe_vm_validate_should_retry(&exec, err, &end)) err = -EAGAIN; @@ -573,7 +576,7 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) /* Lock VM and BOs dma-resv */ drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { - ret = xe_pf_begin(&exec, vma, true, tile->id); + ret = xe_pf_begin(&exec, vma, true, tile->mem.vram); drm_exec_retry_on_contention(&exec); if (ret) break; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index d9fcc81b960e6..f2a3d4ced068c 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -27,6 +27,7 @@ #include "xe_oa.h" #include "xe_pxp.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram_types.h" #include "xe_wa.h"
static const u16 xe_to_user_engine_class[] = { @@ -407,7 +408,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query gt_list->gt_list[iter].near_mem_regions = 0x1; else gt_list->gt_list[iter].near_mem_regions = - BIT(gt_to_tile(gt)->id) << 1; + BIT(gt_to_tile(gt)->mem.vram->id) << 1; gt_list->gt_list[iter].far_mem_regions = xe->info.mem_region_mask ^ gt_list->gt_list[iter].near_mem_regions;
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 901f9a0268e64..10c8a1bcb86e8 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -311,12 +311,11 @@ static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr, struct page *page) { u64 dpa; - struct xe_tile *tile = vr->tile; u64 pfn = page_to_pfn(page); u64 offset;
- xe_tile_assert(tile, is_device_private_page(page)); - xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base); + xe_assert(vr->xe, is_device_private_page(page)); + xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= vr->hpa_base);
offset = (pfn << PAGE_SHIFT) - vr->hpa_base; dpa = vr->dpa_base + offset; @@ -333,7 +332,7 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, unsigned long npages, const enum xe_svm_copy_dir dir) { struct xe_vram_region *vr = NULL; - struct xe_tile *tile; + struct xe_device *xe; struct dma_fence *fence = NULL; unsigned long i; #define XE_VRAM_ADDR_INVALID ~0x0ull @@ -366,7 +365,7 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
if (!vr && spage) { vr = page_to_vr(spage); - tile = vr->tile; + xe = vr->xe; } XE_WARN_ON(spage && page_to_vr(spage) != vr);
@@ -398,18 +397,18 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
if (vram_addr != XE_VRAM_ADDR_INVALID) { if (sram) { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", vram_addr, (u64)dma_addr[pos], i - pos + incr); - __fence = xe_migrate_from_vram(tile->migrate, + __fence = xe_migrate_from_vram(vr->migrate, i - pos + incr, vram_addr, dma_addr + pos); } else { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", (u64)dma_addr[pos], vram_addr, i - pos + incr); - __fence = xe_migrate_to_vram(tile->migrate, + __fence = xe_migrate_to_vram(vr->migrate, i - pos + incr, dma_addr + pos, vram_addr); @@ -434,17 +433,17 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, /* Extra mismatched device page, copy it */ if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { if (sram) { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", vram_addr, (u64)dma_addr[pos], 1); - __fence = xe_migrate_from_vram(tile->migrate, 1, + __fence = xe_migrate_from_vram(vr->migrate, 1, vram_addr, dma_addr + pos); } else { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", (u64)dma_addr[pos], vram_addr, 1); - __fence = xe_migrate_to_vram(tile->migrate, 1, + __fence = xe_migrate_to_vram(vr->migrate, 1, dma_addr + pos, vram_addr); } @@ -502,9 +501,9 @@ static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) return PHYS_PFN(offset + vr->hpa_base); }
-static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) +static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram) { - return &tile->mem.vram->ttm.mm; + return &vram->ttm.mm; }
static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, @@ -518,8 +517,7 @@ static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocati
list_for_each_entry(block, blocks, link) { struct xe_vram_region *vr = block->private; - struct xe_tile *tile = vr->tile; - struct drm_buddy *buddy = tile_to_buddy(tile); + struct drm_buddy *buddy = vram_to_buddy(vr); u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block)); int i;
@@ -685,8 +683,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, unsigned long timeslice_ms) { struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap); - struct xe_tile *tile = vr->tile; - struct xe_device *xe = tile_to_xe(tile); + struct xe_device *xe = vr->xe; struct device *dev = xe->drm.dev; struct drm_buddy_block *block; struct list_head *blocks; @@ -700,9 +697,9 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, xe_pm_runtime_get(xe);
retry: - bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, end - start, + bo = xe_bo_create_locked(vr->xe, NULL, NULL, end - start, ttm_bo_type_device, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | + (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) | XE_BO_FLAG_CPU_ADDR_MIRROR); if (IS_ERR(bo)) { err = PTR_ERR(bo); @@ -712,9 +709,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, }
drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, - &dpagemap_devmem_ops, - &tile->mem.vram->dpagemap, - end - start); + &dpagemap_devmem_ops, dpagemap, end - start);
blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; list_for_each_entry(block, blocks, link) diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index bd2ff91a7d1c0..68b84111f26b3 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -7,6 +7,7 @@
#include <drm/drm_managed.h>
+#include "xe_bo.h" #include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" @@ -114,11 +115,9 @@ int xe_tile_alloc_vram(struct xe_tile *tile) if (!IS_DGFX(xe)) return 0;
- vram = drmm_kzalloc(&xe->drm, sizeof(*vram), GFP_KERNEL); - if (!vram) - return -ENOMEM; - - vram->tile = tile; + vram = xe_vram_region_alloc(xe, tile->id, XE_PL_VRAM0 + tile->id); + if (IS_ERR(vram)) + return PTR_ERR(vram); tile->mem.vram = vram;
return 0; @@ -156,21 +155,6 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) } ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */
-static int tile_ttm_mgr_init(struct xe_tile *tile) -{ - struct xe_device *xe = tile_to_xe(tile); - int err; - - if (tile->mem.vram) { - err = xe_ttm_vram_mgr_init(tile, &tile->mem.vram->ttm); - if (err) - return err; - xe->info.mem_region_mask |= BIT(tile->id) << 1; - } - - return 0; -} - /** * xe_tile_init_noalloc - Init tile up to the point where allocations can happen. * @tile: The tile to initialize. @@ -188,17 +172,20 @@ static int tile_ttm_mgr_init(struct xe_tile *tile) int xe_tile_init_noalloc(struct xe_tile *tile) { struct xe_device *xe = tile_to_xe(tile); - int err; - - err = tile_ttm_mgr_init(tile); - if (err) - return err;
xe_wa_apply_tile_workarounds(tile);
if (xe->info.has_usm && IS_DGFX(xe)) xe_devm_add(tile, tile->mem.vram);
+ if (IS_DGFX(xe) && !ttm_resource_manager_used(&tile->mem.vram->ttm.manager)) { + int err = xe_ttm_vram_mgr_init(xe, tile->mem.vram); + + if (err) + return err; + xe->info.mem_region_mask |= BIT(tile->mem.vram->id) << 1; + } + return xe_tile_sysfs_init(tile); }
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 8f9b8a1d2c058..9175b4a2214b8 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -338,12 +338,18 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); }
-int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr) +/** + * xe_ttm_vram_mgr_init - initialize TTM VRAM region + * @xe: pointer to Xe device + * @vram: pointer to xe_vram_region that contains the memory region attributes + * + * Initialize the Xe TTM for given @vram region using the given parameters. + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_vram_region *vram) { - struct xe_device *xe = tile_to_xe(tile); - struct xe_vram_region *vram = tile->mem.vram; - - return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id, + return __xe_ttm_vram_mgr_init(xe, &vram->ttm, vram->placement, xe_vram_region_usable_size(vram), xe_vram_region_io_size(vram), PAGE_SIZE); diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h index cc76050e376dd..87b7fae5edba1 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -11,11 +11,12 @@ enum dma_data_direction; struct xe_device; struct xe_tile; +struct xe_vram_region;
int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, u32 mem_type, u64 size, u64 io_size, u64 default_page_size); -int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr); +int xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_vram_region *vram); int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, struct ttm_resource *res, u64 offset, u64 length, diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index 366e5d8a85cac..b44ebf50fedbb 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -20,6 +20,7 @@ #include "xe_mmio.h" #include "xe_module.h" #include "xe_sriov.h" +#include "xe_ttm_vram_mgr.h" #include "xe_vram.h" #include "xe_vram_types.h"
@@ -138,7 +139,7 @@ static bool resource_is_valid(struct pci_dev *pdev, int bar) return true; }
-static int determine_lmem_bar_size(struct xe_device *xe) +static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region *lmem_bar) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -149,17 +150,16 @@ static int determine_lmem_bar_size(struct xe_device *xe)
resize_vram_bar(xe);
- xe->mem.vram->io_start = pci_resource_start(pdev, LMEM_BAR); - xe->mem.vram->io_size = pci_resource_len(pdev, LMEM_BAR); - if (!xe->mem.vram->io_size) + lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); + lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); + if (!lmem_bar->io_size) return -EIO;
/* XXX: Need to change when xe link code is ready */ - xe->mem.vram->dpa_base = 0; + lmem_bar->dpa_base = 0;
/* set up a map to the total memory area. */ - xe->mem.vram->mapping = devm_ioremap_wc(&pdev->dev, xe->mem.vram->io_start, - xe->mem.vram->io_size); + lmem_bar->mapping = devm_ioremap_wc(&pdev->dev, lmem_bar->io_start, lmem_bar->io_size);
return 0; } @@ -287,6 +287,67 @@ static void vram_fini(void *arg) tile->mem.vram->mapping = NULL; }
+struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement) +{ + struct xe_vram_region *vram; + struct drm_device *drm = &xe->drm; + + xe_assert(xe, id < xe->info.tile_count); + + vram = drmm_kzalloc(drm, sizeof(*vram), GFP_KERNEL); + if (!vram) + return NULL; + + vram->xe = xe; + vram->id = id; + vram->placement = placement; +#if defined(CONFIG_DRM_XE_PAGEMAP) + vram->migrate = xe->tiles[id].migrate; +#endif + return vram; +} + +static void print_vram_region_info(struct xe_device *xe, struct xe_vram_region *vram) +{ + struct drm_device *drm = &xe->drm; + + if (vram->io_size < vram->usable_size) + drm_info(drm, "Small BAR device\n"); + + drm_info(drm, + "VRAM[%u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", + vram->id, &vram->actual_physical_size, &vram->usable_size, &vram->io_size); + drm_info(drm, "VRAM[%u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", + vram->id, &vram->dpa_base, vram->dpa_base + (u64)vram->actual_physical_size, + &vram->io_start, vram->io_start + (u64)vram->io_size); +} + +static int vram_region_init(struct xe_device *xe, struct xe_vram_region *vram, + struct xe_vram_region *lmem_bar, u64 offset, u64 usable_size, + u64 region_size, resource_size_t remain_io_size) +{ + /* Check if VRAM region is already initialized */ + if (vram->mapping) + return 0; + + vram->actual_physical_size = region_size; + vram->io_start = lmem_bar->io_start + offset; + vram->io_size = min_t(u64, usable_size, remain_io_size); + + if (!vram->io_size) { + drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); + return -ENODEV; + } + + vram->dpa_base = lmem_bar->dpa_base + offset; + vram->mapping = lmem_bar->mapping + offset; + vram->usable_size = usable_size; + + print_vram_region_info(xe, vram); + + return 0; +} + /** * xe_vram_probe() - Probe VRAM configuration * @xe: the &xe_device @@ -298,82 +359,52 @@ static void vram_fini(void *arg) int xe_vram_probe(struct xe_device *xe) { struct xe_tile *tile; - resource_size_t io_size; + struct xe_vram_region lmem_bar; + resource_size_t remain_io_size; u64 available_size = 0; u64 total_size = 0; - u64 tile_offset; - u64 tile_size; - u64 vram_size; int err; u8 id;
if (!IS_DGFX(xe)) return 0;
- /* Get the size of the root tile's vram for later accessibility comparison */ - tile = xe_device_get_root_tile(xe); - err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); + err = determine_lmem_bar_size(xe, &lmem_bar); if (err) return err; + drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &lmem_bar.io_start, &lmem_bar.io_size);
- err = determine_lmem_bar_size(xe); - if (err) - return err; - - drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram->io_start, - &xe->mem.vram->io_size); - - io_size = xe->mem.vram->io_size; + remain_io_size = lmem_bar.io_size;
- /* tile specific ranges */ for_each_tile(tile, xe, id) { - err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); + u64 region_size; + u64 usable_size; + u64 tile_offset; + + err = tile_vram_size(tile, &usable_size, ®ion_size, &tile_offset); if (err) return err;
- tile->mem.vram->actual_physical_size = tile_size; - tile->mem.vram->io_start = xe->mem.vram->io_start + tile_offset; - tile->mem.vram->io_size = min_t(u64, vram_size, io_size); + total_size += region_size; + available_size += usable_size;
- if (!tile->mem.vram->io_size) { - drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); - return -ENODEV; - } + err = vram_region_init(xe, tile->mem.vram, &lmem_bar, tile_offset, usable_size, + region_size, remain_io_size); + if (err) + return err;
- tile->mem.vram->dpa_base = xe->mem.vram->dpa_base + tile_offset; - tile->mem.vram->usable_size = vram_size; - tile->mem.vram->mapping = xe->mem.vram->mapping + tile_offset; - - if (tile->mem.vram->io_size < tile->mem.vram->usable_size) - drm_info(&xe->drm, "Small BAR device\n"); - drm_info(&xe->drm, - "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", - id, tile->id, &tile->mem.vram->actual_physical_size, - &tile->mem.vram->usable_size, &tile->mem.vram->io_size); - drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", - id, tile->id, &tile->mem.vram->dpa_base, - tile->mem.vram->dpa_base + (u64)tile->mem.vram->actual_physical_size, - &tile->mem.vram->io_start, - tile->mem.vram->io_start + (u64)tile->mem.vram->io_size); - - /* calculate total size using tile size to get the correct HW sizing */ - total_size += tile_size; - available_size += vram_size; - - if (total_size > xe->mem.vram->io_size) { + if (total_size > lmem_bar.io_size) { drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", - &total_size, &xe->mem.vram->io_size); + &total_size, &lmem_bar.io_size); }
- io_size -= min_t(u64, tile_size, io_size); + remain_io_size -= min_t(u64, tile->mem.vram->actual_physical_size, remain_io_size); }
- xe->mem.vram->actual_physical_size = total_size; - - drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram->io_start, - &xe->mem.vram->actual_physical_size); - drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram->io_start, - &available_size); + err = vram_region_init(xe, xe->mem.vram, &lmem_bar, 0, available_size, total_size, + lmem_bar.io_size); + if (err) + return err;
return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe); } diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index d4bf1f9c2a72e..72860f714fc66 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -13,6 +13,8 @@ struct xe_vram_region;
int xe_vram_probe(struct xe_device *xe);
+struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement); + resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram); resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram); resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram); diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h index a018382360366..83772dcbf1aff 100644 --- a/drivers/gpu/drm/xe/xe_vram_types.h +++ b/drivers/gpu/drm/xe/xe_vram_types.h @@ -12,7 +12,8 @@
#include "xe_ttm_vram_mgr_types.h"
-struct xe_tile; +struct xe_device; +struct xe_migrate;
/** * struct xe_vram_region - memory region structure @@ -20,8 +21,14 @@ struct xe_tile; * device, such as HBM memory or CXL extension memory. */ struct xe_vram_region { - /** @tile: Back pointer to tile */ - struct xe_tile *tile; + /** @xe: Back pointer to xe device */ + struct xe_device *xe; + /** + * @id: VRAM region instance id + * + * The value should be unique for VRAM region. + */ + u8 id; /** @io_start: IO start address of this VRAM instance */ resource_size_t io_start; /** @@ -54,7 +61,11 @@ struct xe_vram_region { void __iomem *mapping; /** @ttm: VRAM TTM manager */ struct xe_ttm_vram_mgr ttm; + /** @placement: TTM placement dedicated for this region */ + u32 placement; #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + /** @migrate: Back pointer to migrate */ + struct xe_migrate *migrate; /** @pagemap: Used to remap device memory as ZONE_DEVICE */ struct dev_pagemap pagemap; /**
From: Lucas De Marchi lucas.demarchi@intel.com
[ Upstream commit d30203739be798d3de5c84db3060e96f00c54e82 ]
There may be cases in which the BAR0 also needs to move to accommodate the bigger BAR2. However if it's not released, the BAR2 resize fails. During the vram probe it can't be released as it's already in use by xe_mmio for early register access.
Add a new function in xe_vram and let xe_pci call it directly before even early device probe. This allows the BAR2 to resize in cases BAR0 also needs to move, assuming there aren't other reasons to hold that move:
[] xe 0000:03:00.0: vgaarb: deactivate vga console [] xe 0000:03:00.0: [drm] Attempting to resize bar from 8192MiB -> 16384MiB [] xe 0000:03:00.0: BAR 0 [mem 0x83000000-0x83ffffff 64bit]: releasing [] xe 0000:03:00.0: BAR 2 [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] xe 0000:03:00.0: BAR 2 [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] xe 0000:03:00.0: BAR 0 [mem 0x83000000-0x83ffffff 64bit]: assigned [] pcieport 0000:00:01.0: PCI bridge to [bus 01-04] [] pcieport 0000:00:01.0: bridge window [mem 0x83000000-0x840fffff] [] pcieport 0000:00:01.0: bridge window [mem 0x4000000000-0x44007fffff 64bit pref] [] pcieport 0000:01:00.0: PCI bridge to [bus 02-04] [] pcieport 0000:01:00.0: bridge window [mem 0x83000000-0x840fffff] [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref] [] pcieport 0000:02:01.0: PCI bridge to [bus 03] [] pcieport 0000:02:01.0: bridge window [mem 0x83000000-0x83ffffff] [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref] [] xe 0000:03:00.0: [drm] BAR2 resized to 16384M [] xe 0000:03:00.0: [drm:xe_pci_probe [xe]] BATTLEMAGE e221:0000 dgfx:1 gfx:Xe2_HPG (20.02) ...
For BMG there are additional fix needed in the PCI side, but this helps getting it to a working resize.
All the rebar logic is more pci-specific than xe-specific and can be done very early in the probe sequence. In future it would be good to move it out of xe_vram.c, but this refactor is left for later.
Cc: Ilpo Järvinen ilpo.jarvinen@linux.intel.com Cc: stable@vger.kernel.org # 6.12+ Link: https://lore.kernel.org/intel-xe/fafda2a3-fc63-ce97-d22b-803f771a4d19@linux.... Reviewed-by: Ilpo Järvinen ilpo.jarvinen@linux.intel.com Link: https://lore.kernel.org/r/20250918-xe-pci-rebar-2-v1-2-6c094702a074@intel.co... Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com (cherry picked from commit 45e33f220fd625492c11e15733d8e9b4f9db82a4) Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/xe/xe_pci.c | 2 ++ drivers/gpu/drm/xe/xe_vram.c | 34 ++++++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_vram.h | 1 + 3 files changed, 29 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f64942737a0b1..6c2637fc8f1ab 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -805,6 +805,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err;
+ xe_vram_resize_bar(xe); + err = xe_device_probe_early(xe); /* * In Boot Survivability mode, no drm card is exposed and driver diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index b44ebf50fedbb..652df7a5f4f65 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -26,15 +26,35 @@
#define BAR_SIZE_SHIFT 20
-static void -_resize_bar(struct xe_device *xe, int resno, resource_size_t size) +/* + * Release all the BARs that could influence/block LMEMBAR resizing, i.e. + * assigned IORESOURCE_MEM_64 BARs + */ +static void release_bars(struct pci_dev *pdev) +{ + struct resource *res; + int i; + + pci_dev_for_each_resource(pdev, res, i) { + /* Resource already un-assigned, do not reset it */ + if (!res->parent) + continue; + + /* No need to release unrelated BARs */ + if (!(res->flags & IORESOURCE_MEM_64)) + continue; + + pci_release_resource(pdev, i); + } +} + +static void resize_bar(struct xe_device *xe, int resno, resource_size_t size) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int bar_size = pci_rebar_bytes_to_size(size); int ret;
- if (pci_resource_len(pdev, resno)) - pci_release_resource(pdev, resno); + release_bars(pdev);
ret = pci_resize_resource(pdev, resno, bar_size); if (ret) { @@ -50,7 +70,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) * if force_vram_bar_size is set, attempt to set to the requested size * else set to maximum possible size */ -static void resize_vram_bar(struct xe_device *xe) +void xe_vram_resize_bar(struct xe_device *xe) { int force_vram_bar_size = xe_modparam.force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -119,7 +139,7 @@ static void resize_vram_bar(struct xe_device *xe) pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
- _resize_bar(xe, LMEM_BAR, rebar_size); + resize_bar(xe, LMEM_BAR, rebar_size);
pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); @@ -148,8 +168,6 @@ static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region * return -ENXIO; }
- resize_vram_bar(xe); - lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); if (!lmem_bar->io_size) diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index 72860f714fc66..13505cfb184dc 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -11,6 +11,7 @@ struct xe_device; struct xe_vram_region;
+void xe_vram_resize_bar(struct xe_device *xe); int xe_vram_probe(struct xe_device *xe);
struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement);
linux-stable-mirror@lists.linaro.org