Skip to content

Commit 5207c39

Browse files
Thomas Hellströmlucasdemarchi
authored andcommitted
drm/xe: Use write-back caching mode for system memory on DGFX
The caching mode for buffer objects with VRAM as a possible placement was forced to write-combined, regardless of placement. However, write-combined system memory is expensive to allocate and even though it is pooled, the pool is expensive to shrink, since it involves global CPU TLB flushes. Moreover write-combined system memory from TTM is only reliably available on x86 and DGFX doesn't have an x86 restriction. So regardless of the cpu caching mode selected for a bo, internally use write-back caching mode for system memory on DGFX. Coherency is maintained, but user-space clients may perceive a difference in cpu access speeds. v2: - Update RB- and Ack tags. - Rephrase wording in xe_drm.h (Matt Roper) v3: - Really rephrase wording. Signed-off-by: Thomas Hellström <[email protected]> Fixes: 622f709 ("drm/xe/uapi: Add support for CPU caching mode") Cc: Pallavi Mishra <[email protected]> Cc: Matthew Auld <[email protected]> Cc: [email protected] Cc: Joonas Lahtinen <[email protected]> Cc: Effie Yu <[email protected]> Cc: Matthew Brost <[email protected]> Cc: Maarten Lankhorst <[email protected]> Cc: Jose Souza <[email protected]> Cc: Michal Mrozek <[email protected]> Cc: <[email protected]> # v6.8+ Acked-by: Matthew Auld <[email protected]> Acked-by: José Roberto de Souza <[email protected]> Reviewed-by: Rodrigo Vivi <[email protected]> Fixes: 622f709 ("drm/xe/uapi: Add support for CPU caching mode") Acked-by: Michal Mrozek <[email protected]> Acked-by: Effie Yu <[email protected]> #On chat Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] (cherry picked from commit 01e0cfc) Signed-off-by: Lucas De Marchi <[email protected]>
1 parent 256abd8 commit 5207c39

File tree

3 files changed

+37
-21
lines changed

3 files changed

+37
-21
lines changed

drivers/gpu/drm/xe/xe_bo.c

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
317317
struct xe_device *xe = xe_bo_device(bo);
318318
struct xe_ttm_tt *tt;
319319
unsigned long extra_pages;
320-
enum ttm_caching caching;
320+
enum ttm_caching caching = ttm_cached;
321321
int err;
322322

323323
tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -331,26 +331,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
331331
extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
332332
PAGE_SIZE);
333333

334-
switch (bo->cpu_caching) {
335-
case DRM_XE_GEM_CPU_CACHING_WC:
336-
caching = ttm_write_combined;
337-
break;
338-
default:
339-
caching = ttm_cached;
340-
break;
341-
}
342-
343-
WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
344-
345334
/*
346-
* Display scanout is always non-coherent with the CPU cache.
347-
*
348-
* For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
349-
* require a CPU:WC mapping.
335+
* DGFX system memory is always WB / ttm_cached, since
336+
* other caching modes are only supported on x86. DGFX
337+
* GPU system memory accesses are always coherent with the
338+
* CPU.
350339
*/
351-
if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
352-
(xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE))
353-
caching = ttm_write_combined;
340+
if (!IS_DGFX(xe)) {
341+
switch (bo->cpu_caching) {
342+
case DRM_XE_GEM_CPU_CACHING_WC:
343+
caching = ttm_write_combined;
344+
break;
345+
default:
346+
caching = ttm_cached;
347+
break;
348+
}
349+
350+
WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
351+
352+
/*
353+
* Display scanout is always non-coherent with the CPU cache.
354+
*
355+
* For Xe_LPG and beyond, PPGTT PTE lookups are also
356+
* non-coherent and require a CPU:WC mapping.
357+
*/
358+
if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
359+
(xe->info.graphics_verx100 >= 1270 &&
360+
bo->flags & XE_BO_FLAG_PAGETABLE))
361+
caching = ttm_write_combined;
362+
}
354363

355364
err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
356365
if (err) {

drivers/gpu/drm/xe/xe_bo_types.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ struct xe_bo {
6666

6767
/**
6868
* @cpu_caching: CPU caching mode. Currently only used for userspace
69-
* objects.
69+
* objects. Exceptions are system memory on DGFX, which is always
70+
* WB.
7071
*/
7172
u16 cpu_caching;
7273

include/uapi/drm/xe_drm.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,13 @@ struct drm_xe_gem_create {
776776
#define DRM_XE_GEM_CPU_CACHING_WC 2
777777
/**
778778
* @cpu_caching: The CPU caching mode to select for this object. If
779-
* mmaping the object the mode selected here will also be used.
779+
* mmaping the object the mode selected here will also be used. The
780+
* exception is when mapping system memory (including data evicted
781+
* to system) on discrete GPUs. The caching mode selected will
782+
* then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency
783+
* between GPU- and CPU is guaranteed. The caching mode of
784+
* existing CPU-mappings will be updated transparently to
785+
* user-space clients.
780786
*/
781787
__u16 cpu_caching;
782788
/** @pad: MBZ */

0 commit comments

Comments
 (0)