Skip to content

Commit 68b89e2

Browse files
committed
Merge tag 'drm-intel-gt-next-2024-04-26' of https://anongit.freedesktop.org/git/drm/drm-intel into drm-next
UAPI Changes: - drm/i915/guc: Use context hints for GT frequency Allow user to provide a low latency context hint. When set, KMD sends a hint to GuC which results in special handling for this context. SLPC will ramp the GT frequency aggressively every time it switches to this context. The down freq threshold will also be lower so GuC will ramp down the GT freq for this context more slowly. We also disable waitboost for this context as that will interfere with the strategy. We need to enable the use of SLPC Compute strategy during init, but it will apply only to contexts that set this bit during context creation. Userland can check whether this feature is supported using a new param- I915_PARAM_HAS_CONTEXT_FREQ_HINT. This flag is true for all guc submission enabled platforms as they use SLPC for frequency management. The Mesa usage model for this flag is here - https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint - drm/i915/gt: Enable only one CCS for compute workload Enable only one CCS engine by default with all the compute sices allocated to it. While generating the list of UABI engines to be exposed to the user, exclude any additional CCS engines beyond the first instance *** NOTE: This W/A will make all DG2 SKUs appear like single CCS SKUs by default to mitigate a hardware bug. All the EUs will still remain usable, and all the userspace drivers have been confirmed to be able to dynamically detect the change in number of CCS engines and adjust. For the smaller percent of applications that get perf benefit from letting the userspace driver dispatch across all 4 CCS engines we will be introducing a sysfs control as a later patch to choose 4 CCS each with 25% EUs (or 50% if 2 CCS). NOTE: A regression has been reported at https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10895 However Andi has been triaging the issue and we're closing in a fix to the gap in the W/A implementation: https://lists.freedesktop.org/archives/intel-gfx/2024-April/348747.html Driver Changes: - Add new and fix to existing workarounds: Wa_14018575942 (MTL), Wa_16019325821 (Gen12.70), Wa_14019159160 (MTL), Wa_16015675438, Wa_14020495402 (Gen12.70) (Tejas, John, Lucas) - Fix UAF on destroy against retire race and remove two earlier partial fixes (Janusz) - Limit the reserved VM space to only the platforms that need it (Andi) - Reset queue_priority_hint on parking for execlist platforms (Chris) - Fix gt reset with GuC submission is disabled (Nirmoy) - Correct capture of EIR register on hang (John) - Remove usage of the deprecated ida_simple_xx() API - Refactor confusing __intel_gt_reset() (Nirmoy) - Fix the fix for GuC reset lock confusion (John) - Simplify/extend platform check for Wa_14018913170 (John) - Replace dev_priv with i915 (Andi) - Add and use gt_to_guc() wrapper (Andi) - Remove bogus null check (Rodrigo, Dan) . Selftest improvements (Janusz, Nirmoy, Daniele) Signed-off-by: Dave Airlie <[email protected]> From: Joonas Lahtinen <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
2 parents b84bc94 + 4d3421e commit 68b89e2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+414
-156
lines changed

drivers/gpu/drm/i915/gem/i915_gem_context.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
879879
struct i915_gem_proto_context *pc,
880880
struct drm_i915_gem_context_param *args)
881881
{
882+
struct drm_i915_private *i915 = fpriv->i915;
882883
int ret = 0;
883884

884885
switch (args->param) {
@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
904905
pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
905906
break;
906907

908+
case I915_CONTEXT_PARAM_LOW_LATENCY:
909+
if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
910+
pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY);
911+
else
912+
ret = -EINVAL;
913+
break;
914+
907915
case I915_CONTEXT_PARAM_RECOVERABLE:
908916
if (args->size)
909917
ret = -EINVAL;
@@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce,
9921000
if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS))
9931001
ret = intel_context_reconfigure_sseu(ce, sseu);
9941002

1003+
if (test_bit(UCONTEXT_LOW_LATENCY, &ctx->user_flags))
1004+
__set_bit(CONTEXT_LOW_LATENCY, &ce->flags);
1005+
9951006
return ret;
9961007
}
9971008

@@ -1630,6 +1641,9 @@ i915_gem_create_context(struct drm_i915_private *i915,
16301641
if (vm)
16311642
ctx->vm = vm;
16321643

1644+
/* Assign early so intel_context_set_gem can access these flags */
1645+
ctx->user_flags = pc->user_flags;
1646+
16331647
mutex_init(&ctx->engines_mutex);
16341648
if (pc->num_user_engines >= 0) {
16351649
i915_gem_context_set_user_engines(ctx);
@@ -1652,8 +1666,6 @@ i915_gem_create_context(struct drm_i915_private *i915,
16521666
* is no remap info, it will be a NOP. */
16531667
ctx->remap_slice = ALL_L3_SLICES(i915);
16541668

1655-
ctx->user_flags = pc->user_flags;
1656-
16571669
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
16581670
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
16591671

drivers/gpu/drm/i915/gem/i915_gem_context_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ struct i915_gem_context {
338338
#define UCONTEXT_BANNABLE 2
339339
#define UCONTEXT_RECOVERABLE 3
340340
#define UCONTEXT_PERSISTENCE 4
341+
#define UCONTEXT_LOW_LATENCY 5
341342

342343
/**
343344
* @flags: small set of booleans

drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,6 @@ struct i915_execbuffer {
255255
struct intel_context *context; /* logical state for the request */
256256
struct i915_gem_context *gem_context; /** caller's context */
257257
intel_wakeref_t wakeref;
258-
intel_wakeref_t wakeref_gt0;
259258

260259
/** our requests to build */
261260
struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
@@ -2457,15 +2456,15 @@ static int eb_submit(struct i915_execbuffer *eb)
24572456
* The engine index is returned.
24582457
*/
24592458
static unsigned int
2460-
gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
2459+
gen8_dispatch_bsd_engine(struct drm_i915_private *i915,
24612460
struct drm_file *file)
24622461
{
24632462
struct drm_i915_file_private *file_priv = file->driver_priv;
24642463

24652464
/* Check whether the file_priv has already selected one ring. */
24662465
if ((int)file_priv->bsd_engine < 0)
24672466
file_priv->bsd_engine =
2468-
get_random_u32_below(dev_priv->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]);
2467+
get_random_u32_below(i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]);
24692468

24702469
return file_priv->bsd_engine;
24712470
}
@@ -2686,7 +2685,6 @@ static int
26862685
eb_select_engine(struct i915_execbuffer *eb)
26872686
{
26882687
struct intel_context *ce, *child;
2689-
struct intel_gt *gt;
26902688
unsigned int idx;
26912689
int err;
26922690

@@ -2710,17 +2708,10 @@ eb_select_engine(struct i915_execbuffer *eb)
27102708
}
27112709
}
27122710
eb->num_batches = ce->parallel.number_children + 1;
2713-
gt = ce->engine->gt;
27142711

27152712
for_each_child(ce, child)
27162713
intel_context_get(child);
27172714
eb->wakeref = intel_gt_pm_get(ce->engine->gt);
2718-
/*
2719-
* Keep GT0 active on MTL so that i915_vma_parked() doesn't
2720-
* free VMAs while execbuf ioctl is validating VMAs.
2721-
*/
2722-
if (gt->info.id)
2723-
eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915));
27242715

27252716
if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
27262717
err = intel_context_alloc_state(ce);
@@ -2759,9 +2750,6 @@ eb_select_engine(struct i915_execbuffer *eb)
27592750
return err;
27602751

27612752
err:
2762-
if (gt->info.id)
2763-
intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0);
2764-
27652753
intel_gt_pm_put(ce->engine->gt, eb->wakeref);
27662754
for_each_child(ce, child)
27672755
intel_context_put(child);
@@ -2775,12 +2763,6 @@ eb_put_engine(struct i915_execbuffer *eb)
27752763
struct intel_context *child;
27762764

27772765
i915_vm_put(eb->context->vm);
2778-
/*
2779-
* This works in conjunction with eb_select_engine() to prevent
2780-
* i915_vma_parked() from interfering while execbuf validates vmas.
2781-
*/
2782-
if (eb->gt->info.id)
2783-
intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0);
27842766
intel_gt_pm_put(eb->context->engine->gt, eb->wakeref);
27852767
for_each_child(eb->context, child)
27862768
intel_context_put(child);

drivers/gpu/drm/i915/gem/i915_gem_shmem.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915,
654654

655655
/* Allocate a new GEM object and fill it with the supplied data */
656656
struct drm_i915_gem_object *
657-
i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
657+
i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
658658
const void *data, resource_size_t size)
659659
{
660660
struct drm_i915_gem_object *obj;
@@ -663,8 +663,8 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
663663
resource_size_t offset;
664664
int err;
665665

666-
GEM_WARN_ON(IS_DGFX(dev_priv));
667-
obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
666+
GEM_WARN_ON(IS_DGFX(i915));
667+
obj = i915_gem_object_create_shmem(i915, round_up(size, PAGE_SIZE));
668668
if (IS_ERR(obj))
669669
return obj;
670670

drivers/gpu/drm/i915/gem/i915_gem_stolen.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ struct drm_i915_gem_object;
1414

1515
#define i915_stolen_fb drm_mm_node
1616

17-
int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
17+
int i915_gem_stolen_insert_node(struct drm_i915_private *i915,
1818
struct drm_mm_node *node, u64 size,
1919
unsigned alignment);
20-
int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
20+
int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915,
2121
struct drm_mm_node *node, u64 size,
2222
unsigned alignment, u64 start,
2323
u64 end);
24-
void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
24+
void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
2525
struct drm_mm_node *node);
2626
struct intel_memory_region *
2727
i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
@@ -31,7 +31,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
3131
u16 instance);
3232

3333
struct drm_i915_gem_object *
34-
i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
34+
i915_gem_object_create_stolen(struct drm_i915_private *i915,
3535
resource_size_t size);
3636

3737
bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj);

drivers/gpu/drm/i915/gem/i915_gem_tiling.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -343,12 +343,12 @@ int
343343
i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
344344
struct drm_file *file)
345345
{
346-
struct drm_i915_private *dev_priv = to_i915(dev);
346+
struct drm_i915_private *i915 = to_i915(dev);
347347
struct drm_i915_gem_set_tiling *args = data;
348348
struct drm_i915_gem_object *obj;
349349
int err;
350350

351-
if (!to_gt(dev_priv)->ggtt->num_fences)
351+
if (!to_gt(i915)->ggtt->num_fences)
352352
return -EOPNOTSUPP;
353353

354354
obj = i915_gem_object_lookup(file, args->handle);
@@ -374,9 +374,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
374374
args->stride = 0;
375375
} else {
376376
if (args->tiling_mode == I915_TILING_X)
377-
args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
377+
args->swizzle_mode = to_gt(i915)->ggtt->bit_6_swizzle_x;
378378
else
379-
args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
379+
args->swizzle_mode = to_gt(i915)->ggtt->bit_6_swizzle_y;
380380

381381
/* Hide bit 17 swizzling from the user. This prevents old Mesa
382382
* from aborting the application on sw fallbacks to bit 17,
@@ -427,11 +427,11 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
427427
struct drm_file *file)
428428
{
429429
struct drm_i915_gem_get_tiling *args = data;
430-
struct drm_i915_private *dev_priv = to_i915(dev);
430+
struct drm_i915_private *i915 = to_i915(dev);
431431
struct drm_i915_gem_object *obj;
432432
int err = -ENOENT;
433433

434-
if (!to_gt(dev_priv)->ggtt->num_fences)
434+
if (!to_gt(i915)->ggtt->num_fences)
435435
return -EOPNOTSUPP;
436436

437437
rcu_read_lock();
@@ -447,10 +447,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
447447

448448
switch (args->tiling_mode) {
449449
case I915_TILING_X:
450-
args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
450+
args->swizzle_mode = to_gt(i915)->ggtt->bit_6_swizzle_x;
451451
break;
452452
case I915_TILING_Y:
453-
args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
453+
args->swizzle_mode = to_gt(i915)->ggtt->bit_6_swizzle_y;
454454
break;
455455
default:
456456
case I915_TILING_NONE:
@@ -459,7 +459,7 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
459459
}
460460

461461
/* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
462-
if (dev_priv->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES)
462+
if (i915->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES)
463463
args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN;
464464
else
465465
args->phys_swizzle_mode = args->swizzle_mode;

drivers/gpu/drm/i915/gem/i915_gem_userptr.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -463,13 +463,13 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
463463
struct drm_file *file)
464464
{
465465
static struct lock_class_key __maybe_unused lock_class;
466-
struct drm_i915_private *dev_priv = to_i915(dev);
466+
struct drm_i915_private *i915 = to_i915(dev);
467467
struct drm_i915_gem_userptr *args = data;
468468
struct drm_i915_gem_object __maybe_unused *obj;
469469
int __maybe_unused ret;
470470
u32 __maybe_unused handle;
471471

472-
if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
472+
if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) {
473473
/* We cannot support coherent userptr objects on hw without
474474
* LLC and broken snooping.
475475
*/
@@ -501,7 +501,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
501501
* On almost all of the older hw, we cannot tell the GPU that
502502
* a page is readonly.
503503
*/
504-
if (!to_gt(dev_priv)->vm->has_read_only)
504+
if (!to_gt(i915)->vm->has_read_only)
505505
return -ENODEV;
506506
}
507507

drivers/gpu/drm/i915/gem/selftests/huge_pages.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,19 +1969,19 @@ int i915_gem_huge_page_mock_selftests(void)
19691969
SUBTEST(igt_mock_memory_region_huge_pages),
19701970
SUBTEST(igt_mock_ppgtt_misaligned_dma),
19711971
};
1972-
struct drm_i915_private *dev_priv;
1972+
struct drm_i915_private *i915;
19731973
struct i915_ppgtt *ppgtt;
19741974
int err;
19751975

1976-
dev_priv = mock_gem_device();
1977-
if (!dev_priv)
1976+
i915 = mock_gem_device();
1977+
if (!i915)
19781978
return -ENOMEM;
19791979

19801980
/* Pretend to be a device which supports the 48b PPGTT */
1981-
RUNTIME_INFO(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
1982-
RUNTIME_INFO(dev_priv)->ppgtt_size = 48;
1981+
RUNTIME_INFO(i915)->ppgtt_type = INTEL_PPGTT_FULL;
1982+
RUNTIME_INFO(i915)->ppgtt_size = 48;
19831983

1984-
ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0);
1984+
ppgtt = i915_ppgtt_create(to_gt(i915), 0);
19851985
if (IS_ERR(ppgtt)) {
19861986
err = PTR_ERR(ppgtt);
19871987
goto out_unlock;
@@ -2005,7 +2005,7 @@ int i915_gem_huge_page_mock_selftests(void)
20052005
out_put:
20062006
i915_vm_put(&ppgtt->vm);
20072007
out_unlock:
2008-
mock_destroy_device(dev_priv);
2008+
mock_destroy_device(i915);
20092009
return err;
20102010
}
20112011

drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "i915_drv.h"
88
#include "i915_selftest.h"
99
#include "gem/i915_gem_context.h"
10+
#include "gt/intel_gt.h"
1011

1112
#include "mock_context.h"
1213
#include "mock_dmabuf.h"
@@ -155,6 +156,7 @@ static int verify_access(struct drm_i915_private *i915,
155156
struct file *file;
156157
u32 *vaddr;
157158
int err = 0, i;
159+
unsigned int mode;
158160

159161
file = mock_file(i915);
160162
if (IS_ERR(file))
@@ -194,7 +196,8 @@ static int verify_access(struct drm_i915_private *i915,
194196
if (err)
195197
goto out_file;
196198

197-
vaddr = i915_gem_object_pin_map_unlocked(native_obj, I915_MAP_WB);
199+
mode = intel_gt_coherent_map_type(to_gt(i915), native_obj, true);
200+
vaddr = i915_gem_object_pin_map_unlocked(native_obj, mode);
198201
if (IS_ERR(vaddr)) {
199202
err = PTR_ERR(vaddr);
200203
goto out_file;

drivers/gpu/drm/i915/gt/gen8_engine_cs.c

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -740,21 +740,25 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
740740
}
741741

742742
/* Wa_14014475959:dg2 */
743-
#define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540
744-
static u32 ccs_semaphore_offset(struct i915_request *rq)
743+
/* Wa_16019325821 */
744+
/* Wa_14019159160 */
745+
#define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET 0x540
746+
static u32 hold_switchout_semaphore_offset(struct i915_request *rq)
745747
{
746748
return i915_ggtt_offset(rq->context->state) +
747-
(LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
749+
(LRC_PPHWSP_PN * PAGE_SIZE) + HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET;
748750
}
749751

750752
/* Wa_14014475959:dg2 */
751-
static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
753+
/* Wa_16019325821 */
754+
/* Wa_14019159160 */
755+
static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs)
752756
{
753757
int i;
754758

755759
*cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |
756760
MI_ATOMIC_MOVE;
757-
*cs++ = ccs_semaphore_offset(rq);
761+
*cs++ = hold_switchout_semaphore_offset(rq);
758762
*cs++ = 0;
759763
*cs++ = 1;
760764

@@ -770,7 +774,7 @@ static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
770774
MI_SEMAPHORE_POLL |
771775
MI_SEMAPHORE_SAD_EQ_SDD;
772776
*cs++ = 0;
773-
*cs++ = ccs_semaphore_offset(rq);
777+
*cs++ = hold_switchout_semaphore_offset(rq);
774778
*cs++ = 0;
775779

776780
return cs;
@@ -787,8 +791,10 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
787791
cs = gen12_emit_preempt_busywait(rq, cs);
788792

789793
/* Wa_14014475959:dg2 */
790-
if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
791-
cs = ccs_emit_wa_busywait(rq, cs);
794+
/* Wa_16019325821 */
795+
/* Wa_14019159160 */
796+
if (intel_engine_uses_wa_hold_switchout(rq->engine))
797+
cs = hold_switchout_emit_wa_busywait(rq, cs);
792798

793799
rq->tail = intel_ring_offset(rq, cs);
794800
assert_ring_tail_valid(rq->ring, rq->tail);

0 commit comments

Comments
 (0)