Skip to content

Commit ba57b9b

Browse files
committed
Merge tag 'drm-intel-gt-next-2023-06-08' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes: - I915_GEM_CREATE_EXT_SET_PAT for Mesa on Meteorlake. Driver Changes: Fixes/improvements/new stuff: - Use large rings for compute contexts (Chris Wilson) - Better logging/debug of unexpected GuC communication issues (Michal Wajdeczko) - Clear out entire reports after reading if not power of 2 size (Ashutosh Dixit) - Limit lmem allocation size to succeed on SmallBars (Andrzej Hajda) - perf/OA capture robustness improvements on DG2 (Umesh Nerlige Ramappa) - Fix error code in intel_gsc_uc_heci_cmd_submit_nonpriv() (Dan Carpenter) Future platform enablement: - Add workaround 14016712196 (Tejas Upadhyay) - HuC loading for MTL (Daniele Ceraolo Spurio) - Allow user to set cache at BO creation (Fei Yang) Miscellaneous: - Use system include style for drm headers (Jani Nikula) - Drop legacy CTB definitions (Michal Wajdeczko) - Turn off the timer to sample frequencies when GT is parked (Ashutosh Dixit) - Make PMU sample array two-dimensional (Ashutosh Dixit) - Use the correct error value when kernel_context() fails (Andi Shyti) - Fix second parameter type of pre-gen8 pte_encode callbacks (Nathan Chancellor) - Fix parameter in gmch_ggtt_insert_{entries, page}() (Nathan Chancellor) - Fix size_t format specifier in gsccs_send_message() (Nathan Chancellor) - Use the fdinfo helper (Tvrtko Ursulin) - Add some missing error propagation (Tvrtko Ursulin) - Reduce I915_MAX_GT to 2 (Matt Atwood) - Rename I915_PMU_MAX_GTS to I915_PMU_MAX_GT (Matt Atwood) - Remove some obsolete definitions (John Harrison) Merges: - Merge drm/drm-next into drm-intel-gt-next (Tvrtko Ursulin) Signed-off-by: Dave Airlie <[email protected]> From: Tvrtko Ursulin <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/ZIH09fqe5v5yArsu@tursulin-desk
2 parents 959294e + 2433584 commit ba57b9b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1052
-438
lines changed

drivers/gpu/drm/i915/Kconfig.debug

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ config DRM_I915_SW_FENCE_CHECK_DAG
157157
config DRM_I915_DEBUG_GUC
158158
bool "Enable additional driver debugging for GuC"
159159
depends on DRM_I915
160+
select STACKDEPOT
160161
default n
161162
help
162163
Choose this option to turn on extra driver debugging that may affect

drivers/gpu/drm/i915/gem/i915_gem_context.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -964,7 +964,11 @@ static int intel_context_set_gem(struct intel_context *ce,
964964
RCU_INIT_POINTER(ce->gem_context, ctx);
965965

966966
GEM_BUG_ON(intel_context_is_pinned(ce));
967-
ce->ring_size = SZ_16K;
967+
968+
if (ce->engine->class == COMPUTE_CLASS)
969+
ce->ring_size = SZ_512K;
970+
else
971+
ce->ring_size = SZ_16K;
968972

969973
i915_vm_put(ce->vm);
970974
ce->vm = i915_gem_context_get_eb_vm(ctx);

drivers/gpu/drm/i915/gem/i915_gem_create.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ struct create_ext {
245245
unsigned int n_placements;
246246
unsigned int placement_mask;
247247
unsigned long flags;
248+
unsigned int pat_index;
248249
};
249250

250251
static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,43 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data
394395
return 0;
395396
}
396397

398+
static int ext_set_pat(struct i915_user_extension __user *base, void *data)
399+
{
400+
struct create_ext *ext_data = data;
401+
struct drm_i915_private *i915 = ext_data->i915;
402+
struct drm_i915_gem_create_ext_set_pat ext;
403+
unsigned int max_pat_index;
404+
405+
BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
406+
offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
407+
408+
/* Limiting the extension only to Meteor Lake */
409+
if (!IS_METEORLAKE(i915))
410+
return -ENODEV;
411+
412+
if (copy_from_user(&ext, base, sizeof(ext)))
413+
return -EFAULT;
414+
415+
max_pat_index = INTEL_INFO(i915)->max_pat_index;
416+
417+
if (ext.pat_index > max_pat_index) {
418+
drm_dbg(&i915->drm, "PAT index is invalid: %u\n",
419+
ext.pat_index);
420+
return -EINVAL;
421+
}
422+
423+
ext_data->pat_index = ext.pat_index;
424+
425+
return 0;
426+
}
427+
397428
static const i915_user_extension_fn create_extensions[] = {
398429
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
399430
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
431+
[I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
400432
};
401433

434+
#define PAT_INDEX_NOT_SET 0xffff
402435
/**
403436
* i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it.
404437
* @dev: drm device pointer
@@ -418,6 +451,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
418451
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
419452
return -EINVAL;
420453

454+
ext_data.pat_index = PAT_INDEX_NOT_SET;
421455
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
422456
create_extensions,
423457
ARRAY_SIZE(create_extensions),
@@ -454,5 +488,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
454488
if (IS_ERR(obj))
455489
return PTR_ERR(obj);
456490

491+
if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
492+
i915_gem_object_set_pat_index(obj, ext_data.pat_index);
493+
/* Mark pat_index is set by UMD */
494+
obj->pat_set_by_user = true;
495+
}
496+
457497
return i915_gem_publish(obj, file, &args->size, &args->handle);
458498
}

drivers/gpu/drm/i915/gem/i915_gem_object.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
208208
if (!(obj->flags & I915_BO_ALLOC_USER))
209209
return false;
210210

211+
/*
212+
* Always flush cache for UMD objects at creation time.
213+
*/
214+
if (obj->pat_set_by_user)
215+
return true;
216+
211217
/*
212218
* EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
213219
* possible for userspace to bypass the GTT caching bits set by the

drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -348,8 +348,10 @@ static int live_parallel_switch(void *arg)
348348
continue;
349349

350350
ce = intel_context_create(data[m].ce[0]->engine);
351-
if (IS_ERR(ce))
351+
if (IS_ERR(ce)) {
352+
err = PTR_ERR(ce);
352353
goto out;
354+
}
353355

354356
err = intel_context_pin(ce);
355357
if (err) {
@@ -369,8 +371,10 @@ static int live_parallel_switch(void *arg)
369371

370372
worker = kthread_create_worker(0, "igt/parallel:%s",
371373
data[n].ce[0]->engine->name);
372-
if (IS_ERR(worker))
374+
if (IS_ERR(worker)) {
375+
err = PTR_ERR(worker);
373376
goto out;
377+
}
374378

375379
data[n].worker = worker;
376380
}
@@ -399,8 +403,10 @@ static int live_parallel_switch(void *arg)
399403
}
400404
}
401405

402-
if (igt_live_test_end(&t))
403-
err = -EIO;
406+
if (igt_live_test_end(&t)) {
407+
err = err ?: -EIO;
408+
break;
409+
}
404410
}
405411

406412
out:

drivers/gpu/drm/i915/gt/gen8_engine_cs.c

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,14 +177,40 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
177177
return cs;
178178
}
179179

180+
static int mtl_dummy_pipe_control(struct i915_request *rq)
181+
{
182+
/* Wa_14016712196 */
183+
if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) ||
184+
IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) {
185+
u32 *cs;
186+
187+
/* dummy PIPE_CONTROL + depth flush */
188+
cs = intel_ring_begin(rq, 6);
189+
if (IS_ERR(cs))
190+
return PTR_ERR(cs);
191+
cs = gen12_emit_pipe_control(cs,
192+
0,
193+
PIPE_CONTROL_DEPTH_CACHE_FLUSH,
194+
LRC_PPHWSP_SCRATCH_ADDR);
195+
intel_ring_advance(rq, cs);
196+
}
197+
198+
return 0;
199+
}
200+
180201
int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
181202
{
182203
struct intel_engine_cs *engine = rq->engine;
183204

184205
if (mode & EMIT_FLUSH) {
185206
u32 flags = 0;
207+
int err;
186208
u32 *cs;
187209

210+
err = mtl_dummy_pipe_control(rq);
211+
if (err)
212+
return err;
213+
188214
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
189215
flags |= PIPE_CONTROL_FLUSH_L3;
190216
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
@@ -217,6 +243,11 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
217243
if (mode & EMIT_INVALIDATE) {
218244
u32 flags = 0;
219245
u32 *cs, count;
246+
int err;
247+
248+
err = mtl_dummy_pipe_control(rq);
249+
if (err)
250+
return err;
220251

221252
flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
222253
flags |= PIPE_CONTROL_TLB_INVALIDATE;
@@ -733,6 +764,13 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
733764
PIPE_CONTROL_DC_FLUSH_ENABLE |
734765
PIPE_CONTROL_FLUSH_ENABLE);
735766

767+
/* Wa_14016712196 */
768+
if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
769+
IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
770+
/* dummy PIPE_CONTROL + depth flush */
771+
cs = gen12_emit_pipe_control(cs, 0,
772+
PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
773+
736774
if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
737775
/* Wa_1409600907 */
738776
flags |= PIPE_CONTROL_DEPTH_STALL;

drivers/gpu/drm/i915/gt/intel_ggtt.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,16 +1015,16 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
10151015

10161016
/*
10171017
* For pre-gen8 platforms pat_index is the same as enum i915_cache_level,
1018-
* so these PTE encode functions are left with using cache_level.
1018+
* so the switch-case statements in these PTE encode functions are still valid.
10191019
* See translation table LEGACY_CACHELEVEL.
10201020
*/
10211021
static u64 snb_pte_encode(dma_addr_t addr,
1022-
enum i915_cache_level level,
1022+
unsigned int pat_index,
10231023
u32 flags)
10241024
{
10251025
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
10261026

1027-
switch (level) {
1027+
switch (pat_index) {
10281028
case I915_CACHE_L3_LLC:
10291029
case I915_CACHE_LLC:
10301030
pte |= GEN6_PTE_CACHE_LLC;
@@ -1033,19 +1033,19 @@ static u64 snb_pte_encode(dma_addr_t addr,
10331033
pte |= GEN6_PTE_UNCACHED;
10341034
break;
10351035
default:
1036-
MISSING_CASE(level);
1036+
MISSING_CASE(pat_index);
10371037
}
10381038

10391039
return pte;
10401040
}
10411041

10421042
static u64 ivb_pte_encode(dma_addr_t addr,
1043-
enum i915_cache_level level,
1043+
unsigned int pat_index,
10441044
u32 flags)
10451045
{
10461046
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
10471047

1048-
switch (level) {
1048+
switch (pat_index) {
10491049
case I915_CACHE_L3_LLC:
10501050
pte |= GEN7_PTE_CACHE_L3_LLC;
10511051
break;
@@ -1056,46 +1056,46 @@ static u64 ivb_pte_encode(dma_addr_t addr,
10561056
pte |= GEN6_PTE_UNCACHED;
10571057
break;
10581058
default:
1059-
MISSING_CASE(level);
1059+
MISSING_CASE(pat_index);
10601060
}
10611061

10621062
return pte;
10631063
}
10641064

10651065
static u64 byt_pte_encode(dma_addr_t addr,
1066-
enum i915_cache_level level,
1066+
unsigned int pat_index,
10671067
u32 flags)
10681068
{
10691069
gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
10701070

10711071
if (!(flags & PTE_READ_ONLY))
10721072
pte |= BYT_PTE_WRITEABLE;
10731073

1074-
if (level != I915_CACHE_NONE)
1074+
if (pat_index != I915_CACHE_NONE)
10751075
pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
10761076

10771077
return pte;
10781078
}
10791079

10801080
static u64 hsw_pte_encode(dma_addr_t addr,
1081-
enum i915_cache_level level,
1081+
unsigned int pat_index,
10821082
u32 flags)
10831083
{
10841084
gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
10851085

1086-
if (level != I915_CACHE_NONE)
1086+
if (pat_index != I915_CACHE_NONE)
10871087
pte |= HSW_WB_LLC_AGE3;
10881088

10891089
return pte;
10901090
}
10911091

10921092
static u64 iris_pte_encode(dma_addr_t addr,
1093-
enum i915_cache_level level,
1093+
unsigned int pat_index,
10941094
u32 flags)
10951095
{
10961096
gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
10971097

1098-
switch (level) {
1098+
switch (pat_index) {
10991099
case I915_CACHE_NONE:
11001100
break;
11011101
case I915_CACHE_WT:
@@ -1326,6 +1326,9 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
13261326
ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start,
13271327
ggtt->error_capture.size);
13281328

1329+
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
1330+
intel_uc_resume_mappings(&gt->uc);
1331+
13291332
ggtt->invalidate(ggtt);
13301333

13311334
if (flush)

drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,21 @@
1818
static void gmch_ggtt_insert_page(struct i915_address_space *vm,
1919
dma_addr_t addr,
2020
u64 offset,
21-
enum i915_cache_level cache_level,
21+
unsigned int pat_index,
2222
u32 unused)
2323
{
24-
unsigned int flags = (cache_level == I915_CACHE_NONE) ?
24+
unsigned int flags = (pat_index == I915_CACHE_NONE) ?
2525
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2626

2727
intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2828
}
2929

3030
static void gmch_ggtt_insert_entries(struct i915_address_space *vm,
3131
struct i915_vma_resource *vma_res,
32-
enum i915_cache_level cache_level,
32+
unsigned int pat_index,
3333
u32 unused)
3434
{
35-
unsigned int flags = (cache_level == I915_CACHE_NONE) ?
35+
unsigned int flags = (pat_index == I915_CACHE_NONE) ?
3636
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
3737

3838
intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,

drivers/gpu/drm/i915/gt/selftest_execlists.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,22 +1530,26 @@ static int live_busywait_preempt(void *arg)
15301530
struct drm_i915_gem_object *obj;
15311531
struct i915_vma *vma;
15321532
enum intel_engine_id id;
1533-
int err = -ENOMEM;
15341533
u32 *map;
1534+
int err;
15351535

15361536
/*
15371537
* Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
15381538
* preempt the busywaits used to synchronise between rings.
15391539
*/
15401540

15411541
ctx_hi = kernel_context(gt->i915, NULL);
1542-
if (!ctx_hi)
1543-
return -ENOMEM;
1542+
if (IS_ERR(ctx_hi))
1543+
return PTR_ERR(ctx_hi);
1544+
15441545
ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
15451546

15461547
ctx_lo = kernel_context(gt->i915, NULL);
1547-
if (!ctx_lo)
1548+
if (IS_ERR(ctx_lo)) {
1549+
err = PTR_ERR(ctx_lo);
15481550
goto err_ctx_hi;
1551+
}
1552+
15491553
ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
15501554

15511555
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);

drivers/gpu/drm/i915/gt/selftest_tlb.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,18 @@ pte_tlbinv(struct intel_context *ce,
190190

191191
static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
192192
{
193+
struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
194+
resource_size_t size = SZ_1G;
195+
193196
/*
194197
* Allocation of largest possible page size allows to test all types
195-
* of pages.
198+
* of pages. To succeed with both allocations, especially in case of Small
199+
* BAR, try to allocate no more than quarter of mappable memory.
196200
*/
197-
return i915_gem_object_create_lmem(gt->i915, SZ_1G, I915_BO_ALLOC_CONTIGUOUS);
201+
if (mr && size > mr->io_size / 4)
202+
size = mr->io_size / 4;
203+
204+
return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
198205
}
199206

200207
static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)

0 commit comments

Comments
 (0)