Skip to content

Commit 8ae4be5

Browse files
committed
Merge tag 'drm-intel-next-fixes-2022-08-11' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes
- disable pci resize on 32-bit systems (Nirmoy) - don't leak the ccs state (Matt) - TLB invalidation fixes (Chris) [now with all fixes of fixes] Signed-off-by: Dave Airlie <[email protected]> From: Rodrigo Vivi <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
2 parents 568035b + 9d50bff commit 8ae4be5

15 files changed

+183
-55
lines changed

drivers/gpu/drm/i915/gem/i915_gem_object.c

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
268268
*/
269269
void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
270270
{
271-
assert_object_held(obj);
271+
assert_object_held_shared(obj);
272272

273273
if (!list_empty(&obj->vma.list)) {
274274
struct i915_vma *vma;
@@ -331,15 +331,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
331331
continue;
332332
}
333333

334-
if (!i915_gem_object_trylock(obj, NULL)) {
335-
/* busy, toss it back to the pile */
336-
if (llist_add(&obj->freed, &i915->mm.free_list))
337-
queue_delayed_work(i915->wq, &i915->mm.free_work, msecs_to_jiffies(10));
338-
continue;
339-
}
340-
341334
__i915_gem_object_pages_fini(obj);
342-
i915_gem_object_unlock(obj);
343335
__i915_gem_free_object(obj);
344336

345337
/* But keep the pointer alive for RCU-protected lookups */
@@ -359,7 +351,7 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915)
359351
static void __i915_gem_free_work(struct work_struct *work)
360352
{
361353
struct drm_i915_private *i915 =
362-
container_of(work, struct drm_i915_private, mm.free_work.work);
354+
container_of(work, struct drm_i915_private, mm.free_work);
363355

364356
i915_gem_flush_free_objects(i915);
365357
}
@@ -391,7 +383,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
391383
*/
392384

393385
if (llist_add(&obj->freed, &i915->mm.free_list))
394-
queue_delayed_work(i915->wq, &i915->mm.free_work, 0);
386+
queue_work(i915->wq, &i915->mm.free_work);
395387
}
396388

397389
void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
@@ -745,7 +737,7 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj)
745737

746738
void i915_gem_init__objects(struct drm_i915_private *i915)
747739
{
748-
INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work);
740+
INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
749741
}
750742

751743
void i915_objects_module_exit(void)

drivers/gpu/drm/i915/gem/i915_gem_object_types.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,6 @@ struct drm_i915_gem_object {
335335
#define I915_BO_READONLY BIT(7)
336336
#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
337337
#define I915_BO_PROTECTED BIT(9)
338-
#define I915_BO_WAS_BOUND_BIT 10
339338
/**
340339
* @mem_flags - Mutable placement-related flags
341340
*
@@ -616,6 +615,8 @@ struct drm_i915_gem_object {
616615
* pages were last acquired.
617616
*/
618617
bool dirty:1;
618+
619+
u32 tlb;
619620
} mm;
620621

621622
struct {

drivers/gpu/drm/i915/gem/i915_gem_pages.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@
66

77
#include <drm/drm_cache.h>
88

9+
#include "gt/intel_gt.h"
10+
#include "gt/intel_gt_pm.h"
11+
912
#include "i915_drv.h"
1013
#include "i915_gem_object.h"
1114
#include "i915_scatterlist.h"
1215
#include "i915_gem_lmem.h"
1316
#include "i915_gem_mman.h"
1417

15-
#include "gt/intel_gt.h"
16-
1718
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
1819
struct sg_table *pages,
1920
unsigned int sg_page_sizes)
@@ -190,6 +191,18 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
190191
vunmap(ptr);
191192
}
192193

194+
static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
195+
{
196+
struct drm_i915_private *i915 = to_i915(obj->base.dev);
197+
struct intel_gt *gt = to_gt(i915);
198+
199+
if (!obj->mm.tlb)
200+
return;
201+
202+
intel_gt_invalidate_tlb(gt, obj->mm.tlb);
203+
obj->mm.tlb = 0;
204+
}
205+
193206
struct sg_table *
194207
__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
195208
{
@@ -215,13 +228,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
215228
__i915_gem_object_reset_page_iter(obj);
216229
obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
217230

218-
if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
219-
struct drm_i915_private *i915 = to_i915(obj->base.dev);
220-
intel_wakeref_t wakeref;
221-
222-
with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
223-
intel_gt_invalidate_tlbs(to_gt(i915));
224-
}
231+
flush_tlb_invalidate(obj);
225232

226233
return pages;
227234
}

drivers/gpu/drm/i915/gt/intel_gt.c

Lines changed: 60 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
#include "pxp/intel_pxp.h"
1212

1313
#include "i915_drv.h"
14+
#include "i915_perf_oa_regs.h"
1415
#include "intel_context.h"
16+
#include "intel_engine_pm.h"
1517
#include "intel_engine_regs.h"
1618
#include "intel_ggtt_gmch.h"
1719
#include "intel_gt.h"
@@ -36,8 +38,6 @@ static void __intel_gt_init_early(struct intel_gt *gt)
3638
{
3739
spin_lock_init(&gt->irq_lock);
3840

39-
mutex_init(&gt->tlb_invalidate_lock);
40-
4141
INIT_LIST_HEAD(&gt->closed_vma);
4242
spin_lock_init(&gt->closed_lock);
4343

@@ -48,6 +48,8 @@ static void __intel_gt_init_early(struct intel_gt *gt)
4848
intel_gt_init_reset(gt);
4949
intel_gt_init_requests(gt);
5050
intel_gt_init_timelines(gt);
51+
mutex_init(&gt->tlb.invalidate_lock);
52+
seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
5153
intel_gt_pm_init_early(gt);
5254

5355
intel_uc_init_early(&gt->uc);
@@ -768,6 +770,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
768770
intel_gt_fini_requests(gt);
769771
intel_gt_fini_reset(gt);
770772
intel_gt_fini_timelines(gt);
773+
mutex_destroy(&gt->tlb.invalidate_lock);
771774
intel_engines_free(gt);
772775
}
773776
}
@@ -906,7 +909,7 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
906909
return rb;
907910
}
908911

909-
void intel_gt_invalidate_tlbs(struct intel_gt *gt)
912+
static void mmio_invalidate_full(struct intel_gt *gt)
910913
{
911914
static const i915_reg_t gen8_regs[] = {
912915
[RENDER_CLASS] = GEN8_RTCR,
@@ -924,13 +927,11 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
924927
struct drm_i915_private *i915 = gt->i915;
925928
struct intel_uncore *uncore = gt->uncore;
926929
struct intel_engine_cs *engine;
930+
intel_engine_mask_t awake, tmp;
927931
enum intel_engine_id id;
928932
const i915_reg_t *regs;
929933
unsigned int num = 0;
930934

931-
if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
932-
return;
933-
934935
if (GRAPHICS_VER(i915) == 12) {
935936
regs = gen12_regs;
936937
num = ARRAY_SIZE(gen12_regs);
@@ -945,41 +946,50 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
945946
"Platform does not implement TLB invalidation!"))
946947
return;
947948

948-
GEM_TRACE("\n");
949-
950-
assert_rpm_wakelock_held(&i915->runtime_pm);
951-
952-
mutex_lock(&gt->tlb_invalidate_lock);
953949
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
954950

955951
spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
956952

953+
awake = 0;
957954
for_each_engine(engine, gt, id) {
958955
struct reg_and_bit rb;
959956

957+
if (!intel_engine_pm_is_awake(engine))
958+
continue;
959+
960960
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
961961
if (!i915_mmio_reg_offset(rb.reg))
962962
continue;
963963

964964
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
965+
awake |= engine->mask;
965966
}
966967

968+
GT_TRACE(gt, "invalidated engines %08x\n", awake);
969+
970+
/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
971+
if (awake &&
972+
(IS_TIGERLAKE(i915) ||
973+
IS_DG1(i915) ||
974+
IS_ROCKETLAKE(i915) ||
975+
IS_ALDERLAKE_S(i915) ||
976+
IS_ALDERLAKE_P(i915)))
977+
intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
978+
967979
spin_unlock_irq(&uncore->lock);
968980

969-
for_each_engine(engine, gt, id) {
981+
for_each_engine_masked(engine, gt, awake, tmp) {
982+
struct reg_and_bit rb;
983+
970984
/*
971985
* HW architecture suggest typical invalidation time at 40us,
972986
* with pessimistic cases up to 100us and a recommendation to
973987
* cap at 1ms. We go a bit higher just in case.
974988
*/
975989
const unsigned int timeout_us = 100;
976990
const unsigned int timeout_ms = 4;
977-
struct reg_and_bit rb;
978991

979992
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
980-
if (!i915_mmio_reg_offset(rb.reg))
981-
continue;
982-
983993
if (__intel_wait_for_register_fw(uncore,
984994
rb.reg, rb.bit, 0,
985995
timeout_us, timeout_ms,
@@ -996,5 +1006,38 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
9961006
* transitions.
9971007
*/
9981008
intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
999-
mutex_unlock(&gt->tlb_invalidate_lock);
1009+
}
1010+
1011+
static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
1012+
{
1013+
u32 cur = intel_gt_tlb_seqno(gt);
1014+
1015+
/* Only skip if a *full* TLB invalidate barrier has passed */
1016+
return (s32)(cur - ALIGN(seqno, 2)) > 0;
1017+
}
1018+
1019+
void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
1020+
{
1021+
intel_wakeref_t wakeref;
1022+
1023+
if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
1024+
return;
1025+
1026+
if (intel_gt_is_wedged(gt))
1027+
return;
1028+
1029+
if (tlb_seqno_passed(gt, seqno))
1030+
return;
1031+
1032+
with_intel_gt_pm_if_awake(gt, wakeref) {
1033+
mutex_lock(&gt->tlb.invalidate_lock);
1034+
if (tlb_seqno_passed(gt, seqno))
1035+
goto unlock;
1036+
1037+
mmio_invalidate_full(gt);
1038+
1039+
write_seqcount_invalidate(&gt->tlb.seqno);
1040+
unlock:
1041+
mutex_unlock(&gt->tlb.invalidate_lock);
1042+
}
10001043
}

drivers/gpu/drm/i915/gt/intel_gt.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,16 @@ void intel_gt_info_print(const struct intel_gt_info *info,
101101

102102
void intel_gt_watchdog_work(struct work_struct *work);
103103

104-
void intel_gt_invalidate_tlbs(struct intel_gt *gt);
104+
static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
105+
{
106+
return seqprop_sequence(&gt->tlb.seqno);
107+
}
108+
109+
static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
110+
{
111+
return intel_gt_tlb_seqno(gt) | 1;
112+
}
113+
114+
void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno);
105115

106116
#endif /* __INTEL_GT_H__ */

drivers/gpu/drm/i915/gt/intel_gt_pm.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
5555
for (tmp = 1, intel_gt_pm_get(gt); tmp; \
5656
intel_gt_pm_put(gt), tmp = 0)
5757

58+
#define with_intel_gt_pm_if_awake(gt, wf) \
59+
for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
60+
5861
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
5962
{
6063
return intel_wakeref_wait_for_idle(&gt->wakeref);

drivers/gpu/drm/i915/gt/intel_gt_types.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/llist.h>
1212
#include <linux/mutex.h>
1313
#include <linux/notifier.h>
14+
#include <linux/seqlock.h>
1415
#include <linux/spinlock.h>
1516
#include <linux/types.h>
1617
#include <linux/workqueue.h>
@@ -83,7 +84,22 @@ struct intel_gt {
8384
struct intel_uc uc;
8485
struct intel_gsc gsc;
8586

86-
struct mutex tlb_invalidate_lock;
87+
struct {
88+
/* Serialize global tlb invalidations */
89+
struct mutex invalidate_lock;
90+
91+
/*
92+
* Batch TLB invalidations
93+
*
94+
* After unbinding the PTE, we need to ensure the TLB
95+
* are invalidated prior to releasing the physical pages.
96+
* But we only need one such invalidation for all unbinds,
97+
* so we track how many TLB invalidations have been
98+
* performed since unbind the PTE and only emit an extra
99+
* invalidate if no full barrier has been passed.
100+
*/
101+
seqcount_mutex_t seqno;
102+
} tlb;
87103

88104
struct i915_wa_list wa_list;
89105

drivers/gpu/drm/i915/gt/intel_migrate.c

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,7 @@ intel_context_migrate_copy(struct intel_context *ce,
708708
u8 src_access, dst_access;
709709
struct i915_request *rq;
710710
int src_sz, dst_sz;
711-
bool ccs_is_src;
711+
bool ccs_is_src, overwrite_ccs;
712712
int err;
713713

714714
GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
@@ -749,6 +749,8 @@ intel_context_migrate_copy(struct intel_context *ce,
749749
get_ccs_sg_sgt(&it_ccs, bytes_to_cpy);
750750
}
751751

752+
overwrite_ccs = HAS_FLAT_CCS(i915) && !ccs_bytes_to_cpy && dst_is_lmem;
753+
752754
src_offset = 0;
753755
dst_offset = CHUNK_SZ;
754756
if (HAS_64K_PAGES(ce->engine->i915)) {
@@ -852,6 +854,25 @@ intel_context_migrate_copy(struct intel_context *ce,
852854
if (err)
853855
goto out_rq;
854856
ccs_bytes_to_cpy -= ccs_sz;
857+
} else if (overwrite_ccs) {
858+
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
859+
if (err)
860+
goto out_rq;
861+
862+
/*
863+
* While we can't always restore/manage the CCS state,
864+
* we still need to ensure we don't leak the CCS state
865+
* from the previous user, so make sure we overwrite it
866+
* with something.
867+
*/
868+
err = emit_copy_ccs(rq, dst_offset, INDIRECT_ACCESS,
869+
dst_offset, DIRECT_ACCESS, len);
870+
if (err)
871+
goto out_rq;
872+
873+
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
874+
if (err)
875+
goto out_rq;
855876
}
856877

857878
/* Arbitration is re-enabled between requests. */

0 commit comments

Comments
 (0)