Skip to content

Commit 568a2e6

Browse files
Chris WilsonAndi Shyti
authored andcommitted
drm/i915/gt: Move TLB invalidation to its own file
Prepare for supporting more TLB invalidation scenarios by moving the current MMIO invalidation to its own file. Signed-off-by: Chris Wilson <[email protected]> Signed-off-by: Mauro Carvalho Chehab <[email protected]> Reviewed-by: Andi Shyti <[email protected]> Signed-off-by: Andi Shyti <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent acf228c commit 568a2e6

File tree

8 files changed

+197
-152
lines changed

8 files changed

+197
-152
lines changed

drivers/gpu/drm/i915/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ gt-y += \
131131
gt/intel_sseu.o \
132132
gt/intel_sseu_debugfs.o \
133133
gt/intel_timeline.o \
134+
gt/intel_tlb.o \
134135
gt/intel_wopcm.o \
135136
gt/intel_workarounds.o \
136137
gt/shmem_utils.o \

drivers/gpu/drm/i915/gem/i915_gem_pages.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include <drm/drm_cache.h>
88

99
#include "gt/intel_gt.h"
10-
#include "gt/intel_gt_pm.h"
10+
#include "gt/intel_tlb.h"
1111

1212
#include "i915_drv.h"
1313
#include "i915_gem_object.h"
@@ -198,7 +198,7 @@ static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
198198
if (!obj->mm.tlb)
199199
return;
200200

201-
intel_gt_invalidate_tlb(gt, obj->mm.tlb);
201+
intel_gt_invalidate_tlb_full(gt, obj->mm.tlb);
202202
obj->mm.tlb = 0;
203203
}
204204

drivers/gpu/drm/i915/gt/intel_gt.c

Lines changed: 3 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "intel_rps.h"
3434
#include "intel_sa_media.h"
3535
#include "intel_gt_sysfs.h"
36+
#include "intel_tlb.h"
3637
#include "intel_uncore.h"
3738
#include "shmem_utils.h"
3839

@@ -50,8 +51,7 @@ void intel_gt_common_init_early(struct intel_gt *gt)
5051
intel_gt_init_reset(gt);
5152
intel_gt_init_requests(gt);
5253
intel_gt_init_timelines(gt);
53-
mutex_init(&gt->tlb.invalidate_lock);
54-
seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
54+
intel_gt_init_tlb(gt);
5555
intel_gt_pm_init_early(gt);
5656

5757
intel_wopcm_init_early(&gt->wopcm);
@@ -846,7 +846,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
846846
intel_gt_fini_requests(gt);
847847
intel_gt_fini_reset(gt);
848848
intel_gt_fini_timelines(gt);
849-
mutex_destroy(&gt->tlb.invalidate_lock);
849+
intel_gt_fini_tlb(gt);
850850
intel_engines_free(gt);
851851
}
852852
}
@@ -1003,137 +1003,3 @@ void intel_gt_info_print(const struct intel_gt_info *info,
10031003

10041004
intel_sseu_dump(&info->sseu, p);
10051005
}
1006-
1007-
/*
1008-
* HW architecture suggest typical invalidation time at 40us,
1009-
* with pessimistic cases up to 100us and a recommendation to
1010-
* cap at 1ms. We go a bit higher just in case.
1011-
*/
1012-
#define TLB_INVAL_TIMEOUT_US 100
1013-
#define TLB_INVAL_TIMEOUT_MS 4
1014-
1015-
/*
1016-
* On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
1017-
* but are now considered MCR registers. Since they exist within a GAM range,
1018-
* the primary instance of the register rolls up the status from each unit.
1019-
*/
1020-
static int wait_for_invalidate(struct intel_engine_cs *engine)
1021-
{
1022-
if (engine->tlb_inv.mcr)
1023-
return intel_gt_mcr_wait_for_reg(engine->gt,
1024-
engine->tlb_inv.reg.mcr_reg,
1025-
engine->tlb_inv.done,
1026-
0,
1027-
TLB_INVAL_TIMEOUT_US,
1028-
TLB_INVAL_TIMEOUT_MS);
1029-
else
1030-
return __intel_wait_for_register_fw(engine->gt->uncore,
1031-
engine->tlb_inv.reg.reg,
1032-
engine->tlb_inv.done,
1033-
0,
1034-
TLB_INVAL_TIMEOUT_US,
1035-
TLB_INVAL_TIMEOUT_MS,
1036-
NULL);
1037-
}
1038-
1039-
static void mmio_invalidate_full(struct intel_gt *gt)
1040-
{
1041-
struct drm_i915_private *i915 = gt->i915;
1042-
struct intel_uncore *uncore = gt->uncore;
1043-
struct intel_engine_cs *engine;
1044-
intel_engine_mask_t awake, tmp;
1045-
enum intel_engine_id id;
1046-
unsigned long flags;
1047-
1048-
if (GRAPHICS_VER(i915) < 8)
1049-
return;
1050-
1051-
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
1052-
1053-
intel_gt_mcr_lock(gt, &flags);
1054-
spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
1055-
1056-
awake = 0;
1057-
for_each_engine(engine, gt, id) {
1058-
if (!intel_engine_pm_is_awake(engine))
1059-
continue;
1060-
1061-
if (engine->tlb_inv.mcr)
1062-
intel_gt_mcr_multicast_write_fw(gt,
1063-
engine->tlb_inv.reg.mcr_reg,
1064-
engine->tlb_inv.request);
1065-
else
1066-
intel_uncore_write_fw(uncore,
1067-
engine->tlb_inv.reg.reg,
1068-
engine->tlb_inv.request);
1069-
1070-
awake |= engine->mask;
1071-
}
1072-
1073-
GT_TRACE(gt, "invalidated engines %08x\n", awake);
1074-
1075-
/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
1076-
if (awake &&
1077-
(IS_TIGERLAKE(i915) ||
1078-
IS_DG1(i915) ||
1079-
IS_ROCKETLAKE(i915) ||
1080-
IS_ALDERLAKE_S(i915) ||
1081-
IS_ALDERLAKE_P(i915)))
1082-
intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
1083-
1084-
spin_unlock(&uncore->lock);
1085-
intel_gt_mcr_unlock(gt, flags);
1086-
1087-
for_each_engine_masked(engine, gt, awake, tmp) {
1088-
if (wait_for_invalidate(engine))
1089-
gt_err_ratelimited(gt,
1090-
"%s TLB invalidation did not complete in %ums!\n",
1091-
engine->name, TLB_INVAL_TIMEOUT_MS);
1092-
}
1093-
1094-
/*
1095-
* Use delayed put since a) we mostly expect a flurry of TLB
1096-
* invalidations so it is good to avoid paying the forcewake cost and
1097-
* b) it works around a bug in Icelake which cannot cope with too rapid
1098-
* transitions.
1099-
*/
1100-
intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
1101-
}
1102-
1103-
static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
1104-
{
1105-
u32 cur = intel_gt_tlb_seqno(gt);
1106-
1107-
/* Only skip if a *full* TLB invalidate barrier has passed */
1108-
return (s32)(cur - ALIGN(seqno, 2)) > 0;
1109-
}
1110-
1111-
void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
1112-
{
1113-
intel_wakeref_t wakeref;
1114-
1115-
if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
1116-
return;
1117-
1118-
if (intel_gt_is_wedged(gt))
1119-
return;
1120-
1121-
if (tlb_seqno_passed(gt, seqno))
1122-
return;
1123-
1124-
with_intel_gt_pm_if_awake(gt, wakeref) {
1125-
mutex_lock(&gt->tlb.invalidate_lock);
1126-
if (tlb_seqno_passed(gt, seqno))
1127-
goto unlock;
1128-
1129-
mmio_invalidate_full(gt);
1130-
1131-
write_seqcount_invalidate(&gt->tlb.seqno);
1132-
unlock:
1133-
mutex_unlock(&gt->tlb.invalidate_lock);
1134-
}
1135-
}
1136-
1137-
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1138-
#include "selftest_tlb.c"
1139-
#endif

drivers/gpu/drm/i915/gt/intel_gt.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,4 @@ void intel_gt_info_print(const struct intel_gt_info *info,
107107

108108
void intel_gt_watchdog_work(struct work_struct *work);
109109

110-
static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
111-
{
112-
return seqprop_sequence(&gt->tlb.seqno);
113-
}
114-
115-
static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
116-
{
117-
return intel_gt_tlb_seqno(gt) | 1;
118-
}
119-
120-
void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno);
121-
122110
#endif /* __INTEL_GT_H__ */

drivers/gpu/drm/i915/gt/intel_tlb.c

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
// SPDX-License-Identifier: MIT
2+
/*
3+
* Copyright © 2023 Intel Corporation
4+
*/
5+
6+
#include "i915_drv.h"
7+
#include "i915_perf_oa_regs.h"
8+
#include "intel_engine_pm.h"
9+
#include "intel_gt.h"
10+
#include "intel_gt_mcr.h"
11+
#include "intel_gt_pm.h"
12+
#include "intel_gt_print.h"
13+
#include "intel_gt_regs.h"
14+
#include "intel_tlb.h"
15+
16+
/*
17+
* HW architecture suggest typical invalidation time at 40us,
18+
* with pessimistic cases up to 100us and a recommendation to
19+
* cap at 1ms. We go a bit higher just in case.
20+
*/
21+
#define TLB_INVAL_TIMEOUT_US 100
22+
#define TLB_INVAL_TIMEOUT_MS 4
23+
24+
/*
25+
* On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
26+
* but are now considered MCR registers. Since they exist within a GAM range,
27+
* the primary instance of the register rolls up the status from each unit.
28+
*/
29+
static int wait_for_invalidate(struct intel_engine_cs *engine)
30+
{
31+
if (engine->tlb_inv.mcr)
32+
return intel_gt_mcr_wait_for_reg(engine->gt,
33+
engine->tlb_inv.reg.mcr_reg,
34+
engine->tlb_inv.done,
35+
0,
36+
TLB_INVAL_TIMEOUT_US,
37+
TLB_INVAL_TIMEOUT_MS);
38+
else
39+
return __intel_wait_for_register_fw(engine->gt->uncore,
40+
engine->tlb_inv.reg.reg,
41+
engine->tlb_inv.done,
42+
0,
43+
TLB_INVAL_TIMEOUT_US,
44+
TLB_INVAL_TIMEOUT_MS,
45+
NULL);
46+
}
47+
48+
static void mmio_invalidate_full(struct intel_gt *gt)
49+
{
50+
struct drm_i915_private *i915 = gt->i915;
51+
struct intel_uncore *uncore = gt->uncore;
52+
struct intel_engine_cs *engine;
53+
intel_engine_mask_t awake, tmp;
54+
enum intel_engine_id id;
55+
unsigned long flags;
56+
57+
if (GRAPHICS_VER(i915) < 8)
58+
return;
59+
60+
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
61+
62+
intel_gt_mcr_lock(gt, &flags);
63+
spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
64+
65+
awake = 0;
66+
for_each_engine(engine, gt, id) {
67+
if (!intel_engine_pm_is_awake(engine))
68+
continue;
69+
70+
if (engine->tlb_inv.mcr)
71+
intel_gt_mcr_multicast_write_fw(gt,
72+
engine->tlb_inv.reg.mcr_reg,
73+
engine->tlb_inv.request);
74+
else
75+
intel_uncore_write_fw(uncore,
76+
engine->tlb_inv.reg.reg,
77+
engine->tlb_inv.request);
78+
79+
awake |= engine->mask;
80+
}
81+
82+
GT_TRACE(gt, "invalidated engines %08x\n", awake);
83+
84+
/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
85+
if (awake &&
86+
(IS_TIGERLAKE(i915) ||
87+
IS_DG1(i915) ||
88+
IS_ROCKETLAKE(i915) ||
89+
IS_ALDERLAKE_S(i915) ||
90+
IS_ALDERLAKE_P(i915)))
91+
intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
92+
93+
spin_unlock(&uncore->lock);
94+
intel_gt_mcr_unlock(gt, flags);
95+
96+
for_each_engine_masked(engine, gt, awake, tmp) {
97+
if (wait_for_invalidate(engine))
98+
gt_err_ratelimited(gt,
99+
"%s TLB invalidation did not complete in %ums!\n",
100+
engine->name, TLB_INVAL_TIMEOUT_MS);
101+
}
102+
103+
/*
104+
* Use delayed put since a) we mostly expect a flurry of TLB
105+
* invalidations so it is good to avoid paying the forcewake cost and
106+
* b) it works around a bug in Icelake which cannot cope with too rapid
107+
* transitions.
108+
*/
109+
intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
110+
}
111+
112+
static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
113+
{
114+
u32 cur = intel_gt_tlb_seqno(gt);
115+
116+
/* Only skip if a *full* TLB invalidate barrier has passed */
117+
return (s32)(cur - ALIGN(seqno, 2)) > 0;
118+
}
119+
120+
void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
121+
{
122+
intel_wakeref_t wakeref;
123+
124+
if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
125+
return;
126+
127+
if (intel_gt_is_wedged(gt))
128+
return;
129+
130+
if (tlb_seqno_passed(gt, seqno))
131+
return;
132+
133+
with_intel_gt_pm_if_awake(gt, wakeref) {
134+
mutex_lock(&gt->tlb.invalidate_lock);
135+
if (tlb_seqno_passed(gt, seqno))
136+
goto unlock;
137+
138+
mmio_invalidate_full(gt);
139+
140+
write_seqcount_invalidate(&gt->tlb.seqno);
141+
unlock:
142+
mutex_unlock(&gt->tlb.invalidate_lock);
143+
}
144+
}
145+
146+
void intel_gt_init_tlb(struct intel_gt *gt)
147+
{
148+
mutex_init(&gt->tlb.invalidate_lock);
149+
seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
150+
}
151+
152+
void intel_gt_fini_tlb(struct intel_gt *gt)
153+
{
154+
mutex_destroy(&gt->tlb.invalidate_lock);
155+
}
156+
157+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
158+
#include "selftest_tlb.c"
159+
#endif

drivers/gpu/drm/i915/gt/intel_tlb.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright © 2023 Intel Corporation
4+
*/
5+
6+
#ifndef INTEL_TLB_H
7+
#define INTEL_TLB_H
8+
9+
#include <linux/seqlock.h>
10+
#include <linux/types.h>
11+
12+
#include "intel_gt_types.h"
13+
14+
void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno);
15+
16+
void intel_gt_init_tlb(struct intel_gt *gt);
17+
void intel_gt_fini_tlb(struct intel_gt *gt);
18+
19+
static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
20+
{
21+
return seqprop_sequence(&gt->tlb.seqno);
22+
}
23+
24+
static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
25+
{
26+
return intel_gt_tlb_seqno(gt) | 1;
27+
}
28+
29+
#endif /* INTEL_TLB_H */

0 commit comments

Comments
 (0)