Skip to content

Commit 3ffe82d

Browse files
dceraolomattrope
authored andcommitted
drm/i915/xehp: handle new steering options
Xe_HP is more modular than its predecessors and as a consequence it has more types of replicated registers. As with l3bank regions on previous platforms, we may need to explicitly re-steer accesses to these new types of ranges at runtime if we can't find a single default steering value that satisfies the fusing of all types. v2: - Add a local 'i915' variable to reduce gt->i915 usage. (Caz) - Drop unused 'intel_gt_read_register' prototype. (Caz) v3: - Drop unnecessary comment text. (Lucas) - Drop unused register bit definition. (Lucas) Bspec: 66534 Cc: Tvrtko Ursulin <[email protected]> Cc: Caz Yokoyama <[email protected]> Signed-off-by: Daniele Ceraolo Spurio <[email protected]> Signed-off-by: Matt Roper <[email protected]> Reviewed-by: Lucas De Marchi <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent b65a948 commit 3ffe82d

File tree

10 files changed

+163
-6
lines changed

10 files changed

+163
-6
lines changed

drivers/gpu/drm/i915/gt/intel_gt.c

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,40 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = {
8989
{},
9090
};
9191

92+
static u16 slicemask(struct intel_gt *gt, int count)
93+
{
94+
u64 dss_mask = intel_sseu_get_subslices(&gt->info.sseu, 0);
95+
96+
return intel_slicemask_from_dssmask(dss_mask, count);
97+
}
98+
9299
int intel_gt_init_mmio(struct intel_gt *gt)
93100
{
101+
struct drm_i915_private *i915 = gt->i915;
102+
94103
intel_gt_init_clock_frequency(gt);
95104

96105
intel_uc_init_mmio(&gt->uc);
97106
intel_sseu_info_init(gt);
98107

99-
if (GRAPHICS_VER(gt->i915) >= 11) {
108+
/*
109+
* An mslice is unavailable only if both the meml3 for the slice is
110+
* disabled *and* all of the DSS in the slice (quadrant) are disabled.
111+
*/
112+
if (HAS_MSLICES(i915))
113+
gt->info.mslice_mask =
114+
slicemask(gt, GEN_DSS_PER_MSLICE) |
115+
(intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
116+
GEN12_MEML3_EN_MASK);
117+
118+
if (GRAPHICS_VER(i915) >= 11 &&
119+
GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
100120
gt->steering_table[L3BANK] = icl_l3bank_steering_table;
101121
gt->info.l3bank_mask =
102122
~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
103123
GEN10_L3BANK_MASK;
124+
} else if (HAS_MSLICES(i915)) {
125+
MISSING_CASE(INTEL_INFO(i915)->platform);
104126
}
105127

106128
return intel_engines_init_mmio(gt);
@@ -787,6 +809,22 @@ static void intel_gt_get_valid_steering(struct intel_gt *gt,
787809
*sliceid = 0; /* unused */
788810
*subsliceid = __ffs(gt->info.l3bank_mask);
789811
break;
812+
case MSLICE:
813+
GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
814+
815+
*sliceid = __ffs(gt->info.mslice_mask);
816+
*subsliceid = 0; /* unused */
817+
break;
818+
case LNCF:
819+
GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
820+
821+
/*
822+
* An LNCF is always present if its mslice is present, so we
823+
* can safely just steer to LNCF 0 in all cases.
824+
*/
825+
*sliceid = __ffs(gt->info.mslice_mask) << 1;
826+
*subsliceid = 0; /* unused */
827+
break;
790828
default:
791829
MISSING_CASE(type);
792830
*sliceid = 0;

drivers/gpu/drm/i915/gt/intel_gt_types.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,14 @@ struct intel_mmio_range {
4747
* of multicast registers. If another type of steering does not have any
4848
* overlap in valid steering targets with 'subslice' style registers, we will
4949
* need to explicitly re-steer reads of registers of the other type.
50+
*
51+
* Only the replication types that may need additional non-default steering
52+
* are listed here.
5053
*/
5154
enum intel_steering_type {
5255
L3BANK,
56+
MSLICE,
57+
LNCF,
5358

5459
NUM_STEERING_TYPES
5560
};
@@ -184,6 +189,8 @@ struct intel_gt {
184189

185190
/* Slice/subslice/EU info */
186191
struct sseu_dev_info sseu;
192+
193+
unsigned long mslice_mask;
187194
} info;
188195
};
189196

drivers/gpu/drm/i915/gt/intel_region_lmem.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "gem/i915_gem_lmem.h"
1111
#include "gem/i915_gem_region.h"
1212
#include "gem/i915_gem_ttm.h"
13+
#include "gt/intel_gt.h"
1314

1415
static int init_fake_lmem_bar(struct intel_memory_region *mem)
1516
{

drivers/gpu/drm/i915/gt/intel_sseu.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,3 +699,21 @@ void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
699699
}
700700
}
701701
}
702+
703+
u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice)
704+
{
705+
u16 slice_mask = 0;
706+
int i;
707+
708+
WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask));
709+
710+
for (i = 0; dss_mask; i++) {
711+
if (dss_mask & GENMASK(dss_per_slice - 1, 0))
712+
slice_mask |= BIT(i);
713+
714+
dss_mask >>= dss_per_slice;
715+
}
716+
717+
return slice_mask;
718+
}
719+

drivers/gpu/drm/i915/gt/intel_sseu.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ struct drm_printer;
2222
#define GEN_MAX_EUS (16) /* TGL upper bound */
2323
#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
2424

25+
#define GEN_DSS_PER_GSLICE 4
26+
#define GEN_DSS_PER_CSLICE 8
27+
#define GEN_DSS_PER_MSLICE 8
28+
2529
struct sseu_dev_info {
2630
u8 slice_mask;
2731
u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
@@ -104,4 +108,6 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p);
104108
void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
105109
struct drm_printer *p);
106110

111+
u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice);
112+
107113
#endif /* __INTEL_SSEU_H__ */

drivers/gpu/drm/i915/gt/intel_workarounds.c

Lines changed: 84 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -889,12 +889,24 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
889889
GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
890890
}
891891

892+
static void __add_mcr_wa(struct drm_i915_private *i915, struct i915_wa_list *wal,
893+
unsigned slice, unsigned subslice)
894+
{
895+
u32 mcr, mcr_mask;
896+
897+
mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
898+
mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
899+
900+
drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
901+
902+
wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
903+
}
904+
892905
static void
893906
icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
894907
{
895908
const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
896909
unsigned int slice, subslice;
897-
u32 mcr, mcr_mask;
898910

899911
GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
900912
GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
@@ -919,12 +931,79 @@ icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
919931
if (i915->gt.info.l3bank_mask & BIT(subslice))
920932
i915->gt.steering_table[L3BANK] = NULL;
921933

922-
mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
923-
mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
934+
__add_mcr_wa(i915, wal, slice, subslice);
935+
}
924936

925-
drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
937+
__maybe_unused
938+
static void
939+
xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
940+
{
941+
struct drm_i915_private *i915 = gt->i915;
942+
const struct sseu_dev_info *sseu = &gt->info.sseu;
943+
unsigned long slice, subslice = 0, slice_mask = 0;
944+
u64 dss_mask = 0;
945+
u32 lncf_mask = 0;
946+
int i;
926947

927-
wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
948+
/*
949+
* On Xe_HP the steering increases in complexity. There are now several
950+
* more units that require steering and we're not guaranteed to be able
951+
* to find a common setting for all of them. These are:
952+
* - GSLICE (fusable)
953+
* - DSS (sub-unit within gslice; fusable)
954+
* - L3 Bank (fusable)
955+
* - MSLICE (fusable)
956+
* - LNCF (sub-unit within mslice; always present if mslice is present)
957+
* - SQIDI (always on)
958+
*
959+
* We'll do our default/implicit steering based on GSLICE (in the
960+
* sliceid field) and DSS (in the subsliceid field). If we can
961+
* find overlap between the valid MSLICE and/or LNCF values with
962+
* a suitable GSLICE, then we can just re-use the default value and
963+
* skip and explicit steering at runtime.
964+
*
965+
* We only need to look for overlap between GSLICE/MSLICE/LNCF to find
966+
* a valid sliceid value. DSS steering is the only type of steering
967+
* that utilizes the 'subsliceid' bits.
968+
*
969+
* Also note that, even though the steering domain is called "GSlice"
970+
* and it is encoded in the register using the gslice format, the spec
971+
* says that the combined (geometry | compute) fuse should be used to
972+
* select the steering.
973+
*/
974+
975+
/* Find the potential gslice candidates */
976+
dss_mask = intel_sseu_get_subslices(sseu, 0);
977+
slice_mask = intel_slicemask_from_dssmask(dss_mask, GEN_DSS_PER_GSLICE);
978+
979+
/*
980+
* Find the potential LNCF candidates. Either LNCF within a valid
981+
* mslice is fine.
982+
*/
983+
for_each_set_bit(i, &gt->info.mslice_mask, GEN12_MAX_MSLICES)
984+
lncf_mask |= (0x3 << (i * 2));
985+
986+
/*
987+
* Are there any sliceid values that work for both GSLICE and LNCF
988+
* steering?
989+
*/
990+
if (slice_mask & lncf_mask) {
991+
slice_mask &= lncf_mask;
992+
gt->steering_table[LNCF] = NULL;
993+
}
994+
995+
/* How about sliceid values that also work for MSLICE steering? */
996+
if (slice_mask & gt->info.mslice_mask) {
997+
slice_mask &= gt->info.mslice_mask;
998+
gt->steering_table[MSLICE] = NULL;
999+
}
1000+
1001+
slice = __ffs(slice_mask);
1002+
subslice = __ffs(dss_mask >> (slice * GEN_DSS_PER_GSLICE));
1003+
WARN_ON(subslice > GEN_DSS_PER_GSLICE);
1004+
WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0);
1005+
1006+
__add_mcr_wa(i915, wal, slice, subslice);
9281007
}
9291008

9301009
static void

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1695,6 +1695,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
16951695
#define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
16961696
#define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc)
16971697

1698+
#define HAS_MSLICES(dev_priv) \
1699+
(INTEL_INFO(dev_priv)->has_mslices)
1700+
16981701
#define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc)
16991702

17001703
#define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i))

drivers/gpu/drm/i915/i915_pci.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,6 +975,7 @@ static const struct intel_device_info adl_p_info = {
975975
.has_llc = 1, \
976976
.has_logical_ring_contexts = 1, \
977977
.has_logical_ring_elsq = 1, \
978+
.has_mslices = 1, \
978979
.has_rc6 = 1, \
979980
.has_reset_engine = 1, \
980981
.has_rps = 1, \

drivers/gpu/drm/i915/i915_reg.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3122,6 +3122,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
31223122
#define GEN10_MIRROR_FUSE3 _MMIO(0x9118)
31233123
#define GEN10_L3BANK_PAIR_COUNT 4
31243124
#define GEN10_L3BANK_MASK 0x0F
3125+
/* on Xe_HP the same fuses indicates mslices instead of L3 banks */
3126+
#define GEN12_MAX_MSLICES 4
3127+
#define GEN12_MEML3_EN_MASK 0x0F
31253128

31263129
#define GEN8_EU_DISABLE0 _MMIO(0x9134)
31273130
#define GEN8_EU_DIS0_S0_MASK 0xffffff

drivers/gpu/drm/i915/intel_device_info.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ enum intel_ppgtt_type {
134134
func(has_logical_ring_contexts); \
135135
func(has_logical_ring_elsq); \
136136
func(has_master_unit_irq); \
137+
func(has_mslices); \
137138
func(has_pooled_eu); \
138139
func(has_rc6); \
139140
func(has_rc6p); \

0 commit comments

Comments
 (0)