Skip to content

Commit ef50fa9

Browse files
icklejlahtine-intel
authored andcommitted
drm/i915/gt: Move hsw GT workarounds from init_clock_gating to workarounds
Rescue the GT workarounds from being buried inside init_clock_gating so that we remember to apply them after a GT reset, and that they are included in our verification that the workarounds are applied. v2: Leave HSW_SCRATCH to set an explicit value, not or in our disable bit. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2011 Signed-off-by: Chris Wilson <[email protected]> Cc: Mika Kuoppala <[email protected]> Reviewed-by: Mika Kuoppala <[email protected]> Cc: [email protected] Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] (cherry picked from commit f93ec5f) Signed-off-by: Joonas Lahtinen <[email protected]>
1 parent 898e4e5 commit ef50fa9

File tree

2 files changed

+50
-37
lines changed

2 files changed

+50
-37
lines changed

drivers/gpu/drm/i915/gt/intel_workarounds.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,12 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
178178
wa_write_masked_or(wal, reg, set, set);
179179
}
180180

181+
static void
182+
wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
183+
{
184+
wa_write_masked_or(wal, reg, clr, 0);
185+
}
186+
181187
static void
182188
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
183189
{
@@ -686,6 +692,46 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
686692
return 0;
687693
}
688694

695+
static void
696+
hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
697+
{
698+
/* L3 caching of data atomics doesn't work -- disable it. */
699+
wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
700+
701+
wa_add(wal,
702+
HSW_ROW_CHICKEN3, 0,
703+
_MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
704+
0 /* XXX does this reg exist? */);
705+
706+
/* WaVSRefCountFullforceMissDisable:hsw */
707+
wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
708+
709+
wa_masked_dis(wal,
710+
CACHE_MODE_0_GEN7,
711+
/* WaDisable_RenderCache_OperationalFlush:hsw */
712+
RC_OP_FLUSH_ENABLE |
713+
/* enable HiZ Raw Stall Optimization */
714+
HIZ_RAW_STALL_OPT_DISABLE);
715+
716+
/* WaDisable4x2SubspanOptimization:hsw */
717+
wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
718+
719+
/*
720+
* BSpec recommends 8x4 when MSAA is used,
721+
* however in practice 16x4 seems fastest.
722+
*
723+
* Note that PS/WM thread counts depend on the WIZ hashing
724+
* disable bit, which we don't touch here, but it's good
725+
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
726+
*/
727+
wa_add(wal, GEN7_GT_MODE, 0,
728+
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
729+
GEN6_WIZ_HASHING_16x4);
730+
731+
/* WaSampleCChickenBitEnable:hsw */
732+
wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
733+
}
734+
689735
static void
690736
gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
691737
{
@@ -963,6 +1009,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
9631009
bxt_gt_workarounds_init(i915, wal);
9641010
else if (IS_SKYLAKE(i915))
9651011
skl_gt_workarounds_init(i915, wal);
1012+
else if (IS_HASWELL(i915))
1013+
hsw_gt_workarounds_init(i915, wal);
9661014
else if (INTEL_GEN(i915) <= 8)
9671015
return;
9681016
else

drivers/gpu/drm/i915/intel_pm.c

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7230,45 +7230,10 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
72307230

72317231
static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
72327232
{
7233-
/* L3 caching of data atomics doesn't work -- disable it. */
7234-
I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
7235-
I915_WRITE(HSW_ROW_CHICKEN3,
7236-
_MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
7237-
72387233
/* This is required by WaCatErrorRejectionIssue:hsw */
72397234
I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
7240-
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
7241-
GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
7242-
7243-
/* WaVSRefCountFullforceMissDisable:hsw */
7244-
I915_WRITE(GEN7_FF_THREAD_MODE,
7245-
I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
7246-
7247-
/* WaDisable_RenderCache_OperationalFlush:hsw */
7248-
I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7249-
7250-
/* enable HiZ Raw Stall Optimization */
7251-
I915_WRITE(CACHE_MODE_0_GEN7,
7252-
_MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
7253-
7254-
/* WaDisable4x2SubspanOptimization:hsw */
7255-
I915_WRITE(CACHE_MODE_1,
7256-
_MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
7257-
7258-
/*
7259-
* BSpec recommends 8x4 when MSAA is used,
7260-
* however in practice 16x4 seems fastest.
7261-
*
7262-
* Note that PS/WM thread counts depend on the WIZ hashing
7263-
* disable bit, which we don't touch here, but it's good
7264-
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
7265-
*/
7266-
I915_WRITE(GEN7_GT_MODE,
7267-
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
7268-
7269-
/* WaSampleCChickenBitEnable:hsw */
7270-
I915_WRITE(HALF_SLICE_CHICKEN3,
7271-
_MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
7235+
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
7236+
GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
72727237

72737238
/* WaSwitchSolVfFArbitrationPriority:hsw */
72747239
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);

0 commit comments

Comments
 (0)