Skip to content

Commit 70ec2e8

Browse files
vsyrjalatursulin
authored andcommitted
drm/i915/dsb: Don't use indexed register writes needlessly
Turns out the DSB indexed register write command has rather significant initial overhead compared to the normal MMIO write command. Based on some quick experiments on TGL you have to write the register at least ~5 times for the indexed write command to come out ahead. If you write the register less times than that the MMIO write is faster. So it seems my automagic indexed write logic was a bit misguided. Go back to the original approach only use indexed writes for the cases we know will benefit from it (indexed LUT register updates). Currently we shouldn't have any cases where this truly matters (just some rare double writes to the precision LUT index registers), but we will need to switch the legacy LUT updates to write each LUT register twice (to avoid some palette anti-collision logic troubles). This would be close to the worst case for using indexed writes (two writes per register, and 256 separate registers). Using the MMIO write command should shave off around 30% of the execution time compared to using the indexed write command. Cc: [email protected] Fixes: 34d8311 ("drm/i915/dsb: Re-instate DSB for LUT updates") Fixes: 25ea341 ("drm/i915/dsb: Use non-posted register writes for legacy LUT") Signed-off-by: Ville Syrjälä <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] Reviewed-by: Uma Shankar <[email protected]> (cherry picked from commit ecba559) Signed-off-by: Tvrtko Ursulin <[email protected]>
1 parent fac04ef commit 70ec2e8

File tree

3 files changed

+49
-23
lines changed

3 files changed

+49
-23
lines changed

drivers/gpu/drm/i915/display/intel_color.c

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,6 +1343,17 @@ static void ilk_lut_write(const struct intel_crtc_state *crtc_state,
13431343
intel_de_write_fw(display, reg, val);
13441344
}
13451345

1346+
static void ilk_lut_write_indexed(const struct intel_crtc_state *crtc_state,
1347+
i915_reg_t reg, u32 val)
1348+
{
1349+
struct intel_display *display = to_intel_display(crtc_state);
1350+
1351+
if (crtc_state->dsb_color_vblank)
1352+
intel_dsb_reg_write_indexed(crtc_state->dsb_color_vblank, reg, val);
1353+
else
1354+
intel_de_write_fw(display, reg, val);
1355+
}
1356+
13461357
static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state,
13471358
const struct drm_property_blob *blob)
13481359
{
@@ -1458,8 +1469,8 @@ static void bdw_load_lut_10(const struct intel_crtc_state *crtc_state,
14581469
prec_index);
14591470

14601471
for (i = 0; i < lut_size; i++)
1461-
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
1462-
ilk_lut_10(&lut[i]));
1472+
ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
1473+
ilk_lut_10(&lut[i]));
14631474

14641475
/*
14651476
* Reset the index, otherwise it prevents the legacy palette to be
@@ -1612,16 +1623,16 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state,
16121623
* ToDo: Extend to max 7.0. Enable 32 bit input value
16131624
* as compared to just 16 to achieve this.
16141625
*/
1615-
ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe),
1616-
DISPLAY_VER(display) >= 14 ?
1617-
mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i]));
1626+
ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe),
1627+
DISPLAY_VER(display) >= 14 ?
1628+
mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i]));
16181629
}
16191630

16201631
/* Clamp values > 1.0. */
16211632
while (i++ < glk_degamma_lut_size(display))
1622-
ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe),
1623-
DISPLAY_VER(display) >= 14 ?
1624-
1 << 24 : 1 << 16);
1633+
ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe),
1634+
DISPLAY_VER(display) >= 14 ?
1635+
1 << 24 : 1 << 16);
16251636

16261637
ilk_lut_write(crtc_state, PRE_CSC_GAMC_INDEX(pipe), 0);
16271638
}
@@ -1687,10 +1698,10 @@ icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
16871698
for (i = 0; i < 9; i++) {
16881699
const struct drm_color_lut *entry = &lut[i];
16891700

1690-
ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
1691-
ilk_lut_12p4_ldw(entry));
1692-
ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
1693-
ilk_lut_12p4_udw(entry));
1701+
ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
1702+
ilk_lut_12p4_ldw(entry));
1703+
ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
1704+
ilk_lut_12p4_udw(entry));
16941705
}
16951706

16961707
ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_INDEX(pipe),
@@ -1726,10 +1737,10 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
17261737
for (i = 1; i < 257; i++) {
17271738
entry = &lut[i * 8];
17281739

1729-
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
1730-
ilk_lut_12p4_ldw(entry));
1731-
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
1732-
ilk_lut_12p4_udw(entry));
1740+
ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
1741+
ilk_lut_12p4_ldw(entry));
1742+
ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
1743+
ilk_lut_12p4_udw(entry));
17331744
}
17341745

17351746
/*
@@ -1747,10 +1758,10 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
17471758
for (i = 0; i < 256; i++) {
17481759
entry = &lut[i * 8 * 128];
17491760

1750-
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
1751-
ilk_lut_12p4_ldw(entry));
1752-
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
1753-
ilk_lut_12p4_udw(entry));
1761+
ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
1762+
ilk_lut_12p4_ldw(entry));
1763+
ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
1764+
ilk_lut_12p4_udw(entry));
17541765
}
17551766

17561767
ilk_lut_write(crtc_state, PREC_PAL_INDEX(pipe),

drivers/gpu/drm/i915/display/intel_dsb.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -273,16 +273,20 @@ static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_
273273
}
274274

275275
/**
276-
* intel_dsb_reg_write() - Emit register wriite to the DSB context
276+
* intel_dsb_reg_write_indexed() - Emit register wriite to the DSB context
277277
* @dsb: DSB context
278278
* @reg: register address.
279279
* @val: value.
280280
*
281281
* This function is used for writing register-value pair in command
282282
* buffer of DSB.
283+
*
284+
* Note that indexed writes are slower than normal MMIO writes
285+
* for a small number (less than 5 or so) of writes to the same
286+
* register.
283287
*/
284-
void intel_dsb_reg_write(struct intel_dsb *dsb,
285-
i915_reg_t reg, u32 val)
288+
void intel_dsb_reg_write_indexed(struct intel_dsb *dsb,
289+
i915_reg_t reg, u32 val)
286290
{
287291
/*
288292
* For example the buffer will look like below for 3 dwords for auto
@@ -340,6 +344,15 @@ void intel_dsb_reg_write(struct intel_dsb *dsb,
340344
}
341345
}
342346

347+
void intel_dsb_reg_write(struct intel_dsb *dsb,
348+
i915_reg_t reg, u32 val)
349+
{
350+
intel_dsb_emit(dsb, val,
351+
(DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) |
352+
(DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) |
353+
i915_mmio_reg_offset(reg));
354+
}
355+
343356
static u32 intel_dsb_mask_to_byte_en(u32 mask)
344357
{
345358
return (!!(mask & 0xff000000) << 3 |

drivers/gpu/drm/i915/display/intel_dsb.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ void intel_dsb_finish(struct intel_dsb *dsb);
3434
void intel_dsb_cleanup(struct intel_dsb *dsb);
3535
void intel_dsb_reg_write(struct intel_dsb *dsb,
3636
i915_reg_t reg, u32 val);
37+
void intel_dsb_reg_write_indexed(struct intel_dsb *dsb,
38+
i915_reg_t reg, u32 val);
3739
void intel_dsb_reg_write_masked(struct intel_dsb *dsb,
3840
i915_reg_t reg, u32 mask, u32 val);
3941
void intel_dsb_noop(struct intel_dsb *dsb, int count);

0 commit comments

Comments
 (0)