Skip to content

Commit 177e876

Browse files
unerligejlahtine-intel
authored andcommitted
drm/i915/perf: Configure OAR for specific context
Gen12 supports saving/restoring render counters per context. Apply OAR configuration only for the context that is passed in to perf. v2: - Fix OACTXCONTROL value to only stop/resume counters. - Remove gen12_update_reg_state_unlocked as power state is already applied by the caller. v3: (Lionel) - Move register initialization into the array - Assume a valid oa_config in enable_metric_set Signed-off-by: Umesh Nerlige Ramappa <[email protected]> Fixes: 00a7f0d ("drm/i915/tgl: Add perf support on TGL") Reviewed-by: Lionel Landwerlin <[email protected]> Signed-off-by: Lionel Landwerlin <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] (cherry picked from commit ccdeed4) Signed-off-by: Joonas Lahtinen <[email protected]>
1 parent 2a264a0 commit 177e876

File tree

1 file changed

+112
-87
lines changed

1 file changed

+112
-87
lines changed

drivers/gpu/drm/i915/i915_perf.c

Lines changed: 112 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
20782078
u32 *reg_state = ce->lrc_reg_state;
20792079
int i;
20802080

2081-
if (IS_GEN(stream->perf->i915, 12)) {
2082-
u32 format = stream->oa_buffer.format;
2081+
reg_state[ctx_oactxctrl + 1] =
2082+
(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
2083+
(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
2084+
GEN8_OA_COUNTER_RESUME;
20832085

2084-
reg_state[ctx_oactxctrl + 1] =
2085-
(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
2086-
(stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
2087-
} else {
2088-
reg_state[ctx_oactxctrl + 1] =
2089-
(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
2090-
(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
2091-
GEN8_OA_COUNTER_RESUME;
2092-
}
2093-
2094-
for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
2086+
for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
20952087
reg_state[ctx_flexeu0 + i * 2 + 1] =
20962088
oa_config_flex_reg(stream->oa_config, flex_regs[i]);
20972089

@@ -2224,34 +2216,51 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
22242216
return err;
22252217
}
22262218

2227-
static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
2219+
static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
22282220
{
2229-
struct i915_request *rq;
2230-
u32 *cs;
2231-
int err = 0;
2232-
2233-
rq = i915_request_create(ce);
2234-
if (IS_ERR(rq))
2235-
return PTR_ERR(rq);
2236-
2237-
cs = intel_ring_begin(rq, 4);
2238-
if (IS_ERR(cs)) {
2239-
err = PTR_ERR(cs);
2240-
goto out;
2241-
}
2242-
2243-
*cs++ = MI_LOAD_REGISTER_IMM(1);
2244-
*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
2245-
*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
2246-
enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
2247-
*cs++ = MI_NOOP;
2221+
int err;
2222+
struct intel_context *ce = stream->pinned_ctx;
2223+
u32 format = stream->oa_buffer.format;
2224+
struct flex regs_context[] = {
2225+
{
2226+
GEN8_OACTXCONTROL,
2227+
stream->perf->ctx_oactxctrl_offset + 1,
2228+
enable ? GEN8_OA_COUNTER_RESUME : 0,
2229+
},
2230+
};
2231+
/* Offsets in regs_lri are not used since this configuration is only
2232+
* applied using LRI. Initialize the correct offsets for posterity.
2233+
*/
2234+
#define GEN12_OAR_OACONTROL_OFFSET 0x5B0
2235+
struct flex regs_lri[] = {
2236+
{
2237+
GEN12_OAR_OACONTROL,
2238+
GEN12_OAR_OACONTROL_OFFSET + 1,
2239+
(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
2240+
(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
2241+
},
2242+
{
2243+
RING_CONTEXT_CONTROL(ce->engine->mmio_base),
2244+
CTX_CONTEXT_CONTROL,
2245+
_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
2246+
enable ?
2247+
GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
2248+
0)
2249+
},
2250+
};
22482251

2249-
intel_ring_advance(rq, cs);
2252+
/* Modify the context image of pinned context with regs_context*/
2253+
err = intel_context_lock_pinned(ce);
2254+
if (err)
2255+
return err;
22502256

2251-
out:
2252-
i915_request_add(rq);
2257+
err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
2258+
intel_context_unlock_pinned(ce);
2259+
if (err)
2260+
return err;
22532261

2254-
return err;
2262+
/* Apply regs_lri using LRI with pinned context */
2263+
return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
22552264
}
22562265

22572266
/*
@@ -2277,53 +2286,16 @@ static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
22772286
* per-context OA state.
22782287
*
22792288
* Note: it's only the RCS/Render context that has any OA state.
2289+
* Note: the first flex register passed must always be R_PWR_CLK_STATE
22802290
*/
2281-
static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
2282-
const struct i915_oa_config *oa_config)
2291+
static int oa_configure_all_contexts(struct i915_perf_stream *stream,
2292+
struct flex *regs,
2293+
size_t num_regs)
22832294
{
22842295
struct drm_i915_private *i915 = stream->perf->i915;
2285-
/* The MMIO offsets for Flex EU registers aren't contiguous */
2286-
const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2287-
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
2288-
struct flex regs[] = {
2289-
{
2290-
GEN8_R_PWR_CLK_STATE,
2291-
CTX_R_PWR_CLK_STATE,
2292-
},
2293-
{
2294-
IS_GEN(i915, 12) ?
2295-
GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
2296-
stream->perf->ctx_oactxctrl_offset + 1,
2297-
},
2298-
{ EU_PERF_CNTL0, ctx_flexeuN(0) },
2299-
{ EU_PERF_CNTL1, ctx_flexeuN(1) },
2300-
{ EU_PERF_CNTL2, ctx_flexeuN(2) },
2301-
{ EU_PERF_CNTL3, ctx_flexeuN(3) },
2302-
{ EU_PERF_CNTL4, ctx_flexeuN(4) },
2303-
{ EU_PERF_CNTL5, ctx_flexeuN(5) },
2304-
{ EU_PERF_CNTL6, ctx_flexeuN(6) },
2305-
};
2306-
#undef ctx_flexeuN
23072296
struct intel_engine_cs *engine;
23082297
struct i915_gem_context *ctx, *cn;
2309-
size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
2310-
int i, err;
2311-
2312-
if (IS_GEN(i915, 12)) {
2313-
u32 format = stream->oa_buffer.format;
2314-
2315-
regs[1].value =
2316-
(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
2317-
(oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
2318-
} else {
2319-
regs[1].value =
2320-
(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
2321-
(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
2322-
GEN8_OA_COUNTER_RESUME;
2323-
}
2324-
2325-
for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
2326-
regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
2298+
int err;
23272299

23282300
lockdep_assert_held(&stream->perf->lock);
23292301

@@ -2353,7 +2325,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
23532325

23542326
spin_unlock(&i915->gem.contexts.lock);
23552327

2356-
err = gen8_configure_context(ctx, regs, array_size);
2328+
err = gen8_configure_context(ctx, regs, num_regs);
23572329
if (err) {
23582330
i915_gem_context_put(ctx);
23592331
return err;
@@ -2378,14 +2350,64 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
23782350

23792351
regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
23802352

2381-
err = gen8_modify_self(ce, regs, array_size);
2353+
err = gen8_modify_self(ce, regs, num_regs);
23822354
if (err)
23832355
return err;
23842356
}
23852357

23862358
return 0;
23872359
}
23882360

2361+
static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
2362+
const struct i915_oa_config *oa_config)
2363+
{
2364+
struct flex regs[] = {
2365+
{
2366+
GEN8_R_PWR_CLK_STATE,
2367+
CTX_R_PWR_CLK_STATE,
2368+
},
2369+
};
2370+
2371+
return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
2372+
}
2373+
2374+
static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
2375+
const struct i915_oa_config *oa_config)
2376+
{
2377+
/* The MMIO offsets for Flex EU registers aren't contiguous */
2378+
const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2379+
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
2380+
struct flex regs[] = {
2381+
{
2382+
GEN8_R_PWR_CLK_STATE,
2383+
CTX_R_PWR_CLK_STATE,
2384+
},
2385+
{
2386+
GEN8_OACTXCONTROL,
2387+
stream->perf->ctx_oactxctrl_offset + 1,
2388+
},
2389+
{ EU_PERF_CNTL0, ctx_flexeuN(0) },
2390+
{ EU_PERF_CNTL1, ctx_flexeuN(1) },
2391+
{ EU_PERF_CNTL2, ctx_flexeuN(2) },
2392+
{ EU_PERF_CNTL3, ctx_flexeuN(3) },
2393+
{ EU_PERF_CNTL4, ctx_flexeuN(4) },
2394+
{ EU_PERF_CNTL5, ctx_flexeuN(5) },
2395+
{ EU_PERF_CNTL6, ctx_flexeuN(6) },
2396+
};
2397+
#undef ctx_flexeuN
2398+
int i;
2399+
2400+
regs[1].value =
2401+
(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
2402+
(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
2403+
GEN8_OA_COUNTER_RESUME;
2404+
2405+
for (i = 2; i < ARRAY_SIZE(regs); i++)
2406+
regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
2407+
2408+
return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
2409+
}
2410+
23892411
static int gen8_enable_metric_set(struct i915_perf_stream *stream)
23902412
{
23912413
struct intel_uncore *uncore = stream->uncore;
@@ -2464,7 +2486,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
24642486
* to make sure all slices/subslices are ON before writing to NOA
24652487
* registers.
24662488
*/
2467-
ret = lrc_configure_all_contexts(stream, oa_config);
2489+
ret = gen12_configure_all_contexts(stream, oa_config);
24682490
if (ret)
24692491
return ret;
24702492

@@ -2474,8 +2496,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
24742496
* requested this.
24752497
*/
24762498
if (stream->ctx) {
2477-
ret = gen12_emit_oar_config(stream->pinned_ctx,
2478-
oa_config != NULL);
2499+
ret = gen12_configure_oar_context(stream, true);
24792500
if (ret)
24802501
return ret;
24812502
}
@@ -2509,11 +2530,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
25092530
struct intel_uncore *uncore = stream->uncore;
25102531

25112532
/* Reset all contexts' slices/subslices configurations. */
2512-
lrc_configure_all_contexts(stream, NULL);
2533+
gen12_configure_all_contexts(stream, NULL);
25132534

25142535
/* disable the context save/restore or OAR counters */
25152536
if (stream->ctx)
2516-
gen12_emit_oar_config(stream->pinned_ctx, false);
2537+
gen12_configure_oar_context(stream, false);
25172538

25182539
/* Make sure we disable noa to save power. */
25192540
intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2855,7 +2876,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
28552876
return;
28562877

28572878
stream = engine->i915->perf.exclusive_stream;
2858-
if (stream)
2879+
/*
2880+
* For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
2881+
* is already doing that, so nothing to be done for gen12 here.
2882+
*/
2883+
if (stream && INTEL_GEN(stream->perf->i915) < 12)
28592884
gen8_update_reg_state_unlocked(ce, stream);
28602885
}
28612886

0 commit comments

Comments
 (0)