@@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
2078
2078
u32 * reg_state = ce -> lrc_reg_state ;
2079
2079
int i ;
2080
2080
2081
- if (IS_GEN (stream -> perf -> i915 , 12 )) {
2082
- u32 format = stream -> oa_buffer .format ;
2081
+ reg_state [ctx_oactxctrl + 1 ] =
2082
+ (stream -> period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT ) |
2083
+ (stream -> periodic ? GEN8_OA_TIMER_ENABLE : 0 ) |
2084
+ GEN8_OA_COUNTER_RESUME ;
2083
2085
2084
- reg_state [ctx_oactxctrl + 1 ] =
2085
- (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT ) |
2086
- (stream -> oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0 );
2087
- } else {
2088
- reg_state [ctx_oactxctrl + 1 ] =
2089
- (stream -> period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT ) |
2090
- (stream -> periodic ? GEN8_OA_TIMER_ENABLE : 0 ) |
2091
- GEN8_OA_COUNTER_RESUME ;
2092
- }
2093
-
2094
- for (i = 0 ; !!ctx_flexeu0 && i < ARRAY_SIZE (flex_regs ); i ++ )
2086
+ for (i = 0 ; i < ARRAY_SIZE (flex_regs ); i ++ )
2095
2087
reg_state [ctx_flexeu0 + i * 2 + 1 ] =
2096
2088
oa_config_flex_reg (stream -> oa_config , flex_regs [i ]);
2097
2089
@@ -2224,34 +2216,51 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
2224
2216
return err ;
2225
2217
}
2226
2218
2227
- static int gen12_emit_oar_config (struct intel_context * ce , bool enable )
2219
+ static int gen12_configure_oar_context (struct i915_perf_stream * stream , bool enable )
2228
2220
{
2229
- struct i915_request * rq ;
2230
- u32 * cs ;
2231
- int err = 0 ;
2232
-
2233
- rq = i915_request_create (ce );
2234
- if (IS_ERR (rq ))
2235
- return PTR_ERR (rq );
2236
-
2237
- cs = intel_ring_begin (rq , 4 );
2238
- if (IS_ERR (cs )) {
2239
- err = PTR_ERR (cs );
2240
- goto out ;
2241
- }
2242
-
2243
- * cs ++ = MI_LOAD_REGISTER_IMM (1 );
2244
- * cs ++ = i915_mmio_reg_offset (RING_CONTEXT_CONTROL (ce -> engine -> mmio_base ));
2245
- * cs ++ = _MASKED_FIELD (GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE ,
2246
- enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0 );
2247
- * cs ++ = MI_NOOP ;
2221
+ int err ;
2222
+ struct intel_context * ce = stream -> pinned_ctx ;
2223
+ u32 format = stream -> oa_buffer .format ;
2224
+ struct flex regs_context [] = {
2225
+ {
2226
+ GEN8_OACTXCONTROL ,
2227
+ stream -> perf -> ctx_oactxctrl_offset + 1 ,
2228
+ enable ? GEN8_OA_COUNTER_RESUME : 0 ,
2229
+ },
2230
+ };
2231
+ /* Offsets in regs_lri are not used since this configuration is only
2232
+ * applied using LRI. Initialize the correct offsets for posterity.
2233
+ */
2234
+ #define GEN12_OAR_OACONTROL_OFFSET 0x5B0
2235
+ struct flex regs_lri [] = {
2236
+ {
2237
+ GEN12_OAR_OACONTROL ,
2238
+ GEN12_OAR_OACONTROL_OFFSET + 1 ,
2239
+ (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT ) |
2240
+ (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0 )
2241
+ },
2242
+ {
2243
+ RING_CONTEXT_CONTROL (ce -> engine -> mmio_base ),
2244
+ CTX_CONTEXT_CONTROL ,
2245
+ _MASKED_FIELD (GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE ,
2246
+ enable ?
2247
+ GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
2248
+ 0 )
2249
+ },
2250
+ };
2248
2251
2249
- intel_ring_advance (rq , cs );
2252
+ /* Modify the context image of pinned context with regs_context*/
2253
+ err = intel_context_lock_pinned (ce );
2254
+ if (err )
2255
+ return err ;
2250
2256
2251
- out :
2252
- i915_request_add (rq );
2257
+ err = gen8_modify_context (ce , regs_context , ARRAY_SIZE (regs_context ));
2258
+ intel_context_unlock_pinned (ce );
2259
+ if (err )
2260
+ return err ;
2253
2261
2254
- return err ;
2262
+ /* Apply regs_lri using LRI with pinned context */
2263
+ return gen8_modify_self (ce , regs_lri , ARRAY_SIZE (regs_lri ));
2255
2264
}
2256
2265
2257
2266
/*
@@ -2277,53 +2286,16 @@ static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
2277
2286
* per-context OA state.
2278
2287
*
2279
2288
* Note: it's only the RCS/Render context that has any OA state.
2289
+ * Note: the first flex register passed must always be R_PWR_CLK_STATE
2280
2290
*/
2281
- static int lrc_configure_all_contexts (struct i915_perf_stream * stream ,
2282
- const struct i915_oa_config * oa_config )
2291
+ static int oa_configure_all_contexts (struct i915_perf_stream * stream ,
2292
+ struct flex * regs ,
2293
+ size_t num_regs )
2283
2294
{
2284
2295
struct drm_i915_private * i915 = stream -> perf -> i915 ;
2285
- /* The MMIO offsets for Flex EU registers aren't contiguous */
2286
- const u32 ctx_flexeu0 = stream -> perf -> ctx_flexeu0_offset ;
2287
- #define ctx_flexeuN (N ) (ctx_flexeu0 + 2 * (N) + 1)
2288
- struct flex regs [] = {
2289
- {
2290
- GEN8_R_PWR_CLK_STATE ,
2291
- CTX_R_PWR_CLK_STATE ,
2292
- },
2293
- {
2294
- IS_GEN (i915 , 12 ) ?
2295
- GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL ,
2296
- stream -> perf -> ctx_oactxctrl_offset + 1 ,
2297
- },
2298
- { EU_PERF_CNTL0 , ctx_flexeuN (0 ) },
2299
- { EU_PERF_CNTL1 , ctx_flexeuN (1 ) },
2300
- { EU_PERF_CNTL2 , ctx_flexeuN (2 ) },
2301
- { EU_PERF_CNTL3 , ctx_flexeuN (3 ) },
2302
- { EU_PERF_CNTL4 , ctx_flexeuN (4 ) },
2303
- { EU_PERF_CNTL5 , ctx_flexeuN (5 ) },
2304
- { EU_PERF_CNTL6 , ctx_flexeuN (6 ) },
2305
- };
2306
- #undef ctx_flexeuN
2307
2296
struct intel_engine_cs * engine ;
2308
2297
struct i915_gem_context * ctx , * cn ;
2309
- size_t array_size = IS_GEN (i915 , 12 ) ? 2 : ARRAY_SIZE (regs );
2310
- int i , err ;
2311
-
2312
- if (IS_GEN (i915 , 12 )) {
2313
- u32 format = stream -> oa_buffer .format ;
2314
-
2315
- regs [1 ].value =
2316
- (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT ) |
2317
- (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0 );
2318
- } else {
2319
- regs [1 ].value =
2320
- (stream -> period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT ) |
2321
- (stream -> periodic ? GEN8_OA_TIMER_ENABLE : 0 ) |
2322
- GEN8_OA_COUNTER_RESUME ;
2323
- }
2324
-
2325
- for (i = 2 ; !!ctx_flexeu0 && i < array_size ; i ++ )
2326
- regs [i ].value = oa_config_flex_reg (oa_config , regs [i ].reg );
2298
+ int err ;
2327
2299
2328
2300
lockdep_assert_held (& stream -> perf -> lock );
2329
2301
@@ -2353,7 +2325,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
2353
2325
2354
2326
spin_unlock (& i915 -> gem .contexts .lock );
2355
2327
2356
- err = gen8_configure_context (ctx , regs , array_size );
2328
+ err = gen8_configure_context (ctx , regs , num_regs );
2357
2329
if (err ) {
2358
2330
i915_gem_context_put (ctx );
2359
2331
return err ;
@@ -2378,14 +2350,64 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
2378
2350
2379
2351
regs [0 ].value = intel_sseu_make_rpcs (i915 , & ce -> sseu );
2380
2352
2381
- err = gen8_modify_self (ce , regs , array_size );
2353
+ err = gen8_modify_self (ce , regs , num_regs );
2382
2354
if (err )
2383
2355
return err ;
2384
2356
}
2385
2357
2386
2358
return 0 ;
2387
2359
}
2388
2360
2361
+ static int gen12_configure_all_contexts (struct i915_perf_stream * stream ,
2362
+ const struct i915_oa_config * oa_config )
2363
+ {
2364
+ struct flex regs [] = {
2365
+ {
2366
+ GEN8_R_PWR_CLK_STATE ,
2367
+ CTX_R_PWR_CLK_STATE ,
2368
+ },
2369
+ };
2370
+
2371
+ return oa_configure_all_contexts (stream , regs , ARRAY_SIZE (regs ));
2372
+ }
2373
+
2374
+ static int lrc_configure_all_contexts (struct i915_perf_stream * stream ,
2375
+ const struct i915_oa_config * oa_config )
2376
+ {
2377
+ /* The MMIO offsets for Flex EU registers aren't contiguous */
2378
+ const u32 ctx_flexeu0 = stream -> perf -> ctx_flexeu0_offset ;
2379
+ #define ctx_flexeuN (N ) (ctx_flexeu0 + 2 * (N) + 1)
2380
+ struct flex regs [] = {
2381
+ {
2382
+ GEN8_R_PWR_CLK_STATE ,
2383
+ CTX_R_PWR_CLK_STATE ,
2384
+ },
2385
+ {
2386
+ GEN8_OACTXCONTROL ,
2387
+ stream -> perf -> ctx_oactxctrl_offset + 1 ,
2388
+ },
2389
+ { EU_PERF_CNTL0 , ctx_flexeuN (0 ) },
2390
+ { EU_PERF_CNTL1 , ctx_flexeuN (1 ) },
2391
+ { EU_PERF_CNTL2 , ctx_flexeuN (2 ) },
2392
+ { EU_PERF_CNTL3 , ctx_flexeuN (3 ) },
2393
+ { EU_PERF_CNTL4 , ctx_flexeuN (4 ) },
2394
+ { EU_PERF_CNTL5 , ctx_flexeuN (5 ) },
2395
+ { EU_PERF_CNTL6 , ctx_flexeuN (6 ) },
2396
+ };
2397
+ #undef ctx_flexeuN
2398
+ int i ;
2399
+
2400
+ regs [1 ].value =
2401
+ (stream -> period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT ) |
2402
+ (stream -> periodic ? GEN8_OA_TIMER_ENABLE : 0 ) |
2403
+ GEN8_OA_COUNTER_RESUME ;
2404
+
2405
+ for (i = 2 ; i < ARRAY_SIZE (regs ); i ++ )
2406
+ regs [i ].value = oa_config_flex_reg (oa_config , regs [i ].reg );
2407
+
2408
+ return oa_configure_all_contexts (stream , regs , ARRAY_SIZE (regs ));
2409
+ }
2410
+
2389
2411
static int gen8_enable_metric_set (struct i915_perf_stream * stream )
2390
2412
{
2391
2413
struct intel_uncore * uncore = stream -> uncore ;
@@ -2464,7 +2486,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
2464
2486
* to make sure all slices/subslices are ON before writing to NOA
2465
2487
* registers.
2466
2488
*/
2467
- ret = lrc_configure_all_contexts (stream , oa_config );
2489
+ ret = gen12_configure_all_contexts (stream , oa_config );
2468
2490
if (ret )
2469
2491
return ret ;
2470
2492
@@ -2474,8 +2496,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
2474
2496
* requested this.
2475
2497
*/
2476
2498
if (stream -> ctx ) {
2477
- ret = gen12_emit_oar_config (stream -> pinned_ctx ,
2478
- oa_config != NULL );
2499
+ ret = gen12_configure_oar_context (stream , true);
2479
2500
if (ret )
2480
2501
return ret ;
2481
2502
}
@@ -2509,11 +2530,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
2509
2530
struct intel_uncore * uncore = stream -> uncore ;
2510
2531
2511
2532
/* Reset all contexts' slices/subslices configurations. */
2512
- lrc_configure_all_contexts (stream , NULL );
2533
+ gen12_configure_all_contexts (stream , NULL );
2513
2534
2514
2535
/* disable the context save/restore or OAR counters */
2515
2536
if (stream -> ctx )
2516
- gen12_emit_oar_config (stream -> pinned_ctx , false);
2537
+ gen12_configure_oar_context (stream , false);
2517
2538
2518
2539
/* Make sure we disable noa to save power. */
2519
2540
intel_uncore_rmw (uncore , RPM_CONFIG1 , GEN10_GT_NOA_ENABLE , 0 );
@@ -2855,7 +2876,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
2855
2876
return ;
2856
2877
2857
2878
stream = engine -> i915 -> perf .exclusive_stream ;
2858
- if (stream )
2879
+ /*
2880
+ * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
2881
+ * is already doing that, so nothing to be done for gen12 here.
2882
+ */
2883
+ if (stream && INTEL_GEN (stream -> perf -> i915 ) < 12 )
2859
2884
gen8_update_reg_state_unlocked (ce , stream );
2860
2885
}
2861
2886
0 commit comments