@@ -56,11 +56,21 @@ static bool is_engine_config(const u64 config)
56
56
return config < __I915_PMU_OTHER (0 );
57
57
}
58
58
59
+ static unsigned int config_gt_id (const u64 config )
60
+ {
61
+ return config >> __I915_PMU_GT_SHIFT ;
62
+ }
63
+
64
+ static u64 config_counter (const u64 config )
65
+ {
66
+ return config & ~(~0ULL << __I915_PMU_GT_SHIFT );
67
+ }
68
+
59
69
static unsigned int other_bit (const u64 config )
60
70
{
61
71
unsigned int val ;
62
72
63
- switch (config ) {
73
+ switch (config_counter ( config ) ) {
64
74
case I915_PMU_ACTUAL_FREQUENCY :
65
75
val = __I915_PMU_ACTUAL_FREQUENCY_ENABLED ;
66
76
break ;
@@ -78,7 +88,9 @@ static unsigned int other_bit(const u64 config)
78
88
return -1 ;
79
89
}
80
90
81
- return I915_ENGINE_SAMPLE_COUNT + val ;
91
+ return I915_ENGINE_SAMPLE_COUNT +
92
+ config_gt_id (config ) * __I915_PMU_TRACKED_EVENT_COUNT +
93
+ val ;
82
94
}
83
95
84
96
static unsigned int config_bit (const u64 config )
@@ -115,6 +127,18 @@ static unsigned int event_bit(struct perf_event *event)
115
127
return config_bit (event -> attr .config );
116
128
}
117
129
130
+ static u32 frequency_enabled_mask (void )
131
+ {
132
+ unsigned int i ;
133
+ u32 mask = 0 ;
134
+
135
+ for (i = 0 ; i < I915_PMU_MAX_GTS ; i ++ )
136
+ mask |= config_mask (__I915_PMU_ACTUAL_FREQUENCY (i )) |
137
+ config_mask (__I915_PMU_REQUESTED_FREQUENCY (i ));
138
+
139
+ return mask ;
140
+ }
141
+
118
142
static bool pmu_needs_timer (struct i915_pmu * pmu , bool gpu_active )
119
143
{
120
144
struct drm_i915_private * i915 = container_of (pmu , typeof (* i915 ), pmu );
@@ -131,9 +155,7 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
131
155
* Mask out all the ones which do not need the timer, or in
132
156
* other words keep all the ones that could need the timer.
133
157
*/
134
- enable &= config_mask (I915_PMU_ACTUAL_FREQUENCY ) |
135
- config_mask (I915_PMU_REQUESTED_FREQUENCY ) |
136
- ENGINE_SAMPLE_MASK ;
158
+ enable &= frequency_enabled_mask () | ENGINE_SAMPLE_MASK ;
137
159
138
160
/*
139
161
* When the GPU is idle per-engine counters do not need to be
@@ -175,9 +197,37 @@ static inline s64 ktime_since_raw(const ktime_t kt)
175
197
return ktime_to_ns (ktime_sub (ktime_get_raw (), kt ));
176
198
}
177
199
200
+ static unsigned int
201
+ __sample_idx (struct i915_pmu * pmu , unsigned int gt_id , int sample )
202
+ {
203
+ unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample ;
204
+
205
+ GEM_BUG_ON (idx >= ARRAY_SIZE (pmu -> sample ));
206
+
207
+ return idx ;
208
+ }
209
+
210
+ static u64 read_sample (struct i915_pmu * pmu , unsigned int gt_id , int sample )
211
+ {
212
+ return pmu -> sample [__sample_idx (pmu , gt_id , sample )].cur ;
213
+ }
214
+
215
+ static void
216
+ store_sample (struct i915_pmu * pmu , unsigned int gt_id , int sample , u64 val )
217
+ {
218
+ pmu -> sample [__sample_idx (pmu , gt_id , sample )].cur = val ;
219
+ }
220
+
221
+ static void
222
+ add_sample_mult (struct i915_pmu * pmu , unsigned int gt_id , int sample , u32 val , u32 mul )
223
+ {
224
+ pmu -> sample [__sample_idx (pmu , gt_id , sample )].cur += mul_u32_u32 (val , mul );
225
+ }
226
+
178
227
static u64 get_rc6 (struct intel_gt * gt )
179
228
{
180
229
struct drm_i915_private * i915 = gt -> i915 ;
230
+ const unsigned int gt_id = gt -> info .id ;
181
231
struct i915_pmu * pmu = & i915 -> pmu ;
182
232
unsigned long flags ;
183
233
bool awake = false;
@@ -192,7 +242,7 @@ static u64 get_rc6(struct intel_gt *gt)
192
242
spin_lock_irqsave (& pmu -> lock , flags );
193
243
194
244
if (awake ) {
195
- pmu -> sample [ __I915_SAMPLE_RC6 ]. cur = val ;
245
+ store_sample ( pmu , gt_id , __I915_SAMPLE_RC6 , val ) ;
196
246
} else {
197
247
/*
198
248
* We think we are runtime suspended.
@@ -201,14 +251,14 @@ static u64 get_rc6(struct intel_gt *gt)
201
251
* on top of the last known real value, as the approximated RC6
202
252
* counter value.
203
253
*/
204
- val = ktime_since_raw (pmu -> sleep_last );
205
- val += pmu -> sample [ __I915_SAMPLE_RC6 ]. cur ;
254
+ val = ktime_since_raw (pmu -> sleep_last [ gt_id ] );
255
+ val += read_sample ( pmu , gt_id , __I915_SAMPLE_RC6 ) ;
206
256
}
207
257
208
- if (val < pmu -> sample [ __I915_SAMPLE_RC6_LAST_REPORTED ]. cur )
209
- val = pmu -> sample [ __I915_SAMPLE_RC6_LAST_REPORTED ]. cur ;
258
+ if (val < read_sample ( pmu , gt_id , __I915_SAMPLE_RC6_LAST_REPORTED ) )
259
+ val = read_sample ( pmu , gt_id , __I915_SAMPLE_RC6_LAST_REPORTED ) ;
210
260
else
211
- pmu -> sample [ __I915_SAMPLE_RC6_LAST_REPORTED ]. cur = val ;
261
+ store_sample ( pmu , gt_id , __I915_SAMPLE_RC6_LAST_REPORTED , val ) ;
212
262
213
263
spin_unlock_irqrestore (& pmu -> lock , flags );
214
264
@@ -218,22 +268,29 @@ static u64 get_rc6(struct intel_gt *gt)
218
268
static void init_rc6 (struct i915_pmu * pmu )
219
269
{
220
270
struct drm_i915_private * i915 = container_of (pmu , typeof (* i915 ), pmu );
221
- intel_wakeref_t wakeref ;
271
+ struct intel_gt * gt ;
272
+ unsigned int i ;
273
+
274
+ for_each_gt (gt , i915 , i ) {
275
+ intel_wakeref_t wakeref ;
276
+
277
+ with_intel_runtime_pm (gt -> uncore -> rpm , wakeref ) {
278
+ u64 val = __get_rc6 (gt );
222
279
223
- with_intel_runtime_pm ( to_gt ( i915 ) -> uncore -> rpm , wakeref ) {
224
- pmu -> sample [ __I915_SAMPLE_RC6 ]. cur = __get_rc6 ( to_gt ( i915 ));
225
- pmu -> sample [ __I915_SAMPLE_RC6_LAST_REPORTED ]. cur =
226
- pmu -> sample [ __I915_SAMPLE_RC6 ]. cur ;
227
- pmu -> sleep_last = ktime_get_raw ();
280
+ store_sample ( pmu , i , __I915_SAMPLE_RC6 , val );
281
+ store_sample ( pmu , i , __I915_SAMPLE_RC6_LAST_REPORTED ,
282
+ val );
283
+ pmu -> sleep_last [ i ] = ktime_get_raw () ;
284
+ }
228
285
}
229
286
}
230
287
231
288
static void park_rc6 (struct intel_gt * gt )
232
289
{
233
290
struct i915_pmu * pmu = & gt -> i915 -> pmu ;
234
291
235
- pmu -> sample [ __I915_SAMPLE_RC6 ]. cur = __get_rc6 (gt );
236
- pmu -> sleep_last = ktime_get_raw ();
292
+ store_sample ( pmu , gt -> info . id , __I915_SAMPLE_RC6 , __get_rc6 (gt ) );
293
+ pmu -> sleep_last [ gt -> info . id ] = ktime_get_raw ();
237
294
}
238
295
239
296
static void __i915_pmu_maybe_start_timer (struct i915_pmu * pmu )
@@ -373,34 +430,30 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
373
430
}
374
431
}
375
432
376
- static void
377
- add_sample_mult (struct i915_pmu_sample * sample , u32 val , u32 mul )
378
- {
379
- sample -> cur += mul_u32_u32 (val , mul );
380
- }
381
-
382
- static bool frequency_sampling_enabled (struct i915_pmu * pmu )
433
+ static bool
434
+ frequency_sampling_enabled (struct i915_pmu * pmu , unsigned int gt )
383
435
{
384
436
return pmu -> enable &
385
- (config_mask (I915_PMU_ACTUAL_FREQUENCY ) |
386
- config_mask (I915_PMU_REQUESTED_FREQUENCY ));
437
+ (config_mask (__I915_PMU_ACTUAL_FREQUENCY ( gt ) ) |
438
+ config_mask (__I915_PMU_REQUESTED_FREQUENCY ( gt ) ));
387
439
}
388
440
389
441
static void
390
442
frequency_sample (struct intel_gt * gt , unsigned int period_ns )
391
443
{
392
444
struct drm_i915_private * i915 = gt -> i915 ;
445
+ const unsigned int gt_id = gt -> info .id ;
393
446
struct i915_pmu * pmu = & i915 -> pmu ;
394
447
struct intel_rps * rps = & gt -> rps ;
395
448
396
- if (!frequency_sampling_enabled (pmu ))
449
+ if (!frequency_sampling_enabled (pmu , gt_id ))
397
450
return ;
398
451
399
452
/* Report 0/0 (actual/requested) frequency while parked. */
400
453
if (!intel_gt_pm_get_if_awake (gt ))
401
454
return ;
402
455
403
- if (pmu -> enable & config_mask (I915_PMU_ACTUAL_FREQUENCY )) {
456
+ if (pmu -> enable & config_mask (__I915_PMU_ACTUAL_FREQUENCY ( gt_id ) )) {
404
457
u32 val ;
405
458
406
459
/*
@@ -416,12 +469,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
416
469
if (!val )
417
470
val = intel_gpu_freq (rps , rps -> cur_freq );
418
471
419
- add_sample_mult (& pmu -> sample [ __I915_SAMPLE_FREQ_ACT ] ,
472
+ add_sample_mult (pmu , gt_id , __I915_SAMPLE_FREQ_ACT ,
420
473
val , period_ns / 1000 );
421
474
}
422
475
423
- if (pmu -> enable & config_mask (I915_PMU_REQUESTED_FREQUENCY )) {
424
- add_sample_mult (& pmu -> sample [ __I915_SAMPLE_FREQ_REQ ] ,
476
+ if (pmu -> enable & config_mask (__I915_PMU_REQUESTED_FREQUENCY ( gt_id ) )) {
477
+ add_sample_mult (pmu , gt_id , __I915_SAMPLE_FREQ_REQ ,
425
478
intel_rps_get_requested_frequency (rps ),
426
479
period_ns / 1000 );
427
480
}
@@ -458,9 +511,7 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
458
511
continue ;
459
512
460
513
engines_sample (gt , period_ns );
461
-
462
- if (i == 0 ) /* FIXME */
463
- frequency_sample (gt , period_ns );
514
+ frequency_sample (gt , period_ns );
464
515
}
465
516
466
517
hrtimer_forward (hrtimer , now , ns_to_ktime (PERIOD ));
@@ -502,7 +553,13 @@ config_status(struct drm_i915_private *i915, u64 config)
502
553
{
503
554
struct intel_gt * gt = to_gt (i915 );
504
555
505
- switch (config ) {
556
+ unsigned int gt_id = config_gt_id (config );
557
+ unsigned int max_gt_id = HAS_EXTRA_GT_LIST (i915 ) ? 1 : 0 ;
558
+
559
+ if (gt_id > max_gt_id )
560
+ return - ENOENT ;
561
+
562
+ switch (config_counter (config )) {
506
563
case I915_PMU_ACTUAL_FREQUENCY :
507
564
if (IS_VALLEYVIEW (i915 ) || IS_CHERRYVIEW (i915 ))
508
565
/* Requires a mutex for sampling! */
@@ -513,6 +570,8 @@ config_status(struct drm_i915_private *i915, u64 config)
513
570
return - ENODEV ;
514
571
break ;
515
572
case I915_PMU_INTERRUPTS :
573
+ if (gt_id )
574
+ return - ENOENT ;
516
575
break ;
517
576
case I915_PMU_RC6_RESIDENCY :
518
577
if (!gt -> rc6 .supported )
@@ -610,22 +669,27 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
610
669
val = engine -> pmu .sample [sample ].cur ;
611
670
}
612
671
} else {
613
- switch (event -> attr .config ) {
672
+ const unsigned int gt_id = config_gt_id (event -> attr .config );
673
+ const u64 config = config_counter (event -> attr .config );
674
+
675
+ switch (config ) {
614
676
case I915_PMU_ACTUAL_FREQUENCY :
615
677
val =
616
- div_u64 (pmu -> sample [__I915_SAMPLE_FREQ_ACT ].cur ,
678
+ div_u64 (read_sample (pmu , gt_id ,
679
+ __I915_SAMPLE_FREQ_ACT ),
617
680
USEC_PER_SEC /* to MHz */ );
618
681
break ;
619
682
case I915_PMU_REQUESTED_FREQUENCY :
620
683
val =
621
- div_u64 (pmu -> sample [__I915_SAMPLE_FREQ_REQ ].cur ,
684
+ div_u64 (read_sample (pmu , gt_id ,
685
+ __I915_SAMPLE_FREQ_REQ ),
622
686
USEC_PER_SEC /* to MHz */ );
623
687
break ;
624
688
case I915_PMU_INTERRUPTS :
625
689
val = READ_ONCE (pmu -> irq_count );
626
690
break ;
627
691
case I915_PMU_RC6_RESIDENCY :
628
- val = get_rc6 (to_gt ( i915 ) );
692
+ val = get_rc6 (i915 -> gt [ gt_id ] );
629
693
break ;
630
694
case I915_PMU_SOFTWARE_GT_AWAKE_TIME :
631
695
val = ktime_to_ns (intel_gt_get_awake_time (to_gt (i915 )));
0 commit comments