Skip to content

Commit bc4be0a

Browse files
tursulinunerlige
authored andcommitted
drm/i915/pmu: Prepare for multi-tile non-engine counters
Reserve some bits in the counter config namespace which will carry the tile id and prepare the code to handle this. No per tile counters have been added yet. v2: - Fix checkpatch issues - Use 4 bits for gt id in non-engine counters. Drop FIXME. - Set MAX GTs to 4. Drop FIXME. v3: (Ashutosh, Tvrtko) - Drop BUG_ON that would never fire - Make enable u64 - Pull in some code from next patch v4: Set I915_PMU_MAX_GTS to 2 (Tvrtko) v5: s/u64/u32 where needed (Ashutosh) Signed-off-by: Tvrtko Ursulin <[email protected]> Signed-off-by: Umesh Nerlige Ramappa <[email protected]> Reviewed-by: Ashutosh Dixit <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent b319cc5 commit bc4be0a

File tree

3 files changed

+127
-45
lines changed

3 files changed

+127
-45
lines changed

drivers/gpu/drm/i915/i915_pmu.c

Lines changed: 105 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,21 @@ static bool is_engine_config(const u64 config)
5656
return config < __I915_PMU_OTHER(0);
5757
}
5858

59+
static unsigned int config_gt_id(const u64 config)
60+
{
61+
return config >> __I915_PMU_GT_SHIFT;
62+
}
63+
64+
static u64 config_counter(const u64 config)
65+
{
66+
return config & ~(~0ULL << __I915_PMU_GT_SHIFT);
67+
}
68+
5969
static unsigned int other_bit(const u64 config)
6070
{
6171
unsigned int val;
6272

63-
switch (config) {
73+
switch (config_counter(config)) {
6474
case I915_PMU_ACTUAL_FREQUENCY:
6575
val = __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
6676
break;
@@ -78,7 +88,9 @@ static unsigned int other_bit(const u64 config)
7888
return -1;
7989
}
8090

81-
return I915_ENGINE_SAMPLE_COUNT + val;
91+
return I915_ENGINE_SAMPLE_COUNT +
92+
config_gt_id(config) * __I915_PMU_TRACKED_EVENT_COUNT +
93+
val;
8294
}
8395

8496
static unsigned int config_bit(const u64 config)
@@ -115,6 +127,18 @@ static unsigned int event_bit(struct perf_event *event)
115127
return config_bit(event->attr.config);
116128
}
117129

130+
static u32 frequency_enabled_mask(void)
131+
{
132+
unsigned int i;
133+
u32 mask = 0;
134+
135+
for (i = 0; i < I915_PMU_MAX_GTS; i++)
136+
mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) |
137+
config_mask(__I915_PMU_REQUESTED_FREQUENCY(i));
138+
139+
return mask;
140+
}
141+
118142
static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
119143
{
120144
struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
@@ -131,9 +155,7 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
131155
* Mask out all the ones which do not need the timer, or in
132156
* other words keep all the ones that could need the timer.
133157
*/
134-
enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) |
135-
config_mask(I915_PMU_REQUESTED_FREQUENCY) |
136-
ENGINE_SAMPLE_MASK;
158+
enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
137159

138160
/*
139161
* When the GPU is idle per-engine counters do not need to be
@@ -175,9 +197,37 @@ static inline s64 ktime_since_raw(const ktime_t kt)
175197
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
176198
}
177199

200+
static unsigned int
201+
__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
202+
{
203+
unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
204+
205+
GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
206+
207+
return idx;
208+
}
209+
210+
static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
211+
{
212+
return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
213+
}
214+
215+
static void
216+
store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
217+
{
218+
pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
219+
}
220+
221+
static void
222+
add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
223+
{
224+
pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
225+
}
226+
178227
static u64 get_rc6(struct intel_gt *gt)
179228
{
180229
struct drm_i915_private *i915 = gt->i915;
230+
const unsigned int gt_id = gt->info.id;
181231
struct i915_pmu *pmu = &i915->pmu;
182232
unsigned long flags;
183233
bool awake = false;
@@ -192,7 +242,7 @@ static u64 get_rc6(struct intel_gt *gt)
192242
spin_lock_irqsave(&pmu->lock, flags);
193243

194244
if (awake) {
195-
pmu->sample[__I915_SAMPLE_RC6].cur = val;
245+
store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
196246
} else {
197247
/*
198248
* We think we are runtime suspended.
@@ -201,14 +251,14 @@ static u64 get_rc6(struct intel_gt *gt)
201251
* on top of the last known real value, as the approximated RC6
202252
* counter value.
203253
*/
204-
val = ktime_since_raw(pmu->sleep_last);
205-
val += pmu->sample[__I915_SAMPLE_RC6].cur;
254+
val = ktime_since_raw(pmu->sleep_last[gt_id]);
255+
val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
206256
}
207257

208-
if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
209-
val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
258+
if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
259+
val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
210260
else
211-
pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
261+
store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
212262

213263
spin_unlock_irqrestore(&pmu->lock, flags);
214264

@@ -218,22 +268,29 @@ static u64 get_rc6(struct intel_gt *gt)
218268
static void init_rc6(struct i915_pmu *pmu)
219269
{
220270
struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
221-
intel_wakeref_t wakeref;
271+
struct intel_gt *gt;
272+
unsigned int i;
273+
274+
for_each_gt(gt, i915, i) {
275+
intel_wakeref_t wakeref;
276+
277+
with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
278+
u64 val = __get_rc6(gt);
222279

223-
with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) {
224-
pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
225-
pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
226-
pmu->sample[__I915_SAMPLE_RC6].cur;
227-
pmu->sleep_last = ktime_get_raw();
280+
store_sample(pmu, i, __I915_SAMPLE_RC6, val);
281+
store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
282+
val);
283+
pmu->sleep_last[i] = ktime_get_raw();
284+
}
228285
}
229286
}
230287

231288
static void park_rc6(struct intel_gt *gt)
232289
{
233290
struct i915_pmu *pmu = &gt->i915->pmu;
234291

235-
pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(gt);
236-
pmu->sleep_last = ktime_get_raw();
292+
store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
293+
pmu->sleep_last[gt->info.id] = ktime_get_raw();
237294
}
238295

239296
static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
@@ -373,34 +430,30 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
373430
}
374431
}
375432

376-
static void
377-
add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
378-
{
379-
sample->cur += mul_u32_u32(val, mul);
380-
}
381-
382-
static bool frequency_sampling_enabled(struct i915_pmu *pmu)
433+
static bool
434+
frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
383435
{
384436
return pmu->enable &
385-
(config_mask(I915_PMU_ACTUAL_FREQUENCY) |
386-
config_mask(I915_PMU_REQUESTED_FREQUENCY));
437+
(config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt)) |
438+
config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt)));
387439
}
388440

389441
static void
390442
frequency_sample(struct intel_gt *gt, unsigned int period_ns)
391443
{
392444
struct drm_i915_private *i915 = gt->i915;
445+
const unsigned int gt_id = gt->info.id;
393446
struct i915_pmu *pmu = &i915->pmu;
394447
struct intel_rps *rps = &gt->rps;
395448

396-
if (!frequency_sampling_enabled(pmu))
449+
if (!frequency_sampling_enabled(pmu, gt_id))
397450
return;
398451

399452
/* Report 0/0 (actual/requested) frequency while parked. */
400453
if (!intel_gt_pm_get_if_awake(gt))
401454
return;
402455

403-
if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
456+
if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) {
404457
u32 val;
405458

406459
/*
@@ -416,12 +469,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
416469
if (!val)
417470
val = intel_gpu_freq(rps, rps->cur_freq);
418471

419-
add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
472+
add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
420473
val, period_ns / 1000);
421474
}
422475

423-
if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
424-
add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
476+
if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
477+
add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
425478
intel_rps_get_requested_frequency(rps),
426479
period_ns / 1000);
427480
}
@@ -458,9 +511,7 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
458511
continue;
459512

460513
engines_sample(gt, period_ns);
461-
462-
if (i == 0) /* FIXME */
463-
frequency_sample(gt, period_ns);
514+
frequency_sample(gt, period_ns);
464515
}
465516

466517
hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
@@ -502,7 +553,13 @@ config_status(struct drm_i915_private *i915, u64 config)
502553
{
503554
struct intel_gt *gt = to_gt(i915);
504555

505-
switch (config) {
556+
unsigned int gt_id = config_gt_id(config);
557+
unsigned int max_gt_id = HAS_EXTRA_GT_LIST(i915) ? 1 : 0;
558+
559+
if (gt_id > max_gt_id)
560+
return -ENOENT;
561+
562+
switch (config_counter(config)) {
506563
case I915_PMU_ACTUAL_FREQUENCY:
507564
if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
508565
/* Requires a mutex for sampling! */
@@ -513,6 +570,8 @@ config_status(struct drm_i915_private *i915, u64 config)
513570
return -ENODEV;
514571
break;
515572
case I915_PMU_INTERRUPTS:
573+
if (gt_id)
574+
return -ENOENT;
516575
break;
517576
case I915_PMU_RC6_RESIDENCY:
518577
if (!gt->rc6.supported)
@@ -610,22 +669,27 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
610669
val = engine->pmu.sample[sample].cur;
611670
}
612671
} else {
613-
switch (event->attr.config) {
672+
const unsigned int gt_id = config_gt_id(event->attr.config);
673+
const u64 config = config_counter(event->attr.config);
674+
675+
switch (config) {
614676
case I915_PMU_ACTUAL_FREQUENCY:
615677
val =
616-
div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
678+
div_u64(read_sample(pmu, gt_id,
679+
__I915_SAMPLE_FREQ_ACT),
617680
USEC_PER_SEC /* to MHz */);
618681
break;
619682
case I915_PMU_REQUESTED_FREQUENCY:
620683
val =
621-
div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
684+
div_u64(read_sample(pmu, gt_id,
685+
__I915_SAMPLE_FREQ_REQ),
622686
USEC_PER_SEC /* to MHz */);
623687
break;
624688
case I915_PMU_INTERRUPTS:
625689
val = READ_ONCE(pmu->irq_count);
626690
break;
627691
case I915_PMU_RC6_RESIDENCY:
628-
val = get_rc6(to_gt(i915));
692+
val = get_rc6(i915->gt[gt_id]);
629693
break;
630694
case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
631695
val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));

drivers/gpu/drm/i915/i915_pmu.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,16 @@ enum {
3838
__I915_NUM_PMU_SAMPLERS
3939
};
4040

41+
#define I915_PMU_MAX_GTS 2
42+
4143
/*
4244
* How many different events we track in the global PMU mask.
4345
*
4446
* It is also used to know to needed number of event reference counters.
4547
*/
4648
#define I915_PMU_MASK_BITS \
47-
(I915_ENGINE_SAMPLE_COUNT + __I915_PMU_TRACKED_EVENT_COUNT)
49+
(I915_ENGINE_SAMPLE_COUNT + \
50+
I915_PMU_MAX_GTS * __I915_PMU_TRACKED_EVENT_COUNT)
4851

4952
#define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1)
5053

@@ -124,11 +127,11 @@ struct i915_pmu {
124127
* Only global counters are held here, while the per-engine ones are in
125128
* struct intel_engine_cs.
126129
*/
127-
struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
130+
struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
128131
/**
129132
* @sleep_last: Last time GT parked for RC6 estimation.
130133
*/
131-
ktime_t sleep_last;
134+
ktime_t sleep_last[I915_PMU_MAX_GTS];
132135
/**
133136
* @irq_count: Number of interrupts
134137
*

include/uapi/drm/i915_drm.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,16 @@ enum drm_i915_pmu_engine_sample {
280280
#define I915_PMU_ENGINE_SEMA(class, instance) \
281281
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
282282

283-
#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
283+
/*
284+
* Top 4 bits of every non-engine counter are GT id.
285+
*/
286+
#define __I915_PMU_GT_SHIFT (60)
287+
288+
#define ___I915_PMU_OTHER(gt, x) \
289+
(((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \
290+
((__u64)(gt) << __I915_PMU_GT_SHIFT))
291+
292+
#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x)
284293

285294
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
286295
#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
@@ -290,6 +299,12 @@ enum drm_i915_pmu_engine_sample {
290299

291300
#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
292301

302+
#define __I915_PMU_ACTUAL_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 0)
303+
#define __I915_PMU_REQUESTED_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 1)
304+
#define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2)
305+
#define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3)
306+
#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt) ___I915_PMU_OTHER(gt, 4)
307+
293308
/* Each region is a minimum of 16k, and there are at most 255 of them.
294309
*/
295310
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use

0 commit comments

Comments
 (0)