Skip to content

Commit 7d73c60

Browse files
unerligetursulin
authored andcommitted
drm/i915/pmu: Fix KMD and GuC race on accessing busyness
GuC updates shared memory and KMD reads it. Since this is not synchronized, we run into a race where the value read is inconsistent. Sometimes the inconsistency is in reading the upper MSB bytes of the last_switch_in value. 2 types of cases are seen - upper 8 bits are zero and upper 24 bits are zero. Since these are non-zero values, it is not trivial to determine validity of these values. Instead we read the values multiple times until they are consistent. In test runs, 3 attempts results in consistent values. The upper bound is set to 6 attempts and may need to be tuned as per any new occurences. Since the duration that gt is parked can vary, the patch also updates the gt timestamp on unpark before starting the worker. v2: - Initialize i - Use READ_ONCE to access engine record Fixes: 77cdd05 ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu") Signed-off-by: Umesh Nerlige Ramappa <[email protected]> Reviewed-by: Alan Previn <[email protected]> Signed-off-by: John Harrison <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] (cherry picked from commit 512712a) Signed-off-by: Tvrtko Ursulin <[email protected]>
1 parent 3c6f13a commit 7d73c60

File tree

1 file changed

+54
-4
lines changed

1 file changed

+54
-4
lines changed

drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1113,6 +1113,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
11131113
if (new_start == lower_32_bits(*prev_start))
11141114
return;
11151115

1116+
/*
1117+
* When gt is unparked, we update the gt timestamp and start the ping
1118+
* worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
1119+
* is unparked, all switched in contexts will have a start time that is
1120+
* within +/- POLL_TIME_CLKS of the most recent gt_stamp.
1121+
*
1122+
* If neither gt_stamp nor new_start has rolled over, then the
1123+
* gt_stamp_hi does not need to be adjusted, however if one of them has
1124+
* rolled over, we need to adjust gt_stamp_hi accordingly.
1125+
*
1126+
* The below conditions address the cases of new_start rollover and
1127+
* gt_stamp_last rollover respectively.
1128+
*/
11161129
if (new_start < gt_stamp_last &&
11171130
(new_start - gt_stamp_last) <= POLL_TIME_CLKS)
11181131
gt_stamp_hi++;
@@ -1124,17 +1137,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
11241137
*prev_start = ((u64)gt_stamp_hi << 32) | new_start;
11251138
}
11261139

1127-
static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
1140+
/*
1141+
* GuC updates shared memory and KMD reads it. Since this is not synchronized,
1142+
* we run into a race where the value read is inconsistent. Sometimes the
1143+
* inconsistency is in reading the upper MSB bytes of the last_in value when
1144+
* this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
1145+
* 24 bits are zero. Since these are non-zero values, it is non-trivial to
1146+
* determine validity of these values. Instead we read the values multiple times
1147+
* until they are consistent. In test runs, 3 attempts results in consistent
1148+
* values. The upper bound is set to 6 attempts and may need to be tuned as per
1149+
* any new occurences.
1150+
*/
1151+
static void __get_engine_usage_record(struct intel_engine_cs *engine,
1152+
u32 *last_in, u32 *id, u32 *total)
11281153
{
11291154
struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
1155+
int i = 0;
1156+
1157+
do {
1158+
*last_in = READ_ONCE(rec->last_switch_in_stamp);
1159+
*id = READ_ONCE(rec->current_context_index);
1160+
*total = READ_ONCE(rec->total_runtime);
1161+
1162+
if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
1163+
READ_ONCE(rec->current_context_index) == *id &&
1164+
READ_ONCE(rec->total_runtime) == *total)
1165+
break;
1166+
} while (++i < 6);
1167+
}
1168+
1169+
static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
1170+
{
11301171
struct intel_engine_guc_stats *stats = &engine->stats.guc;
11311172
struct intel_guc *guc = &engine->gt->uc.guc;
1132-
u32 last_switch = rec->last_switch_in_stamp;
1133-
u32 ctx_id = rec->current_context_index;
1134-
u32 total = rec->total_runtime;
1173+
u32 last_switch, ctx_id, total;
11351174

11361175
lockdep_assert_held(&guc->timestamp.lock);
11371176

1177+
__get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
1178+
11381179
stats->running = ctx_id != ~0U && last_switch;
11391180
if (stats->running)
11401181
__extend_last_switch(guc, &stats->start_gt_clk, last_switch);
@@ -1236,6 +1277,10 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
12361277
if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
12371278
stats_saved = *stats;
12381279
gt_stamp_saved = guc->timestamp.gt_stamp;
1280+
/*
1281+
* Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
1282+
* start_gt_clk' calculation below for active engines.
1283+
*/
12391284
guc_update_engine_gt_clks(engine);
12401285
guc_update_pm_timestamp(guc, now);
12411286
intel_gt_pm_put_async(gt);
@@ -1364,10 +1409,15 @@ void intel_guc_busyness_park(struct intel_gt *gt)
13641409
void intel_guc_busyness_unpark(struct intel_gt *gt)
13651410
{
13661411
struct intel_guc *guc = &gt->uc.guc;
1412+
unsigned long flags;
1413+
ktime_t unused;
13671414

13681415
if (!guc_submission_initialized(guc))
13691416
return;
13701417

1418+
spin_lock_irqsave(&guc->timestamp.lock, flags);
1419+
guc_update_pm_timestamp(guc, &unused);
1420+
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
13711421
mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
13721422
guc->timestamp.ping_delay);
13731423
}

0 commit comments

Comments
 (0)