@@ -1113,6 +1113,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
1113
1113
if (new_start == lower_32_bits (* prev_start ))
1114
1114
return ;
1115
1115
1116
+ /*
1117
+ * When gt is unparked, we update the gt timestamp and start the ping
1118
+ * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
1119
+ * is unparked, all switched in contexts will have a start time that is
1120
+ * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
1121
+ *
1122
+ * If neither gt_stamp nor new_start has rolled over, then the
1123
+ * gt_stamp_hi does not need to be adjusted, however if one of them has
1124
+ * rolled over, we need to adjust gt_stamp_hi accordingly.
1125
+ *
1126
+ * The below conditions address the cases of new_start rollover and
1127
+ * gt_stamp_last rollover respectively.
1128
+ */
1116
1129
if (new_start < gt_stamp_last &&
1117
1130
(new_start - gt_stamp_last ) <= POLL_TIME_CLKS )
1118
1131
gt_stamp_hi ++ ;
@@ -1124,17 +1137,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
1124
1137
* prev_start = ((u64 )gt_stamp_hi << 32 ) | new_start ;
1125
1138
}
1126
1139
1127
- static void guc_update_engine_gt_clks (struct intel_engine_cs * engine )
1140
+ /*
1141
+ * GuC updates shared memory and KMD reads it. Since this is not synchronized,
1142
+ * we run into a race where the value read is inconsistent. Sometimes the
1143
+ * inconsistency is in reading the upper MSB bytes of the last_in value when
1144
+ * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
1145
+ * 24 bits are zero. Since these are non-zero values, it is non-trivial to
1146
+ * determine validity of these values. Instead we read the values multiple times
1147
+ * until they are consistent. In test runs, 3 attempts results in consistent
1148
+ * values. The upper bound is set to 6 attempts and may need to be tuned as per
1149
+ * any new occurences.
1150
+ */
1151
+ static void __get_engine_usage_record (struct intel_engine_cs * engine ,
1152
+ u32 * last_in , u32 * id , u32 * total )
1128
1153
{
1129
1154
struct guc_engine_usage_record * rec = intel_guc_engine_usage (engine );
1155
+ int i = 0 ;
1156
+
1157
+ do {
1158
+ * last_in = READ_ONCE (rec -> last_switch_in_stamp );
1159
+ * id = READ_ONCE (rec -> current_context_index );
1160
+ * total = READ_ONCE (rec -> total_runtime );
1161
+
1162
+ if (READ_ONCE (rec -> last_switch_in_stamp ) == * last_in &&
1163
+ READ_ONCE (rec -> current_context_index ) == * id &&
1164
+ READ_ONCE (rec -> total_runtime ) == * total )
1165
+ break ;
1166
+ } while (++ i < 6 );
1167
+ }
1168
+
1169
+ static void guc_update_engine_gt_clks (struct intel_engine_cs * engine )
1170
+ {
1130
1171
struct intel_engine_guc_stats * stats = & engine -> stats .guc ;
1131
1172
struct intel_guc * guc = & engine -> gt -> uc .guc ;
1132
- u32 last_switch = rec -> last_switch_in_stamp ;
1133
- u32 ctx_id = rec -> current_context_index ;
1134
- u32 total = rec -> total_runtime ;
1173
+ u32 last_switch , ctx_id , total ;
1135
1174
1136
1175
lockdep_assert_held (& guc -> timestamp .lock );
1137
1176
1177
+ __get_engine_usage_record (engine , & last_switch , & ctx_id , & total );
1178
+
1138
1179
stats -> running = ctx_id != ~0U && last_switch ;
1139
1180
if (stats -> running )
1140
1181
__extend_last_switch (guc , & stats -> start_gt_clk , last_switch );
@@ -1149,23 +1190,51 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
1149
1190
}
1150
1191
}
1151
1192
1152
- static void guc_update_pm_timestamp (struct intel_guc * guc ,
1153
- struct intel_engine_cs * engine ,
1154
- ktime_t * now )
1193
+ static u32 gpm_timestamp_shift (struct intel_gt * gt )
1194
+ {
1195
+ intel_wakeref_t wakeref ;
1196
+ u32 reg , shift ;
1197
+
1198
+ with_intel_runtime_pm (gt -> uncore -> rpm , wakeref )
1199
+ reg = intel_uncore_read (gt -> uncore , RPM_CONFIG0 );
1200
+
1201
+ shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK ) >>
1202
+ GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT ;
1203
+
1204
+ return 3 - shift ;
1205
+ }
1206
+
1207
+ static u64 gpm_timestamp (struct intel_gt * gt )
1208
+ {
1209
+ u32 lo , hi , old_hi , loop = 0 ;
1210
+
1211
+ hi = intel_uncore_read (gt -> uncore , MISC_STATUS1 );
1212
+ do {
1213
+ lo = intel_uncore_read (gt -> uncore , MISC_STATUS0 );
1214
+ old_hi = hi ;
1215
+ hi = intel_uncore_read (gt -> uncore , MISC_STATUS1 );
1216
+ } while (old_hi != hi && loop ++ < 2 );
1217
+
1218
+ return ((u64 )hi << 32 ) | lo ;
1219
+ }
1220
+
1221
+ static void guc_update_pm_timestamp (struct intel_guc * guc , ktime_t * now )
1155
1222
{
1156
- u32 gt_stamp_now , gt_stamp_hi ;
1223
+ struct intel_gt * gt = guc_to_gt (guc );
1224
+ u32 gt_stamp_lo , gt_stamp_hi ;
1225
+ u64 gpm_ts ;
1157
1226
1158
1227
lockdep_assert_held (& guc -> timestamp .lock );
1159
1228
1160
1229
gt_stamp_hi = upper_32_bits (guc -> timestamp .gt_stamp );
1161
- gt_stamp_now = intel_uncore_read ( engine -> uncore ,
1162
- RING_TIMESTAMP ( engine -> mmio_base ) );
1230
+ gpm_ts = gpm_timestamp ( gt ) >> guc -> timestamp . shift ;
1231
+ gt_stamp_lo = lower_32_bits ( gpm_ts );
1163
1232
* now = ktime_get ();
1164
1233
1165
- if (gt_stamp_now < lower_32_bits (guc -> timestamp .gt_stamp ))
1234
+ if (gt_stamp_lo < lower_32_bits (guc -> timestamp .gt_stamp ))
1166
1235
gt_stamp_hi ++ ;
1167
1236
1168
- guc -> timestamp .gt_stamp = ((u64 )gt_stamp_hi << 32 ) | gt_stamp_now ;
1237
+ guc -> timestamp .gt_stamp = ((u64 )gt_stamp_hi << 32 ) | gt_stamp_lo ;
1169
1238
}
1170
1239
1171
1240
/*
@@ -1208,8 +1277,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
1208
1277
if (!in_reset && intel_gt_pm_get_if_awake (gt )) {
1209
1278
stats_saved = * stats ;
1210
1279
gt_stamp_saved = guc -> timestamp .gt_stamp ;
1280
+ /*
1281
+ * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
1282
+ * start_gt_clk' calculation below for active engines.
1283
+ */
1211
1284
guc_update_engine_gt_clks (engine );
1212
- guc_update_pm_timestamp (guc , engine , now );
1285
+ guc_update_pm_timestamp (guc , now );
1213
1286
intel_gt_pm_put_async (gt );
1214
1287
if (i915_reset_count (gpu_error ) != reset_count ) {
1215
1288
* stats = stats_saved ;
@@ -1241,8 +1314,8 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
1241
1314
1242
1315
spin_lock_irqsave (& guc -> timestamp .lock , flags );
1243
1316
1317
+ guc_update_pm_timestamp (guc , & unused );
1244
1318
for_each_engine (engine , gt , id ) {
1245
- guc_update_pm_timestamp (guc , engine , & unused );
1246
1319
guc_update_engine_gt_clks (engine );
1247
1320
engine -> stats .guc .prev_total = 0 ;
1248
1321
}
@@ -1259,10 +1332,11 @@ static void __update_guc_busyness_stats(struct intel_guc *guc)
1259
1332
ktime_t unused ;
1260
1333
1261
1334
spin_lock_irqsave (& guc -> timestamp .lock , flags );
1262
- for_each_engine (engine , gt , id ) {
1263
- guc_update_pm_timestamp (guc , engine , & unused );
1335
+
1336
+ guc_update_pm_timestamp (guc , & unused );
1337
+ for_each_engine (engine , gt , id )
1264
1338
guc_update_engine_gt_clks (engine );
1265
- }
1339
+
1266
1340
spin_unlock_irqrestore (& guc -> timestamp .lock , flags );
1267
1341
}
1268
1342
@@ -1335,10 +1409,15 @@ void intel_guc_busyness_park(struct intel_gt *gt)
1335
1409
void intel_guc_busyness_unpark (struct intel_gt * gt )
1336
1410
{
1337
1411
struct intel_guc * guc = & gt -> uc .guc ;
1412
+ unsigned long flags ;
1413
+ ktime_t unused ;
1338
1414
1339
1415
if (!guc_submission_initialized (guc ))
1340
1416
return ;
1341
1417
1418
+ spin_lock_irqsave (& guc -> timestamp .lock , flags );
1419
+ guc_update_pm_timestamp (guc , & unused );
1420
+ spin_unlock_irqrestore (& guc -> timestamp .lock , flags );
1342
1421
mod_delayed_work (system_highpri_wq , & guc -> timestamp .work ,
1343
1422
guc -> timestamp .ping_delay );
1344
1423
}
@@ -1783,6 +1862,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
1783
1862
spin_lock_init (& guc -> timestamp .lock );
1784
1863
INIT_DELAYED_WORK (& guc -> timestamp .work , guc_timestamp_ping );
1785
1864
guc -> timestamp .ping_delay = (POLL_TIME_CLKS / gt -> clock_frequency + 1 ) * HZ ;
1865
+ guc -> timestamp .shift = gpm_timestamp_shift (gt );
1786
1866
1787
1867
return 0 ;
1788
1868
}
0 commit comments