47
47
* length). In turn, the "intercepts" metric reflects the relative frequency of
48
48
* situations in which the measured idle duration is so much shorter than the
49
49
* sleep length that the bin it falls into corresponds to an idle state
50
- * shallower than the one whose bin is fallen into by the sleep length.
50
+ * shallower than the one whose bin is fallen into by the sleep length (these
51
+ * situations are referred to as "intercepts" below).
52
+ *
53
+ * In addition to the metrics described above, the governor counts recent
54
+ * intercepts (that is, intercepts that have occurred during the last NR_RECENT
55
+ * invocations of it for the given CPU) for each bin.
51
56
*
52
57
* In order to select an idle state for a CPU, the governor takes the following
53
58
* steps (modulo the possible latency constraint that must be taken into account
54
59
* too):
55
60
*
56
61
* 1. Find the deepest CPU idle state whose target residency does not exceed
57
- * the current sleep length (the candidate idle state) and compute two sums
58
- * as follows:
62
+ * the current sleep length (the candidate idle state) and compute 3 sums as
63
+ * follows:
59
64
*
60
65
* - The sum of the "hits" and "intercepts" metrics for the candidate state
61
66
* and all of the deeper idle states (it represents the cases in which the
67
72
* idle long enough to avoid being intercepted if the sleep length had been
68
73
* equal to the current one).
69
74
*
70
- * 2. If the second sum is greater than the first one, look for an alternative
71
- * idle state to select.
75
+ * - The sum of the numbers of recent intercepts for all of the idle states
76
+ * shallower than the candidate one.
77
+ *
78
+ * 2. If the second sum is greater than the first one or the third sum is
79
+ * greater than NR_RECENT / 2, the CPU is likely to wake up early, so look
80
+ * for an alternative idle state to select.
72
81
*
73
82
* - Traverse the idle states shallower than the candidate one in the
74
83
* descending order.
75
84
*
76
- * - For each of them compute the sum of the "intercepts" metrics over all of
77
- * the idle states between it and the candidate one (including the former
78
- * and excluding the latter).
85
+ * - For each of them compute the sum of the "intercepts" metrics and the sum
86
+ * of the numbers of recent intercepts over all of the idle states between
87
+ * it and the candidate one (including the former and excluding the
88
+ * latter).
79
89
*
80
- * - If that sum is greater than a half of the second sum computed in step 1
81
- * (which means that the target residency of the state in question had not
82
- * exceeded the idle duration in over a half of the relevant cases), select
83
- * the given idle state instead of the candidate one.
90
+ * - If each of these sums that needs to be taken into account (because the
91
+ * check related to it has indicated that the CPU is likely to wake up
92
+ * early) is greater than a half of the corresponding sum computed in step
93
+ * 1 (which means that the target residency of the state in question had
94
+ * not exceeded the idle duration in over a half of the relevant cases),
95
+ * select the given idle state instead of the candidate one.
84
96
*
85
- * 3. If the majority of the most recent idle duration values are below the
86
- * current anticipated idle duration, use those values to compute the new
87
- * expected idle duration and find an idle state matching it (which has to
88
- * be shallower than the current candidate one).
97
+ * 3. By default, select the candidate state.
89
98
*/
90
99
91
100
#include <linux/cpuidle.h>
103
112
104
113
/*
105
114
* Number of the most recent idle duration values to take into consideration for
106
- * the detection of wakeup patterns.
115
+ * the detection of recent early wakeup patterns.
107
116
*/
108
- #define INTERVALS 8
117
+ #define NR_RECENT 9
109
118
110
119
/**
111
120
* struct teo_bin - Metrics used by the TEO cpuidle governor.
112
121
* @intercepts: The "intercepts" metric.
113
122
* @hits: The "hits" metric.
123
+ * @recent: The number of recent "intercepts".
114
124
*/
115
125
struct teo_bin {
116
126
unsigned int intercepts ;
117
127
unsigned int hits ;
128
+ unsigned int recent ;
118
129
};
119
130
120
131
/**
@@ -123,16 +134,16 @@ struct teo_bin {
123
134
* @sleep_length_ns: Time till the closest timer event (at the selection time).
124
135
* @state_bins: Idle state data bins for this CPU.
125
136
* @total: Grand total of the "intercepts" and "hits" mertics for all bins.
126
- * @interval_idx : Index of the most recent saved idle interval .
127
- * @intervals: Saved idle duration values .
137
+ * @next_recent_idx : Index of the next @recent_idx entry to update .
138
+ * @recent_idx: Indices of bins corresponding to recent "intercepts" .
128
139
*/
129
140
struct teo_cpu {
130
141
s64 time_span_ns ;
131
142
s64 sleep_length_ns ;
132
143
struct teo_bin state_bins [CPUIDLE_STATE_MAX ];
133
144
unsigned int total ;
134
- int interval_idx ;
135
- u64 intervals [ INTERVALS ];
145
+ int next_recent_idx ;
146
+ int recent_idx [ NR_RECENT ];
136
147
};
137
148
138
149
static DEFINE_PER_CPU (struct teo_cpu , teo_cpus ) ;
@@ -201,26 +212,29 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
201
212
}
202
213
}
203
214
215
+ i = cpu_data -> next_recent_idx ++ ;
216
+ if (cpu_data -> next_recent_idx >= NR_RECENT )
217
+ cpu_data -> next_recent_idx = 0 ;
218
+
219
+ if (cpu_data -> recent_idx [i ] >= 0 )
220
+ cpu_data -> state_bins [cpu_data -> recent_idx [i ]].recent -- ;
221
+
204
222
/*
205
223
* If the measured idle duration falls into the same bin as the sleep
206
224
* length, this is a "hit", so update the "hits" metric for that bin.
207
225
* Otherwise, update the "intercepts" metric for the bin fallen into by
208
226
* the measured idle duration.
209
227
*/
210
- if (idx_timer == idx_duration )
228
+ if (idx_timer == idx_duration ) {
211
229
cpu_data -> state_bins [idx_timer ].hits += PULSE ;
212
- else
230
+ cpu_data -> recent_idx [i ] = -1 ;
231
+ } else {
213
232
cpu_data -> state_bins [idx_duration ].intercepts += PULSE ;
233
+ cpu_data -> state_bins [idx_duration ].recent ++ ;
234
+ cpu_data -> recent_idx [i ] = idx_duration ;
235
+ }
214
236
215
237
cpu_data -> total += PULSE ;
216
-
217
- /*
218
- * Save idle duration values corresponding to non-timer wakeups for
219
- * pattern detection.
220
- */
221
- cpu_data -> intervals [cpu_data -> interval_idx ++ ] = measured_ns ;
222
- if (cpu_data -> interval_idx >= INTERVALS )
223
- cpu_data -> interval_idx = 0 ;
224
238
}
225
239
226
240
static bool teo_time_ok (u64 interval_ns )
@@ -271,10 +285,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
271
285
s64 latency_req = cpuidle_governor_latency_req (dev -> cpu );
272
286
unsigned int idx_intercept_sum = 0 ;
273
287
unsigned int intercept_sum = 0 ;
288
+ unsigned int idx_recent_sum = 0 ;
289
+ unsigned int recent_sum = 0 ;
274
290
unsigned int idx_hit_sum = 0 ;
275
291
unsigned int hit_sum = 0 ;
276
292
int constraint_idx = 0 ;
277
293
int idx0 = 0 , idx = -1 ;
294
+ bool alt_intercepts , alt_recent ;
278
295
ktime_t delta_tick ;
279
296
s64 duration_ns ;
280
297
int i ;
@@ -317,6 +334,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
317
334
*/
318
335
intercept_sum += prev_bin -> intercepts ;
319
336
hit_sum += prev_bin -> hits ;
337
+ recent_sum += prev_bin -> recent ;
320
338
321
339
if (dev -> states_usage [i ].disable )
322
340
continue ;
@@ -336,6 +354,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
336
354
337
355
idx_intercept_sum = intercept_sum ;
338
356
idx_hit_sum = hit_sum ;
357
+ idx_recent_sum = recent_sum ;
339
358
}
340
359
341
360
/* Avoid unnecessary overhead. */
@@ -350,27 +369,36 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
350
369
* If the sum of the intercepts metric for all of the idle states
351
370
* shallower than the current candidate one (idx) is greater than the
352
371
* sum of the intercepts and hits metrics for the candidate state and
353
- * all of the deeper states, the CPU is likely to wake up early, so find
354
- * an alternative idle state to select.
372
+ * all of the deeper states, or the sum of the numbers of recent
373
+ * intercepts over all of the states shallower than the candidate one
374
+ * is greater than a half of the number of recent events taken into
375
+ * account, the CPU is likely to wake up early, so find an alternative
376
+ * idle state to select.
355
377
*/
356
- if (2 * idx_intercept_sum > cpu_data -> total - idx_hit_sum ) {
378
+ alt_intercepts = 2 * idx_intercept_sum > cpu_data -> total - idx_hit_sum ;
379
+ alt_recent = idx_recent_sum > NR_RECENT / 2 ;
380
+ if (alt_recent || alt_intercepts ) {
357
381
s64 last_enabled_span_ns = duration_ns ;
358
382
int last_enabled_idx = idx ;
359
383
360
384
/*
361
385
* Look for the deepest idle state whose target residency had
362
386
* not exceeded the idle duration in over a half of the relevant
363
- * cases in the past.
387
+ * cases (both with respect to intercepts overall and with
388
+ * respect to the recent intercepts only) in the past.
364
389
*
365
390
* Take the possible latency constraint and duration limitation
366
391
* present if the tick has been stopped already into account.
367
392
*/
368
393
intercept_sum = 0 ;
394
+ recent_sum = 0 ;
369
395
370
396
for (i = idx - 1 ; i >= idx0 ; i -- ) {
397
+ struct teo_bin * bin = & cpu_data -> state_bins [i ];
371
398
s64 span_ns ;
372
399
373
- intercept_sum += cpu_data -> state_bins [i ].intercepts ;
400
+ intercept_sum += bin -> intercepts ;
401
+ recent_sum += bin -> recent ;
374
402
375
403
if (dev -> states_usage [i ].disable )
376
404
continue ;
@@ -386,7 +414,9 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
386
414
break ;
387
415
}
388
416
389
- if (2 * intercept_sum > idx_intercept_sum ) {
417
+ if ((!alt_recent || 2 * recent_sum > idx_recent_sum ) &&
418
+ (!alt_intercepts ||
419
+ 2 * intercept_sum > idx_intercept_sum )) {
390
420
idx = i ;
391
421
duration_ns = span_ns ;
392
422
break ;
@@ -404,49 +434,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
404
434
if (idx > constraint_idx )
405
435
idx = constraint_idx ;
406
436
407
- if (idx > idx0 ) {
408
- unsigned int count = 0 ;
409
- u64 sum = 0 ;
410
-
411
- /*
412
- * The target residencies of at least two different enabled idle
413
- * states are less than or equal to the current expected idle
414
- * duration. Try to refine the selection using the most recent
415
- * measured idle duration values.
416
- *
417
- * Count and sum the most recent idle duration values less than
418
- * the current expected idle duration value.
419
- */
420
- for (i = 0 ; i < INTERVALS ; i ++ ) {
421
- u64 val = cpu_data -> intervals [i ];
422
-
423
- if (val >= duration_ns )
424
- continue ;
425
-
426
- count ++ ;
427
- sum += val ;
428
- }
429
-
430
- /*
431
- * Give up unless the majority of the most recent idle duration
432
- * values are in the interesting range.
433
- */
434
- if (count > INTERVALS / 2 ) {
435
- u64 avg_ns = div64_u64 (sum , count );
436
-
437
- /*
438
- * Avoid spending too much time in an idle state that
439
- * would be too shallow.
440
- */
441
- if (teo_time_ok (avg_ns )) {
442
- duration_ns = avg_ns ;
443
- if (drv -> states [idx ].target_residency_ns > avg_ns )
444
- idx = teo_find_shallower_state (drv , dev ,
445
- idx , avg_ns );
446
- }
447
- }
448
- }
449
-
450
437
end :
451
438
/*
452
439
* Don't stop the tick if the selected state is a polling one or if the
@@ -507,8 +494,8 @@ static int teo_enable_device(struct cpuidle_driver *drv,
507
494
508
495
memset (cpu_data , 0 , sizeof (* cpu_data ));
509
496
510
- for (i = 0 ; i < INTERVALS ; i ++ )
511
- cpu_data -> intervals [i ] = U64_MAX ;
497
+ for (i = 0 ; i < NR_RECENT ; i ++ )
498
+ cpu_data -> recent_idx [i ] = -1 ;
512
499
513
500
return 0 ;
514
501
}
0 commit comments