Skip to content

Commit 609b07b

Browse files
committed
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: "A fix for KVM's scheduler clock which (erroneously) was always marked unstable, a fix for RT/DL load balancing, plus latency fixes" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/clock, x86/tsc: Rework the x86 'unstable' sched_clock() interface sched/core: Fix pick_next_task() for RT,DL sched/fair: Make select_idle_cpu() more aggressive
2 parents c3abcab + f94c8d1 commit 609b07b

File tree

10 files changed

+37
-32
lines changed

10 files changed

+37
-32
lines changed

arch/x86/kernel/cpu/amd.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -556,10 +556,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
556556
if (c->x86_power & (1 << 8)) {
557557
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
558558
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
559-
if (check_tsc_unstable())
560-
clear_sched_clock_stable();
561-
} else {
562-
clear_sched_clock_stable();
563559
}
564560

565561
/* Bit 12 of 8000_0007 edx is accumulated power mechanism. */

arch/x86/kernel/cpu/centaur.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
105105
#ifdef CONFIG_X86_64
106106
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
107107
#endif
108-
109-
clear_sched_clock_stable();
110108
}
111109

112110
static void init_centaur(struct cpuinfo_x86 *c)

arch/x86/kernel/cpu/common.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ static void default_init(struct cpuinfo_x86 *c)
8888
strcpy(c->x86_model_id, "386");
8989
}
9090
#endif
91-
clear_sched_clock_stable();
9291
}
9392

9493
static const struct cpu_dev default_cpu = {
@@ -1077,8 +1076,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
10771076
*/
10781077
if (this_cpu->c_init)
10791078
this_cpu->c_init(c);
1080-
else
1081-
clear_sched_clock_stable();
10821079

10831080
/* Disable the PN if appropriate */
10841081
squash_the_stupid_serial_number(c);

arch/x86/kernel/cpu/cyrix.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ static void early_init_cyrix(struct cpuinfo_x86 *c)
185185
set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
186186
break;
187187
}
188-
clear_sched_clock_stable();
189188
}
190189

191190
static void init_cyrix(struct cpuinfo_x86 *c)

arch/x86/kernel/cpu/intel.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
162162
if (c->x86_power & (1 << 8)) {
163163
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
164164
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
165-
if (check_tsc_unstable())
166-
clear_sched_clock_stable();
167-
} else {
168-
clear_sched_clock_stable();
169165
}
170166

171167
/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */

arch/x86/kernel/cpu/transmeta.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@ static void early_init_transmeta(struct cpuinfo_x86 *c)
1616
if (xlvl >= 0x80860001)
1717
c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001);
1818
}
19-
20-
clear_sched_clock_stable();
2119
}
2220

2321
static void init_transmeta(struct cpuinfo_x86 *c)

arch/x86/kernel/tsc.c

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -327,9 +327,16 @@ unsigned long long sched_clock(void)
327327
{
328328
return paravirt_sched_clock();
329329
}
330+
331+
static inline bool using_native_sched_clock(void)
332+
{
333+
return pv_time_ops.sched_clock == native_sched_clock;
334+
}
330335
#else
331336
unsigned long long
332337
sched_clock(void) __attribute__((alias("native_sched_clock")));
338+
339+
static inline bool using_native_sched_clock(void) { return true; }
333340
#endif
334341

335342
int check_tsc_unstable(void)
@@ -1112,8 +1119,10 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
11121119
{
11131120
if (tsc_unstable)
11141121
return;
1122+
11151123
tsc_unstable = 1;
1116-
clear_sched_clock_stable();
1124+
if (using_native_sched_clock())
1125+
clear_sched_clock_stable();
11171126
disable_sched_clock_irqtime();
11181127
pr_info("Marking TSC unstable due to clocksource watchdog\n");
11191128
}
@@ -1135,18 +1144,20 @@ static struct clocksource clocksource_tsc = {
11351144

11361145
void mark_tsc_unstable(char *reason)
11371146
{
1138-
if (!tsc_unstable) {
1139-
tsc_unstable = 1;
1147+
if (tsc_unstable)
1148+
return;
1149+
1150+
tsc_unstable = 1;
1151+
if (using_native_sched_clock())
11401152
clear_sched_clock_stable();
1141-
disable_sched_clock_irqtime();
1142-
pr_info("Marking TSC unstable due to %s\n", reason);
1143-
/* Change only the rating, when not registered */
1144-
if (clocksource_tsc.mult)
1145-
clocksource_mark_unstable(&clocksource_tsc);
1146-
else {
1147-
clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
1148-
clocksource_tsc.rating = 0;
1149-
}
1153+
disable_sched_clock_irqtime();
1154+
pr_info("Marking TSC unstable due to %s\n", reason);
1155+
/* Change only the rating, when not registered */
1156+
if (clocksource_tsc.mult) {
1157+
clocksource_mark_unstable(&clocksource_tsc);
1158+
} else {
1159+
clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
1160+
clocksource_tsc.rating = 0;
11501161
}
11511162
}
11521163

kernel/sched/core.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3287,10 +3287,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
32873287
struct task_struct *p;
32883288

32893289
/*
3290-
* Optimization: we know that if all tasks are in
3291-
* the fair class we can call that function directly:
3290+
* Optimization: we know that if all tasks are in the fair class we can
3291+
* call that function directly, but only if the @prev task wasn't of a
3292+
* higher scheduling class, because otherwise those loose the
3293+
* opportunity to pull in more work from other CPUs.
32923294
*/
3293-
if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
3295+
if (likely((prev->sched_class == &idle_sched_class ||
3296+
prev->sched_class == &fair_sched_class) &&
3297+
rq->nr_running == rq->cfs.h_nr_running)) {
3298+
32943299
p = fair_sched_class.pick_next_task(rq, prev, rf);
32953300
if (unlikely(p == RETRY_TASK))
32963301
goto again;

kernel/sched/fair.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5799,7 +5799,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
57995799
* Due to large variance we need a large fuzz factor; hackbench in
58005800
* particularly is sensitive here.
58015801
*/
5802-
if ((avg_idle / 512) < avg_cost)
5802+
if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost)
58035803
return -1;
58045804

58055805
time = local_clock();

kernel/sched/features.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
5151
*/
5252
SCHED_FEAT(TTWU_QUEUE, true)
5353

54+
/*
55+
* When doing wakeups, attempt to limit superfluous scans of the LLC domain.
56+
*/
57+
SCHED_FEAT(SIS_AVG_CPU, false)
58+
5459
#ifdef HAVE_RT_PUSH_IPI
5560
/*
5661
* In order to avoid a thundering herd attack of CPUs that are

0 commit comments

Comments
 (0)