Skip to content

Commit 9998a98

Browse files
ricardonIngo Molnar
authored andcommitted
x86/cpu: Relocate sync_core() to sync_core.h
Having sync_core() in processor.h is problematic since it is not possible to check for hardware capabilities via the *cpu_has() family of macros. The latter needs the definitions in processor.h. It also looks more intuitive to relocate the function to sync_core.h. This changeset does not make changes in functionality. Signed-off-by: Ricardo Neri <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Reviewed-by: Tony Luck <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 85b23fb commit 9998a98

File tree

7 files changed

+69
-64
lines changed

7 files changed

+69
-64
lines changed

arch/x86/include/asm/processor.h

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -678,70 +678,6 @@ static inline unsigned int cpuid_edx(unsigned int op)
678678
return edx;
679679
}
680680

681-
/*
682-
* This function forces the icache and prefetched instruction stream to
683-
* catch up with reality in two very specific cases:
684-
*
685-
* a) Text was modified using one virtual address and is about to be executed
686-
* from the same physical page at a different virtual address.
687-
*
688-
* b) Text was modified on a different CPU, may subsequently be
689-
* executed on this CPU, and you want to make sure the new version
690-
* gets executed. This generally means you're calling this in a IPI.
691-
*
692-
* If you're calling this for a different reason, you're probably doing
693-
* it wrong.
694-
*/
695-
static inline void sync_core(void)
696-
{
697-
/*
698-
* There are quite a few ways to do this. IRET-to-self is nice
699-
* because it works on every CPU, at any CPL (so it's compatible
700-
* with paravirtualization), and it never exits to a hypervisor.
701-
* The only down sides are that it's a bit slow (it seems to be
702-
* a bit more than 2x slower than the fastest options) and that
703-
* it unmasks NMIs. The "push %cs" is needed because, in
704-
* paravirtual environments, __KERNEL_CS may not be a valid CS
705-
* value when we do IRET directly.
706-
*
707-
* In case NMI unmasking or performance ever becomes a problem,
708-
* the next best option appears to be MOV-to-CR2 and an
709-
* unconditional jump. That sequence also works on all CPUs,
710-
* but it will fault at CPL3 (i.e. Xen PV).
711-
*
712-
* CPUID is the conventional way, but it's nasty: it doesn't
713-
* exist on some 486-like CPUs, and it usually exits to a
714-
* hypervisor.
715-
*
716-
* Like all of Linux's memory ordering operations, this is a
717-
* compiler barrier as well.
718-
*/
719-
#ifdef CONFIG_X86_32
720-
asm volatile (
721-
"pushfl\n\t"
722-
"pushl %%cs\n\t"
723-
"pushl $1f\n\t"
724-
"iret\n\t"
725-
"1:"
726-
: ASM_CALL_CONSTRAINT : : "memory");
727-
#else
728-
unsigned int tmp;
729-
730-
asm volatile (
731-
"mov %%ss, %0\n\t"
732-
"pushq %q0\n\t"
733-
"pushq %%rsp\n\t"
734-
"addq $8, (%%rsp)\n\t"
735-
"pushfq\n\t"
736-
"mov %%cs, %0\n\t"
737-
"pushq %q0\n\t"
738-
"pushq $1f\n\t"
739-
"iretq\n\t"
740-
"1:"
741-
: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
742-
#endif
743-
}
744-
745681
extern void select_idle_routine(const struct cpuinfo_x86 *c);
746682
extern void amd_e400_c1e_apic_setup(void);
747683

arch/x86/include/asm/sync_core.h

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,70 @@
66
#include <asm/processor.h>
77
#include <asm/cpufeature.h>
88

9+
/*
10+
* This function forces the icache and prefetched instruction stream to
11+
* catch up with reality in two very specific cases:
12+
*
13+
* a) Text was modified using one virtual address and is about to be executed
14+
* from the same physical page at a different virtual address.
15+
*
16+
* b) Text was modified on a different CPU, may subsequently be
17+
* executed on this CPU, and you want to make sure the new version
18+
* gets executed. This generally means you're calling this in a IPI.
19+
*
20+
* If you're calling this for a different reason, you're probably doing
21+
* it wrong.
22+
*/
23+
static inline void sync_core(void)
24+
{
25+
/*
26+
* There are quite a few ways to do this. IRET-to-self is nice
27+
* because it works on every CPU, at any CPL (so it's compatible
28+
* with paravirtualization), and it never exits to a hypervisor.
29+
* The only down sides are that it's a bit slow (it seems to be
30+
* a bit more than 2x slower than the fastest options) and that
31+
* it unmasks NMIs. The "push %cs" is needed because, in
32+
* paravirtual environments, __KERNEL_CS may not be a valid CS
33+
* value when we do IRET directly.
34+
*
35+
* In case NMI unmasking or performance ever becomes a problem,
36+
* the next best option appears to be MOV-to-CR2 and an
37+
* unconditional jump. That sequence also works on all CPUs,
38+
* but it will fault at CPL3 (i.e. Xen PV).
39+
*
40+
* CPUID is the conventional way, but it's nasty: it doesn't
41+
* exist on some 486-like CPUs, and it usually exits to a
42+
* hypervisor.
43+
*
44+
* Like all of Linux's memory ordering operations, this is a
45+
* compiler barrier as well.
46+
*/
47+
#ifdef CONFIG_X86_32
48+
asm volatile (
49+
"pushfl\n\t"
50+
"pushl %%cs\n\t"
51+
"pushl $1f\n\t"
52+
"iret\n\t"
53+
"1:"
54+
: ASM_CALL_CONSTRAINT : : "memory");
55+
#else
56+
unsigned int tmp;
57+
58+
asm volatile (
59+
"mov %%ss, %0\n\t"
60+
"pushq %q0\n\t"
61+
"pushq %%rsp\n\t"
62+
"addq $8, (%%rsp)\n\t"
63+
"pushfq\n\t"
64+
"mov %%cs, %0\n\t"
65+
"pushq %q0\n\t"
66+
"pushq $1f\n\t"
67+
"iretq\n\t"
68+
"1:"
69+
: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
70+
#endif
71+
}
72+
973
/*
1074
* Ensure that a core serializing instruction is issued before returning
1175
* to user-mode. x86 implements return to user-space through sysexit,

arch/x86/kernel/alternative.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/kprobes.h>
1616
#include <linux/mmu_context.h>
1717
#include <linux/bsearch.h>
18+
#include <linux/sync_core.h>
1819
#include <asm/text-patching.h>
1920
#include <asm/alternative.h>
2021
#include <asm/sections.h>

arch/x86/kernel/cpu/mce/core.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <linux/export.h>
4343
#include <linux/jump_label.h>
4444
#include <linux/set_memory.h>
45+
#include <linux/sync_core.h>
4546
#include <linux/task_work.h>
4647
#include <linux/hardirq.h>
4748

drivers/misc/sgi-gru/grufault.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <linux/io.h>
2121
#include <linux/uaccess.h>
2222
#include <linux/security.h>
23+
#include <linux/sync_core.h>
2324
#include <linux/prefetch.h>
2425
#include "gru.h"
2526
#include "grutables.h"

drivers/misc/sgi-gru/gruhandles.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
1717
#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq)
1818
#else
19+
#include <linux/sync_core.h>
1920
#include <asm/tsc.h>
2021
#define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
2122
#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz)

drivers/misc/sgi-gru/grukservices.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/miscdevice.h>
1717
#include <linux/proc_fs.h>
1818
#include <linux/interrupt.h>
19+
#include <linux/sync_core.h>
1920
#include <linux/uaccess.h>
2021
#include <linux/delay.h>
2122
#include <linux/export.h>

0 commit comments

Comments
 (0)