Skip to content

Commit 3adee77

Browse files
Brian GerstKAGA-KOKO
authored andcommitted
x86/smpboot: Remove initial_stack on 64-bit
In order to facilitate parallel startup, start to eliminate some of the global variables passing information to CPUs in the startup path. However, start by introducing one more: smpboot_control. For now this merely holds the CPU# of the CPU which is coming up. Each CPU can then find its own per-cpu data, and everything else it needs can be found from there, allowing the other global variables to be removed. First to be removed is initial_stack. Each CPU can load %rsp from its current_task->thread.sp instead. That is already set up with the correct idle thread for APs. Set up the .sp field in INIT_THREAD on x86 so that the BSP also finds a suitable stack pointer in the static per-cpu data when coming up on first boot. On resume from S3, the CPU needs a temporary stack because its idle task is already active. Instead of setting initial_stack, the sleep code can simply set its own current->thread.sp to point to the temporary stack. Nobody else cares about ->thread.sp for a thread which is currently on a CPU, because the true value is actually in the %rsp register. Which is restored with the rest of the CPU context in do_suspend_lowlevel(). Signed-off-by: Brian Gerst <[email protected]> Signed-off-by: David Woodhouse <[email protected]> Signed-off-by: Usama Arif <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Usama Arif <[email protected]> Tested-by: Guilherme G. Piccoli <[email protected]> Reviewed-by: David Woodhouse <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent cefad86 commit 3adee77

File tree

7 files changed

+63
-21
lines changed

7 files changed

+63
-21
lines changed

arch/x86/include/asm/processor.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,11 @@ static inline void spin_lock_prefetch(const void *x)
647647
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
648648

649649
#else
650-
#define INIT_THREAD { }
650+
extern unsigned long __end_init_task[];
651+
652+
#define INIT_THREAD { \
653+
.sp = (unsigned long)&__end_init_task - sizeof(struct pt_regs), \
654+
}
651655

652656
extern unsigned long KSTK_ESP(struct task_struct *task);
653657

arch/x86/include/asm/smp.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,5 +199,8 @@ extern void nmi_selftest(void);
199199
#define nmi_selftest() do { } while (0)
200200
#endif
201201

202-
#endif /* __ASSEMBLY__ */
202+
extern unsigned int smpboot_control;
203+
204+
#endif /* !__ASSEMBLY__ */
205+
203206
#endif /* _ASM_X86_SMP_H */

arch/x86/kernel/acpi/sleep.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,29 @@ int x86_acpi_suspend_lowlevel(void)
111111
saved_magic = 0x12345678;
112112
#else /* CONFIG_64BIT */
113113
#ifdef CONFIG_SMP
114-
initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
114+
/*
115+
* As each CPU starts up, it will find its own stack pointer
116+
* from its current_task->thread.sp. Typically that will be
117+
* the idle thread for a newly-started AP, or even the boot
118+
* CPU which will find it set to &init_task in the static
119+
* per-cpu data.
120+
*
121+
* Make the resuming CPU use the temporary stack at startup
122+
* by setting current->thread.sp to point to that. The true
123+
* %rsp will be restored with the rest of the CPU context,
124+
* by do_suspend_lowlevel(). And unwinders don't care about
125+
* the abuse of ->thread.sp because it's a dead variable
126+
* while the thread is running on the CPU anyway; the true
127+
* value is in the actual %rsp register.
128+
*/
129+
current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
115130
early_gdt_descr.address =
116131
(unsigned long)get_cpu_gdt_rw(smp_processor_id());
117132
initial_gs = per_cpu_offset(smp_processor_id());
133+
smpboot_control = smp_processor_id();
118134
#endif
119135
initial_code = (unsigned long)wakeup_long64;
120-
saved_magic = 0x123456789abcdef0L;
136+
saved_magic = 0x123456789abcdef0L;
121137
#endif /* CONFIG_64BIT */
122138

123139
/*

arch/x86/kernel/asm-offsets.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ static void __used common(void)
115115
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
116116
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
117117
OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
118+
OFFSET(X86_current_task, pcpu_hot, current_task);
118119
#ifdef CONFIG_CALL_DEPTH_TRACKING
119120
OFFSET(X86_call_depth, pcpu_hot, call_depth);
120121
#endif

arch/x86/kernel/head_64.S

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ SYM_CODE_START_NOALIGN(startup_64)
6161
* tables and then reload them.
6262
*/
6363

64-
/* Set up the stack for verify_cpu(), similar to initial_stack below */
65-
leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp
64+
/* Set up the stack for verify_cpu() */
65+
leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp
6666

6767
leaq _text(%rip), %rdi
6868

@@ -241,6 +241,24 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
241241
UNWIND_HINT_EMPTY
242242
ANNOTATE_NOENDBR // above
243243

244+
#ifdef CONFIG_SMP
245+
movl smpboot_control(%rip), %ecx
246+
247+
/* Get the per cpu offset for the given CPU# which is in ECX */
248+
movq __per_cpu_offset(,%rcx,8), %rdx
249+
#else
250+
xorl %edx, %edx /* zero-extended to clear all of RDX */
251+
#endif /* CONFIG_SMP */
252+
253+
/*
254+
* Setup a boot time stack - Any secondary CPU will have lost its stack
255+
* by now because the cr3-switch above unmaps the real-mode stack.
256+
*
257+
* RDX contains the per-cpu offset
258+
*/
259+
movq pcpu_hot + X86_current_task(%rdx), %rax
260+
movq TASK_threadsp(%rax), %rsp
261+
244262
/*
245263
* We must switch to a new descriptor in kernel space for the GDT
246264
* because soon the kernel won't have access anymore to the userspace
@@ -275,12 +293,6 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
275293
movl initial_gs+4(%rip),%edx
276294
wrmsr
277295

278-
/*
279-
* Setup a boot time stack - Any secondary CPU will have lost its stack
280-
* by now because the cr3-switch above unmaps the real-mode stack
281-
*/
282-
movq initial_stack(%rip), %rsp
283-
284296
/* Setup and Load IDT */
285297
pushq %rsi
286298
call early_setup_idt
@@ -372,7 +384,11 @@ SYM_CODE_END(secondary_startup_64)
372384
SYM_CODE_START(start_cpu0)
373385
ANNOTATE_NOENDBR
374386
UNWIND_HINT_EMPTY
375-
movq initial_stack(%rip), %rsp
387+
388+
/* Find the idle task stack */
389+
movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx
390+
movq TASK_threadsp(%rcx), %rsp
391+
376392
jmp .Ljump_to_C_code
377393
SYM_CODE_END(start_cpu0)
378394
#endif
@@ -420,12 +436,6 @@ SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data))
420436
#ifdef CONFIG_AMD_MEM_ENCRYPT
421437
SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
422438
#endif
423-
424-
/*
425-
* The FRAME_SIZE gap is a convention which helps the in-kernel unwinder
426-
* reliably detect the end of the stack.
427-
*/
428-
SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE)
429439
__FINITDATA
430440

431441
__INIT
@@ -660,6 +670,9 @@ SYM_DATA_END(level1_fixmap_pgt)
660670
SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1)
661671
SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page))
662672

673+
.align 16
674+
SYM_DATA(smpboot_control, .long 0)
675+
663676
.align 16
664677
/* This must match the first entry in level2_kernel_pgt */
665678
SYM_DATA(phys_base, .quad 0x0)

arch/x86/kernel/smpboot.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1088,7 +1088,12 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
10881088
idle->thread.sp = (unsigned long)task_pt_regs(idle);
10891089
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
10901090
initial_code = (unsigned long)start_secondary;
1091-
initial_stack = idle->thread.sp;
1091+
1092+
if (IS_ENABLED(CONFIG_X86_32)) {
1093+
initial_stack = idle->thread.sp;
1094+
} else {
1095+
smpboot_control = cpu;
1096+
}
10921097

10931098
/* Enable the espfix hack for this CPU */
10941099
init_espfix_ap(cpu);

arch/x86/xen/xen-head.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ SYM_CODE_START(startup_xen)
4949
ANNOTATE_NOENDBR
5050
cld
5151

52-
mov initial_stack(%rip), %rsp
52+
leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp
5353

5454
/* Set up %gs.
5555
*

0 commit comments

Comments
 (0)