Skip to content

Commit 0b085e6

Browse files
committed
x86/entry: Consolidate 32/64 bit syscall entry
64bit and 32bit entry code have the same open coded syscall entry handling after the bitwidth specific bits. Move it to a helper function and share the code. Signed-off-by: Thomas Gleixner <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 8d5ea35 commit 0b085e6

File tree

1 file changed

+41
-52
lines changed

1 file changed

+41
-52
lines changed

arch/x86/entry/common.c

Lines changed: 41 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,7 @@ __visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
366366
exit_to_user_mode();
367367
}
368368

369-
#ifdef CONFIG_X86_64
370-
__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
369+
static noinstr long syscall_enter(struct pt_regs *regs, unsigned long nr)
371370
{
372371
struct thread_info *ti;
373372

@@ -379,6 +378,16 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
379378
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
380379
nr = syscall_trace_enter(regs);
381380

381+
instrumentation_end();
382+
return nr;
383+
}
384+
385+
#ifdef CONFIG_X86_64
386+
__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
387+
{
388+
nr = syscall_enter(regs, nr);
389+
390+
instrumentation_begin();
382391
if (likely(nr < NR_syscalls)) {
383392
nr = array_index_nospec(nr, NR_syscalls);
384393
regs->ax = sys_call_table[nr](regs);
@@ -390,64 +399,53 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
390399
regs->ax = x32_sys_call_table[nr](regs);
391400
#endif
392401
}
393-
__syscall_return_slowpath(regs);
394-
395402
instrumentation_end();
396-
exit_to_user_mode();
403+
syscall_return_slowpath(regs);
397404
}
398405
#endif
399406

400407
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
408+
static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
409+
{
410+
if (IS_ENABLED(CONFIG_IA32_EMULATION))
411+
current_thread_info()->status |= TS_COMPAT;
412+
/*
413+
* Subtlety here: if ptrace pokes something larger than 2^32-1 into
414+
* orig_ax, the unsigned int return value truncates it. This may
415+
* or may not be necessary, but it matches the old asm behavior.
416+
*/
417+
return syscall_enter(regs, (unsigned int)regs->orig_ax);
418+
}
419+
401420
/*
402-
* Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
403-
* all entry and exit work and returns with IRQs off. This function is
404-
* extremely hot in workloads that use it, and it's usually called from
405-
* do_fast_syscall_32, so forcibly inline it to improve performance.
421+
* Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL.
406422
*/
407-
static void do_syscall_32_irqs_on(struct pt_regs *regs)
423+
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
424+
unsigned int nr)
408425
{
409-
struct thread_info *ti = current_thread_info();
410-
unsigned int nr = (unsigned int)regs->orig_ax;
411-
412-
#ifdef CONFIG_IA32_EMULATION
413-
ti->status |= TS_COMPAT;
414-
#endif
415-
416-
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
417-
/*
418-
* Subtlety here: if ptrace pokes something larger than
419-
* 2^32-1 into orig_ax, this truncates it. This may or
420-
* may not be necessary, but it matches the old asm
421-
* behavior.
422-
*/
423-
nr = syscall_trace_enter(regs);
424-
}
425-
426426
if (likely(nr < IA32_NR_syscalls)) {
427+
instrumentation_begin();
427428
nr = array_index_nospec(nr, IA32_NR_syscalls);
428429
regs->ax = ia32_sys_call_table[nr](regs);
430+
instrumentation_end();
429431
}
430-
431-
__syscall_return_slowpath(regs);
432432
}
433433

434434
/* Handles int $0x80 */
435435
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
436436
{
437-
enter_from_user_mode(regs);
438-
instrumentation_begin();
437+
unsigned int nr = syscall_32_enter(regs);
439438

440-
local_irq_enable();
441-
do_syscall_32_irqs_on(regs);
442-
443-
instrumentation_end();
444-
exit_to_user_mode();
439+
do_syscall_32_irqs_on(regs, nr);
440+
syscall_return_slowpath(regs);
445441
}
446442

447-
static bool __do_fast_syscall_32(struct pt_regs *regs)
443+
static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
448444
{
445+
unsigned int nr = syscall_32_enter(regs);
449446
int res;
450447

448+
instrumentation_begin();
451449
/* Fetch EBP from where the vDSO stashed it. */
452450
if (IS_ENABLED(CONFIG_X86_64)) {
453451
/*
@@ -460,17 +458,18 @@ static bool __do_fast_syscall_32(struct pt_regs *regs)
460458
res = get_user(*(u32 *)&regs->bp,
461459
(u32 __user __force *)(unsigned long)(u32)regs->sp);
462460
}
461+
instrumentation_end();
463462

464463
if (res) {
465464
/* User code screwed up. */
466465
regs->ax = -EFAULT;
467-
local_irq_disable();
468-
__prepare_exit_to_usermode(regs);
466+
syscall_return_slowpath(regs);
469467
return false;
470468
}
471469

472470
/* Now this is just like a normal syscall. */
473-
do_syscall_32_irqs_on(regs);
471+
do_syscall_32_irqs_on(regs, nr);
472+
syscall_return_slowpath(regs);
474473
return true;
475474
}
476475

@@ -483,7 +482,6 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
483482
*/
484483
unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
485484
vdso_image_32.sym_int80_landing_pad;
486-
bool success;
487485

488486
/*
489487
* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
@@ -492,17 +490,8 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
492490
*/
493491
regs->ip = landing_pad;
494492

495-
enter_from_user_mode(regs);
496-
instrumentation_begin();
497-
498-
local_irq_enable();
499-
success = __do_fast_syscall_32(regs);
500-
501-
instrumentation_end();
502-
exit_to_user_mode();
503-
504-
/* If it failed, keep it simple: use IRET. */
505-
if (!success)
493+
/* Invoke the syscall. If it failed, keep it simple: use IRET. */
494+
if (!__do_fast_syscall_32(regs))
506495
return 0;
507496

508497
#ifdef CONFIG_X86_64

0 commit comments

Comments
 (0)