@@ -366,8 +366,7 @@ __visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
366
366
exit_to_user_mode ();
367
367
}
368
368
369
- #ifdef CONFIG_X86_64
370
- __visible noinstr void do_syscall_64 (unsigned long nr , struct pt_regs * regs )
369
+ static noinstr long syscall_enter (struct pt_regs * regs , unsigned long nr )
371
370
{
372
371
struct thread_info * ti ;
373
372
@@ -379,6 +378,16 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
379
378
if (READ_ONCE (ti -> flags ) & _TIF_WORK_SYSCALL_ENTRY )
380
379
nr = syscall_trace_enter (regs );
381
380
381
+ instrumentation_end ();
382
+ return nr ;
383
+ }
384
+
385
+ #ifdef CONFIG_X86_64
386
+ __visible noinstr void do_syscall_64 (unsigned long nr , struct pt_regs * regs )
387
+ {
388
+ nr = syscall_enter (regs , nr );
389
+
390
+ instrumentation_begin ();
382
391
if (likely (nr < NR_syscalls )) {
383
392
nr = array_index_nospec (nr , NR_syscalls );
384
393
regs -> ax = sys_call_table [nr ](regs );
@@ -390,64 +399,53 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
390
399
regs -> ax = x32_sys_call_table [nr ](regs );
391
400
#endif
392
401
}
393
- __syscall_return_slowpath (regs );
394
-
395
402
instrumentation_end ();
396
- exit_to_user_mode ( );
403
+ syscall_return_slowpath ( regs );
397
404
}
398
405
#endif
399
406
400
407
#if defined(CONFIG_X86_32 ) || defined(CONFIG_IA32_EMULATION )
408
+ static __always_inline unsigned int syscall_32_enter (struct pt_regs * regs )
409
+ {
410
+ if (IS_ENABLED (CONFIG_IA32_EMULATION ))
411
+ current_thread_info ()-> status |= TS_COMPAT ;
412
+ /*
413
+ * Subtlety here: if ptrace pokes something larger than 2^32-1 into
414
+ * orig_ax, the unsigned int return value truncates it. This may
415
+ * or may not be necessary, but it matches the old asm behavior.
416
+ */
417
+ return syscall_enter (regs , (unsigned int )regs -> orig_ax );
418
+ }
419
+
401
420
/*
402
- * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
403
- * all entry and exit work and returns with IRQs off. This function is
404
- * extremely hot in workloads that use it, and it's usually called from
405
- * do_fast_syscall_32, so forcibly inline it to improve performance.
421
+ * Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL.
406
422
*/
407
- static void do_syscall_32_irqs_on (struct pt_regs * regs )
423
+ static __always_inline void do_syscall_32_irqs_on (struct pt_regs * regs ,
424
+ unsigned int nr )
408
425
{
409
- struct thread_info * ti = current_thread_info ();
410
- unsigned int nr = (unsigned int )regs -> orig_ax ;
411
-
412
- #ifdef CONFIG_IA32_EMULATION
413
- ti -> status |= TS_COMPAT ;
414
- #endif
415
-
416
- if (READ_ONCE (ti -> flags ) & _TIF_WORK_SYSCALL_ENTRY ) {
417
- /*
418
- * Subtlety here: if ptrace pokes something larger than
419
- * 2^32-1 into orig_ax, this truncates it. This may or
420
- * may not be necessary, but it matches the old asm
421
- * behavior.
422
- */
423
- nr = syscall_trace_enter (regs );
424
- }
425
-
426
426
if (likely (nr < IA32_NR_syscalls )) {
427
+ instrumentation_begin ();
427
428
nr = array_index_nospec (nr , IA32_NR_syscalls );
428
429
regs -> ax = ia32_sys_call_table [nr ](regs );
430
+ instrumentation_end ();
429
431
}
430
-
431
- __syscall_return_slowpath (regs );
432
432
}
433
433
434
434
/* Handles int $0x80 */
435
435
__visible noinstr void do_int80_syscall_32 (struct pt_regs * regs )
436
436
{
437
- enter_from_user_mode (regs );
438
- instrumentation_begin ();
437
+ unsigned int nr = syscall_32_enter (regs );
439
438
440
- local_irq_enable ();
441
- do_syscall_32_irqs_on (regs );
442
-
443
- instrumentation_end ();
444
- exit_to_user_mode ();
439
+ do_syscall_32_irqs_on (regs , nr );
440
+ syscall_return_slowpath (regs );
445
441
}
446
442
447
- static bool __do_fast_syscall_32 (struct pt_regs * regs )
443
+ static noinstr bool __do_fast_syscall_32 (struct pt_regs * regs )
448
444
{
445
+ unsigned int nr = syscall_32_enter (regs );
449
446
int res ;
450
447
448
+ instrumentation_begin ();
451
449
/* Fetch EBP from where the vDSO stashed it. */
452
450
if (IS_ENABLED (CONFIG_X86_64 )) {
453
451
/*
@@ -460,17 +458,18 @@ static bool __do_fast_syscall_32(struct pt_regs *regs)
460
458
res = get_user (* (u32 * )& regs -> bp ,
461
459
(u32 __user __force * )(unsigned long )(u32 )regs -> sp );
462
460
}
461
+ instrumentation_end ();
463
462
464
463
if (res ) {
465
464
/* User code screwed up. */
466
465
regs -> ax = - EFAULT ;
467
- local_irq_disable ();
468
- __prepare_exit_to_usermode (regs );
466
+ syscall_return_slowpath (regs );
469
467
return false;
470
468
}
471
469
472
470
/* Now this is just like a normal syscall. */
473
- do_syscall_32_irqs_on (regs );
471
+ do_syscall_32_irqs_on (regs , nr );
472
+ syscall_return_slowpath (regs );
474
473
return true;
475
474
}
476
475
@@ -483,7 +482,6 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
483
482
*/
484
483
unsigned long landing_pad = (unsigned long )current -> mm -> context .vdso +
485
484
vdso_image_32 .sym_int80_landing_pad ;
486
- bool success ;
487
485
488
486
/*
489
487
* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
@@ -492,17 +490,8 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
492
490
*/
493
491
regs -> ip = landing_pad ;
494
492
495
- enter_from_user_mode (regs );
496
- instrumentation_begin ();
497
-
498
- local_irq_enable ();
499
- success = __do_fast_syscall_32 (regs );
500
-
501
- instrumentation_end ();
502
- exit_to_user_mode ();
503
-
504
- /* If it failed, keep it simple: use IRET. */
505
- if (!success )
493
+ /* Invoke the syscall. If it failed, keep it simple: use IRET. */
494
+ if (!__do_fast_syscall_32 (regs ))
506
495
return 0 ;
507
496
508
497
#ifdef CONFIG_X86_64
0 commit comments