Skip to content

Commit 0a7c10d

Browse files
committed
Merge tag 'x86_urgent_for_v5.12_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Borislav Petkov: - A couple of SEV-ES fixes and robustifications: verify usermode stack pointer in NMI is not coming from the syscall gap, correctly track IRQ states in the #VC handler and access user insn bytes atomically in same handler as latter cannot sleep. - Balance 32-bit fast syscall exit path to do the proper work on exit and thus not confuse audit and ptrace frameworks. - Two fixes for the ORC unwinder going "off the rails" into KASAN redzones and when ORC data is missing. * tag 'x86_urgent_for_v5.12_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/sev-es: Use __copy_from_user_inatomic() x86/sev-es: Correctly track IRQ states in runtime #VC handler x86/sev-es: Check regs->sp is trusted before adjusting #VC IST stack x86/sev-es: Introduce ip_within_syscall_gap() helper x86/entry: Fix entry/exit mismatch on failed fast 32-bit syscalls x86/unwind/orc: Silence warnings caused by missing ORC data x86/unwind/orc: Disable KASAN checking in the ORC unwinder, part 2
2 parents c3c7579 + bffe30d commit 0a7c10d

File tree

9 files changed

+99
-29
lines changed

9 files changed

+99
-29
lines changed

arch/x86/entry/common.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
128128
regs->ax = -EFAULT;
129129

130130
instrumentation_end();
131-
syscall_exit_to_user_mode(regs);
131+
local_irq_disable();
132+
irqentry_exit_to_user_mode(regs);
132133
return false;
133134
}
134135

arch/x86/entry/entry_64_compat.S

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ SYM_CODE_START(entry_SYSCALL_compat)
210210
/* Switch to the kernel stack */
211211
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
212212

213+
SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
214+
213215
/* Construct struct pt_regs on stack */
214216
pushq $__USER32_DS /* pt_regs->ss */
215217
pushq %r8 /* pt_regs->sp */

arch/x86/include/asm/insn-eval.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
2323
int insn_get_code_seg_params(struct pt_regs *regs);
2424
int insn_fetch_from_user(struct pt_regs *regs,
2525
unsigned char buf[MAX_INSN_SIZE]);
26+
int insn_fetch_from_user_inatomic(struct pt_regs *regs,
27+
unsigned char buf[MAX_INSN_SIZE]);
2628
bool insn_decode(struct insn *insn, struct pt_regs *regs,
2729
unsigned char buf[MAX_INSN_SIZE], int buf_size);
2830

arch/x86/include/asm/proto.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ void __end_SYSENTER_singlestep_region(void);
2525
void entry_SYSENTER_compat(void);
2626
void __end_entry_SYSENTER_compat(void);
2727
void entry_SYSCALL_compat(void);
28+
void entry_SYSCALL_compat_safe_stack(void);
2829
void entry_INT80_compat(void);
2930
#ifdef CONFIG_XEN_PV
3031
void xen_entry_INT80_compat(void);

arch/x86/include/asm/ptrace.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ struct pt_regs {
9494
#include <asm/paravirt_types.h>
9595
#endif
9696

97+
#include <asm/proto.h>
98+
9799
struct cpuinfo_x86;
98100
struct task_struct;
99101

@@ -175,6 +177,19 @@ static inline bool any_64bit_mode(struct pt_regs *regs)
175177
#ifdef CONFIG_X86_64
176178
#define current_user_stack_pointer() current_pt_regs()->sp
177179
#define compat_user_stack_pointer() current_pt_regs()->sp
180+
181+
static inline bool ip_within_syscall_gap(struct pt_regs *regs)
182+
{
183+
bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
184+
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack);
185+
186+
#ifdef CONFIG_IA32_EMULATION
187+
ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
188+
regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack);
189+
#endif
190+
191+
return ret;
192+
}
178193
#endif
179194

180195
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)

arch/x86/kernel/sev-es.c

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,18 @@ static void __init setup_vc_stacks(int cpu)
121121
cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
122122
}
123123

124-
static __always_inline bool on_vc_stack(unsigned long sp)
124+
static __always_inline bool on_vc_stack(struct pt_regs *regs)
125125
{
126+
unsigned long sp = regs->sp;
127+
128+
/* User-mode RSP is not trusted */
129+
if (user_mode(regs))
130+
return false;
131+
132+
/* SYSCALL gap still has user-mode RSP */
133+
if (ip_within_syscall_gap(regs))
134+
return false;
135+
126136
return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
127137
}
128138

@@ -144,7 +154,7 @@ void noinstr __sev_es_ist_enter(struct pt_regs *regs)
144154
old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
145155

146156
/* Make room on the IST stack */
147-
if (on_vc_stack(regs->sp))
157+
if (on_vc_stack(regs))
148158
new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
149159
else
150160
new_ist = old_ist - sizeof(old_ist);
@@ -248,7 +258,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
248258
int res;
249259

250260
if (user_mode(ctxt->regs)) {
251-
res = insn_fetch_from_user(ctxt->regs, buffer);
261+
res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
252262
if (!res) {
253263
ctxt->fi.vector = X86_TRAP_PF;
254264
ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
@@ -1248,13 +1258,12 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
12481258
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
12491259
{
12501260
struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
1261+
irqentry_state_t irq_state;
12511262
struct ghcb_state state;
12521263
struct es_em_ctxt ctxt;
12531264
enum es_result result;
12541265
struct ghcb *ghcb;
12551266

1256-
lockdep_assert_irqs_disabled();
1257-
12581267
/*
12591268
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
12601269
*/
@@ -1263,6 +1272,8 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
12631272
return;
12641273
}
12651274

1275+
irq_state = irqentry_nmi_enter(regs);
1276+
lockdep_assert_irqs_disabled();
12661277
instrumentation_begin();
12671278

12681279
/*
@@ -1325,6 +1336,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
13251336

13261337
out:
13271338
instrumentation_end();
1339+
irqentry_nmi_exit(regs, irq_state);
13281340

13291341
return;
13301342

arch/x86/kernel/traps.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -694,8 +694,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
694694
* In the SYSCALL entry path the RSP value comes from user-space - don't
695695
* trust it and switch to the current kernel stack
696696
*/
697-
if (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
698-
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack) {
697+
if (ip_within_syscall_gap(regs)) {
699698
sp = this_cpu_read(cpu_current_top_of_stack);
700699
goto sync;
701700
}

arch/x86/kernel/unwind_orc.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
#define orc_warn_current(args...) \
1515
({ \
16-
if (state->task == current) \
16+
if (state->task == current && !state->error) \
1717
orc_warn(args); \
1818
})
1919

@@ -367,8 +367,8 @@ static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
367367
if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
368368
return false;
369369

370-
*ip = regs->ip;
371-
*sp = regs->sp;
370+
*ip = READ_ONCE_NOCHECK(regs->ip);
371+
*sp = READ_ONCE_NOCHECK(regs->sp);
372372
return true;
373373
}
374374

@@ -380,8 +380,8 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
380380
if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
381381
return false;
382382

383-
*ip = regs->ip;
384-
*sp = regs->sp;
383+
*ip = READ_ONCE_NOCHECK(regs->ip);
384+
*sp = READ_ONCE_NOCHECK(regs->sp);
385385
return true;
386386
}
387387

@@ -402,12 +402,12 @@ static bool get_reg(struct unwind_state *state, unsigned int reg_off,
402402
return false;
403403

404404
if (state->full_regs) {
405-
*val = ((unsigned long *)state->regs)[reg];
405+
*val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]);
406406
return true;
407407
}
408408

409409
if (state->prev_regs) {
410-
*val = ((unsigned long *)state->prev_regs)[reg];
410+
*val = READ_ONCE_NOCHECK(((unsigned long *)state->prev_regs)[reg]);
411411
return true;
412412
}
413413

arch/x86/lib/insn-eval.c

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,6 +1415,25 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
14151415
}
14161416
}
14171417

1418+
static unsigned long insn_get_effective_ip(struct pt_regs *regs)
1419+
{
1420+
unsigned long seg_base = 0;
1421+
1422+
/*
1423+
* If not in user-space long mode, a custom code segment could be in
1424+
* use. This is true in protected mode (if the process defined a local
1425+
* descriptor table), or virtual-8086 mode. In most of the cases
1426+
* seg_base will be zero as in USER_CS.
1427+
*/
1428+
if (!user_64bit_mode(regs)) {
1429+
seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
1430+
if (seg_base == -1L)
1431+
return 0;
1432+
}
1433+
1434+
return seg_base + regs->ip;
1435+
}
1436+
14181437
/**
14191438
* insn_fetch_from_user() - Copy instruction bytes from user-space memory
14201439
* @regs: Structure with register values as seen when entering kernel mode
@@ -1431,24 +1450,43 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
14311450
*/
14321451
int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
14331452
{
1434-
unsigned long seg_base = 0;
1453+
unsigned long ip;
14351454
int not_copied;
14361455

1437-
/*
1438-
* If not in user-space long mode, a custom code segment could be in
1439-
* use. This is true in protected mode (if the process defined a local
1440-
* descriptor table), or virtual-8086 mode. In most of the cases
1441-
* seg_base will be zero as in USER_CS.
1442-
*/
1443-
if (!user_64bit_mode(regs)) {
1444-
seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
1445-
if (seg_base == -1L)
1446-
return 0;
1447-
}
1456+
ip = insn_get_effective_ip(regs);
1457+
if (!ip)
1458+
return 0;
1459+
1460+
not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);
14481461

1462+
return MAX_INSN_SIZE - not_copied;
1463+
}
1464+
1465+
/**
1466+
* insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space memory
1467+
* while in atomic code
1468+
* @regs: Structure with register values as seen when entering kernel mode
1469+
* @buf: Array to store the fetched instruction
1470+
*
1471+
* Gets the linear address of the instruction and copies the instruction bytes
1472+
* to the buf. This function must be used in atomic context.
1473+
*
1474+
* Returns:
1475+
*
1476+
* Number of instruction bytes copied.
1477+
*
1478+
* 0 if nothing was copied.
1479+
*/
1480+
int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
1481+
{
1482+
unsigned long ip;
1483+
int not_copied;
1484+
1485+
ip = insn_get_effective_ip(regs);
1486+
if (!ip)
1487+
return 0;
14491488

1450-
not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
1451-
MAX_INSN_SIZE);
1489+
not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE);
14521490

14531491
return MAX_INSN_SIZE - not_copied;
14541492
}

0 commit comments

Comments
 (0)