Skip to content

Commit 5c4a1c0

Browse files
committed
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "These are the fixes left over from the v5.4 cycle: - Various low level 32-bit entry code fixes and improvements by Andy Lutomirski, Peter Zijlstra and Thomas Gleixner. - Fix 32-bit Xen PV breakage, by Jan Beulich" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/entry/32: Fix FIXUP_ESPFIX_STACK with user CR3 x86/pti/32: Calculate the various PTI cpu_entry_area sizes correctly, make the CPU_ENTRY_AREA_PAGES assert precise selftests/x86/sigreturn/32: Invalidate DS and ES when abusing the kernel selftests/x86/mov_ss_trap: Fix the SYSENTER test x86/entry/32: Fix NMI vs ESPFIX x86/entry/32: Unwind the ESPFIX stack earlier on exception entry x86/entry/32: Move FIXUP_FRAME after pushing %fs in SAVE_ALL x86/entry/32: Use %ss segment where required x86/entry/32: Fix IRET exception x86/cpu_entry_area: Add guard page for entry stack on 32bit x86/pti/32: Size initial_page_table correctly x86/doublefault/32: Fix stack canaries in the double fault handler x86/xen/32: Simplify ring check in xen_iret_crit_fixup() x86/xen/32: Make xen_iret_crit_fixup() independent of frame layout x86/stackframe/32: Repair 32-bit Xen PV
2 parents 53a07a1 + 4a13b0e commit 5c4a1c0

File tree

10 files changed

+217
-140
lines changed

10 files changed

+217
-140
lines changed

arch/x86/entry/entry_32.S

Lines changed: 133 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@
172172
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
173173
.if \no_user_check == 0
174174
/* coming from usermode? */
175-
testl $SEGMENT_RPL_MASK, PT_CS(%esp)
175+
testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp)
176176
jz .Lend_\@
177177
.endif
178178
/* On user-cr3? */
@@ -205,64 +205,76 @@
205205
#define CS_FROM_ENTRY_STACK (1 << 31)
206206
#define CS_FROM_USER_CR3 (1 << 30)
207207
#define CS_FROM_KERNEL (1 << 29)
208+
#define CS_FROM_ESPFIX (1 << 28)
208209

209210
.macro FIXUP_FRAME
210211
/*
211212
* The high bits of the CS dword (__csh) are used for CS_FROM_*.
212213
* Clear them in case hardware didn't do this for us.
213214
*/
214-
andl $0x0000ffff, 3*4(%esp)
215+
andl $0x0000ffff, 4*4(%esp)
215216

216217
#ifdef CONFIG_VM86
217-
testl $X86_EFLAGS_VM, 4*4(%esp)
218+
testl $X86_EFLAGS_VM, 5*4(%esp)
218219
jnz .Lfrom_usermode_no_fixup_\@
219220
#endif
220-
testl $SEGMENT_RPL_MASK, 3*4(%esp)
221+
testl $USER_SEGMENT_RPL_MASK, 4*4(%esp)
221222
jnz .Lfrom_usermode_no_fixup_\@
222223

223-
orl $CS_FROM_KERNEL, 3*4(%esp)
224+
orl $CS_FROM_KERNEL, 4*4(%esp)
224225

225226
/*
226227
* When we're here from kernel mode; the (exception) stack looks like:
227228
*
228-
* 5*4(%esp) - <previous context>
229-
* 4*4(%esp) - flags
230-
* 3*4(%esp) - cs
231-
* 2*4(%esp) - ip
232-
* 1*4(%esp) - orig_eax
233-
* 0*4(%esp) - gs / function
229+
* 6*4(%esp) - <previous context>
230+
* 5*4(%esp) - flags
231+
* 4*4(%esp) - cs
232+
* 3*4(%esp) - ip
233+
* 2*4(%esp) - orig_eax
234+
* 1*4(%esp) - gs / function
235+
* 0*4(%esp) - fs
234236
*
235237
* Lets build a 5 entry IRET frame after that, such that struct pt_regs
236238
* is complete and in particular regs->sp is correct. This gives us
237-
* the original 5 enties as gap:
239+
* the original 6 enties as gap:
238240
*
239-
* 12*4(%esp) - <previous context>
240-
* 11*4(%esp) - gap / flags
241-
* 10*4(%esp) - gap / cs
242-
* 9*4(%esp) - gap / ip
243-
* 8*4(%esp) - gap / orig_eax
244-
* 7*4(%esp) - gap / gs / function
245-
* 6*4(%esp) - ss
246-
* 5*4(%esp) - sp
247-
* 4*4(%esp) - flags
248-
* 3*4(%esp) - cs
249-
* 2*4(%esp) - ip
250-
* 1*4(%esp) - orig_eax
251-
* 0*4(%esp) - gs / function
241+
* 14*4(%esp) - <previous context>
242+
* 13*4(%esp) - gap / flags
243+
* 12*4(%esp) - gap / cs
244+
* 11*4(%esp) - gap / ip
245+
* 10*4(%esp) - gap / orig_eax
246+
* 9*4(%esp) - gap / gs / function
247+
* 8*4(%esp) - gap / fs
248+
* 7*4(%esp) - ss
249+
* 6*4(%esp) - sp
250+
* 5*4(%esp) - flags
251+
* 4*4(%esp) - cs
252+
* 3*4(%esp) - ip
253+
* 2*4(%esp) - orig_eax
254+
* 1*4(%esp) - gs / function
255+
* 0*4(%esp) - fs
252256
*/
253257

254258
pushl %ss # ss
255259
pushl %esp # sp (points at ss)
256-
addl $6*4, (%esp) # point sp back at the previous context
257-
pushl 6*4(%esp) # flags
258-
pushl 6*4(%esp) # cs
259-
pushl 6*4(%esp) # ip
260-
pushl 6*4(%esp) # orig_eax
261-
pushl 6*4(%esp) # gs / function
260+
addl $7*4, (%esp) # point sp back at the previous context
261+
pushl 7*4(%esp) # flags
262+
pushl 7*4(%esp) # cs
263+
pushl 7*4(%esp) # ip
264+
pushl 7*4(%esp) # orig_eax
265+
pushl 7*4(%esp) # gs / function
266+
pushl 7*4(%esp) # fs
262267
.Lfrom_usermode_no_fixup_\@:
263268
.endm
264269

265270
.macro IRET_FRAME
271+
/*
272+
* We're called with %ds, %es, %fs, and %gs from the interrupted
273+
* frame, so we shouldn't use them. Also, we may be in ESPFIX
274+
* mode and therefore have a nonzero SS base and an offset ESP,
275+
* so any attempt to access the stack needs to use SS. (except for
276+
* accesses through %esp, which automatically use SS.)
277+
*/
266278
testl $CS_FROM_KERNEL, 1*4(%esp)
267279
jz .Lfinished_frame_\@
268280

@@ -276,31 +288,40 @@
276288
movl 5*4(%esp), %eax # (modified) regs->sp
277289

278290
movl 4*4(%esp), %ecx # flags
279-
movl %ecx, -4(%eax)
291+
movl %ecx, %ss:-1*4(%eax)
280292

281293
movl 3*4(%esp), %ecx # cs
282294
andl $0x0000ffff, %ecx
283-
movl %ecx, -8(%eax)
295+
movl %ecx, %ss:-2*4(%eax)
284296

285297
movl 2*4(%esp), %ecx # ip
286-
movl %ecx, -12(%eax)
298+
movl %ecx, %ss:-3*4(%eax)
287299

288300
movl 1*4(%esp), %ecx # eax
289-
movl %ecx, -16(%eax)
301+
movl %ecx, %ss:-4*4(%eax)
290302

291303
popl %ecx
292-
lea -16(%eax), %esp
304+
lea -4*4(%eax), %esp
293305
popl %eax
294306
.Lfinished_frame_\@:
295307
.endm
296308

297-
.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
309+
.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0
298310
cld
299311
.if \skip_gs == 0
300312
PUSH_GS
301313
.endif
302-
FIXUP_FRAME
303314
pushl %fs
315+
316+
pushl %eax
317+
movl $(__KERNEL_PERCPU), %eax
318+
movl %eax, %fs
319+
.if \unwind_espfix > 0
320+
UNWIND_ESPFIX_STACK
321+
.endif
322+
popl %eax
323+
324+
FIXUP_FRAME
304325
pushl %es
305326
pushl %ds
306327
pushl \pt_regs_ax
@@ -313,8 +334,6 @@
313334
movl $(__USER_DS), %edx
314335
movl %edx, %ds
315336
movl %edx, %es
316-
movl $(__KERNEL_PERCPU), %edx
317-
movl %edx, %fs
318337
.if \skip_gs == 0
319338
SET_KERNEL_GS %edx
320339
.endif
@@ -324,8 +343,8 @@
324343
.endif
325344
.endm
326345

327-
.macro SAVE_ALL_NMI cr3_reg:req
328-
SAVE_ALL
346+
.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0
347+
SAVE_ALL unwind_espfix=\unwind_espfix
329348

330349
BUG_IF_WRONG_CR3
331350

@@ -357,6 +376,7 @@
357376
2: popl %es
358377
3: popl %fs
359378
POP_GS \pop
379+
IRET_FRAME
360380
.pushsection .fixup, "ax"
361381
4: movl $0, (%esp)
362382
jmp 1b
@@ -395,7 +415,8 @@
395415

396416
.macro CHECK_AND_APPLY_ESPFIX
397417
#ifdef CONFIG_X86_ESPFIX32
398-
#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
418+
#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8)
419+
#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET
399420

400421
ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX
401422

@@ -1075,7 +1096,6 @@ restore_all:
10751096
/* Restore user state */
10761097
RESTORE_REGS pop=4 # skip orig_eax/error_code
10771098
.Lirq_return:
1078-
IRET_FRAME
10791099
/*
10801100
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
10811101
* when returning from IPI handler and when returning from
@@ -1128,30 +1148,43 @@ ENDPROC(entry_INT80_32)
11281148
* We can't call C functions using the ESPFIX stack. This code reads
11291149
* the high word of the segment base from the GDT and swiches to the
11301150
* normal stack and adjusts ESP with the matching offset.
1151+
*
1152+
* We might be on user CR3 here, so percpu data is not mapped and we can't
1153+
* access the GDT through the percpu segment. Instead, use SGDT to find
1154+
* the cpu_entry_area alias of the GDT.
11311155
*/
11321156
#ifdef CONFIG_X86_ESPFIX32
11331157
/* fixup the stack */
1134-
mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
1135-
mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
1158+
pushl %ecx
1159+
subl $2*4, %esp
1160+
sgdt (%esp)
1161+
movl 2(%esp), %ecx /* GDT address */
1162+
/*
1163+
* Careful: ECX is a linear pointer, so we need to force base
1164+
* zero. %cs is the only known-linear segment we have right now.
1165+
*/
1166+
mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */
1167+
mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */
11361168
shl $16, %eax
1169+
addl $2*4, %esp
1170+
popl %ecx
11371171
addl %esp, %eax /* the adjusted stack pointer */
11381172
pushl $__KERNEL_DS
11391173
pushl %eax
11401174
lss (%esp), %esp /* switch to the normal stack segment */
11411175
#endif
11421176
.endm
1177+
11431178
.macro UNWIND_ESPFIX_STACK
1179+
/* It's safe to clobber %eax, all other regs need to be preserved */
11441180
#ifdef CONFIG_X86_ESPFIX32
11451181
movl %ss, %eax
11461182
/* see if on espfix stack */
11471183
cmpw $__ESPFIX_SS, %ax
1148-
jne 27f
1149-
movl $__KERNEL_DS, %eax
1150-
movl %eax, %ds
1151-
movl %eax, %es
1184+
jne .Lno_fixup_\@
11521185
/* switch to normal stack */
11531186
FIXUP_ESPFIX_STACK
1154-
27:
1187+
.Lno_fixup_\@:
11551188
#endif
11561189
.endm
11571190

@@ -1341,28 +1374,24 @@ END(spurious_interrupt_bug)
13411374

13421375
#ifdef CONFIG_XEN_PV
13431376
ENTRY(xen_hypervisor_callback)
1344-
pushl $-1 /* orig_ax = -1 => not a system call */
1345-
SAVE_ALL
1346-
ENCODE_FRAME_POINTER
1347-
TRACE_IRQS_OFF
1348-
13491377
/*
13501378
* Check to see if we got the event in the critical
13511379
* region in xen_iret_direct, after we've reenabled
13521380
* events and checked for pending events. This simulates
13531381
* iret instruction's behaviour where it delivers a
13541382
* pending interrupt when enabling interrupts:
13551383
*/
1356-
movl PT_EIP(%esp), %eax
1357-
cmpl $xen_iret_start_crit, %eax
1384+
cmpl $xen_iret_start_crit, (%esp)
13581385
jb 1f
1359-
cmpl $xen_iret_end_crit, %eax
1386+
cmpl $xen_iret_end_crit, (%esp)
13601387
jae 1f
1361-
1362-
jmp xen_iret_crit_fixup
1363-
1364-
ENTRY(xen_do_upcall)
1365-
1: mov %esp, %eax
1388+
call xen_iret_crit_fixup
1389+
1:
1390+
pushl $-1 /* orig_ax = -1 => not a system call */
1391+
SAVE_ALL
1392+
ENCODE_FRAME_POINTER
1393+
TRACE_IRQS_OFF
1394+
mov %esp, %eax
13661395
call xen_evtchn_do_upcall
13671396
#ifndef CONFIG_PREEMPTION
13681397
call xen_maybe_preempt_hcall
@@ -1449,10 +1478,9 @@ END(page_fault)
14491478

14501479
common_exception_read_cr2:
14511480
/* the function address is in %gs's slot on the stack */
1452-
SAVE_ALL switch_stacks=1 skip_gs=1
1481+
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
14531482

14541483
ENCODE_FRAME_POINTER
1455-
UNWIND_ESPFIX_STACK
14561484

14571485
/* fixup %gs */
14581486
GS_TO_REG %ecx
@@ -1474,9 +1502,8 @@ END(common_exception_read_cr2)
14741502

14751503
common_exception:
14761504
/* the function address is in %gs's slot on the stack */
1477-
SAVE_ALL switch_stacks=1 skip_gs=1
1505+
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
14781506
ENCODE_FRAME_POINTER
1479-
UNWIND_ESPFIX_STACK
14801507

14811508
/* fixup %gs */
14821509
GS_TO_REG %ecx
@@ -1515,6 +1542,10 @@ ENTRY(nmi)
15151542
ASM_CLAC
15161543

15171544
#ifdef CONFIG_X86_ESPFIX32
1545+
/*
1546+
* ESPFIX_SS is only ever set on the return to user path
1547+
* after we've switched to the entry stack.
1548+
*/
15181549
pushl %eax
15191550
movl %ss, %eax
15201551
cmpw $__ESPFIX_SS, %ax
@@ -1550,30 +1581,54 @@ ENTRY(nmi)
15501581
movl %ebx, %esp
15511582

15521583
.Lnmi_return:
1584+
#ifdef CONFIG_X86_ESPFIX32
1585+
testl $CS_FROM_ESPFIX, PT_CS(%esp)
1586+
jnz .Lnmi_from_espfix
1587+
#endif
1588+
15531589
CHECK_AND_APPLY_ESPFIX
15541590
RESTORE_ALL_NMI cr3_reg=%edi pop=4
15551591
jmp .Lirq_return
15561592

15571593
#ifdef CONFIG_X86_ESPFIX32
15581594
.Lnmi_espfix_stack:
15591595
/*
1560-
* create the pointer to lss back
1596+
* Create the pointer to LSS back
15611597
*/
15621598
pushl %ss
15631599
pushl %esp
15641600
addl $4, (%esp)
1565-
/* copy the iret frame of 12 bytes */
1566-
.rept 3
1567-
pushl 16(%esp)
1568-
.endr
1569-
pushl %eax
1570-
SAVE_ALL_NMI cr3_reg=%edi
1601+
1602+
/* Copy the (short) IRET frame */
1603+
pushl 4*4(%esp) # flags
1604+
pushl 4*4(%esp) # cs
1605+
pushl 4*4(%esp) # ip
1606+
1607+
pushl %eax # orig_ax
1608+
1609+
SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1
15711610
ENCODE_FRAME_POINTER
1572-
FIXUP_ESPFIX_STACK # %eax == %esp
1611+
1612+
/* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */
1613+
xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp)
1614+
15731615
xorl %edx, %edx # zero error code
1574-
call do_nmi
1616+
movl %esp, %eax # pt_regs pointer
1617+
jmp .Lnmi_from_sysenter_stack
1618+
1619+
.Lnmi_from_espfix:
15751620
RESTORE_ALL_NMI cr3_reg=%edi
1576-
lss 12+4(%esp), %esp # back to espfix stack
1621+
/*
1622+
* Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to
1623+
* fix up the gap and long frame:
1624+
*
1625+
* 3 - original frame (exception)
1626+
* 2 - ESPFIX block (above)
1627+
* 6 - gap (FIXUP_FRAME)
1628+
* 5 - long frame (FIXUP_FRAME)
1629+
* 1 - orig_ax
1630+
*/
1631+
lss (1+5+6)*4(%esp), %esp # back to espfix stack
15771632
jmp .Lirq_return
15781633
#endif
15791634
END(nmi)

0 commit comments

Comments
 (0)