172
172
ALTERNATIVE "jmp .Lend_\@" , "", X86_FEATURE_PTI
173
173
.if \no_user_check == 0
174
174
/* coming from usermode? */
175
- testl $SEGMENT_RPL_MASK , PT_CS(%esp )
175
+ testl $USER_SEGMENT_RPL_MASK , PT_CS(%esp )
176
176
jz .Lend_\@
177
177
.endif
178
178
/* On user-cr3? */
205
205
#define CS_FROM_ENTRY_STACK (1 << 31 )
206
206
#define CS_FROM_USER_CR3 (1 << 30 )
207
207
#define CS_FROM_KERNEL (1 << 29 )
208
+ #define CS_FROM_ESPFIX (1 << 28 )
208
209
209
210
.macro FIXUP_FRAME
210
211
/*
211
212
* The high bits of the CS dword (__csh) are used for CS_FROM_*.
212
213
* Clear them in case hardware didn't do this for us.
213
214
*/
214
- andl $0x0000ffff , 3 *4 (%esp )
215
+ andl $0x0000ffff , 4 *4 (%esp )
215
216
216
217
#ifdef CONFIG_VM86
217
- testl $X86_EFLAGS_VM, 4 *4 (%esp )
218
+ testl $X86_EFLAGS_VM, 5 *4 (%esp )
218
219
jnz .Lfrom_usermode_no_fixup_\@
219
220
#endif
220
- testl $SEGMENT_RPL_MASK, 3 *4 (%esp )
221
+ testl $USER_SEGMENT_RPL_MASK, 4 *4 (%esp )
221
222
jnz .Lfrom_usermode_no_fixup_\@
222
223
223
- orl $CS_FROM_KERNEL, 3 *4 (%esp )
224
+ orl $CS_FROM_KERNEL, 4 *4 (%esp )
224
225
225
226
/*
226
227
* When we're here from kernel mode; the (exception) stack looks like:
227
228
*
228
- * 5*4(%esp) - <previous context>
229
- * 4*4(%esp) - flags
230
- * 3*4(%esp) - cs
231
- * 2*4(%esp) - ip
232
- * 1*4(%esp) - orig_eax
233
- * 0*4(%esp) - gs / function
229
+ * 6*4(%esp) - <previous context>
230
+ * 5*4(%esp) - flags
231
+ * 4*4(%esp) - cs
232
+ * 3*4(%esp) - ip
233
+ * 2*4(%esp) - orig_eax
234
+ * 1*4(%esp) - gs / function
235
+ * 0*4(%esp) - fs
234
236
*
235
237
* Lets build a 5 entry IRET frame after that, such that struct pt_regs
236
238
* is complete and in particular regs->sp is correct. This gives us
237
- * the original 5 enties as gap:
239
+ * the original 6 enties as gap:
238
240
*
239
- * 12*4(%esp) - <previous context>
240
- * 11*4(%esp) - gap / flags
241
- * 10*4(%esp) - gap / cs
242
- * 9*4(%esp) - gap / ip
243
- * 8*4(%esp) - gap / orig_eax
244
- * 7*4(%esp) - gap / gs / function
245
- * 6*4(%esp) - ss
246
- * 5*4(%esp) - sp
247
- * 4*4(%esp) - flags
248
- * 3*4(%esp) - cs
249
- * 2*4(%esp) - ip
250
- * 1*4(%esp) - orig_eax
251
- * 0*4(%esp) - gs / function
241
+ * 14*4(%esp) - <previous context>
242
+ * 13*4(%esp) - gap / flags
243
+ * 12*4(%esp) - gap / cs
244
+ * 11*4(%esp) - gap / ip
245
+ * 10*4(%esp) - gap / orig_eax
246
+ * 9*4(%esp) - gap / gs / function
247
+ * 8*4(%esp) - gap / fs
248
+ * 7*4(%esp) - ss
249
+ * 6*4(%esp) - sp
250
+ * 5*4(%esp) - flags
251
+ * 4*4(%esp) - cs
252
+ * 3*4(%esp) - ip
253
+ * 2*4(%esp) - orig_eax
254
+ * 1*4(%esp) - gs / function
255
+ * 0*4(%esp) - fs
252
256
*/
253
257
254
258
pushl %ss # ss
255
259
pushl %esp # sp (points at ss)
256
- addl $6*4 , (%esp ) # point sp back at the previous context
257
- pushl 6*4 (%esp ) # flags
258
- pushl 6*4 (%esp ) # cs
259
- pushl 6*4 (%esp ) # ip
260
- pushl 6*4 (%esp ) # orig_eax
261
- pushl 6*4 (%esp ) # gs / function
260
+ addl $7*4 , (%esp ) # point sp back at the previous context
261
+ pushl 7*4 (%esp ) # flags
262
+ pushl 7*4 (%esp ) # cs
263
+ pushl 7*4 (%esp ) # ip
264
+ pushl 7*4 (%esp ) # orig_eax
265
+ pushl 7*4 (%esp ) # gs / function
266
+ pushl 7*4 (%esp ) # fs
262
267
.Lfrom_usermode_no_fixup_\@:
263
268
.endm
264
269
265
270
.macro IRET_FRAME
271
+ /*
272
+ * We're called with %ds, %es, %fs, and %gs from the interrupted
273
+ * frame, so we shouldn't use them. Also, we may be in ESPFIX
274
+ * mode and therefore have a nonzero SS base and an offset ESP,
275
+ * so any attempt to access the stack needs to use SS. (except for
276
+ * accesses through %esp, which automatically use SS.)
277
+ */
266
278
testl $CS_FROM_KERNEL, 1*4 (%esp )
267
279
jz .Lfinished_frame_\@
268
280
276
288
movl 5*4 (%esp ), %eax # (modified) regs->sp
277
289
278
290
movl 4*4 (%esp ), %ecx # flags
279
- movl %ecx , - 4 (%eax )
291
+ movl %ecx , %ss :-1* 4 (%eax )
280
292
281
293
movl 3*4 (%esp ), %ecx # cs
282
294
andl $0x0000ffff , %ecx
283
- movl %ecx , -8 (%eax )
295
+ movl %ecx , %ss :-2* 4 (%eax )
284
296
285
297
movl 2*4 (%esp ), %ecx # ip
286
- movl %ecx , -12 (%eax )
298
+ movl %ecx , %ss :-3* 4 (%eax )
287
299
288
300
movl 1*4 (%esp ), %ecx # eax
289
- movl %ecx , -16 (%eax )
301
+ movl %ecx , %ss :-4* 4 (%eax )
290
302
291
303
popl %ecx
292
- lea -16 (%eax ), %esp
304
+ lea -4* 4 (%eax ), %esp
293
305
popl %eax
294
306
.Lfinished_frame_\@:
295
307
.endm
296
308
297
- .macro SAVE_ALL pt_regs_ax =%eax switch_stacks =0 skip_gs =0
309
+ .macro SAVE_ALL pt_regs_ax =%eax switch_stacks =0 skip_gs =0 unwind_espfix = 0
298
310
cld
299
311
.if \skip_gs == 0
300
312
PUSH_GS
301
313
.endif
302
- FIXUP_FRAME
303
314
pushl %fs
315
+
316
+ pushl %eax
317
+ movl $(__KERNEL_PERCPU), %eax
318
+ movl %eax , %fs
319
+ .if \unwind_espfix > 0
320
+ UNWIND_ESPFIX_STACK
321
+ .endif
322
+ popl %eax
323
+
324
+ FIXUP_FRAME
304
325
pushl %es
305
326
pushl %ds
306
327
pushl \pt_regs_ax
313
334
movl $(__USER_DS), %edx
314
335
movl %edx , %ds
315
336
movl %edx , %es
316
- movl $(__KERNEL_PERCPU), %edx
317
- movl %edx , %fs
318
337
.if \skip_gs == 0
319
338
SET_KERNEL_GS %edx
320
339
.endif
324
343
.endif
325
344
.endm
326
345
327
- .macro SAVE_ALL_NMI cr3_reg: req
328
- SAVE_ALL
346
+ .macro SAVE_ALL_NMI cr3_reg: req unwind_espfix = 0
347
+ SAVE_ALL unwind_espfix = \unwind_espfix
329
348
330
349
BUG_IF_WRONG_CR3
331
350
357
376
2: popl %es
358
377
3: popl %fs
359
378
POP_GS \pop
379
+ IRET_FRAME
360
380
.pushsection .fixup, "ax"
361
381
4: movl $0 , (%esp )
362
382
jmp 1b
395
415
396
416
.macro CHECK_AND_APPLY_ESPFIX
397
417
#ifdef CONFIG_X86_ESPFIX32
398
- #define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8 )
418
+ #define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8 )
419
+ #define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET
399
420
400
421
ALTERNATIVE "jmp .Lend_\@" , "", X86_BUG_ESPFIX
401
422
@@ -1075,7 +1096,6 @@ restore_all:
1075
1096
/* Restore user state */
1076
1097
RESTORE_REGS pop =4 # skip orig_eax/error_code
1077
1098
.Lirq_return:
1078
- IRET_FRAME
1079
1099
/*
1080
1100
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
1081
1101
* when returning from IPI handler and when returning from
@@ -1128,30 +1148,43 @@ ENDPROC(entry_INT80_32)
1128
1148
* We can't call C functions using the ESPFIX stack. This code reads
1129
1149
* the high word of the segment base from the GDT and swiches to the
1130
1150
* normal stack and adjusts ESP with the matching offset.
1151
+ *
1152
+ * We might be on user CR3 here, so percpu data is not mapped and we can't
1153
+ * access the GDT through the percpu segment. Instead, use SGDT to find
1154
+ * the cpu_entry_area alias of the GDT.
1131
1155
*/
1132
1156
#ifdef CONFIG_X86_ESPFIX32
1133
1157
/* fixup the stack */
1134
- mov GDT_ESPFIX_SS + 4 , %al /* bits 16..23 */
1135
- mov GDT_ESPFIX_SS + 7 , %ah /* bits 24..31 */
1158
+ pushl %ecx
1159
+ subl $2*4 , %esp
1160
+ sgdt (%esp )
1161
+ movl 2 (%esp ), %ecx /* GDT address */
1162
+ /*
1163
+ * Careful: ECX is a linear pointer, so we need to force base
1164
+ * zero. %cs is the only known-linear segment we have right now.
1165
+ */
1166
+ mov %cs :GDT_ESPFIX_OFFSET + 4 (%ecx ), %al /* bits 16..23 */
1167
+ mov %cs :GDT_ESPFIX_OFFSET + 7 (%ecx ), %ah /* bits 24..31 */
1136
1168
shl $16 , %eax
1169
+ addl $2*4 , %esp
1170
+ popl %ecx
1137
1171
addl %esp , %eax /* the adjusted stack pointer */
1138
1172
pushl $__KERNEL_DS
1139
1173
pushl %eax
1140
1174
lss (%esp ), %esp /* switch to the normal stack segment */
1141
1175
#endif
1142
1176
.endm
1177
+
1143
1178
.macro UNWIND_ESPFIX_STACK
1179
+ /* It's safe to clobber %eax, all other regs need to be preserved */
1144
1180
#ifdef CONFIG_X86_ESPFIX32
1145
1181
movl %ss , %eax
1146
1182
/* see if on espfix stack */
1147
1183
cmpw $__ESPFIX_SS, %ax
1148
- jne 27f
1149
- movl $__KERNEL_DS, %eax
1150
- movl %eax , %ds
1151
- movl %eax , %es
1184
+ jne .Lno_fixup_\@
1152
1185
/* switch to normal stack */
1153
1186
FIXUP_ESPFIX_STACK
1154
- 27 :
1187
+ .Lno_fixup_\@ :
1155
1188
#endif
1156
1189
.endm
1157
1190
@@ -1341,28 +1374,24 @@ END(spurious_interrupt_bug)
1341
1374
1342
1375
#ifdef CONFIG_XEN_PV
1343
1376
ENTRY(xen_hypervisor_callback)
1344
- pushl $-1 /* orig_ax = -1 => not a system call */
1345
- SAVE_ALL
1346
- ENCODE_FRAME_POINTER
1347
- TRACE_IRQS_OFF
1348
-
1349
1377
/*
1350
1378
* Check to see if we got the event in the critical
1351
1379
* region in xen_iret_direct, after we've reenabled
1352
1380
* events and checked for pending events. This simulates
1353
1381
* iret instruction's behaviour where it delivers a
1354
1382
* pending interrupt when enabling interrupts:
1355
1383
*/
1356
- movl PT_EIP(%esp ), %eax
1357
- cmpl $xen_iret_start_crit, %eax
1384
+ cmpl $xen_iret_start_crit, (%esp )
1358
1385
jb 1f
1359
- cmpl $xen_iret_end_crit, %eax
1386
+ cmpl $xen_iret_end_crit, ( %esp )
1360
1387
jae 1f
1361
-
1362
- jmp xen_iret_crit_fixup
1363
-
1364
- ENTRY(xen_do_upcall)
1365
- 1: mov %esp , %eax
1388
+ call xen_iret_crit_fixup
1389
+ 1:
1390
+ pushl $-1 /* orig_ax = -1 => not a system call */
1391
+ SAVE_ALL
1392
+ ENCODE_FRAME_POINTER
1393
+ TRACE_IRQS_OFF
1394
+ mov %esp , %eax
1366
1395
call xen_evtchn_do_upcall
1367
1396
#ifndef CONFIG_PREEMPTION
1368
1397
call xen_maybe_preempt_hcall
@@ -1449,10 +1478,9 @@ END(page_fault)
1449
1478
1450
1479
common_exception_read_cr2:
1451
1480
/* the function address is in %gs's slot on the stack */
1452
- SAVE_ALL switch_stacks =1 skip_gs =1
1481
+ SAVE_ALL switch_stacks =1 skip_gs =1 unwind_espfix = 1
1453
1482
1454
1483
ENCODE_FRAME_POINTER
1455
- UNWIND_ESPFIX_STACK
1456
1484
1457
1485
/* fixup %gs */
1458
1486
GS_TO_REG %ecx
@@ -1474,9 +1502,8 @@ END(common_exception_read_cr2)
1474
1502
1475
1503
common_exception:
1476
1504
/* the function address is in %gs's slot on the stack */
1477
- SAVE_ALL switch_stacks =1 skip_gs =1
1505
+ SAVE_ALL switch_stacks =1 skip_gs =1 unwind_espfix = 1
1478
1506
ENCODE_FRAME_POINTER
1479
- UNWIND_ESPFIX_STACK
1480
1507
1481
1508
/* fixup %gs */
1482
1509
GS_TO_REG %ecx
@@ -1515,6 +1542,10 @@ ENTRY(nmi)
1515
1542
ASM_CLAC
1516
1543
1517
1544
#ifdef CONFIG_X86_ESPFIX32
1545
+ /*
1546
+ * ESPFIX_SS is only ever set on the return to user path
1547
+ * after we've switched to the entry stack.
1548
+ */
1518
1549
pushl %eax
1519
1550
movl %ss , %eax
1520
1551
cmpw $__ESPFIX_SS, %ax
@@ -1550,30 +1581,54 @@ ENTRY(nmi)
1550
1581
movl %ebx , %esp
1551
1582
1552
1583
.Lnmi_return:
1584
+ #ifdef CONFIG_X86_ESPFIX32
1585
+ testl $CS_FROM_ESPFIX, PT_CS(%esp )
1586
+ jnz .Lnmi_from_espfix
1587
+ #endif
1588
+
1553
1589
CHECK_AND_APPLY_ESPFIX
1554
1590
RESTORE_ALL_NMI cr3_reg =%edi pop =4
1555
1591
jmp .Lirq_return
1556
1592
1557
1593
#ifdef CONFIG_X86_ESPFIX32
1558
1594
.Lnmi_espfix_stack:
1559
1595
/*
1560
- * create the pointer to lss back
1596
+ * Create the pointer to LSS back
1561
1597
*/
1562
1598
pushl %ss
1563
1599
pushl %esp
1564
1600
addl $4 , (%esp )
1565
- /* copy the iret frame of 12 bytes */
1566
- .rept 3
1567
- pushl 16 (%esp )
1568
- .endr
1569
- pushl %eax
1570
- SAVE_ALL_NMI cr3_reg =%edi
1601
+
1602
+ /* Copy the (short) IRET frame */
1603
+ pushl 4*4 (%esp ) # flags
1604
+ pushl 4*4 (%esp ) # cs
1605
+ pushl 4*4 (%esp ) # ip
1606
+
1607
+ pushl %eax # orig_ax
1608
+
1609
+ SAVE_ALL_NMI cr3_reg =%edi unwind_espfix =1
1571
1610
ENCODE_FRAME_POINTER
1572
- FIXUP_ESPFIX_STACK # %eax == %esp
1611
+
1612
+ /* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */
1613
+ xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp )
1614
+
1573
1615
xorl %edx , %edx # zero error code
1574
- call do_nmi
1616
+ movl %esp , %eax # pt_regs pointer
1617
+ jmp .Lnmi_from_sysenter_stack
1618
+
1619
+ .Lnmi_from_espfix:
1575
1620
RESTORE_ALL_NMI cr3_reg =%edi
1576
- lss 12 +4 (%esp ), %esp # back to espfix stack
1621
+ /*
1622
+ * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to
1623
+ * fix up the gap and long frame:
1624
+ *
1625
+ * 3 - original frame (exception)
1626
+ * 2 - ESPFIX block (above)
1627
+ * 6 - gap (FIXUP_FRAME)
1628
+ * 5 - long frame (FIXUP_FRAME)
1629
+ * 1 - orig_ax
1630
+ */
1631
+ lss (1 +5 +6 )*4 (%esp ), %esp # back to espfix stack
1577
1632
jmp .Lirq_return
1578
1633
#endif
1579
1634
END(nmi)
0 commit comments