Skip to content

Commit 8a6bd2f

Browse files
committed
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Thomas Gleixner: "An unfortunately larger set of fixes, but a large portion is selftests: - Fix the missing clusterid initializaiton for x2apic cluster management which caused boot failures due to IPIs being sent to the wrong cluster - Drop TX_COMPAT when a 64bit executable is exec()'ed from a compat task - Wrap access to __supported_pte_mask in __startup_64() where clang compile fails due to a non PC relative access being generated. - Two fixes for 5 level paging fallout in the decompressor: - Handle GOT correctly for paging_prepare() and cleanup_trampoline() - Fix the page table handling in cleanup_trampoline() to avoid page table corruption. - Stop special casing protection key 0 as this is inconsistent with the manpage and also inconsistent with the allocation map handling. - Override the protection key wen moving away from PROT_EXEC to prevent inaccessible memory. - Fix and update the protection key selftests to address breakage and to cover the above issue - Add a MOV SS self test" [ Part of the x86 fixes were in the earlier core pull due to dependencies ] * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) x86/mm: Drop TS_COMPAT on 64-bit exec() syscall x86/apic/x2apic: Initialize cluster ID properly x86/boot/compressed/64: Fix moving page table out of trampoline memory x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline() x86/pkeys: Do not special case protection key 0 x86/pkeys/selftests: Add a test for pkey 0 x86/pkeys/selftests: Save off 'prot' for allocations x86/pkeys/selftests: Fix pointer math x86/pkeys: Override pkey when moving away from PROT_EXEC x86/pkeys/selftests: Fix pkey exhaustion test off-by-one x86/pkeys/selftests: Add PROT_EXEC test x86/pkeys/selftests: Factor out "instruction page" x86/pkeys/selftests: Allow faults on unknown keys x86/pkeys/selftests: Avoid printf-in-signal deadlocks x86/pkeys/selftests: Remove dead debugging code, fix dprint_in_signal x86/pkeys/selftests: Stop using assert() x86/pkeys/selftests: Give better unexpected fault error messages x86/selftests: Add mov_to_ss test x86/mpx/selftests: Adjust the self-test to fresh distros that export the MPX ABI x86/pkeys/selftests: Adjust the self-test to fresh distros that export the pkeys ABI ...
2 parents b9aad92 + acf4602 commit 8a6bd2f

File tree

13 files changed

+585
-129
lines changed

13 files changed

+585
-129
lines changed

arch/x86/boot/compressed/head_64.S

Lines changed: 66 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,25 @@ ENTRY(startup_64)
305305
/* Set up the stack */
306306
leaq boot_stack_end(%rbx), %rsp
307307

308+
/*
309+
* paging_prepare() and cleanup_trampoline() below can have GOT
310+
* references. Adjust the table with address we are running at.
311+
*
312+
* Zero RAX for adjust_got: the GOT was not adjusted before;
313+
* there's no adjustment to undo.
314+
*/
315+
xorq %rax, %rax
316+
317+
/*
318+
* Calculate the address the binary is loaded at and use it as
319+
* a GOT adjustment.
320+
*/
321+
call 1f
322+
1: popq %rdi
323+
subq $1b, %rdi
324+
325+
call adjust_got
326+
308327
/*
309328
* At this point we are in long mode with 4-level paging enabled,
310329
* but we might want to enable 5-level paging or vice versa.
@@ -370,17 +389,36 @@ trampoline_return:
370389
/*
371390
* cleanup_trampoline() would restore trampoline memory.
372391
*
392+
* RDI is address of the page table to use instead of page table
393+
* in trampoline memory (if required).
394+
*
373395
* RSI holds real mode data and needs to be preserved across
374396
* this function call.
375397
*/
376398
pushq %rsi
399+
leaq top_pgtable(%rbx), %rdi
377400
call cleanup_trampoline
378401
popq %rsi
379402

380403
/* Zero EFLAGS */
381404
pushq $0
382405
popfq
383406

407+
/*
408+
* Previously we've adjusted the GOT with address the binary was
409+
* loaded at. Now we need to re-adjust for relocation address.
410+
*
411+
* Calculate the address the binary is loaded at, so that we can
412+
* undo the previous GOT adjustment.
413+
*/
414+
call 1f
415+
1: popq %rax
416+
subq $1b, %rax
417+
418+
/* The new adjustment is the relocation address */
419+
movq %rbx, %rdi
420+
call adjust_got
421+
384422
/*
385423
* Copy the compressed kernel to the end of our buffer
386424
* where decompression in place becomes safe.
@@ -481,19 +519,6 @@ relocated:
481519
shrq $3, %rcx
482520
rep stosq
483521

484-
/*
485-
* Adjust our own GOT
486-
*/
487-
leaq _got(%rip), %rdx
488-
leaq _egot(%rip), %rcx
489-
1:
490-
cmpq %rcx, %rdx
491-
jae 2f
492-
addq %rbx, (%rdx)
493-
addq $8, %rdx
494-
jmp 1b
495-
2:
496-
497522
/*
498523
* Do the extraction, and jump to the new kernel..
499524
*/
@@ -512,6 +537,27 @@ relocated:
512537
*/
513538
jmp *%rax
514539

540+
/*
541+
* Adjust the global offset table
542+
*
543+
* RAX is the previous adjustment of the table to undo (use 0 if it's the
544+
* first time we touch GOT).
545+
* RDI is the new adjustment to apply.
546+
*/
547+
adjust_got:
548+
/* Walk through the GOT adding the address to the entries */
549+
leaq _got(%rip), %rdx
550+
leaq _egot(%rip), %rcx
551+
1:
552+
cmpq %rcx, %rdx
553+
jae 2f
554+
subq %rax, (%rdx) /* Undo previous adjustment */
555+
addq %rdi, (%rdx) /* Apply the new adjustment */
556+
addq $8, %rdx
557+
jmp 1b
558+
2:
559+
ret
560+
515561
.code32
516562
/*
517563
* This is the 32-bit trampoline that will be copied over to low memory.
@@ -649,3 +695,10 @@ boot_stack_end:
649695
.balign 4096
650696
pgtable:
651697
.fill BOOT_PGT_SIZE, 1, 0
698+
699+
/*
700+
* The page table is going to be used instead of page table in the trampoline
701+
* memory.
702+
*/
703+
top_pgtable:
704+
.fill PAGE_SIZE, 1, 0

arch/x86/boot/compressed/pgtable_64.c

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,6 @@ struct paging_config {
2222
/* Buffer to preserve trampoline memory */
2323
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
2424

25-
/*
26-
* The page table is going to be used instead of page table in the trampoline
27-
* memory.
28-
*
29-
* It must not be in BSS as BSS is cleared after cleanup_trampoline().
30-
*/
31-
static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
32-
3325
/*
3426
* Trampoline address will be printed by extract_kernel() for debugging
3527
* purposes.
@@ -134,7 +126,7 @@ struct paging_config paging_prepare(void)
134126
return paging_config;
135127
}
136128

137-
void cleanup_trampoline(void)
129+
void cleanup_trampoline(void *pgtable)
138130
{
139131
void *trampoline_pgtable;
140132

@@ -145,8 +137,8 @@ void cleanup_trampoline(void)
145137
* if it's there.
146138
*/
147139
if ((void *)__native_read_cr3() == trampoline_pgtable) {
148-
memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
149-
native_write_cr3((unsigned long)top_pgtable);
140+
memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
141+
native_write_cr3((unsigned long)pgtable);
150142
}
151143

152144
/* Restore trampoline memory */

arch/x86/include/asm/mmu_context.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk,
193193

194194
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
195195
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
196-
/* pkey 0 is the default and always allocated */
196+
/* pkey 0 is the default and allocated implicitly */
197197
mm->context.pkey_allocation_map = 0x1;
198198
/* -1 means unallocated or invalid */
199199
mm->context.execute_only_pkey = -1;

arch/x86/include/asm/pkeys.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#ifndef _ASM_X86_PKEYS_H
33
#define _ASM_X86_PKEYS_H
44

5+
#define ARCH_DEFAULT_PKEY 0
6+
57
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
68

79
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
@@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm);
1517
static inline int execute_only_pkey(struct mm_struct *mm)
1618
{
1719
if (!boot_cpu_has(X86_FEATURE_OSPKE))
18-
return 0;
20+
return ARCH_DEFAULT_PKEY;
1921

2022
return __execute_only_pkey(mm);
2123
}
@@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
4951
{
5052
/*
5153
* "Allocated" pkeys are those that have been returned
52-
* from pkey_alloc(). pkey 0 is special, and never
53-
* returned from pkey_alloc().
54+
* from pkey_alloc() or pkey 0 which is allocated
55+
* implicitly when the mm is created.
5456
*/
55-
if (pkey <= 0)
57+
if (pkey < 0)
5658
return false;
5759
if (pkey >= arch_max_pkey())
5860
return false;
61+
/*
62+
* The exec-only pkey is set in the allocation map, but
63+
* is not available to any of the user interfaces like
64+
* mprotect_pkey().
65+
*/
66+
if (pkey == mm->context.execute_only_pkey)
67+
return false;
68+
5969
return mm_pkey_allocation_map(mm) & (1U << pkey);
6070
}
6171

arch/x86/kernel/apic/x2apic_cluster.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ static void init_x2apic_ldr(void)
116116
goto update;
117117
}
118118
cmsk = cluster_hotplug_mask;
119+
cmsk->clusterid = cluster;
119120
cluster_hotplug_mask = NULL;
120121
update:
121122
this_cpu_write(cluster_masks, cmsk);

arch/x86/kernel/head64.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ static bool __head check_la57_support(unsigned long physaddr)
104104
}
105105
#endif
106106

107+
/* Code in __startup_64() can be relocated during execution, but the compiler
108+
* doesn't have to generate PC-relative relocations when accessing globals from
109+
* that function. Clang actually does not generate them, which leads to
110+
* boot-time crashes. To work around this problem, every global pointer must
111+
* be adjusted using fixup_pointer().
112+
*/
107113
unsigned long __head __startup_64(unsigned long physaddr,
108114
struct boot_params *bp)
109115
{
@@ -113,6 +119,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
113119
p4dval_t *p4d;
114120
pudval_t *pud;
115121
pmdval_t *pmd, pmd_entry;
122+
pteval_t *mask_ptr;
116123
bool la57;
117124
int i;
118125
unsigned int *next_pgt_ptr;
@@ -196,7 +203,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
196203

197204
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
198205
/* Filter out unsupported __PAGE_KERNEL_* bits: */
199-
pmd_entry &= __supported_pte_mask;
206+
mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr);
207+
pmd_entry &= *mask_ptr;
200208
pmd_entry += sme_get_me_mask();
201209
pmd_entry += physaddr;
202210

arch/x86/kernel/process_64.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,7 @@ void set_personality_64bit(void)
542542
clear_thread_flag(TIF_X32);
543543
/* Pretend that this comes from a 64bit execve */
544544
task_pt_regs(current)->orig_ax = __NR_execve;
545+
current_thread_info()->status &= ~TS_COMPAT;
545546

546547
/* Ensure the corresponding mm is not marked. */
547548
if (current->mm)

arch/x86/mm/pkeys.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
9494
*/
9595
if (pkey != -1)
9696
return pkey;
97-
/*
98-
* Look for a protection-key-drive execute-only mapping
99-
* which is now being given permissions that are not
100-
* execute-only. Move it back to the default pkey.
101-
*/
102-
if (vma_is_pkey_exec_only(vma) &&
103-
(prot & (PROT_READ|PROT_WRITE))) {
104-
return 0;
105-
}
97+
10698
/*
10799
* The mapping is execute-only. Go try to get the
108100
* execute-only protection key. If we fail to do that,
109101
* fall through as if we do not have execute-only
110-
* support.
102+
* support in this mm.
111103
*/
112104
if (prot == PROT_EXEC) {
113105
pkey = execute_only_pkey(vma->vm_mm);
114106
if (pkey > 0)
115107
return pkey;
108+
} else if (vma_is_pkey_exec_only(vma)) {
109+
/*
110+
* Protections are *not* PROT_EXEC, but the mapping
111+
* is using the exec-only pkey. This mapping was
112+
* PROT_EXEC and will no longer be. Move back to
113+
* the default pkey.
114+
*/
115+
return ARCH_DEFAULT_PKEY;
116116
}
117+
117118
/*
118119
* This is a vanilla, non-pkey mprotect (or we failed to
119120
* setup execute-only), inherit the pkey from the VMA we

tools/testing/selftests/x86/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
1111

1212
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
1313
check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
14-
protection_keys test_vdso test_vsyscall
14+
protection_keys test_vdso test_vsyscall mov_ss_trap
1515
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
1616
test_FCMOV test_FCOMI test_FISTTP \
1717
vdso_restorer

0 commit comments

Comments
 (0)