Skip to content

Commit 7508500

Browse files
author
Peter Zijlstra
committed
x86/alternative: Implement .retpoline_sites support
Rewrite retpoline thunk call sites to be indirect calls for spectre_v2=off. This ensures spectre_v2=off is as near to a RETPOLINE=n build as possible. This is the replacement for objtool writing alternative entries to ensure the same and achieves feature-parity with the previous approach. One noteworthy feature is that it relies on the thunks to be in machine order to compute the register index. Specifically, this does not yet address the Jcc __x86_indirect_thunk_* calls generated by clang, a future patch will add this. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Borislav Petkov <[email protected]> Acked-by: Josh Poimboeuf <[email protected]> Tested-by: Alexei Starovoitov <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 1a6f744 commit 7508500

File tree

4 files changed

+150
-5
lines changed

4 files changed

+150
-5
lines changed

arch/um/kernel/um_arch.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,10 @@ void __init check_bugs(void)
421421
os_check_bugs();
422422
}
423423

424+
void apply_retpolines(s32 *start, s32 *end)
425+
{
426+
}
427+
424428
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
425429
{
426430
}

arch/x86/include/asm/alternative.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ extern int alternatives_patched;
7575

7676
extern void alternative_instructions(void);
7777
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
78+
extern void apply_retpolines(s32 *start, s32 *end);
7879

7980
struct module;
8081

arch/x86/kernel/alternative.c

Lines changed: 137 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <asm/io.h>
3030
#include <asm/fixmap.h>
3131
#include <asm/paravirt.h>
32+
#include <asm/asm-prototypes.h>
3233

3334
int __read_mostly alternatives_patched;
3435

@@ -113,6 +114,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
113114
}
114115
}
115116

117+
extern s32 __retpoline_sites[], __retpoline_sites_end[];
116118
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
117119
extern s32 __smp_locks[], __smp_locks_end[];
118120
void text_poke_early(void *addr, const void *opcode, size_t len);
@@ -221,7 +223,7 @@ static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
221223
* "noinline" to cause control flow change and thus invalidate I$ and
222224
* cause refetch after modification.
223225
*/
224-
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
226+
static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
225227
{
226228
struct insn insn;
227229
int i = 0;
@@ -239,11 +241,11 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins
239241
* optimized.
240242
*/
241243
if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
242-
i += optimize_nops_range(instr, a->instrlen, i);
244+
i += optimize_nops_range(instr, len, i);
243245
else
244246
i += insn.length;
245247

246-
if (i >= a->instrlen)
248+
if (i >= len)
247249
return;
248250
}
249251
}
@@ -331,10 +333,135 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
331333
text_poke_early(instr, insn_buff, insn_buff_sz);
332334

333335
next:
334-
optimize_nops(a, instr);
336+
optimize_nops(instr, a->instrlen);
335337
}
336338
}
337339

340+
#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION)
341+
342+
/*
343+
* CALL/JMP *%\reg
344+
*/
345+
static int emit_indirect(int op, int reg, u8 *bytes)
346+
{
347+
int i = 0;
348+
u8 modrm;
349+
350+
switch (op) {
351+
case CALL_INSN_OPCODE:
352+
modrm = 0x10; /* Reg = 2; CALL r/m */
353+
break;
354+
355+
case JMP32_INSN_OPCODE:
356+
modrm = 0x20; /* Reg = 4; JMP r/m */
357+
break;
358+
359+
default:
360+
WARN_ON_ONCE(1);
361+
return -1;
362+
}
363+
364+
if (reg >= 8) {
365+
bytes[i++] = 0x41; /* REX.B prefix */
366+
reg -= 8;
367+
}
368+
369+
modrm |= 0xc0; /* Mod = 3 */
370+
modrm += reg;
371+
372+
bytes[i++] = 0xff; /* opcode */
373+
bytes[i++] = modrm;
374+
375+
return i;
376+
}
377+
378+
/*
379+
* Rewrite the compiler generated retpoline thunk calls.
380+
*
381+
* For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate
382+
* indirect instructions, avoiding the extra indirection.
383+
*
384+
* For example, convert:
385+
*
386+
* CALL __x86_indirect_thunk_\reg
387+
*
388+
* into:
389+
*
390+
* CALL *%\reg
391+
*
392+
*/
393+
static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
394+
{
395+
retpoline_thunk_t *target;
396+
int reg, i = 0;
397+
398+
target = addr + insn->length + insn->immediate.value;
399+
reg = target - __x86_indirect_thunk_array;
400+
401+
if (WARN_ON_ONCE(reg & ~0xf))
402+
return -1;
403+
404+
/* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */
405+
BUG_ON(reg == 4);
406+
407+
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE))
408+
return -1;
409+
410+
i = emit_indirect(insn->opcode.bytes[0], reg, bytes);
411+
if (i < 0)
412+
return i;
413+
414+
for (; i < insn->length;)
415+
bytes[i++] = BYTES_NOP1;
416+
417+
return i;
418+
}
419+
420+
/*
421+
* Generated by 'objtool --retpoline'.
422+
*/
423+
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
424+
{
425+
s32 *s;
426+
427+
for (s = start; s < end; s++) {
428+
void *addr = (void *)s + *s;
429+
struct insn insn;
430+
int len, ret;
431+
u8 bytes[16];
432+
u8 op1, op2;
433+
434+
ret = insn_decode_kernel(&insn, addr);
435+
if (WARN_ON_ONCE(ret < 0))
436+
continue;
437+
438+
op1 = insn.opcode.bytes[0];
439+
op2 = insn.opcode.bytes[1];
440+
441+
switch (op1) {
442+
case CALL_INSN_OPCODE:
443+
case JMP32_INSN_OPCODE:
444+
break;
445+
446+
default:
447+
WARN_ON_ONCE(1);
448+
continue;
449+
}
450+
451+
len = patch_retpoline(addr, &insn, bytes);
452+
if (len == insn.length) {
453+
optimize_nops(bytes, len);
454+
text_poke_early(addr, bytes, len);
455+
}
456+
}
457+
}
458+
459+
#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
460+
461+
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
462+
463+
#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
464+
338465
#ifdef CONFIG_SMP
339466
static void alternatives_smp_lock(const s32 *start, const s32 *end,
340467
u8 *text, u8 *text_end)
@@ -642,6 +769,12 @@ void __init alternative_instructions(void)
642769
*/
643770
apply_paravirt(__parainstructions, __parainstructions_end);
644771

772+
/*
773+
* Rewrite the retpolines, must be done before alternatives since
774+
* those can rewrite the retpoline thunks.
775+
*/
776+
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
777+
645778
/*
646779
* Then patch alternatives, such that those paravirt calls that are in
647780
* alternatives can be overwritten by their immediate fragments.

arch/x86/kernel/module.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,8 @@ int module_finalize(const Elf_Ehdr *hdr,
251251
struct module *me)
252252
{
253253
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
254-
*para = NULL, *orc = NULL, *orc_ip = NULL;
254+
*para = NULL, *orc = NULL, *orc_ip = NULL,
255+
*retpolines = NULL;
255256
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
256257

257258
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -267,8 +268,14 @@ int module_finalize(const Elf_Ehdr *hdr,
267268
orc = s;
268269
if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
269270
orc_ip = s;
271+
if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
272+
retpolines = s;
270273
}
271274

275+
if (retpolines) {
276+
void *rseg = (void *)retpolines->sh_addr;
277+
apply_retpolines(rseg, rseg + retpolines->sh_size);
278+
}
272279
if (alt) {
273280
/* patch .altinstructions */
274281
void *aseg = (void *)alt->sh_addr;

0 commit comments

Comments
 (0)