Skip to content

Commit e52c1dc

Browse files
Peter Zijlstrahansendc
authored andcommitted
x86/its: FineIBT-paranoid vs ITS
FineIBT-paranoid was using the retpoline bytes for the paranoid check, disabling retpolines, because all parts that have IBT also have eIBRS and thus don't need no stinking retpolines. Except... ITS needs the retpolines for indirect calls must not be in the first half of a cacheline :-/ So what was the paranoid call sequence: <fineibt_paranoid_start>: 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d a: 4d 8d 5b <f0> lea -0x10(%r11), %r11 e: 75 fd jne d <fineibt_paranoid_start+0xd> 10: 41 ff d3 call *%r11 13: 90 nop Now becomes: <fineibt_paranoid_start>: 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d a: 4d 8d 5b f0 lea -0x10(%r11), %r11 e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11 Where the paranoid_thunk looks like: 1d: <ea> (bad) __x86_indirect_paranoid_thunk_r11: 1e: 75 fd jne 1d __x86_indirect_its_thunk_r11: 20: 41 ff eb jmp *%r11 23: cc int3 [ dhansen: remove initialization to false ] Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Pawan Gupta <[email protected]> Signed-off-by: Dave Hansen <[email protected]> Reviewed-by: Alexandre Chartre <[email protected]>
1 parent 872df34 commit e52c1dc

File tree

5 files changed

+159
-20
lines changed

5 files changed

+159
-20
lines changed

arch/x86/include/asm/alternative.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <linux/stringify.h>
77
#include <linux/objtool.h>
88
#include <asm/asm.h>
9+
#include <asm/bug.h>
910

1011
#define ALT_FLAGS_SHIFT 16
1112

@@ -128,10 +129,17 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
128129
extern void its_init_mod(struct module *mod);
129130
extern void its_fini_mod(struct module *mod);
130131
extern void its_free_mod(struct module *mod);
132+
extern u8 *its_static_thunk(int reg);
131133
#else /* CONFIG_MITIGATION_ITS */
132134
static inline void its_init_mod(struct module *mod) { }
133135
static inline void its_fini_mod(struct module *mod) { }
134136
static inline void its_free_mod(struct module *mod) { }
137+
static inline u8 *its_static_thunk(int reg)
138+
{
139+
WARN_ONCE(1, "ITS not compiled in");
140+
141+
return NULL;
142+
}
135143
#endif
136144

137145
#if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL)

arch/x86/kernel/alternative.c

Lines changed: 129 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
127127
#endif
128128
};
129129

130+
#ifdef CONFIG_FINEIBT
131+
static bool cfi_paranoid __ro_after_init;
132+
#endif
133+
130134
#ifdef CONFIG_MITIGATION_ITS
131135

132136
static struct module *its_mod;
@@ -137,8 +141,25 @@ static unsigned int its_offset;
137141
static void *its_init_thunk(void *thunk, int reg)
138142
{
139143
u8 *bytes = thunk;
144+
int offset = 0;
140145
int i = 0;
141146

147+
#ifdef CONFIG_FINEIBT
148+
if (cfi_paranoid) {
149+
/*
150+
* When ITS uses indirect branch thunk the fineibt_paranoid
151+
* caller sequence doesn't fit in the caller site. So put the
152+
* remaining part of the sequence (<ea> + JNE) into the ITS
153+
* thunk.
154+
*/
155+
bytes[i++] = 0xea; /* invalid instruction */
156+
bytes[i++] = 0x75; /* JNE */
157+
bytes[i++] = 0xfd;
158+
159+
offset = 1;
160+
}
161+
#endif
162+
142163
if (reg >= 8) {
143164
bytes[i++] = 0x41; /* REX.B prefix */
144165
reg -= 8;
@@ -147,7 +168,7 @@ static void *its_init_thunk(void *thunk, int reg)
147168
bytes[i++] = 0xe0 + reg; /* jmp *reg */
148169
bytes[i++] = 0xcc;
149170

150-
return thunk;
171+
return thunk + offset;
151172
}
152173

153174
void its_init_mod(struct module *mod)
@@ -217,6 +238,17 @@ static void *its_allocate_thunk(int reg)
217238
int size = 3 + (reg / 8);
218239
void *thunk;
219240

241+
#ifdef CONFIG_FINEIBT
242+
/*
243+
* The ITS thunk contains an indirect jump and an int3 instruction so
244+
* its size is 3 or 4 bytes depending on the register used. If CFI
245+
* paranoid is used then 3 extra bytes are added in the ITS thunk to
246+
* complete the fineibt_paranoid caller sequence.
247+
*/
248+
if (cfi_paranoid)
249+
size += 3;
250+
#endif
251+
220252
if (!its_page || (its_offset + size - 1) >= PAGE_SIZE) {
221253
its_page = its_alloc();
222254
if (!its_page) {
@@ -240,6 +272,18 @@ static void *its_allocate_thunk(int reg)
240272
return its_init_thunk(thunk, reg);
241273
}
242274

275+
u8 *its_static_thunk(int reg)
276+
{
277+
u8 *thunk = __x86_indirect_its_thunk_array[reg];
278+
279+
#ifdef CONFIG_FINEIBT
280+
/* Paranoid thunk starts 2 bytes before */
281+
if (cfi_paranoid)
282+
return thunk - 2;
283+
#endif
284+
return thunk;
285+
}
286+
243287
#endif
244288

245289
/*
@@ -775,8 +819,17 @@ static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg)
775819
/* Lower-half of the cacheline? */
776820
return !(addr & 0x20);
777821
}
822+
#else /* CONFIG_MITIGATION_ITS */
823+
824+
#ifdef CONFIG_FINEIBT
825+
static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg)
826+
{
827+
return false;
828+
}
778829
#endif
779830

831+
#endif /* CONFIG_MITIGATION_ITS */
832+
780833
/*
781834
* Rewrite the compiler generated retpoline thunk calls.
782835
*
@@ -893,6 +946,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
893946
int len, ret;
894947
u8 bytes[16];
895948
u8 op1, op2;
949+
u8 *dest;
896950

897951
ret = insn_decode_kernel(&insn, addr);
898952
if (WARN_ON_ONCE(ret < 0))
@@ -909,6 +963,12 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
909963

910964
case CALL_INSN_OPCODE:
911965
case JMP32_INSN_OPCODE:
966+
/* Check for cfi_paranoid + ITS */
967+
dest = addr + insn.length + insn.immediate.value;
968+
if (dest[-1] == 0xea && (dest[0] & 0xf0) == 0x70) {
969+
WARN_ON_ONCE(cfi_mode != CFI_FINEIBT);
970+
continue;
971+
}
912972
break;
913973

914974
case 0x0f: /* escape */
@@ -1198,8 +1258,6 @@ int cfi_get_func_arity(void *func)
11981258
static bool cfi_rand __ro_after_init = true;
11991259
static u32 cfi_seed __ro_after_init;
12001260

1201-
static bool cfi_paranoid __ro_after_init = false;
1202-
12031261
/*
12041262
* Re-hash the CFI hash with a boot-time seed while making sure the result is
12051263
* not a valid ENDBR instruction.
@@ -1612,6 +1670,19 @@ static int cfi_rand_callers(s32 *start, s32 *end)
16121670
return 0;
16131671
}
16141672

1673+
static int emit_paranoid_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes)
1674+
{
1675+
u8 *thunk = (void *)__x86_indirect_its_thunk_array[reg] - 2;
1676+
1677+
#ifdef CONFIG_MITIGATION_ITS
1678+
u8 *tmp = its_allocate_thunk(reg);
1679+
if (tmp)
1680+
thunk = tmp;
1681+
#endif
1682+
1683+
return __emit_trampoline(addr, insn, bytes, thunk, thunk);
1684+
}
1685+
16151686
static int cfi_rewrite_callers(s32 *start, s32 *end)
16161687
{
16171688
s32 *s;
@@ -1653,9 +1724,14 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
16531724
memcpy(bytes, fineibt_paranoid_start, fineibt_paranoid_size);
16541725
memcpy(bytes + fineibt_caller_hash, &hash, 4);
16551726

1656-
ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
1657-
if (WARN_ON_ONCE(ret != 3))
1658-
continue;
1727+
if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + fineibt_paranoid_ind, 11)) {
1728+
emit_paranoid_trampoline(addr + fineibt_caller_size,
1729+
&insn, 11, bytes + fineibt_caller_size);
1730+
} else {
1731+
ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
1732+
if (WARN_ON_ONCE(ret != 3))
1733+
continue;
1734+
}
16591735

16601736
text_poke_early(addr, bytes, fineibt_paranoid_size);
16611737
}
@@ -1882,29 +1958,66 @@ static bool decode_fineibt_bhi(struct pt_regs *regs, unsigned long *target, u32
18821958
return false;
18831959
}
18841960

1961+
static bool is_paranoid_thunk(unsigned long addr)
1962+
{
1963+
u32 thunk;
1964+
1965+
__get_kernel_nofault(&thunk, (u32 *)addr, u32, Efault);
1966+
return (thunk & 0x00FFFFFF) == 0xfd75ea;
1967+
1968+
Efault:
1969+
return false;
1970+
}
1971+
18851972
/*
18861973
* regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[]
1887-
* sequence.
1974+
* sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS
1975+
* thunk.
18881976
*/
18891977
static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type)
18901978
{
18911979
unsigned long addr = regs->ip - fineibt_paranoid_ud;
1892-
u32 hash;
18931980

1894-
if (!cfi_paranoid || !is_cfi_trap(addr + fineibt_caller_size - LEN_UD2))
1981+
if (!cfi_paranoid)
18951982
return false;
18961983

1897-
__get_kernel_nofault(&hash, addr + fineibt_caller_hash, u32, Efault);
1898-
*target = regs->r11 + fineibt_preamble_size;
1899-
*type = regs->r10;
1984+
if (is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) {
1985+
*target = regs->r11 + fineibt_preamble_size;
1986+
*type = regs->r10;
1987+
1988+
/*
1989+
* Since the trapping instruction is the exact, but LOCK prefixed,
1990+
* Jcc.d8 that got us here, the normal fixup will work.
1991+
*/
1992+
return true;
1993+
}
19001994

19011995
/*
1902-
* Since the trapping instruction is the exact, but LOCK prefixed,
1903-
* Jcc.d8 that got us here, the normal fixup will work.
1996+
* The cfi_paranoid + ITS thunk combination results in:
1997+
*
1998+
* 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d
1999+
* 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d
2000+
* a: 4d 8d 5b f0 lea -0x10(%r11), %r11
2001+
* e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11
2002+
*
2003+
* Where the paranoid_thunk looks like:
2004+
*
2005+
* 1d: <ea> (bad)
2006+
* __x86_indirect_paranoid_thunk_r11:
2007+
* 1e: 75 fd jne 1d
2008+
* __x86_indirect_its_thunk_r11:
2009+
* 20: 41 ff eb jmp *%r11
2010+
* 23: cc int3
2011+
*
19042012
*/
1905-
return true;
2013+
if (is_paranoid_thunk(regs->ip)) {
2014+
*target = regs->r11 + fineibt_preamble_size;
2015+
*type = regs->r10;
2016+
2017+
regs->ip = *target;
2018+
return true;
2019+
}
19062020

1907-
Efault:
19082021
return false;
19092022
}
19102023

arch/x86/lib/retpoline.S

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -371,26 +371,35 @@ SYM_FUNC_END(call_depth_return_thunk)
371371

372372
.macro ITS_THUNK reg
373373

374+
/*
375+
* If CFI paranoid is used then the ITS thunk starts with opcodes (0xea; jne 1b)
376+
* that complete the fineibt_paranoid caller sequence.
377+
*/
378+
1: .byte 0xea
379+
SYM_INNER_LABEL(__x86_indirect_paranoid_thunk_\reg, SYM_L_GLOBAL)
380+
UNWIND_HINT_UNDEFINED
381+
ANNOTATE_NOENDBR
382+
jne 1b
374383
SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL)
375384
UNWIND_HINT_UNDEFINED
376385
ANNOTATE_NOENDBR
377386
ANNOTATE_RETPOLINE_SAFE
378387
jmp *%\reg
379388
int3
380389
.align 32, 0xcc /* fill to the end of the line */
381-
.skip 32, 0xcc /* skip to the next upper half */
390+
.skip 32 - (__x86_indirect_its_thunk_\reg - 1b), 0xcc /* skip to the next upper half */
382391
.endm
383392

384393
/* ITS mitigation requires thunks be aligned to upper half of cacheline */
385394
.align 64, 0xcc
386-
.skip 32, 0xcc
387-
SYM_CODE_START(__x86_indirect_its_thunk_array)
395+
.skip 29, 0xcc
388396

389397
#define GEN(reg) ITS_THUNK reg
390398
#include <asm/GEN-for-each-reg.h>
391399
#undef GEN
392400

393401
.align 64, 0xcc
402+
SYM_FUNC_ALIAS(__x86_indirect_its_thunk_array, __x86_indirect_its_thunk_rax)
394403
SYM_CODE_END(__x86_indirect_its_thunk_array)
395404

396405
.align 64, 0xcc

arch/x86/net/bpf_jit_comp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
663663

664664
if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) {
665665
OPTIMIZER_HIDE_VAR(reg);
666-
emit_jump(&prog, &__x86_indirect_its_thunk_array[reg], ip);
666+
emit_jump(&prog, its_static_thunk(reg), ip);
667667
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
668668
EMIT_LFENCE();
669669
EMIT2(0xFF, 0xE0 + reg);

tools/objtool/arch/x86/decode.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
189189
op2 = ins.opcode.bytes[1];
190190
op3 = ins.opcode.bytes[2];
191191

192+
/*
193+
* XXX hack, decoder is buggered and thinks 0xea is 7 bytes long.
194+
*/
195+
if (op1 == 0xea) {
196+
insn->len = 1;
197+
insn->type = INSN_BUG;
198+
return 0;
199+
}
200+
192201
if (ins.rex_prefix.nbytes) {
193202
rex = ins.rex_prefix.bytes[0];
194203
rex_w = X86_REX_W(rex) >> 3;

0 commit comments

Comments
 (0)