Skip to content

Commit 87c87ec

Browse files
author
Peter Zijlstra
committed
bpf,x86: Respect X86_FEATURE_RETPOLINE*
Current BPF codegen doesn't respect X86_FEATURE_RETPOLINE* flags and unconditionally emits a thunk call, this is sub-optimal and doesn't match the regular, compiler generated, code. Update the i386 JIT to emit code equal to what the compiler emits for the regular kernel text (IOW. a plain THUNK call). Update the x86_64 JIT to emit code similar to the result of compiler and kernel rewrites as according to X86_FEATURE_RETPOLINE* flags. Inlining RETPOLINE_AMD (lfence; jmp *%reg) and !RETPOLINE (jmp *%reg), while doing a THUNK call for RETPOLINE. This removes the hard-coded retpoline thunks and shrinks the generated code. Leaving a single retpoline thunk definition in the kernel. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Borislav Petkov <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Acked-by: Josh Poimboeuf <[email protected]> Tested-by: Alexei Starovoitov <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent dceba08 commit 87c87ec

File tree

3 files changed

+41
-86
lines changed

3 files changed

+41
-86
lines changed

arch/x86/include/asm/nospec-branch.h

Lines changed: 0 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -316,63 +316,4 @@ static inline void mds_idle_clear_cpu_buffers(void)
316316

317317
#endif /* __ASSEMBLY__ */
318318

319-
/*
320-
* Below is used in the eBPF JIT compiler and emits the byte sequence
321-
* for the following assembly:
322-
*
323-
* With retpolines configured:
324-
*
325-
* callq do_rop
326-
* spec_trap:
327-
* pause
328-
* lfence
329-
* jmp spec_trap
330-
* do_rop:
331-
* mov %rcx,(%rsp) for x86_64
332-
* mov %edx,(%esp) for x86_32
333-
* retq
334-
*
335-
* Without retpolines configured:
336-
*
337-
* jmp *%rcx for x86_64
338-
* jmp *%edx for x86_32
339-
*/
340-
#ifdef CONFIG_RETPOLINE
341-
# ifdef CONFIG_X86_64
342-
# define RETPOLINE_RCX_BPF_JIT_SIZE 17
343-
# define RETPOLINE_RCX_BPF_JIT() \
344-
do { \
345-
EMIT1_off32(0xE8, 7); /* callq do_rop */ \
346-
/* spec_trap: */ \
347-
EMIT2(0xF3, 0x90); /* pause */ \
348-
EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
349-
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
350-
/* do_rop: */ \
351-
EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \
352-
EMIT1(0xC3); /* retq */ \
353-
} while (0)
354-
# else /* !CONFIG_X86_64 */
355-
# define RETPOLINE_EDX_BPF_JIT() \
356-
do { \
357-
EMIT1_off32(0xE8, 7); /* call do_rop */ \
358-
/* spec_trap: */ \
359-
EMIT2(0xF3, 0x90); /* pause */ \
360-
EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
361-
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
362-
/* do_rop: */ \
363-
EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
364-
EMIT1(0xC3); /* ret */ \
365-
} while (0)
366-
# endif
367-
#else /* !CONFIG_RETPOLINE */
368-
# ifdef CONFIG_X86_64
369-
# define RETPOLINE_RCX_BPF_JIT_SIZE 2
370-
# define RETPOLINE_RCX_BPF_JIT() \
371-
EMIT2(0xFF, 0xE1); /* jmp *%rcx */
372-
# else /* !CONFIG_X86_64 */
373-
# define RETPOLINE_EDX_BPF_JIT() \
374-
EMIT2(0xFF, 0xE2) /* jmp *%edx */
375-
# endif
376-
#endif
377-
378319
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */

arch/x86/net/bpf_jit_comp.c

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,25 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
387387
return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
388388
}
389389

390+
#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
391+
392+
static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
393+
{
394+
u8 *prog = *pprog;
395+
396+
#ifdef CONFIG_RETPOLINE
397+
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
398+
EMIT_LFENCE();
399+
EMIT2(0xFF, 0xE0 + reg);
400+
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
401+
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
402+
} else
403+
#endif
404+
EMIT2(0xFF, 0xE0 + reg);
405+
406+
*pprog = prog;
407+
}
408+
390409
/*
391410
* Generate the following code:
392411
*
@@ -468,7 +487,7 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
468487
* rdi == ctx (1st arg)
469488
* rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
470489
*/
471-
RETPOLINE_RCX_BPF_JIT();
490+
emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
472491

473492
/* out: */
474493
ctx->tail_call_indirect_label = prog - start;
@@ -1179,8 +1198,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
11791198
/* speculation barrier */
11801199
case BPF_ST | BPF_NOSPEC:
11811200
if (boot_cpu_has(X86_FEATURE_XMM2))
1182-
/* Emit 'lfence' */
1183-
EMIT3(0x0F, 0xAE, 0xE8);
1201+
EMIT_LFENCE();
11841202
break;
11851203

11861204
/* ST: *(u8*)(dst_reg + off) = imm */
@@ -2084,24 +2102,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
20842102
return ret;
20852103
}
20862104

2087-
static int emit_fallback_jump(u8 **pprog)
2088-
{
2089-
u8 *prog = *pprog;
2090-
int err = 0;
2091-
2092-
#ifdef CONFIG_RETPOLINE
2093-
/* Note that this assumes the the compiler uses external
2094-
* thunks for indirect calls. Both clang and GCC use the same
2095-
* naming convention for external thunks.
2096-
*/
2097-
err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
2098-
#else
2099-
EMIT2(0xFF, 0xE2); /* jmp rdx */
2100-
#endif
2101-
*pprog = prog;
2102-
return err;
2103-
}
2104-
21052105
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
21062106
{
21072107
u8 *jg_reloc, *prog = *pprog;
@@ -2123,9 +2123,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
21232123
if (err)
21242124
return err;
21252125

2126-
err = emit_fallback_jump(&prog); /* jmp thunk/indirect */
2127-
if (err)
2128-
return err;
2126+
emit_indirect_jump(&prog, 2 /* rdx */, prog);
21292127

21302128
*pprog = prog;
21312129
return 0;

arch/x86/net/bpf_jit_comp32.c

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <asm/cacheflush.h>
1616
#include <asm/set_memory.h>
1717
#include <asm/nospec-branch.h>
18+
#include <asm/asm-prototypes.h>
1819
#include <linux/bpf.h>
1920

2021
/*
@@ -1267,6 +1268,21 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
12671268
*pprog = prog;
12681269
}
12691270

1271+
static int emit_jmp_edx(u8 **pprog, u8 *ip)
1272+
{
1273+
u8 *prog = *pprog;
1274+
int cnt = 0;
1275+
1276+
#ifdef CONFIG_RETPOLINE
1277+
EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
1278+
#else
1279+
EMIT2(0xFF, 0xE2);
1280+
#endif
1281+
*pprog = prog;
1282+
1283+
return cnt;
1284+
}
1285+
12701286
/*
12711287
* Generate the following code:
12721288
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
@@ -1280,7 +1296,7 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
12801296
* goto *(prog->bpf_func + prologue_size);
12811297
* out:
12821298
*/
1283-
static void emit_bpf_tail_call(u8 **pprog)
1299+
static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
12841300
{
12851301
u8 *prog = *pprog;
12861302
int cnt = 0;
@@ -1362,7 +1378,7 @@ static void emit_bpf_tail_call(u8 **pprog)
13621378
* eax == ctx (1st arg)
13631379
* edx == prog->bpf_func + prologue_size
13641380
*/
1365-
RETPOLINE_EDX_BPF_JIT();
1381+
cnt += emit_jmp_edx(&prog, ip + cnt);
13661382

13671383
if (jmp_label1 == -1)
13681384
jmp_label1 = cnt;
@@ -2122,7 +2138,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
21222138
break;
21232139
}
21242140
case BPF_JMP | BPF_TAIL_CALL:
2125-
emit_bpf_tail_call(&prog);
2141+
emit_bpf_tail_call(&prog, image + addrs[i - 1]);
21262142
break;
21272143

21282144
/* cond jump */

0 commit comments

Comments
 (0)