Skip to content

Commit b07df4d

Browse files
image-dragonKernel Patches Daemon
authored andcommitted
bpf,x86: do RSB balance for trampoline
In origin call case, we skip the "rip" directly before we return, which break the RSB, as we have twice "call", but only once "ret". Do the RSB balance by pseudo a "ret". Instead of skipping the "rip", we modify it to the address of a "ret" insn that we generate. The performance of "fexit" increases from 76M/s to 84M/s. Before this optimize, the bench resulting of fexit is: fexit : 76.494 ± 0.216M/s fexit : 76.319 ± 0.097M/s fexit : 70.680 ± 0.060M/s fexit : 75.509 ± 0.039M/s fexit : 76.392 ± 0.049M/s After this optimize: fexit : 86.023 ± 0.518M/s fexit : 83.388 ± 0.021M/s fexit : 85.146 ± 0.058M/s fexit : 85.646 ± 0.136M/s fexit : 84.040 ± 0.045M/s Things become a little more complex, not sure if the benefits worth it :/ Signed-off-by: Menglong Dong <[email protected]>
1 parent fe98223 commit b07df4d

File tree

1 file changed

+29
-3
lines changed

1 file changed

+29
-3
lines changed

arch/x86/net/bpf_jit_comp.c

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3185,6 +3185,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
31853185
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
31863186
void *orig_call = func_addr;
31873187
u8 **branches = NULL;
3188+
u8 *rsb_pos;
31883189
u8 *prog;
31893190
bool save_ret;
31903191

@@ -3431,17 +3432,42 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
34313432
LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack_size);
34323433
}
34333434

3435+
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
3436+
u64 ret_addr = (u64)(image + (prog - (u8 *)rw_image));
3437+
3438+
rsb_pos = prog;
3439+
/*
3440+
* reserve the room to save the return address to rax:
3441+
* movabs rax, imm64
3442+
*
3443+
* this is used to do the RSB balance. For the SKIP_FRAME
3444+
* case, we do the "call" twice, but only have one "ret",
3445+
* which can break the RSB.
3446+
*
3447+
* Therefore, instead of skipping the "rip", we make it as
3448+
* a pseudo return: modify the "rip" in the stack to the
3449+
* second "ret" address that we build bellow.
3450+
*/
3451+
emit_mov_imm64(&prog, BPF_REG_0, ret_addr >> 32, (u32)ret_addr);
3452+
/* mov [rbp + 8], rax */
3453+
EMIT4(0x48, 0x89, 0x45, 0x08);
3454+
}
3455+
34343456
/* restore return value of orig_call or fentry prog back into RAX */
34353457
if (save_ret)
34363458
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
34373459

34383460
emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
34393461
EMIT1(0xC9); /* leave */
3462+
emit_return(&prog, image + (prog - (u8 *)rw_image));
34403463
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
3441-
/* skip our return address and return to parent */
3442-
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
3464+
u64 ret_addr = (u64)(image + (prog - (u8 *)rw_image));
3465+
3466+
/* fix the return address to second return address */
3467+
emit_mov_imm64(&rsb_pos, BPF_REG_0, ret_addr >> 32, (u32)ret_addr);
3468+
/* this is the second(real) return */
3469+
emit_return(&prog, image + (prog - (u8 *)rw_image));
34433470
}
3444-
emit_return(&prog, image + (prog - (u8 *)rw_image));
34453471
/* Make sure the trampoline generation logic doesn't overflow */
34463472
if (WARN_ON_ONCE(prog > (u8 *)rw_image_end - BPF_INSN_SAFETY)) {
34473473
ret = -EFAULT;

0 commit comments

Comments
 (0)