Skip to content

Commit a031538

Browse files
Saket Kumar BhaskarKernel Patches Daemon
authored andcommitted
powerpc64/bpf: Implement PROBE_MEM32 pseudo instructions
Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW] instructions. They are similar to PROBE_MEM instructions with the following differences: - PROBE_MEM32 supports store. - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the src/dst register - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in _R26 in the prologue). Due to bpf_arena constructions such _R26 + reg + off16 access is guaranteed to be within arena virtual range, so no address check at run-time. - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When LDX faults the destination register is zeroed. To support these on powerpc, we do tmp1 = _R26 + src/dst reg and then use tmp1 as the new src/dst register. This allows us to reuse most of the code for normal [LDX | STX | ST]. Additionally, bpf_jit_emit_probe_mem_store() is introduced to emit instructions for storing memory values depending on the size (byte, halfword, word, doubleword). Stack layout is adjusted to introduce a new NVR (_R26) and to make BPF_PPC_STACKFRAME quadword aligned (local_tmp_var is increased by 8 bytes). Reviewed-by: Hari Bathini <[email protected]> Tested-by: Venkat Rao Bagalkote <[email protected]> Signed-off-by: Saket Kumar Bhaskar <[email protected]>
1 parent c04dd5a commit a031538

File tree

4 files changed

+155
-24
lines changed

4 files changed

+155
-24
lines changed

arch/powerpc/net/bpf_jit.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,10 @@ struct codegen_context {
161161
unsigned int seen;
162162
unsigned int idx;
163163
unsigned int stack_size;
164-
int b2p[MAX_BPF_JIT_REG + 2];
164+
int b2p[MAX_BPF_JIT_REG + 3];
165165
unsigned int exentry_idx;
166166
unsigned int alt_exit_addr;
167+
u64 arena_vm_start;
167168
};
168169

169170
#define bpf_to_ppc(r) (ctx->b2p[r])
@@ -201,7 +202,7 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
201202

202203
int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
203204
struct codegen_context *ctx, int insn_idx,
204-
int jmp_off, int dst_reg);
205+
int jmp_off, int dst_reg, u32 code);
205206

206207
#endif
207208

arch/powerpc/net/bpf_jit_comp.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
204204

205205
/* Make sure that the stack is quadword aligned. */
206206
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
207+
cgctx.arena_vm_start = bpf_arena_get_kern_vm_start(fp->aux->arena);
207208

208209
/* Scouting faux-generate pass 0 */
209210
if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
@@ -326,7 +327,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
326327
*/
327328
int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
328329
struct codegen_context *ctx, int insn_idx, int jmp_off,
329-
int dst_reg)
330+
int dst_reg, u32 code)
330331
{
331332
off_t offset;
332333
unsigned long pc;
@@ -355,6 +356,9 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass
355356
(ctx->exentry_idx * BPF_FIXUP_LEN * 4);
356357

357358
fixup[0] = PPC_RAW_LI(dst_reg, 0);
359+
if (BPF_CLASS(code) == BPF_ST || BPF_CLASS(code) == BPF_STX)
360+
fixup[0] = PPC_RAW_NOP();
361+
358362
if (IS_ENABLED(CONFIG_PPC32))
359363
fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */
360364

@@ -579,7 +583,7 @@ static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_contex
579583
{
580584
if (IS_ENABLED(CONFIG_PPC64)) {
581585
/* See bpf_jit_stack_tailcallcnt() */
582-
int tailcallcnt_offset = 6 * 8;
586+
int tailcallcnt_offset = 7 * 8;
583587

584588
EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
585589
EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset));
@@ -594,7 +598,7 @@ static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_cont
594598
{
595599
if (IS_ENABLED(CONFIG_PPC64)) {
596600
/* See bpf_jit_stack_tailcallcnt() */
597-
int tailcallcnt_offset = 6 * 8;
601+
int tailcallcnt_offset = 7 * 8;
598602

599603
EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset));
600604
EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset));

arch/powerpc/net/bpf_jit_comp32.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1087,7 +1087,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
10871087
}
10881088

10891089
ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, insn_idx,
1090-
jmp_off, dst_reg);
1090+
jmp_off, dst_reg, code);
10911091
if (ret)
10921092
return ret;
10931093
}

arch/powerpc/net/bpf_jit_comp64.c

Lines changed: 144 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,26 @@
2525
* with our redzone usage.
2626
*
2727
* [ prev sp ] <-------------
28-
* [ nv gpr save area ] 5*8 |
28+
* [ nv gpr save area ] 6*8 |
2929
* [ tail_call_cnt ] 8 |
30-
* [ local_tmp_var ] 16 |
30+
* [ local_tmp_var ] 24 |
3131
* fp (r31) --> [ ebpf stack space ] upto 512 |
3232
* [ frame header ] 32/112 |
3333
* sp (r1) ---> [ stack pointer ] --------------
3434
*/
3535

3636
/* for gpr non volatile registers BPG_REG_6 to 10 */
37-
#define BPF_PPC_STACK_SAVE (5*8)
37+
#define BPF_PPC_STACK_SAVE (6*8)
3838
/* for bpf JIT code internal usage */
39-
#define BPF_PPC_STACK_LOCALS 24
39+
#define BPF_PPC_STACK_LOCALS 32
4040
/* stack frame excluding BPF stack, ensure this is quadword aligned */
4141
#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \
4242
BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE)
4343

4444
/* BPF register usage */
4545
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
4646
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
47+
#define ARENA_VM_START (MAX_BPF_JIT_REG + 2)
4748

4849
/* BPF to ppc register mappings */
4950
void bpf_jit_init_reg_mapping(struct codegen_context *ctx)
@@ -67,10 +68,12 @@ void bpf_jit_init_reg_mapping(struct codegen_context *ctx)
6768
ctx->b2p[BPF_REG_AX] = _R12;
6869
ctx->b2p[TMP_REG_1] = _R9;
6970
ctx->b2p[TMP_REG_2] = _R10;
71+
/* non volatile register for kern_vm_start address */
72+
ctx->b2p[ARENA_VM_START] = _R26;
7073
}
7174

72-
/* PPC NVR range -- update this if we ever use NVRs below r27 */
73-
#define BPF_PPC_NVR_MIN _R27
75+
/* PPC NVR range -- update this if we ever use NVRs below r26 */
76+
#define BPF_PPC_NVR_MIN _R26
7477

7578
static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
7679
{
@@ -89,22 +92,22 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
8992
* [ prev sp ] <-------------
9093
* [ ... ] |
9194
* sp (r1) ---> [ stack pointer ] --------------
92-
* [ nv gpr save area ] 5*8
95+
* [ nv gpr save area ] 6*8
9396
* [ tail_call_cnt ] 8
94-
* [ local_tmp_var ] 16
97+
* [ local_tmp_var ] 24
9598
* [ unused red zone ] 224
9699
*/
97100
static int bpf_jit_stack_local(struct codegen_context *ctx)
98101
{
99102
if (bpf_has_stack_frame(ctx))
100103
return STACK_FRAME_MIN_SIZE + ctx->stack_size;
101104
else
102-
return -(BPF_PPC_STACK_SAVE + 24);
105+
return -(BPF_PPC_STACK_SAVE + 32);
103106
}
104107

105108
static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
106109
{
107-
return bpf_jit_stack_local(ctx) + 16;
110+
return bpf_jit_stack_local(ctx) + 24;
108111
}
109112

110113
static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
@@ -170,10 +173,17 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
170173
if (bpf_is_seen_register(ctx, bpf_to_ppc(i)))
171174
EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i))));
172175

176+
if (ctx->arena_vm_start)
177+
EMIT(PPC_RAW_STD(bpf_to_ppc(ARENA_VM_START), _R1,
178+
bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ARENA_VM_START))));
179+
173180
/* Setup frame pointer to point to the bpf stack area */
174181
if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)))
175182
EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
176183
STACK_FRAME_MIN_SIZE + ctx->stack_size));
184+
185+
if (ctx->arena_vm_start)
186+
PPC_LI64(bpf_to_ppc(ARENA_VM_START), ctx->arena_vm_start);
177187
}
178188

179189
static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
@@ -185,6 +195,10 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
185195
if (bpf_is_seen_register(ctx, bpf_to_ppc(i)))
186196
EMIT(PPC_RAW_LD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i))));
187197

198+
if (ctx->arena_vm_start)
199+
EMIT(PPC_RAW_LD(bpf_to_ppc(ARENA_VM_START), _R1,
200+
bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ARENA_VM_START))));
201+
188202
/* Tear down our stack frame */
189203
if (bpf_has_stack_frame(ctx)) {
190204
EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME + ctx->stack_size));
@@ -396,11 +410,11 @@ void bpf_stf_barrier(void);
396410
asm (
397411
" .global bpf_stf_barrier ;"
398412
" bpf_stf_barrier: ;"
399-
" std 21,-64(1) ;"
400-
" std 22,-56(1) ;"
413+
" std 21,-80(1) ;"
414+
" std 22,-72(1) ;"
401415
" sync ;"
402-
" ld 21,-64(1) ;"
403-
" ld 22,-56(1) ;"
416+
" ld 21,-80(1) ;"
417+
" ld 22,-72(1) ;"
404418
" ori 31,31,0 ;"
405419
" .rept 14 ;"
406420
" b 1f ;"
@@ -409,6 +423,36 @@ asm (
409423
" blr ;"
410424
);
411425

426+
static int bpf_jit_emit_probe_mem_store(struct codegen_context *ctx, u32 src_reg, s16 off,
427+
u32 code, u32 *image)
428+
{
429+
u32 tmp1_reg = bpf_to_ppc(TMP_REG_1);
430+
u32 tmp2_reg = bpf_to_ppc(TMP_REG_2);
431+
432+
switch (BPF_SIZE(code)) {
433+
case BPF_B:
434+
EMIT(PPC_RAW_STB(src_reg, tmp1_reg, off));
435+
break;
436+
case BPF_H:
437+
EMIT(PPC_RAW_STH(src_reg, tmp1_reg, off));
438+
break;
439+
case BPF_W:
440+
EMIT(PPC_RAW_STW(src_reg, tmp1_reg, off));
441+
break;
442+
case BPF_DW:
443+
if (off % 4) {
444+
EMIT(PPC_RAW_LI(tmp2_reg, off));
445+
EMIT(PPC_RAW_STDX(src_reg, tmp1_reg, tmp2_reg));
446+
} else {
447+
EMIT(PPC_RAW_STD(src_reg, tmp1_reg, off));
448+
}
449+
break;
450+
default:
451+
return -EINVAL;
452+
}
453+
return 0;
454+
}
455+
412456
static int emit_atomic_ld_st(const struct bpf_insn insn, struct codegen_context *ctx, u32 *image)
413457
{
414458
u32 code = insn.code;
@@ -960,6 +1004,50 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
9601004
}
9611005
break;
9621006

1007+
case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
1008+
case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
1009+
case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
1010+
case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
1011+
1012+
EMIT(PPC_RAW_ADD(tmp1_reg, dst_reg, bpf_to_ppc(ARENA_VM_START)));
1013+
1014+
ret = bpf_jit_emit_probe_mem_store(ctx, src_reg, off, code, image);
1015+
if (ret)
1016+
return ret;
1017+
1018+
ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
1019+
ctx->idx - 1, 4, -1, code);
1020+
if (ret)
1021+
return ret;
1022+
1023+
break;
1024+
1025+
case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
1026+
case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
1027+
case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
1028+
case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
1029+
1030+
EMIT(PPC_RAW_ADD(tmp1_reg, dst_reg, bpf_to_ppc(ARENA_VM_START)));
1031+
1032+
if (BPF_SIZE(code) == BPF_W || BPF_SIZE(code) == BPF_DW) {
1033+
PPC_LI32(tmp2_reg, imm);
1034+
src_reg = tmp2_reg;
1035+
} else {
1036+
EMIT(PPC_RAW_LI(tmp2_reg, imm));
1037+
src_reg = tmp2_reg;
1038+
}
1039+
1040+
ret = bpf_jit_emit_probe_mem_store(ctx, src_reg, off, code, image);
1041+
if (ret)
1042+
return ret;
1043+
1044+
ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
1045+
ctx->idx - 1, 4, -1, code);
1046+
if (ret)
1047+
return ret;
1048+
1049+
break;
1050+
9631051
/*
9641052
* BPF_STX ATOMIC (atomic ops)
9651053
*/
@@ -1112,9 +1200,10 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
11121200
* Check if 'off' is word aligned for BPF_DW, because
11131201
* we might generate two instructions.
11141202
*/
1115-
if ((BPF_SIZE(code) == BPF_DW ||
1116-
(BPF_SIZE(code) == BPF_B && BPF_MODE(code) == BPF_PROBE_MEMSX)) &&
1117-
(off & 3))
1203+
if ((BPF_SIZE(code) == BPF_DW && (off & 3)) ||
1204+
(BPF_SIZE(code) == BPF_B &&
1205+
BPF_MODE(code) == BPF_PROBE_MEMSX) ||
1206+
(BPF_SIZE(code) == BPF_B && BPF_MODE(code) == BPF_MEMSX))
11181207
PPC_JMP((ctx->idx + 3) * 4);
11191208
else
11201209
PPC_JMP((ctx->idx + 2) * 4);
@@ -1160,12 +1249,49 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
11601249

11611250
if (BPF_MODE(code) == BPF_PROBE_MEM) {
11621251
ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
1163-
ctx->idx - 1, 4, dst_reg);
1252+
ctx->idx - 1, 4, dst_reg, code);
11641253
if (ret)
11651254
return ret;
11661255
}
11671256
break;
11681257

1258+
/* dst = *(u64 *)(ul) (src + ARENA_VM_START + off) */
1259+
case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
1260+
case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
1261+
case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
1262+
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
1263+
1264+
EMIT(PPC_RAW_ADD(tmp1_reg, src_reg, bpf_to_ppc(ARENA_VM_START)));
1265+
1266+
switch (size) {
1267+
case BPF_B:
1268+
EMIT(PPC_RAW_LBZ(dst_reg, tmp1_reg, off));
1269+
break;
1270+
case BPF_H:
1271+
EMIT(PPC_RAW_LHZ(dst_reg, tmp1_reg, off));
1272+
break;
1273+
case BPF_W:
1274+
EMIT(PPC_RAW_LWZ(dst_reg, tmp1_reg, off));
1275+
break;
1276+
case BPF_DW:
1277+
if (off % 4) {
1278+
EMIT(PPC_RAW_LI(tmp2_reg, off));
1279+
EMIT(PPC_RAW_LDX(dst_reg, tmp1_reg, tmp2_reg));
1280+
} else {
1281+
EMIT(PPC_RAW_LD(dst_reg, tmp1_reg, off));
1282+
}
1283+
break;
1284+
}
1285+
1286+
if (size != BPF_DW && insn_is_zext(&insn[i + 1]))
1287+
addrs[++i] = ctx->idx * 4;
1288+
1289+
ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
1290+
ctx->idx - 1, 4, dst_reg, code);
1291+
if (ret)
1292+
return ret;
1293+
break;
1294+
11691295
/*
11701296
* Doubleword load
11711297
* 16 byte instruction that uses two 'struct bpf_insn'

0 commit comments

Comments
 (0)