@@ -325,6 +325,22 @@ struct jit_context {
325
325
/* Number of bytes that will be skipped on tailcall */
326
326
#define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE)
327
327
328
+ static void push_r9 (u8 * * pprog )
329
+ {
330
+ u8 * prog = * pprog ;
331
+
332
+ EMIT2 (0x41 , 0x51 ); /* push r9 */
333
+ * pprog = prog ;
334
+ }
335
+
336
+ static void pop_r9 (u8 * * pprog )
337
+ {
338
+ u8 * prog = * pprog ;
339
+
340
+ EMIT2 (0x41 , 0x59 ); /* pop r9 */
341
+ * pprog = prog ;
342
+ }
343
+
328
344
static void push_r12 (u8 * * pprog )
329
345
{
330
346
u8 * prog = * pprog ;
@@ -1404,6 +1420,24 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
1404
1420
* pprog = prog ;
1405
1421
}
1406
1422
1423
+ static void emit_priv_frame_ptr (u8 * * pprog , void __percpu * priv_frame_ptr )
1424
+ {
1425
+ u8 * prog = * pprog ;
1426
+
1427
+ /* movabs r9, priv_frame_ptr */
1428
+ emit_mov_imm64 (& prog , X86_REG_R9 , (__force long ) priv_frame_ptr >> 32 ,
1429
+ (u32 ) (__force long ) priv_frame_ptr );
1430
+
1431
+ #ifdef CONFIG_SMP
1432
+ /* add <r9>, gs:[<off>] */
1433
+ EMIT2 (0x65 , 0x4c );
1434
+ EMIT3 (0x03 , 0x0c , 0x25 );
1435
+ EMIT ((u32 )(unsigned long )& this_cpu_off , 4 );
1436
+ #endif
1437
+
1438
+ * pprog = prog ;
1439
+ }
1440
+
1407
1441
#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
1408
1442
1409
1443
#define __LOAD_TCC_PTR (off ) \
@@ -1412,6 +1446,10 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
1412
1446
#define LOAD_TAIL_CALL_CNT_PTR (stack ) \
1413
1447
__LOAD_TCC_PTR(BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack))
1414
1448
1449
+ /* Memory size/value to protect private stack overflow/underflow */
1450
+ #define PRIV_STACK_GUARD_SZ 8
1451
+ #define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
1452
+
1415
1453
static int do_jit (struct bpf_prog * bpf_prog , int * addrs , u8 * image , u8 * rw_image ,
1416
1454
int oldproglen , struct jit_context * ctx , bool jmp_padding )
1417
1455
{
@@ -1421,14 +1459,21 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
1421
1459
int insn_cnt = bpf_prog -> len ;
1422
1460
bool seen_exit = false;
1423
1461
u8 temp [BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY ];
1462
+ void __percpu * priv_frame_ptr = NULL ;
1424
1463
u64 arena_vm_start , user_vm_start ;
1464
+ void __percpu * priv_stack_ptr ;
1425
1465
int i , excnt = 0 ;
1426
1466
int ilen , proglen = 0 ;
1427
1467
u8 * prog = temp ;
1428
1468
u32 stack_depth ;
1429
1469
int err ;
1430
1470
1431
1471
stack_depth = bpf_prog -> aux -> stack_depth ;
1472
+ priv_stack_ptr = bpf_prog -> aux -> priv_stack_ptr ;
1473
+ if (priv_stack_ptr ) {
1474
+ priv_frame_ptr = priv_stack_ptr + PRIV_STACK_GUARD_SZ + round_up (stack_depth , 8 );
1475
+ stack_depth = 0 ;
1476
+ }
1432
1477
1433
1478
arena_vm_start = bpf_arena_get_kern_vm_start (bpf_prog -> aux -> arena );
1434
1479
user_vm_start = bpf_arena_get_user_vm_start (bpf_prog -> aux -> arena );
@@ -1457,6 +1502,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
1457
1502
emit_mov_imm64 (& prog , X86_REG_R12 ,
1458
1503
arena_vm_start >> 32 , (u32 ) arena_vm_start );
1459
1504
1505
+ if (priv_frame_ptr )
1506
+ emit_priv_frame_ptr (& prog , priv_frame_ptr );
1507
+
1460
1508
ilen = prog - temp ;
1461
1509
if (rw_image )
1462
1510
memcpy (rw_image + proglen , temp , ilen );
@@ -1476,6 +1524,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
1476
1524
u8 * func ;
1477
1525
int nops ;
1478
1526
1527
+ if (priv_frame_ptr ) {
1528
+ if (src_reg == BPF_REG_FP )
1529
+ src_reg = X86_REG_R9 ;
1530
+
1531
+ if (dst_reg == BPF_REG_FP )
1532
+ dst_reg = X86_REG_R9 ;
1533
+ }
1534
+
1479
1535
switch (insn -> code ) {
1480
1536
/* ALU */
1481
1537
case BPF_ALU | BPF_ADD | BPF_X :
@@ -2136,9 +2192,15 @@ st: if (is_imm8(insn->off))
2136
2192
}
2137
2193
if (!imm32 )
2138
2194
return - EINVAL ;
2195
+ if (priv_frame_ptr ) {
2196
+ push_r9 (& prog );
2197
+ ip += 2 ;
2198
+ }
2139
2199
ip += x86_call_depth_emit_accounting (& prog , func , ip );
2140
2200
if (emit_call (& prog , func , ip ))
2141
2201
return - EINVAL ;
2202
+ if (priv_frame_ptr )
2203
+ pop_r9 (& prog );
2142
2204
break ;
2143
2205
}
2144
2206
@@ -3306,6 +3368,42 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
3306
3368
return emit_bpf_dispatcher (& prog , 0 , num_funcs - 1 , funcs , image , buf );
3307
3369
}
3308
3370
3371
+ static const char * bpf_get_prog_name (struct bpf_prog * prog )
3372
+ {
3373
+ if (prog -> aux -> ksym .prog )
3374
+ return prog -> aux -> ksym .name ;
3375
+ return prog -> aux -> name ;
3376
+ }
3377
+
3378
+ static void priv_stack_init_guard (void __percpu * priv_stack_ptr , int alloc_size )
3379
+ {
3380
+ int cpu , underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ ) >> 3 ;
3381
+ u64 * stack_ptr ;
3382
+
3383
+ for_each_possible_cpu (cpu ) {
3384
+ stack_ptr = per_cpu_ptr (priv_stack_ptr , cpu );
3385
+ stack_ptr [0 ] = PRIV_STACK_GUARD_VAL ;
3386
+ stack_ptr [underflow_idx ] = PRIV_STACK_GUARD_VAL ;
3387
+ }
3388
+ }
3389
+
3390
+ static void priv_stack_check_guard (void __percpu * priv_stack_ptr , int alloc_size ,
3391
+ struct bpf_prog * prog )
3392
+ {
3393
+ int cpu , underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ ) >> 3 ;
3394
+ u64 * stack_ptr ;
3395
+
3396
+ for_each_possible_cpu (cpu ) {
3397
+ stack_ptr = per_cpu_ptr (priv_stack_ptr , cpu );
3398
+ if (stack_ptr [0 ] != PRIV_STACK_GUARD_VAL ||
3399
+ stack_ptr [underflow_idx ] != PRIV_STACK_GUARD_VAL ) {
3400
+ pr_err ("BPF private stack overflow/underflow detected for prog %sx\n" ,
3401
+ bpf_get_prog_name (prog ));
3402
+ break ;
3403
+ }
3404
+ }
3405
+ }
3406
+
3309
3407
struct x64_jit_data {
3310
3408
struct bpf_binary_header * rw_header ;
3311
3409
struct bpf_binary_header * header ;
@@ -3323,7 +3421,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
3323
3421
struct bpf_binary_header * rw_header = NULL ;
3324
3422
struct bpf_binary_header * header = NULL ;
3325
3423
struct bpf_prog * tmp , * orig_prog = prog ;
3424
+ void __percpu * priv_stack_ptr = NULL ;
3326
3425
struct x64_jit_data * jit_data ;
3426
+ int priv_stack_alloc_sz ;
3327
3427
int proglen , oldproglen = 0 ;
3328
3428
struct jit_context ctx = {};
3329
3429
bool tmp_blinded = false;
@@ -3359,6 +3459,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
3359
3459
}
3360
3460
prog -> aux -> jit_data = jit_data ;
3361
3461
}
3462
+ priv_stack_ptr = prog -> aux -> priv_stack_ptr ;
3463
+ if (!priv_stack_ptr && prog -> aux -> jits_use_priv_stack ) {
3464
+ /* Allocate actual private stack size with verifier-calculated
3465
+ * stack size plus two memory guards to protect overflow and
3466
+ * underflow.
3467
+ */
3468
+ priv_stack_alloc_sz = round_up (prog -> aux -> stack_depth , 8 ) +
3469
+ 2 * PRIV_STACK_GUARD_SZ ;
3470
+ priv_stack_ptr = __alloc_percpu_gfp (priv_stack_alloc_sz , 8 , GFP_KERNEL );
3471
+ if (!priv_stack_ptr ) {
3472
+ prog = orig_prog ;
3473
+ goto out_priv_stack ;
3474
+ }
3475
+
3476
+ priv_stack_init_guard (priv_stack_ptr , priv_stack_alloc_sz );
3477
+ prog -> aux -> priv_stack_ptr = priv_stack_ptr ;
3478
+ }
3362
3479
addrs = jit_data -> addrs ;
3363
3480
if (addrs ) {
3364
3481
ctx = jit_data -> ctx ;
@@ -3494,6 +3611,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
3494
3611
bpf_prog_fill_jited_linfo (prog , addrs + 1 );
3495
3612
out_addrs :
3496
3613
kvfree (addrs );
3614
+ if (!image && priv_stack_ptr ) {
3615
+ free_percpu (priv_stack_ptr );
3616
+ prog -> aux -> priv_stack_ptr = NULL ;
3617
+ }
3618
+ out_priv_stack :
3497
3619
kfree (jit_data );
3498
3620
prog -> aux -> jit_data = NULL ;
3499
3621
}
@@ -3532,6 +3654,8 @@ void bpf_jit_free(struct bpf_prog *prog)
3532
3654
if (prog -> jited ) {
3533
3655
struct x64_jit_data * jit_data = prog -> aux -> jit_data ;
3534
3656
struct bpf_binary_header * hdr ;
3657
+ void __percpu * priv_stack_ptr ;
3658
+ int priv_stack_alloc_sz ;
3535
3659
3536
3660
/*
3537
3661
* If we fail the final pass of JIT (from jit_subprogs),
@@ -3547,6 +3671,13 @@ void bpf_jit_free(struct bpf_prog *prog)
3547
3671
prog -> bpf_func = (void * )prog -> bpf_func - cfi_get_offset ();
3548
3672
hdr = bpf_jit_binary_pack_hdr (prog );
3549
3673
bpf_jit_binary_pack_free (hdr , NULL );
3674
+ priv_stack_ptr = prog -> aux -> priv_stack_ptr ;
3675
+ if (priv_stack_ptr ) {
3676
+ priv_stack_alloc_sz = round_up (prog -> aux -> stack_depth , 8 ) +
3677
+ 2 * PRIV_STACK_GUARD_SZ ;
3678
+ priv_stack_check_guard (priv_stack_ptr , priv_stack_alloc_sz , prog );
3679
+ free_percpu (prog -> aux -> priv_stack_ptr );
3680
+ }
3550
3681
WARN_ON_ONCE (!bpf_prog_kallsyms_verify_off (prog ));
3551
3682
}
3552
3683
@@ -3562,6 +3693,11 @@ bool bpf_jit_supports_exceptions(void)
3562
3693
return IS_ENABLED (CONFIG_UNWINDER_ORC );
3563
3694
}
3564
3695
3696
+ bool bpf_jit_supports_private_stack (void )
3697
+ {
3698
+ return true;
3699
+ }
3700
+
3565
3701
void arch_bpf_stack_walk (bool (* consume_fn )(void * cookie , u64 ip , u64 sp , u64 bp ), void * cookie )
3566
3702
{
3567
3703
#if defined(CONFIG_UNWINDER_ORC )
0 commit comments