30
30
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
31
31
#define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
32
32
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
33
+ #define PRIVATE_SP (MAX_BPF_JIT_REG + 4)
33
34
#define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
34
35
35
36
#define check_imm (bits , imm ) do { \
@@ -68,6 +69,8 @@ static const int bpf2a64[] = {
68
69
[TCCNT_PTR ] = A64_R (26 ),
69
70
/* temporary register for blinding constants */
70
71
[BPF_REG_AX ] = A64_R (9 ),
72
+ /* callee saved register for private stack pointer */
73
+ [PRIVATE_SP ] = A64_R (27 ),
71
74
/* callee saved register for kern_vm_start address */
72
75
[ARENA_VM_START ] = A64_R (28 ),
73
76
};
@@ -86,6 +89,7 @@ struct jit_ctx {
86
89
u64 user_vm_start ;
87
90
u64 arena_vm_start ;
88
91
bool fp_used ;
92
+ bool priv_sp_used ;
89
93
bool write ;
90
94
};
91
95
@@ -98,6 +102,10 @@ struct bpf_plt {
98
102
#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target)
99
103
#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
100
104
105
+ /* Memory size/value to protect private stack overflow/underflow */
106
+ #define PRIV_STACK_GUARD_SZ 16
107
+ #define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
108
+
101
109
static inline void emit (const u32 insn , struct jit_ctx * ctx )
102
110
{
103
111
if (ctx -> image != NULL && ctx -> write )
@@ -387,8 +395,11 @@ static void find_used_callee_regs(struct jit_ctx *ctx)
387
395
if (reg_used & 8 )
388
396
ctx -> used_callee_reg [i ++ ] = bpf2a64 [BPF_REG_9 ];
389
397
390
- if (reg_used & 16 )
398
+ if (reg_used & 16 ) {
391
399
ctx -> used_callee_reg [i ++ ] = bpf2a64 [BPF_REG_FP ];
400
+ if (ctx -> priv_sp_used )
401
+ ctx -> used_callee_reg [i ++ ] = bpf2a64 [PRIVATE_SP ];
402
+ }
392
403
393
404
if (ctx -> arena_vm_start )
394
405
ctx -> used_callee_reg [i ++ ] = bpf2a64 [ARENA_VM_START ];
@@ -412,6 +423,7 @@ static void push_callee_regs(struct jit_ctx *ctx)
412
423
emit (A64_PUSH (A64_R (23 ), A64_R (24 ), A64_SP ), ctx );
413
424
emit (A64_PUSH (A64_R (25 ), A64_R (26 ), A64_SP ), ctx );
414
425
emit (A64_PUSH (A64_R (27 ), A64_R (28 ), A64_SP ), ctx );
426
+ ctx -> fp_used = true;
415
427
} else {
416
428
find_used_callee_regs (ctx );
417
429
for (i = 0 ; i + 1 < ctx -> nr_used_callee_reg ; i += 2 ) {
@@ -461,6 +473,19 @@ static void pop_callee_regs(struct jit_ctx *ctx)
461
473
}
462
474
}
463
475
476
+ static void emit_percpu_ptr (const u8 dst_reg , void __percpu * ptr ,
477
+ struct jit_ctx * ctx )
478
+ {
479
+ const u8 tmp = bpf2a64 [TMP_REG_1 ];
480
+
481
+ emit_a64_mov_i64 (dst_reg , (__force const u64 )ptr , ctx );
482
+ if (cpus_have_cap (ARM64_HAS_VIRT_HOST_EXTN ))
483
+ emit (A64_MRS_TPIDR_EL2 (tmp ), ctx );
484
+ else
485
+ emit (A64_MRS_TPIDR_EL1 (tmp ), ctx );
486
+ emit (A64_ADD (1 , dst_reg , dst_reg , tmp ), ctx );
487
+ }
488
+
464
489
#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
465
490
#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
466
491
@@ -476,6 +501,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
476
501
const bool is_main_prog = !bpf_is_subprog (prog );
477
502
const u8 fp = bpf2a64 [BPF_REG_FP ];
478
503
const u8 arena_vm_base = bpf2a64 [ARENA_VM_START ];
504
+ const u8 priv_sp = bpf2a64 [PRIVATE_SP ];
505
+ void __percpu * priv_stack_ptr ;
479
506
const int idx0 = ctx -> idx ;
480
507
int cur_offset ;
481
508
@@ -551,15 +578,23 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
551
578
emit (A64_SUB_I (1 , A64_SP , A64_FP , 96 ), ctx );
552
579
}
553
580
554
- if (ctx -> fp_used )
555
- /* Set up BPF prog stack base register */
556
- emit (A64_MOV (1 , fp , A64_SP ), ctx );
557
-
558
581
/* Stack must be multiples of 16B */
559
582
ctx -> stack_size = round_up (prog -> aux -> stack_depth , 16 );
560
583
584
+ if (ctx -> fp_used ) {
585
+ if (ctx -> priv_sp_used ) {
586
+ /* Set up private stack pointer */
587
+ priv_stack_ptr = prog -> aux -> priv_stack_ptr + PRIV_STACK_GUARD_SZ ;
588
+ emit_percpu_ptr (priv_sp , priv_stack_ptr , ctx );
589
+ emit (A64_ADD_I (1 , fp , priv_sp , ctx -> stack_size ), ctx );
590
+ } else {
591
+ /* Set up BPF prog stack base register */
592
+ emit (A64_MOV (1 , fp , A64_SP ), ctx );
593
+ }
594
+ }
595
+
561
596
/* Set up function call stack */
562
- if (ctx -> stack_size )
597
+ if (ctx -> stack_size && ! ctx -> priv_sp_used )
563
598
emit (A64_SUB_I (1 , A64_SP , A64_SP , ctx -> stack_size ), ctx );
564
599
565
600
if (ctx -> arena_vm_start )
@@ -623,7 +658,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
623
658
emit (A64_STR64I (tcc , ptr , 0 ), ctx );
624
659
625
660
/* restore SP */
626
- if (ctx -> stack_size )
661
+ if (ctx -> stack_size && ! ctx -> priv_sp_used )
627
662
emit (A64_ADD_I (1 , A64_SP , A64_SP , ctx -> stack_size ), ctx );
628
663
629
664
pop_callee_regs (ctx );
@@ -991,7 +1026,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
991
1026
const u8 ptr = bpf2a64 [TCCNT_PTR ];
992
1027
993
1028
/* We're done with BPF stack */
994
- if (ctx -> stack_size )
1029
+ if (ctx -> stack_size && ! ctx -> priv_sp_used )
995
1030
emit (A64_ADD_I (1 , A64_SP , A64_SP , ctx -> stack_size ), ctx );
996
1031
997
1032
pop_callee_regs (ctx );
@@ -1120,6 +1155,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
1120
1155
const u8 tmp2 = bpf2a64 [TMP_REG_2 ];
1121
1156
const u8 fp = bpf2a64 [BPF_REG_FP ];
1122
1157
const u8 arena_vm_base = bpf2a64 [ARENA_VM_START ];
1158
+ const u8 priv_sp = bpf2a64 [PRIVATE_SP ];
1123
1159
const s16 off = insn -> off ;
1124
1160
const s32 imm = insn -> imm ;
1125
1161
const int i = insn - ctx -> prog -> insnsi ;
@@ -1564,7 +1600,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
1564
1600
src = tmp2 ;
1565
1601
}
1566
1602
if (src == fp ) {
1567
- src_adj = A64_SP ;
1603
+ src_adj = ctx -> priv_sp_used ? priv_sp : A64_SP ;
1568
1604
off_adj = off + ctx -> stack_size ;
1569
1605
} else {
1570
1606
src_adj = src ;
@@ -1630,17 +1666,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
1630
1666
return ret ;
1631
1667
break ;
1632
1668
1633
- /* speculation barrier */
1669
+ /* speculation barrier against v1 and v4 */
1634
1670
case BPF_ST | BPF_NOSPEC :
1635
- /*
1636
- * Nothing required here.
1637
- *
1638
- * In case of arm64, we rely on the firmware mitigation of
1639
- * Speculative Store Bypass as controlled via the ssbd kernel
1640
- * parameter. Whenever the mitigation is enabled, it works
1641
- * for all of the kernel code with no need to provide any
1642
- * additional instructions.
1643
- */
1671
+ if (alternative_has_cap_likely (ARM64_HAS_SB )) {
1672
+ emit (A64_SB , ctx );
1673
+ } else {
1674
+ emit (A64_DSB_NSH , ctx );
1675
+ emit (A64_ISB , ctx );
1676
+ }
1644
1677
break ;
1645
1678
1646
1679
/* ST: *(size *)(dst + off) = imm */
@@ -1657,7 +1690,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
1657
1690
dst = tmp2 ;
1658
1691
}
1659
1692
if (dst == fp ) {
1660
- dst_adj = A64_SP ;
1693
+ dst_adj = ctx -> priv_sp_used ? priv_sp : A64_SP ;
1661
1694
off_adj = off + ctx -> stack_size ;
1662
1695
} else {
1663
1696
dst_adj = dst ;
@@ -1719,7 +1752,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
1719
1752
dst = tmp2 ;
1720
1753
}
1721
1754
if (dst == fp ) {
1722
- dst_adj = A64_SP ;
1755
+ dst_adj = ctx -> priv_sp_used ? priv_sp : A64_SP ;
1723
1756
off_adj = off + ctx -> stack_size ;
1724
1757
} else {
1725
1758
dst_adj = dst ;
@@ -1862,6 +1895,39 @@ static inline void bpf_flush_icache(void *start, void *end)
1862
1895
flush_icache_range ((unsigned long )start , (unsigned long )end );
1863
1896
}
1864
1897
1898
+ static void priv_stack_init_guard (void __percpu * priv_stack_ptr , int alloc_size )
1899
+ {
1900
+ int cpu , underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ ) >> 3 ;
1901
+ u64 * stack_ptr ;
1902
+
1903
+ for_each_possible_cpu (cpu ) {
1904
+ stack_ptr = per_cpu_ptr (priv_stack_ptr , cpu );
1905
+ stack_ptr [0 ] = PRIV_STACK_GUARD_VAL ;
1906
+ stack_ptr [1 ] = PRIV_STACK_GUARD_VAL ;
1907
+ stack_ptr [underflow_idx ] = PRIV_STACK_GUARD_VAL ;
1908
+ stack_ptr [underflow_idx + 1 ] = PRIV_STACK_GUARD_VAL ;
1909
+ }
1910
+ }
1911
+
1912
+ static void priv_stack_check_guard (void __percpu * priv_stack_ptr , int alloc_size ,
1913
+ struct bpf_prog * prog )
1914
+ {
1915
+ int cpu , underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ ) >> 3 ;
1916
+ u64 * stack_ptr ;
1917
+
1918
+ for_each_possible_cpu (cpu ) {
1919
+ stack_ptr = per_cpu_ptr (priv_stack_ptr , cpu );
1920
+ if (stack_ptr [0 ] != PRIV_STACK_GUARD_VAL ||
1921
+ stack_ptr [1 ] != PRIV_STACK_GUARD_VAL ||
1922
+ stack_ptr [underflow_idx ] != PRIV_STACK_GUARD_VAL ||
1923
+ stack_ptr [underflow_idx + 1 ] != PRIV_STACK_GUARD_VAL ) {
1924
+ pr_err ("BPF private stack overflow/underflow detected for prog %sx\n" ,
1925
+ bpf_jit_get_prog_name (prog ));
1926
+ break ;
1927
+ }
1928
+ }
1929
+ }
1930
+
1865
1931
struct arm64_jit_data {
1866
1932
struct bpf_binary_header * header ;
1867
1933
u8 * ro_image ;
@@ -1874,9 +1940,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1874
1940
int image_size , prog_size , extable_size , extable_align , extable_offset ;
1875
1941
struct bpf_prog * tmp , * orig_prog = prog ;
1876
1942
struct bpf_binary_header * header ;
1877
- struct bpf_binary_header * ro_header ;
1943
+ struct bpf_binary_header * ro_header = NULL ;
1878
1944
struct arm64_jit_data * jit_data ;
1945
+ void __percpu * priv_stack_ptr = NULL ;
1879
1946
bool was_classic = bpf_prog_was_classic (prog );
1947
+ int priv_stack_alloc_sz ;
1880
1948
bool tmp_blinded = false;
1881
1949
bool extra_pass = false;
1882
1950
struct jit_ctx ctx ;
@@ -1908,6 +1976,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1908
1976
}
1909
1977
prog -> aux -> jit_data = jit_data ;
1910
1978
}
1979
+ priv_stack_ptr = prog -> aux -> priv_stack_ptr ;
1980
+ if (!priv_stack_ptr && prog -> aux -> jits_use_priv_stack ) {
1981
+ /* Allocate actual private stack size with verifier-calculated
1982
+ * stack size plus two memory guards to protect overflow and
1983
+ * underflow.
1984
+ */
1985
+ priv_stack_alloc_sz = round_up (prog -> aux -> stack_depth , 16 ) +
1986
+ 2 * PRIV_STACK_GUARD_SZ ;
1987
+ priv_stack_ptr = __alloc_percpu_gfp (priv_stack_alloc_sz , 16 , GFP_KERNEL );
1988
+ if (!priv_stack_ptr ) {
1989
+ prog = orig_prog ;
1990
+ goto out_priv_stack ;
1991
+ }
1992
+
1993
+ priv_stack_init_guard (priv_stack_ptr , priv_stack_alloc_sz );
1994
+ prog -> aux -> priv_stack_ptr = priv_stack_ptr ;
1995
+ }
1911
1996
if (jit_data -> ctx .offset ) {
1912
1997
ctx = jit_data -> ctx ;
1913
1998
ro_image_ptr = jit_data -> ro_image ;
@@ -1931,6 +2016,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1931
2016
ctx .user_vm_start = bpf_arena_get_user_vm_start (prog -> aux -> arena );
1932
2017
ctx .arena_vm_start = bpf_arena_get_kern_vm_start (prog -> aux -> arena );
1933
2018
2019
+ if (priv_stack_ptr )
2020
+ ctx .priv_sp_used = true;
2021
+
1934
2022
/* Pass 1: Estimate the maximum image size.
1935
2023
*
1936
2024
* BPF line info needs ctx->offset[i] to be the offset of
@@ -2070,7 +2158,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2070
2158
ctx .offset [i ] *= AARCH64_INSN_SIZE ;
2071
2159
bpf_prog_fill_jited_linfo (prog , ctx .offset + 1 );
2072
2160
out_off :
2161
+ if (!ro_header && priv_stack_ptr ) {
2162
+ free_percpu (priv_stack_ptr );
2163
+ prog -> aux -> priv_stack_ptr = NULL ;
2164
+ }
2073
2165
kvfree (ctx .offset );
2166
+ out_priv_stack :
2074
2167
kfree (jit_data );
2075
2168
prog -> aux -> jit_data = NULL ;
2076
2169
}
@@ -2089,6 +2182,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2089
2182
goto out_off ;
2090
2183
}
2091
2184
2185
+ bool bpf_jit_supports_private_stack (void )
2186
+ {
2187
+ return true;
2188
+ }
2189
+
2092
2190
bool bpf_jit_supports_kfunc_call (void )
2093
2191
{
2094
2192
return true;
@@ -2243,11 +2341,6 @@ static int calc_arg_aux(const struct btf_func_model *m,
2243
2341
2244
2342
/* the rest arguments are passed through stack */
2245
2343
for (; i < m -> nr_args ; i ++ ) {
2246
- /* We can not know for sure about exact alignment needs for
2247
- * struct passed on stack, so deny those
2248
- */
2249
- if (m -> arg_flags [i ] & BTF_FMODEL_STRUCT_ARG )
2250
- return - ENOTSUPP ;
2251
2344
stack_slots = (m -> arg_size [i ] + 7 ) / 8 ;
2252
2345
a -> bstack_for_args += stack_slots * 8 ;
2253
2346
a -> ostack_for_args = a -> ostack_for_args + stack_slots * 8 ;
@@ -2911,6 +3004,17 @@ bool bpf_jit_supports_percpu_insn(void)
2911
3004
return true;
2912
3005
}
2913
3006
3007
+ bool bpf_jit_bypass_spec_v4 (void )
3008
+ {
3009
+ /* In case of arm64, we rely on the firmware mitigation of Speculative
3010
+ * Store Bypass as controlled via the ssbd kernel parameter. Whenever
3011
+ * the mitigation is enabled, it works for all of the kernel code with
3012
+ * no need to provide any additional instructions. Therefore, skip
3013
+ * inserting nospec insns against Spectre v4.
3014
+ */
3015
+ return true;
3016
+ }
3017
+
2914
3018
bool bpf_jit_inlines_helper_call (s32 imm )
2915
3019
{
2916
3020
switch (imm ) {
@@ -2928,6 +3032,8 @@ void bpf_jit_free(struct bpf_prog *prog)
2928
3032
if (prog -> jited ) {
2929
3033
struct arm64_jit_data * jit_data = prog -> aux -> jit_data ;
2930
3034
struct bpf_binary_header * hdr ;
3035
+ void __percpu * priv_stack_ptr ;
3036
+ int priv_stack_alloc_sz ;
2931
3037
2932
3038
/*
2933
3039
* If we fail the final pass of JIT (from jit_subprogs),
@@ -2941,6 +3047,13 @@ void bpf_jit_free(struct bpf_prog *prog)
2941
3047
}
2942
3048
hdr = bpf_jit_binary_pack_hdr (prog );
2943
3049
bpf_jit_binary_pack_free (hdr , NULL );
3050
+ priv_stack_ptr = prog -> aux -> priv_stack_ptr ;
3051
+ if (priv_stack_ptr ) {
3052
+ priv_stack_alloc_sz = round_up (prog -> aux -> stack_depth , 16 ) +
3053
+ 2 * PRIV_STACK_GUARD_SZ ;
3054
+ priv_stack_check_guard (priv_stack_ptr , priv_stack_alloc_sz , prog );
3055
+ free_percpu (prog -> aux -> priv_stack_ptr );
3056
+ }
2944
3057
WARN_ON_ONCE (!bpf_prog_kallsyms_verify_off (prog ));
2945
3058
}
2946
3059
0 commit comments