@@ -113,6 +113,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
113113/* Pick a register outside of BPF range for JIT internal work */
114114#define AUX_REG (MAX_BPF_JIT_REG + 1)
115115#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
116+ #define X86_REG_R12 (MAX_BPF_JIT_REG + 3)
116117
117118/*
118119 * The following table maps BPF registers to x86-64 registers.
@@ -139,6 +140,7 @@ static const int reg2hex[] = {
139140 [BPF_REG_AX ] = 2 , /* R10 temp register */
140141 [AUX_REG ] = 3 , /* R11 temp register */
141142 [X86_REG_R9 ] = 1 , /* R9 register, 6th function argument */
143+ [X86_REG_R12 ] = 4 , /* R12 callee saved */
142144};
143145
144146static const int reg2pt_regs [] = {
@@ -167,6 +169,7 @@ static bool is_ereg(u32 reg)
167169 BIT (BPF_REG_8 ) |
168170 BIT (BPF_REG_9 ) |
169171 BIT (X86_REG_R9 ) |
172+ BIT (X86_REG_R12 ) |
170173 BIT (BPF_REG_AX ));
171174}
172175
@@ -205,6 +208,17 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
205208 return byte ;
206209}
207210
211+ static u8 add_3mod (u8 byte , u32 r1 , u32 r2 , u32 index )
212+ {
213+ if (is_ereg (r1 ))
214+ byte |= 1 ;
215+ if (is_ereg (index ))
216+ byte |= 2 ;
217+ if (is_ereg (r2 ))
218+ byte |= 4 ;
219+ return byte ;
220+ }
221+
208222/* Encode 'dst_reg' register into x86-64 opcode 'byte' */
209223static u8 add_1reg (u8 byte , u32 dst_reg )
210224{
@@ -645,6 +659,8 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
645659 pop_r12 (& prog );
646660 } else {
647661 pop_callee_regs (& prog , callee_regs_used );
662+ if (bpf_arena_get_kern_vm_start (bpf_prog -> aux -> arena ))
663+ pop_r12 (& prog );
648664 }
649665
650666 EMIT1 (0x58 ); /* pop rax */
@@ -704,6 +720,8 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
704720 pop_r12 (& prog );
705721 } else {
706722 pop_callee_regs (& prog , callee_regs_used );
723+ if (bpf_arena_get_kern_vm_start (bpf_prog -> aux -> arena ))
724+ pop_r12 (& prog );
707725 }
708726
709727 EMIT1 (0x58 ); /* pop rax */
@@ -887,6 +905,18 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
887905 * pprog = prog ;
888906}
889907
908+ static void emit_insn_suffix_SIB (u8 * * pprog , u32 ptr_reg , u32 val_reg , u32 index_reg , int off )
909+ {
910+ u8 * prog = * pprog ;
911+
912+ if (is_imm8 (off )) {
913+ EMIT3 (add_2reg (0x44 , BPF_REG_0 , val_reg ), add_2reg (0 , ptr_reg , index_reg ) /* SIB */ , off );
914+ } else {
915+ EMIT2_off32 (add_2reg (0x84 , BPF_REG_0 , val_reg ), add_2reg (0 , ptr_reg , index_reg ) /* SIB */ , off );
916+ }
917+ * pprog = prog ;
918+ }
919+
890920/*
891921 * Emit a REX byte if it will be necessary to address these registers
892922 */
@@ -968,6 +998,37 @@ static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
968998 * pprog = prog ;
969999}
9701000
1001+ static void emit_ldx_index (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , u32 index_reg , int off )
1002+ {
1003+ u8 * prog = * pprog ;
1004+
1005+ switch (size ) {
1006+ case BPF_B :
1007+ /* movzx rax, byte ptr [rax + r12 + off] */
1008+ EMIT3 (add_3mod (0x40 , src_reg , dst_reg , index_reg ), 0x0F , 0xB6 );
1009+ break ;
1010+ case BPF_H :
1011+ /* movzx rax, word ptr [rax + r12 + off] */
1012+ EMIT3 (add_3mod (0x40 , src_reg , dst_reg , index_reg ), 0x0F , 0xB7 );
1013+ break ;
1014+ case BPF_W :
1015+ /* mov eax, dword ptr [rax + r12 + off] */
1016+ EMIT2 (add_3mod (0x40 , src_reg , dst_reg , index_reg ), 0x8B );
1017+ break ;
1018+ case BPF_DW :
1019+ /* mov rax, qword ptr [rax + r12 + off] */
1020+ EMIT2 (add_3mod (0x48 , src_reg , dst_reg , index_reg ), 0x8B );
1021+ break ;
1022+ }
1023+ emit_insn_suffix_SIB (& prog , src_reg , dst_reg , index_reg , off );
1024+ * pprog = prog ;
1025+ }
1026+
1027+ static void emit_ldx_r12 (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , int off )
1028+ {
1029+ emit_ldx_index (pprog , size , dst_reg , src_reg , X86_REG_R12 , off );
1030+ }
1031+
9711032/* STX: *(u8*)(dst_reg + off) = src_reg */
9721033static void emit_stx (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , int off )
9731034{
@@ -1002,6 +1063,71 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
10021063 * pprog = prog ;
10031064}
10041065
1066+ /* STX: *(u8*)(dst_reg + index_reg + off) = src_reg */
1067+ static void emit_stx_index (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , u32 index_reg , int off )
1068+ {
1069+ u8 * prog = * pprog ;
1070+
1071+ switch (size ) {
1072+ case BPF_B :
1073+ /* mov byte ptr [rax + r12 + off], al */
1074+ EMIT2 (add_3mod (0x40 , dst_reg , src_reg , index_reg ), 0x88 );
1075+ break ;
1076+ case BPF_H :
1077+ /* mov word ptr [rax + r12 + off], ax */
1078+ EMIT3 (0x66 , add_3mod (0x40 , dst_reg , src_reg , index_reg ), 0x89 );
1079+ break ;
1080+ case BPF_W :
1081+ /* mov dword ptr [rax + r12 + 1], eax */
1082+ EMIT2 (add_3mod (0x40 , dst_reg , src_reg , index_reg ), 0x89 );
1083+ break ;
1084+ case BPF_DW :
1085+ /* mov qword ptr [rax + r12 + 1], rax */
1086+ EMIT2 (add_3mod (0x48 , dst_reg , src_reg , index_reg ), 0x89 );
1087+ break ;
1088+ }
1089+ emit_insn_suffix_SIB (& prog , dst_reg , src_reg , index_reg , off );
1090+ * pprog = prog ;
1091+ }
1092+
1093+ static void emit_stx_r12 (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , int off )
1094+ {
1095+ emit_stx_index (pprog , size , dst_reg , src_reg , X86_REG_R12 , off );
1096+ }
1097+
1098+ /* ST: *(u8*)(dst_reg + index_reg + off) = imm32 */
1099+ static void emit_st_index (u8 * * pprog , u32 size , u32 dst_reg , u32 index_reg , int off , int imm )
1100+ {
1101+ u8 * prog = * pprog ;
1102+
1103+ switch (size ) {
1104+ case BPF_B :
1105+ /* mov byte ptr [rax + r12 + off], imm8 */
1106+ EMIT2 (add_3mod (0x40 , dst_reg , 0 , index_reg ), 0xC6 );
1107+ break ;
1108+ case BPF_H :
1109+ /* mov word ptr [rax + r12 + off], imm16 */
1110+ EMIT3 (0x66 , add_3mod (0x40 , dst_reg , 0 , index_reg ), 0xC7 );
1111+ break ;
1112+ case BPF_W :
1113+ /* mov dword ptr [rax + r12 + 1], imm32 */
1114+ EMIT2 (add_3mod (0x40 , dst_reg , 0 , index_reg ), 0xC7 );
1115+ break ;
1116+ case BPF_DW :
1117+ /* mov qword ptr [rax + r12 + 1], imm32 */
1118+ EMIT2 (add_3mod (0x48 , dst_reg , 0 , index_reg ), 0xC7 );
1119+ break ;
1120+ }
1121+ emit_insn_suffix_SIB (& prog , dst_reg , 0 , index_reg , off );
1122+ EMIT (imm , bpf_size_to_x86_bytes (size ));
1123+ * pprog = prog ;
1124+ }
1125+
1126+ static void emit_st_r12 (u8 * * pprog , u32 size , u32 dst_reg , int off , int imm )
1127+ {
1128+ emit_st_index (pprog , size , dst_reg , X86_REG_R12 , off , imm );
1129+ }
1130+
10051131static int emit_atomic (u8 * * pprog , u8 atomic_op ,
10061132 u32 dst_reg , u32 src_reg , s16 off , u8 bpf_size )
10071133{
@@ -1043,12 +1169,15 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
10431169 return 0 ;
10441170}
10451171
1172+ #define DONT_CLEAR 1
1173+
10461174bool ex_handler_bpf (const struct exception_table_entry * x , struct pt_regs * regs )
10471175{
10481176 u32 reg = x -> fixup >> 8 ;
10491177
10501178 /* jump over faulting load and clear dest register */
1051- * (unsigned long * )((void * )regs + reg ) = 0 ;
1179+ if (reg != DONT_CLEAR )
1180+ * (unsigned long * )((void * )regs + reg ) = 0 ;
10521181 regs -> ip += x -> fixup & 0xff ;
10531182 return true;
10541183}
@@ -1147,11 +1276,15 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
11471276 bool tail_call_seen = false;
11481277 bool seen_exit = false;
11491278 u8 temp [BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY ];
1279+ u64 arena_vm_start , user_vm_start ;
11501280 int i , excnt = 0 ;
11511281 int ilen , proglen = 0 ;
11521282 u8 * prog = temp ;
11531283 int err ;
11541284
1285+ arena_vm_start = bpf_arena_get_kern_vm_start (bpf_prog -> aux -> arena );
1286+ user_vm_start = bpf_arena_get_user_vm_start (bpf_prog -> aux -> arena );
1287+
11551288 detect_reg_usage (insn , insn_cnt , callee_regs_used ,
11561289 & tail_call_seen );
11571290
@@ -1172,8 +1305,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
11721305 push_r12 (& prog );
11731306 push_callee_regs (& prog , all_callee_regs_used );
11741307 } else {
1308+ if (arena_vm_start )
1309+ push_r12 (& prog );
11751310 push_callee_regs (& prog , callee_regs_used );
11761311 }
1312+ if (arena_vm_start )
1313+ emit_mov_imm64 (& prog , X86_REG_R12 ,
1314+ arena_vm_start >> 32 , (u32 ) arena_vm_start );
11771315
11781316 ilen = prog - temp ;
11791317 if (rw_image )
@@ -1213,6 +1351,40 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
12131351 break ;
12141352
12151353 case BPF_ALU64 | BPF_MOV | BPF_X :
1354+ if (insn -> off == BPF_ADDR_SPACE_CAST &&
1355+ insn -> imm == 1U << 16 ) {
1356+ if (dst_reg != src_reg )
1357+ /* 32-bit mov */
1358+ emit_mov_reg (& prog , false, dst_reg , src_reg );
1359+ /* shl dst_reg, 32 */
1360+ maybe_emit_1mod (& prog , dst_reg , true);
1361+ EMIT3 (0xC1 , add_1reg (0xE0 , dst_reg ), 32 );
1362+
1363+ /* or dst_reg, user_vm_start */
1364+ maybe_emit_1mod (& prog , dst_reg , true);
1365+ if (is_axreg (dst_reg ))
1366+ EMIT1_off32 (0x0D , user_vm_start >> 32 );
1367+ else
1368+ EMIT2_off32 (0x81 , add_1reg (0xC8 , dst_reg ), user_vm_start >> 32 );
1369+
1370+ /* rol dst_reg, 32 */
1371+ maybe_emit_1mod (& prog , dst_reg , true);
1372+ EMIT3 (0xC1 , add_1reg (0xC0 , dst_reg ), 32 );
1373+
1374+ /* xor r11, r11 */
1375+ EMIT3 (0x4D , 0x31 , 0xDB );
1376+
1377+ /* test dst_reg32, dst_reg32; check if lower 32-bit are zero */
1378+ maybe_emit_mod (& prog , dst_reg , dst_reg , false);
1379+ EMIT2 (0x85 , add_2reg (0xC0 , dst_reg , dst_reg ));
1380+
1381+ /* cmove r11, dst_reg; if so, set dst_reg to zero */
1382+ /* WARNING: Intel swapped src/dst register encoding in CMOVcc !!! */
1383+ maybe_emit_mod (& prog , AUX_REG , dst_reg , true);
1384+ EMIT3 (0x0F , 0x44 , add_2reg (0xC0 , AUX_REG , dst_reg ));
1385+ break ;
1386+ }
1387+ fallthrough ;
12161388 case BPF_ALU | BPF_MOV | BPF_X :
12171389 if (insn -> off == 0 )
12181390 emit_mov_reg (& prog ,
@@ -1564,6 +1736,56 @@ st: if (is_imm8(insn->off))
15641736 emit_stx (& prog , BPF_SIZE (insn -> code ), dst_reg , src_reg , insn -> off );
15651737 break ;
15661738
1739+ case BPF_ST | BPF_PROBE_MEM32 | BPF_B :
1740+ case BPF_ST | BPF_PROBE_MEM32 | BPF_H :
1741+ case BPF_ST | BPF_PROBE_MEM32 | BPF_W :
1742+ case BPF_ST | BPF_PROBE_MEM32 | BPF_DW :
1743+ start_of_ldx = prog ;
1744+ emit_st_r12 (& prog , BPF_SIZE (insn -> code ), dst_reg , insn -> off , insn -> imm );
1745+ goto populate_extable ;
1746+
1747+ /* LDX: dst_reg = *(u8*)(src_reg + r12 + off) */
1748+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_B :
1749+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_H :
1750+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_W :
1751+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW :
1752+ case BPF_STX | BPF_PROBE_MEM32 | BPF_B :
1753+ case BPF_STX | BPF_PROBE_MEM32 | BPF_H :
1754+ case BPF_STX | BPF_PROBE_MEM32 | BPF_W :
1755+ case BPF_STX | BPF_PROBE_MEM32 | BPF_DW :
1756+ start_of_ldx = prog ;
1757+ if (BPF_CLASS (insn -> code ) == BPF_LDX )
1758+ emit_ldx_r12 (& prog , BPF_SIZE (insn -> code ), dst_reg , src_reg , insn -> off );
1759+ else
1760+ emit_stx_r12 (& prog , BPF_SIZE (insn -> code ), dst_reg , src_reg , insn -> off );
1761+ populate_extable :
1762+ {
1763+ struct exception_table_entry * ex ;
1764+ u8 * _insn = image + proglen + (start_of_ldx - temp );
1765+ s64 delta ;
1766+
1767+ if (!bpf_prog -> aux -> extable )
1768+ break ;
1769+
1770+ if (excnt >= bpf_prog -> aux -> num_exentries ) {
1771+ pr_err ("mem32 extable bug\n" );
1772+ return - EFAULT ;
1773+ }
1774+ ex = & bpf_prog -> aux -> extable [excnt ++ ];
1775+
1776+ delta = _insn - (u8 * )& ex -> insn ;
1777+ /* switch ex to rw buffer for writes */
1778+ ex = (void * )rw_image + ((void * )ex - (void * )image );
1779+
1780+ ex -> insn = delta ;
1781+
1782+ ex -> data = EX_TYPE_BPF ;
1783+
1784+ ex -> fixup = (prog - start_of_ldx ) |
1785+ ((BPF_CLASS (insn -> code ) == BPF_LDX ? reg2pt_regs [dst_reg ] : DONT_CLEAR ) << 8 );
1786+ }
1787+ break ;
1788+
15671789 /* LDX: dst_reg = *(u8*)(src_reg + off) */
15681790 case BPF_LDX | BPF_MEM | BPF_B :
15691791 case BPF_LDX | BPF_PROBE_MEM | BPF_B :
@@ -2036,6 +2258,8 @@ st: if (is_imm8(insn->off))
20362258 pop_r12 (& prog );
20372259 } else {
20382260 pop_callee_regs (& prog , callee_regs_used );
2261+ if (arena_vm_start )
2262+ pop_r12 (& prog );
20392263 }
20402264 EMIT1 (0xC9 ); /* leave */
20412265 emit_return (& prog , image + addrs [i - 1 ] + (prog - temp ));
@@ -3243,6 +3467,11 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
32433467 }
32443468}
32453469
3470+ bool bpf_jit_supports_arena (void )
3471+ {
3472+ return true;
3473+ }
3474+
32463475bool bpf_jit_supports_ptr_xchg (void )
32473476{
32483477 return true;
0 commit comments