You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[BPF] Add jump table support with switch statements and computed goto
NOTE 1: We probably need cpu v5 or other flags to enable this feature.
We can add it later when necessary. Let us use cpu v4 for now.
NOTE 2: An option -bpf-min-jump-table-entries is implemented to control the minimum
number of entries to use a jump table on BPF. The default value 5 and this is
to make it easy to test. Eventually we will increase min jump table entries to be 13.
This patch adds jump table support. A new insn 'gotox <reg>' is
added to allow goto through a register. The register represents
the address in the current section.
Example 1 (switch statement):
=============================
Code:
struct simple_ctx {
int x;
int y;
int z;
};
int ret_user, ret_user2;
void bar(void);
int foo(struct simple_ctx *ctx, struct simple_ctx *ctx2)
{
switch (ctx->x) {
case 1: ret_user = 18; break;
case 20: ret_user = 6; break;
case 16: ret_user = 9; break;
case 6: ret_user = 16; break;
case 8: ret_user = 14; break;
case 30: ret_user = 2; break;
default: ret_user = 1; break;
}
bar();
switch (ctx2->x) {
case 0: ret_user2 = 8; break;
case 31: ret_user2 = 5; break;
case 13: ret_user2 = 8; break;
case 1: ret_user2 = 3; break;
case 11: ret_user2 = 4; break;
default: ret_user2 = 29; break;
}
return 0;
}
Run: clang --target=bpf -mcpu=v4 -O2 -S test.c
The assembly code:
...
# %bb.1: # %entry
r1 <<= 3
r2 = .LJTI0_0 ll
r2 += r1
r1 = *(u64 *)(r2 + 0)
gotox r1
LBB0_2:
w1 = 18
goto LBB0_9
...
# %bb.10: # %sw.epilog
r1 <<= 3
r2 = .LJTI0_1 ll
r2 += r1
r1 = *(u64 *)(r2 + 0)
gotox r1
LBB0_11:
w1 = 8
goto LBB0_16
...
.section .rodata,"a",@progbits
.p2align 3, 0x0
.LJTI0_0:
.quad LBB0_2
.quad LBB0_8
...
.quad LBB0_7
.LJTI0_1:
.quad LBB0_11
.quad LBB0_13
...
Although we do have labels .LJTI0_0 and .LJTI0_1, but since they have
prefix '.L' so they won't appear in the .o file like other symbols.
Run: llvm-objdump -Sr test.o
...
4: 67 01 00 00 03 00 00 00 r1 <<= 0x3
5: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
0000000000000028: R_BPF_64_64 .rodata
7: 0f 12 00 00 00 00 00 00 r2 += r1
...
29: 67 01 00 00 03 00 00 00 r1 <<= 0x3
30: 18 02 00 00 f0 00 00 00 00 00 00 00 00 00 00 00 r2 = 0xf0 ll
00000000000000f0: R_BPF_64_64 .rodata
32: 0f 12 00 00 00 00 00 00 r2 += r1
The size of jump table is not obvious. The libbpf needs to check all relocations
against .rodata section in order to get precise size in order to construct bpf
maps.
Example 2 (Simple computed goto):
=================================
Code:
int bar(int a) {
__label__ l1, l2;
void * volatile tgt;
int ret = 0;
if (a)
tgt = &&l1; // synthetic jump table generated here
else
tgt = &&l2; // another synthetic jump table
goto *tgt;
l1: ret += 1;
l2: ret += 2;
return ret;
}
Compile: clang --target=bpf -mcpu=v4 -O2 -c test1.c
Objdump: llvm-objdump -Sr test1.o
0: 18 02 00 00 50 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x50 ll
0000000000000000: R_BPF_64_64 .text
2: 16 01 02 00 00 00 00 00 if w1 == 0x0 goto +0x2 <bar+0x28>
3: 18 02 00 00 40 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x40 ll
0000000000000018: R_BPF_64_64 .text
5: 7b 2a f8 ff 00 00 00 00 *(u64 *)(r10 - 0x8) = r2
6: 79 a1 f8 ff 00 00 00 00 r1 = *(u64 *)(r10 - 0x8)
7: 0d 01 00 00 00 00 00 00 gotox r1
8: b4 00 00 00 03 00 00 00 w0 = 0x3
9: 05 00 01 00 00 00 00 00 goto +0x1 <bar+0x58>
10: b4 00 00 00 02 00 00 00 w0 = 0x2
11: 95 00 00 00 00 00 00 00 exit
For this case, there is no jump table so it would be hard to track offset
during verification esp. when offset needs adjustment. So practically we
need to create two jump tables for '&&l1' and '&&l2' respectively.
Example 3 (More complicated computed goto):
===========================================
Code:
int foo(int a, int b) {
__label__ l1, l2, l3, l4;
void *jt1[] = {[0]=&&l1, [1]=&&l2};
void *jt2[] = {[0]=&&l3, [1]=&&l4};
int ret = 0;
goto *jt1[a % 2];
l1: ret += 1;
l2: ret += 3;
goto *jt2[b % 2];
l3: ret += 5;
l4: ret += 7;
return ret;
}
Compile: clang --target=bpf -mcpu=v4 -O2 -S test2.c
Asm code:
...
r3 = (s32)r2
r3 <<= 3
r2 = .L__const.foo.jt2 ll
r2 += r3
r1 = (s32)r1
r1 <<= 3
r3 = .L__const.foo.jt1 ll
r3 += r1
w0 = 0
r1 = *(u64 *)(r3 + 0)
gotox r1
.Ltmp0: # Block address taken
LBB0_1: # %l1
# =>This Inner Loop Header: Depth=1
w0 += 1
w0 += 3
r1 = *(u64 *)(r2 + 0)
gotox r1
.Ltmp1: # Block address taken
LBB0_2: # %l2
...
.type .L__const.foo.jt1,@object # @__const.foo.jt1
.section .rodata,"a",@progbits
.p2align 3, 0x0
.L__const.foo.jt1:
.quad .Ltmp0
.quad .Ltmp1
.size .L__const.foo.jt1, 16
.type .L__const.foo.jt2,@object # @__const.foo.jt2
.p2align 3, 0x0
.L__const.foo.jt2:
.quad .Ltmp2
.quad .Ltmp3
.size .L__const.foo.jt2, 16
Similar to switch statement case, for the binary, the symbols
.L__const.foo.jt* will not show up in the symbol table and jump table
will be in .rodata section.
We need to resolve Example 2 case.
Also with more libbpf work (dealing with .rodata sections etc.),
everything should work fine for Examples 1 and 3. But we could do
better by
- Replacing symbols like .L<...> with symbols appearing in
symbol table.
- Add jump tables to .jumptables section instead of .rodata section.
This should make things easier for libbpf. User can also benefit
from this as relocation/section will be easy to check.
Next two patches will fix Example 2 and improve all of them as
mentioned in the above.
0 commit comments