Skip to content

Commit 96c6aa4

Browse files
eddyz87Alexei Starovoitov
authored andcommitted
bpf: compute SCCs in program control flow graph
Compute strongly connected components in the program CFG. Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc. Use Tarjan's algorithm for SCC computation adapted to run non-recursively. For debug purposes print out computed SCCs as a part of full program dump in compute_live_registers() at log level 2, e.g.: func#0 @0 Live regs before insn: 0: .......... (b4) w6 = 10 2 1: ......6... (18) r1 = 0xffff88810bbb5565 2 3: .1....6... (b4) w2 = 2 2 4: .12...6... (85) call bpf_trace_printk#6 2 5: ......6... (04) w6 += -1 2 6: ......6... (56) if w6 != 0x0 goto pc-6 7: .......... (b4) w6 = 5 1 8: ......6... (18) r1 = 0xffff88810bbb5567 1 10: .1....6... (b4) w2 = 2 1 11: .12...6... (85) call bpf_trace_printk#6 1 12: ......6... (04) w6 += -1 1 13: ......6... (56) if w6 != 0x0 goto pc-6 14: .......... (b4) w0 = 0 15: 0......... (95) exit ^^^ SCC number for the instruction Signed-off-by: Eduard Zingerman <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent baaebe0 commit 96c6aa4

File tree

2 files changed

+187
-0
lines changed

2 files changed

+187
-0
lines changed

include/linux/bpf_verifier.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,11 @@ struct bpf_insn_aux_data {
609609
* accepts callback function as a parameter.
610610
*/
611611
bool calls_callback;
612+
/*
613+
* CFG strongly connected component this instruction belongs to,
614+
* zero if it is a singleton SCC.
615+
*/
616+
u32 scc;
612617
/* registers alive before this instruction. */
613618
u16 live_regs_before;
614619
};

kernel/bpf/verifier.c

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24013,6 +24013,10 @@ static int compute_live_registers(struct bpf_verifier_env *env)
2401324013
if (env->log.level & BPF_LOG_LEVEL2) {
2401424014
verbose(env, "Live regs before insn:\n");
2401524015
for (i = 0; i < insn_cnt; ++i) {
24016+
if (env->insn_aux_data[i].scc)
24017+
verbose(env, "%3d ", env->insn_aux_data[i].scc);
24018+
else
24019+
verbose(env, " ");
2401624020
verbose(env, "%3d: ", i);
2401724021
for (j = BPF_REG_0; j < BPF_REG_10; ++j)
2401824022
if (insn_aux[i].live_regs_before & BIT(j))
@@ -24034,6 +24038,180 @@ static int compute_live_registers(struct bpf_verifier_env *env)
2403424038
return err;
2403524039
}
2403624040

24041+
/*
24042+
* Compute strongly connected components (SCCs) on the CFG.
24043+
* Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc.
24044+
* If instruction is a sole member of its SCC and there are no self edges,
24045+
* assign it SCC number of zero.
24046+
* Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation.
24047+
*/
24048+
static int compute_scc(struct bpf_verifier_env *env)
24049+
{
24050+
const u32 NOT_ON_STACK = U32_MAX;
24051+
24052+
struct bpf_insn_aux_data *aux = env->insn_aux_data;
24053+
const u32 insn_cnt = env->prog->len;
24054+
int stack_sz, dfs_sz, err = 0;
24055+
u32 *stack, *pre, *low, *dfs;
24056+
u32 succ_cnt, i, j, t, w;
24057+
u32 next_preorder_num;
24058+
u32 next_scc_id;
24059+
bool assign_scc;
24060+
u32 succ[2];
24061+
24062+
next_preorder_num = 1;
24063+
next_scc_id = 1;
24064+
/*
24065+
* - 'stack' accumulates vertices in DFS order, see invariant comment below;
24066+
* - 'pre[t] == p' => preorder number of vertex 't' is 'p';
24067+
* - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n';
24068+
* - 'dfs' DFS traversal stack, used to emulate explicit recursion.
24069+
*/
24070+
stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
24071+
pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
24072+
low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
24073+
dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL);
24074+
if (!stack || !pre || !low || !dfs) {
24075+
err = -ENOMEM;
24076+
goto exit;
24077+
}
24078+
/*
24079+
* References:
24080+
* [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms"
24081+
* [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components"
24082+
*
24083+
* The algorithm maintains the following invariant:
24084+
* - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]';
24085+
* - then, vertex 'u' remains on stack while vertex 'v' is on stack.
24086+
*
24087+
* Consequently:
24088+
* - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u',
24089+
* such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack,
24090+
* and thus there is an SCC (loop) containing both 'u' and 'v'.
24091+
* - If 'low[v] == pre[v]', loops containing 'v' have been explored,
24092+
* and 'v' can be considered the root of some SCC.
24093+
*
24094+
* Here is a pseudo-code for an explicitly recursive version of the algorithm:
24095+
*
24096+
* NOT_ON_STACK = insn_cnt + 1
24097+
* pre = [0] * insn_cnt
24098+
* low = [0] * insn_cnt
24099+
* scc = [0] * insn_cnt
24100+
* stack = []
24101+
*
24102+
* next_preorder_num = 1
24103+
* next_scc_id = 1
24104+
*
24105+
* def recur(w):
24106+
* nonlocal next_preorder_num
24107+
* nonlocal next_scc_id
24108+
*
24109+
* pre[w] = next_preorder_num
24110+
* low[w] = next_preorder_num
24111+
* next_preorder_num += 1
24112+
* stack.append(w)
24113+
* for s in successors(w):
24114+
* # Note: for classic algorithm the block below should look as:
24115+
* #
24116+
* # if pre[s] == 0:
24117+
* # recur(s)
24118+
* # low[w] = min(low[w], low[s])
24119+
* # elif low[s] != NOT_ON_STACK:
24120+
* # low[w] = min(low[w], pre[s])
24121+
* #
24122+
* # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])'
24123+
* # does not break the invariant and makes itartive version of the algorithm
24124+
* # simpler. See 'Algorithm #3' from [2].
24125+
*
24126+
* # 's' not yet visited
24127+
* if pre[s] == 0:
24128+
* recur(s)
24129+
* # if 's' is on stack, pick lowest reachable preorder number from it;
24130+
* # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]',
24131+
* # so 'min' would be a noop.
24132+
* low[w] = min(low[w], low[s])
24133+
*
24134+
* if low[w] == pre[w]:
24135+
* # 'w' is the root of an SCC, pop all vertices
24136+
* # below 'w' on stack and assign same SCC to them.
24137+
* while True:
24138+
* t = stack.pop()
24139+
* low[t] = NOT_ON_STACK
24140+
* scc[t] = next_scc_id
24141+
* if t == w:
24142+
* break
24143+
* next_scc_id += 1
24144+
*
24145+
* for i in range(0, insn_cnt):
24146+
* if pre[i] == 0:
24147+
* recur(i)
24148+
*
24149+
* Below implementation replaces explicit recusion with array 'dfs'.
24150+
*/
24151+
for (i = 0; i < insn_cnt; i++) {
24152+
if (pre[i])
24153+
continue;
24154+
stack_sz = 0;
24155+
dfs_sz = 1;
24156+
dfs[0] = i;
24157+
dfs_continue:
24158+
while (dfs_sz) {
24159+
w = dfs[dfs_sz - 1];
24160+
if (pre[w] == 0) {
24161+
low[w] = next_preorder_num;
24162+
pre[w] = next_preorder_num;
24163+
next_preorder_num++;
24164+
stack[stack_sz++] = w;
24165+
}
24166+
/* Visit 'w' successors */
24167+
succ_cnt = insn_successors(env->prog, w, succ);
24168+
for (j = 0; j < succ_cnt; ++j) {
24169+
if (pre[succ[j]]) {
24170+
low[w] = min(low[w], low[succ[j]]);
24171+
} else {
24172+
dfs[dfs_sz++] = succ[j];
24173+
goto dfs_continue;
24174+
}
24175+
}
24176+
/*
24177+
* Preserve the invariant: if some vertex above in the stack
24178+
* is reachable from 'w', keep 'w' on the stack.
24179+
*/
24180+
if (low[w] < pre[w]) {
24181+
dfs_sz--;
24182+
goto dfs_continue;
24183+
}
24184+
/*
24185+
* Assign SCC number only if component has two or more elements,
24186+
* or if component has a self reference.
24187+
*/
24188+
assign_scc = stack[stack_sz - 1] != w;
24189+
for (j = 0; j < succ_cnt; ++j) {
24190+
if (succ[j] == w) {
24191+
assign_scc = true;
24192+
break;
24193+
}
24194+
}
24195+
/* Pop component elements from stack */
24196+
do {
24197+
t = stack[--stack_sz];
24198+
low[t] = NOT_ON_STACK;
24199+
if (assign_scc)
24200+
aux[t].scc = next_scc_id;
24201+
} while (t != w);
24202+
if (assign_scc)
24203+
next_scc_id++;
24204+
dfs_sz--;
24205+
}
24206+
}
24207+
exit:
24208+
kvfree(stack);
24209+
kvfree(pre);
24210+
kvfree(low);
24211+
kvfree(dfs);
24212+
return err;
24213+
}
24214+
2403724215
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
2403824216
{
2403924217
u64 start_time = ktime_get_ns();
@@ -24155,6 +24333,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
2415524333
if (ret)
2415624334
goto skip_full_check;
2415724335

24336+
ret = compute_scc(env);
24337+
if (ret < 0)
24338+
goto skip_full_check;
24339+
2415824340
ret = compute_live_registers(env);
2415924341
if (ret < 0)
2416024342
goto skip_full_check;

0 commit comments

Comments
 (0)