Skip to content

Commit f732a47

Browse files
jsitnickiKernel Patches Daemon
authored andcommitted
bpf: Realign skb metadata for TC progs using data_meta
After decoupling metadata location from MAC header offset, a gap can appear between metadata and skb->data on L2 decapsulation (e.g., VLAN, GRE). This breaks the BPF data_meta pointer which assumes metadata is directly before skb->data. Introduce bpf_skb_meta_realign() kfunc to close the gap by moving metadata to immediately precede the MAC header. Inject a call to it in tc_cls_act_prologue() when the verifier detects data_meta access (PA_F_DATA_META_LOAD flag). Update skb_data_move() to handle the gap case: on skb_push(), move metadata to the top of the head buffer; on skb_pull() where metadata is already detached, leave it in place. This restores data_meta functionality for TC programs while keeping the performance benefit of avoiding memmove on L2 decapsulation for programs that don't use data_meta. Signed-off-by: Jakub Sitnicki <[email protected]>
1 parent f81f969 commit f732a47

File tree

2 files changed

+70
-10
lines changed

2 files changed

+70
-10
lines changed

include/linux/skbuff.h

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4600,19 +4600,28 @@ static inline void skb_data_move(struct sk_buff *skb, const int len,
46004600
if (!meta_len)
46014601
goto no_metadata;
46024602

4603-
meta_end = skb_metadata_end(skb);
4604-
meta = meta_end - meta_len;
4605-
4606-
if (WARN_ON_ONCE(meta_end + len != skb->data ||
4607-
meta_len > skb_headroom(skb))) {
4603+
/* Not enough headroom left for metadata. Drop it. */
4604+
if (WARN_ONCE(meta_len > skb_headroom(skb),
4605+
"skb headroom smaller than metadata")) {
46084606
skb_metadata_clear(skb);
46094607
goto no_metadata;
46104608
}
46114609

4612-
memmove(meta + len, meta, meta_len + n);
4613-
skb_shinfo(skb)->meta_end += len;
4614-
return;
4610+
meta_end = skb_metadata_end(skb);
4611+
meta = meta_end - meta_len;
46154612

4613+
/* Metadata in front of data before push/pull. Keep it that way. */
4614+
if (meta_end == skb->data - len) {
4615+
memmove(meta + len, meta, meta_len + n);
4616+
skb_shinfo(skb)->meta_end += len;
4617+
return;
4618+
}
4619+
4620+
if (len < 0) {
4621+
/* Data pushed. Move metadata to the top. */
4622+
memmove(skb->head, meta, meta_len);
4623+
skb_shinfo(skb)->meta_end = meta_len;
4624+
}
46164625
no_metadata:
46174626
memmove(skb->data, skb->data - len, n);
46184627
}

net/core/filter.c

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9125,11 +9125,62 @@ static int bpf_gen_ld_abs(const struct bpf_insn *orig,
91259125
return insn - insn_buf;
91269126
}
91279127

9128+
__bpf_kfunc_start_defs();
9129+
9130+
__bpf_kfunc void bpf_skb_meta_realign(struct __sk_buff *skb_)
9131+
{
9132+
struct sk_buff *skb = (typeof(skb))skb_;
9133+
u8 *meta_end = skb_metadata_end(skb);
9134+
u8 meta_len = skb_metadata_len(skb);
9135+
u8 *meta;
9136+
int gap;
9137+
9138+
gap = skb_mac_header(skb) - meta_end;
9139+
if (!meta_len || !gap)
9140+
return;
9141+
9142+
if (WARN_ONCE(gap < 0, "skb metadata end past mac header")) {
9143+
skb_metadata_clear(skb);
9144+
return;
9145+
}
9146+
9147+
meta = meta_end - meta_len;
9148+
memmove(meta + gap, meta, meta_len);
9149+
skb_shinfo(skb)->meta_end += gap;
9150+
9151+
bpf_compute_data_pointers(skb);
9152+
}
9153+
9154+
__bpf_kfunc_end_defs();
9155+
9156+
BTF_KFUNCS_START(tc_cls_act_hidden_ids)
9157+
BTF_ID_FLAGS(func, bpf_skb_meta_realign, KF_TRUSTED_ARGS)
9158+
BTF_KFUNCS_END(tc_cls_act_hidden_ids)
9159+
9160+
BTF_ID_LIST_SINGLE(bpf_skb_meta_realign_ids, func, bpf_skb_meta_realign)
9161+
91289162
static int tc_cls_act_prologue(struct bpf_insn *insn_buf, u32 pkt_access_flags,
91299163
const struct bpf_prog *prog)
91309164
{
9131-
return bpf_unclone_prologue(insn_buf, pkt_access_flags, prog,
9132-
TC_ACT_SHOT);
9165+
struct bpf_insn *insn = insn_buf;
9166+
int cnt;
9167+
9168+
if (pkt_access_flags & PA_F_DATA_META_LOAD) {
9169+
/* Realign skb metadata for access through data_meta pointer.
9170+
*
9171+
* r6 = r1; // r6 will be "u64 *ctx"
9172+
* r0 = bpf_skb_meta_realign(r1); // r0 is undefined
9173+
* r1 = r6;
9174+
*/
9175+
*insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
9176+
*insn++ = BPF_CALL_KFUNC(0, bpf_skb_meta_realign_ids[0]);
9177+
*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
9178+
}
9179+
cnt = bpf_unclone_prologue(insn, pkt_access_flags, prog, TC_ACT_SHOT);
9180+
if (!cnt && insn > insn_buf)
9181+
*insn++ = prog->insnsi[0];
9182+
9183+
return cnt + insn - insn_buf;
91339184
}
91349185

91359186
static bool tc_cls_act_is_valid_access(int off, int size,

0 commit comments

Comments
 (0)