Skip to content

Commit 49500ad

Browse files
mrpreKernel Patches Daemon
authored andcommitted
tcp_bpf: optimize splice_read with zero-copy for non-slab pages
The previous splice_read implementation copies all data through intermediate pages (alloc_page + memcpy). This is wasteful for skb fragment pages which are allocated from the page allocator and can be safely referenced via get_page(). Optimize by checking PageSlab() to distinguish between linear skb data (slab-backed) and fragment pages (page allocator-backed): - For slab pages (skb linear data): copy to a page fragment via sk_page_frag, matching what linear_to_page() does in the standard TCP splice path (skb_splice_bits). get_page() is invalid on slab pages so a copy is unavoidable here. - For non-slab pages (skb frags): use get_page() directly for true zero-copy, same as skb_splice_bits does for fragments. Both paths use nosteal_pipe_buf_ops. The sk_page_frag approach is more memory-efficient than alloc_page for small linear copies, as multiple copies can share a single page fragment. Benchmark results with rx-verdict-ingress mode (loopback, 8 CPUs): splice(2) + always-copy: ~2770 MB/s (before this patch) splice(2) + zero-copy: ~4270 MB/s (after this patch, +54%) read(2): ~4292 MB/s (baseline for reference) Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
1 parent 0eb3c1a commit 49500ad

File tree

1 file changed

+31
-10
lines changed

1 file changed

+31
-10
lines changed

net/ipv4/tcp_bpf.c

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
447447

448448
struct tcp_bpf_splice_ctx {
449449
struct pipe_inode_info *pipe;
450+
struct sock *sk;
450451
};
451452

452453
static int sk_msg_splice_actor(void *arg, struct page *page,
@@ -458,13 +459,33 @@ static int sk_msg_splice_actor(void *arg, struct page *page,
458459
};
459460
ssize_t ret;
460461

461-
buf.page = alloc_page(GFP_KERNEL);
462-
if (!buf.page)
463-
return 0;
462+
if (PageSlab(page)) {
463+
/*
464+
* skb linear data is backed by slab memory where
465+
* get_page() is invalid. Copy to a page fragment from
466+
* the socket's page allocator, matching what
467+
* linear_to_page() does in the standard TCP splice
468+
* path (skb_splice_bits).
469+
*/
470+
struct page_frag *pfrag = sk_page_frag(ctx->sk);
471+
472+
if (!sk_page_frag_refill(ctx->sk, pfrag))
473+
return 0;
464474

465-
memcpy(page_address(buf.page), page_address(page) + offset, len);
466-
buf.offset = 0;
467-
buf.len = len;
475+
len = min_t(size_t, len, pfrag->size - pfrag->offset);
476+
memcpy(page_address(pfrag->page) + pfrag->offset,
477+
page_address(page) + offset, len);
478+
buf.page = pfrag->page;
479+
buf.offset = pfrag->offset;
480+
buf.len = len;
481+
pfrag->offset += len;
482+
} else {
483+
buf.page = page;
484+
buf.offset = offset;
485+
buf.len = len;
486+
}
487+
488+
get_page(buf.page);
468489

469490
/*
470491
* add_to_pipe() calls pipe_buf_release() on failure, which
@@ -481,9 +502,9 @@ static ssize_t tcp_bpf_splice_read(struct socket *sock, loff_t *ppos,
481502
struct pipe_inode_info *pipe, size_t len,
482503
unsigned int flags)
483504
{
484-
struct tcp_bpf_splice_ctx ctx = { .pipe = pipe };
485-
int bpf_flags = flags & SPLICE_F_NONBLOCK ? MSG_DONTWAIT : 0;
486505
struct sock *sk = sock->sk;
506+
struct tcp_bpf_splice_ctx ctx = { .pipe = pipe, .sk = sk };
507+
int bpf_flags = flags & SPLICE_F_NONBLOCK ? MSG_DONTWAIT : 0;
487508
struct sk_psock *psock;
488509
int ret;
489510

@@ -508,9 +529,9 @@ static ssize_t tcp_bpf_splice_read_parser(struct socket *sock, loff_t *ppos,
508529
struct pipe_inode_info *pipe,
509530
size_t len, unsigned int flags)
510531
{
511-
struct tcp_bpf_splice_ctx ctx = { .pipe = pipe };
512-
int bpf_flags = flags & SPLICE_F_NONBLOCK ? MSG_DONTWAIT : 0;
513532
struct sock *sk = sock->sk;
533+
struct tcp_bpf_splice_ctx ctx = { .pipe = pipe, .sk = sk };
534+
int bpf_flags = flags & SPLICE_F_NONBLOCK ? MSG_DONTWAIT : 0;
514535
struct sk_psock *psock;
515536
int ret;
516537

0 commit comments

Comments
 (0)