Skip to content

Commit bc57c7d

Browse files
isilenceaxboe
authored andcommitted
io_uring/zcrx: add copy fallback
There are scenarios in which the zerocopy path can get a kernel buffer instead of a net_iov and needs to copy it to the user, whether it is because of mis-steering or simply getting an skb with the linear part. In this case, grab a net_iov, copy into it and return it to the user as normally. At the moment the user doesn't get any indication whether there was a copy or not, which is left for follow up work. Reviewed-by: Jens Axboe <[email protected]> Signed-off-by: Pavel Begunkov <[email protected]> Signed-off-by: David Wei <[email protected]> Acked-by: Jakub Kicinski <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent 931dfae commit bc57c7d

File tree

1 file changed

+114
-6
lines changed

1 file changed

+114
-6
lines changed

io_uring/zcrx.c

Lines changed: 114 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <linux/io_uring.h>
88
#include <linux/netdevice.h>
99
#include <linux/rtnetlink.h>
10+
#include <linux/skbuff_ref.h>
1011

1112
#include <net/page_pool/helpers.h>
1213
#include <net/page_pool/memory_provider.h>
@@ -134,6 +135,13 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov)
134135
atomic_inc(io_get_user_counter(niov));
135136
}
136137

138+
static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
139+
{
140+
struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
141+
142+
return area->pages[net_iov_idx(niov)];
143+
}
144+
137145
static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
138146
struct io_uring_zcrx_ifq_reg *reg,
139147
struct io_uring_region_desc *rd)
@@ -448,6 +456,11 @@ static void io_zcrx_return_niov(struct net_iov *niov)
448456
{
449457
netmem_ref netmem = net_iov_to_netmem(niov);
450458

459+
if (!niov->pp) {
460+
/* copy fallback allocated niovs */
461+
io_zcrx_return_niov_freelist(niov);
462+
return;
463+
}
451464
page_pool_put_unrefed_netmem(niov->pp, netmem, -1, false);
452465
}
453466

@@ -686,13 +699,93 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
686699
return true;
687700
}
688701

702+
static struct net_iov *io_zcrx_alloc_fallback(struct io_zcrx_area *area)
703+
{
704+
struct net_iov *niov = NULL;
705+
706+
spin_lock_bh(&area->freelist_lock);
707+
if (area->free_count)
708+
niov = __io_zcrx_get_free_niov(area);
709+
spin_unlock_bh(&area->freelist_lock);
710+
711+
if (niov)
712+
page_pool_fragment_netmem(net_iov_to_netmem(niov), 1);
713+
return niov;
714+
}
715+
716+
static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
717+
void *src_base, struct page *src_page,
718+
unsigned int src_offset, size_t len)
719+
{
720+
struct io_zcrx_area *area = ifq->area;
721+
size_t copied = 0;
722+
int ret = 0;
723+
724+
while (len) {
725+
size_t copy_size = min_t(size_t, PAGE_SIZE, len);
726+
const int dst_off = 0;
727+
struct net_iov *niov;
728+
struct page *dst_page;
729+
void *dst_addr;
730+
731+
niov = io_zcrx_alloc_fallback(area);
732+
if (!niov) {
733+
ret = -ENOMEM;
734+
break;
735+
}
736+
737+
dst_page = io_zcrx_iov_page(niov);
738+
dst_addr = kmap_local_page(dst_page);
739+
if (src_page)
740+
src_base = kmap_local_page(src_page);
741+
742+
memcpy(dst_addr, src_base + src_offset, copy_size);
743+
744+
if (src_page)
745+
kunmap_local(src_base);
746+
kunmap_local(dst_addr);
747+
748+
if (!io_zcrx_queue_cqe(req, niov, ifq, dst_off, copy_size)) {
749+
io_zcrx_return_niov(niov);
750+
ret = -ENOSPC;
751+
break;
752+
}
753+
754+
io_zcrx_get_niov_uref(niov);
755+
src_offset += copy_size;
756+
len -= copy_size;
757+
copied += copy_size;
758+
}
759+
760+
return copied ? copied : ret;
761+
}
762+
763+
static int io_zcrx_copy_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
764+
const skb_frag_t *frag, int off, int len)
765+
{
766+
struct page *page = skb_frag_page(frag);
767+
u32 p_off, p_len, t, copied = 0;
768+
int ret = 0;
769+
770+
off += skb_frag_off(frag);
771+
772+
skb_frag_foreach_page(frag, off, len,
773+
page, p_off, p_len, t) {
774+
ret = io_zcrx_copy_chunk(req, ifq, NULL, page, p_off, p_len);
775+
if (ret < 0)
776+
return copied ? copied : ret;
777+
copied += ret;
778+
}
779+
return copied;
780+
}
781+
689782
static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
690783
const skb_frag_t *frag, int off, int len)
691784
{
692785
struct net_iov *niov;
693786

694787
if (unlikely(!skb_frag_is_net_iov(frag)))
695-
return -EOPNOTSUPP;
788+
return io_zcrx_copy_frag(req, ifq, frag, off, len);
696789

697790
niov = netmem_to_net_iov(frag->netmem);
698791
if (niov->pp->mp_ops != &io_uring_pp_zc_ops ||
@@ -719,18 +812,33 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
719812
struct io_zcrx_ifq *ifq = args->ifq;
720813
struct io_kiocb *req = args->req;
721814
struct sk_buff *frag_iter;
722-
unsigned start, start_off;
815+
unsigned start, start_off = offset;
723816
int i, copy, end, off;
724817
int ret = 0;
725818

726819
if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT))
727820
return -EAGAIN;
728821

729-
start = skb_headlen(skb);
730-
start_off = offset;
822+
if (unlikely(offset < skb_headlen(skb))) {
823+
ssize_t copied;
824+
size_t to_copy;
731825

732-
if (offset < start)
733-
return -EOPNOTSUPP;
826+
to_copy = min_t(size_t, skb_headlen(skb) - offset, len);
827+
copied = io_zcrx_copy_chunk(req, ifq, skb->data, NULL,
828+
offset, to_copy);
829+
if (copied < 0) {
830+
ret = copied;
831+
goto out;
832+
}
833+
offset += copied;
834+
len -= copied;
835+
if (!len)
836+
goto out;
837+
if (offset != skb_headlen(skb))
838+
goto out;
839+
}
840+
841+
start = skb_headlen(skb);
734842

735843
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
736844
const skb_frag_t *frag;

0 commit comments

Comments
 (0)