Skip to content

Commit 7b80581

Browse files
author
Alexei Starovoitov
committed
Merge branch 'fix-ktls-with-sk_skb_verdict'
John Fastabend says: ==================== If a socket is running a BPF_SK_SKB_SREAM_VERDICT program and KTLS is enabled the data stream may be broken if both TLS stream parser and BPF stream parser try to handle data. Fix this here by making KTLS stream parser run first to ensure TLS messages are received correctly and then calling the verdict program. This analogous to how we handle a similar conflict on the TX side. Note, this is a fix but it doesn't make sense to push this late to bpf tree so targeting bpf-next and keeping fixes tags. ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents df8fe57 + 463bac5 commit 7b80581

File tree

6 files changed

+296
-48
lines changed

6 files changed

+296
-48
lines changed

include/linux/skmsg.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,4 +437,12 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
437437
psock_set_prog(&progs->skb_verdict, NULL);
438438
}
439439

440+
int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
441+
442+
static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
443+
{
444+
if (!psock)
445+
return false;
446+
return psock->parser.enabled;
447+
}
440448
#endif /* _LINUX_SKMSG_H */

include/net/tls.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,15 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
571571
return !!tls_sw_ctx_tx(ctx);
572572
}
573573

574+
static inline bool tls_sw_has_ctx_rx(const struct sock *sk)
575+
{
576+
struct tls_context *ctx = tls_get_ctx(sk);
577+
578+
if (!ctx)
579+
return false;
580+
return !!tls_sw_ctx_rx(ctx);
581+
}
582+
574583
void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
575584
void tls_device_write_space(struct sock *sk, struct tls_context *ctx);
576585

net/core/skmsg.c

Lines changed: 74 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include <net/sock.h>
99
#include <net/tcp.h>
10+
#include <net/tls.h>
1011

1112
static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
1213
{
@@ -682,13 +683,75 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
682683
return container_of(parser, struct sk_psock, parser);
683684
}
684685

685-
static void sk_psock_verdict_apply(struct sk_psock *psock,
686-
struct sk_buff *skb, int verdict)
686+
static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
687687
{
688688
struct sk_psock *psock_other;
689689
struct sock *sk_other;
690690
bool ingress;
691691

692+
sk_other = tcp_skb_bpf_redirect_fetch(skb);
693+
if (unlikely(!sk_other)) {
694+
kfree_skb(skb);
695+
return;
696+
}
697+
psock_other = sk_psock(sk_other);
698+
if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
699+
!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
700+
kfree_skb(skb);
701+
return;
702+
}
703+
704+
ingress = tcp_skb_bpf_ingress(skb);
705+
if ((!ingress && sock_writeable(sk_other)) ||
706+
(ingress &&
707+
atomic_read(&sk_other->sk_rmem_alloc) <=
708+
sk_other->sk_rcvbuf)) {
709+
if (!ingress)
710+
skb_set_owner_w(skb, sk_other);
711+
skb_queue_tail(&psock_other->ingress_skb, skb);
712+
schedule_work(&psock_other->work);
713+
} else {
714+
kfree_skb(skb);
715+
}
716+
}
717+
718+
static void sk_psock_tls_verdict_apply(struct sk_psock *psock,
719+
struct sk_buff *skb, int verdict)
720+
{
721+
switch (verdict) {
722+
case __SK_REDIRECT:
723+
sk_psock_skb_redirect(psock, skb);
724+
break;
725+
case __SK_PASS:
726+
case __SK_DROP:
727+
default:
728+
break;
729+
}
730+
}
731+
732+
int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
733+
{
734+
struct bpf_prog *prog;
735+
int ret = __SK_PASS;
736+
737+
rcu_read_lock();
738+
prog = READ_ONCE(psock->progs.skb_verdict);
739+
if (likely(prog)) {
740+
tcp_skb_bpf_redirect_clear(skb);
741+
ret = sk_psock_bpf_run(psock, prog, skb);
742+
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
743+
}
744+
rcu_read_unlock();
745+
sk_psock_tls_verdict_apply(psock, skb, ret);
746+
return ret;
747+
}
748+
EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
749+
750+
static void sk_psock_verdict_apply(struct sk_psock *psock,
751+
struct sk_buff *skb, int verdict)
752+
{
753+
struct sock *sk_other;
754+
692755
switch (verdict) {
693756
case __SK_PASS:
694757
sk_other = psock->sk;
@@ -707,25 +770,8 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
707770
}
708771
goto out_free;
709772
case __SK_REDIRECT:
710-
sk_other = tcp_skb_bpf_redirect_fetch(skb);
711-
if (unlikely(!sk_other))
712-
goto out_free;
713-
psock_other = sk_psock(sk_other);
714-
if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
715-
!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED))
716-
goto out_free;
717-
ingress = tcp_skb_bpf_ingress(skb);
718-
if ((!ingress && sock_writeable(sk_other)) ||
719-
(ingress &&
720-
atomic_read(&sk_other->sk_rmem_alloc) <=
721-
sk_other->sk_rcvbuf)) {
722-
if (!ingress)
723-
skb_set_owner_w(skb, sk_other);
724-
skb_queue_tail(&psock_other->ingress_skb, skb);
725-
schedule_work(&psock_other->work);
726-
break;
727-
}
728-
/* fall-through */
773+
sk_psock_skb_redirect(psock, skb);
774+
break;
729775
case __SK_DROP:
730776
/* fall-through */
731777
default:
@@ -779,9 +825,13 @@ static void sk_psock_strp_data_ready(struct sock *sk)
779825
rcu_read_lock();
780826
psock = sk_psock(sk);
781827
if (likely(psock)) {
782-
write_lock_bh(&sk->sk_callback_lock);
783-
strp_data_ready(&psock->parser.strp);
784-
write_unlock_bh(&sk->sk_callback_lock);
828+
if (tls_sw_has_ctx_rx(sk)) {
829+
psock->parser.saved_data_ready(sk);
830+
} else {
831+
write_lock_bh(&sk->sk_callback_lock);
832+
strp_data_ready(&psock->parser.strp);
833+
write_unlock_bh(&sk->sk_callback_lock);
834+
}
785835
}
786836
rcu_read_unlock();
787837
}

net/tls/tls_sw.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1742,6 +1742,7 @@ int tls_sw_recvmsg(struct sock *sk,
17421742
long timeo;
17431743
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
17441744
bool is_peek = flags & MSG_PEEK;
1745+
bool bpf_strp_enabled;
17451746
int num_async = 0;
17461747
int pending;
17471748

@@ -1752,6 +1753,7 @@ int tls_sw_recvmsg(struct sock *sk,
17521753

17531754
psock = sk_psock_get(sk);
17541755
lock_sock(sk);
1756+
bpf_strp_enabled = sk_psock_strp_enabled(psock);
17551757

17561758
/* Process pending decrypted records. It must be non-zero-copy */
17571759
err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false,
@@ -1805,11 +1807,12 @@ int tls_sw_recvmsg(struct sock *sk,
18051807

18061808
if (to_decrypt <= len && !is_kvec && !is_peek &&
18071809
ctx->control == TLS_RECORD_TYPE_DATA &&
1808-
prot->version != TLS_1_3_VERSION)
1810+
prot->version != TLS_1_3_VERSION &&
1811+
!bpf_strp_enabled)
18091812
zc = true;
18101813

18111814
/* Do not use async mode if record is non-data */
1812-
if (ctx->control == TLS_RECORD_TYPE_DATA)
1815+
if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
18131816
async_capable = ctx->async_capable;
18141817
else
18151818
async_capable = false;
@@ -1859,6 +1862,19 @@ int tls_sw_recvmsg(struct sock *sk,
18591862
goto pick_next_record;
18601863

18611864
if (!zc) {
1865+
if (bpf_strp_enabled) {
1866+
err = sk_psock_tls_strp_read(psock, skb);
1867+
if (err != __SK_PASS) {
1868+
rxm->offset = rxm->offset + rxm->full_len;
1869+
rxm->full_len = 0;
1870+
if (err == __SK_DROP)
1871+
consume_skb(skb);
1872+
ctx->recv_pkt = NULL;
1873+
__strp_unpause(&ctx->strp);
1874+
continue;
1875+
}
1876+
}
1877+
18621878
if (rxm->full_len > len) {
18631879
retain_skb = true;
18641880
chunk = len;

tools/testing/selftests/bpf/progs/test_sockmap_kern.h

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,18 @@ struct {
7979

8080
struct {
8181
__uint(type, BPF_MAP_TYPE_ARRAY);
82-
__uint(max_entries, 1);
82+
__uint(max_entries, 2);
8383
__type(key, int);
8484
__type(value, int);
8585
} sock_skb_opts SEC(".maps");
8686

87+
struct {
88+
__uint(type, TEST_MAP_TYPE);
89+
__uint(max_entries, 20);
90+
__uint(key_size, sizeof(int));
91+
__uint(value_size, sizeof(int));
92+
} tls_sock_map SEC(".maps");
93+
8794
SEC("sk_skb1")
8895
int bpf_prog1(struct __sk_buff *skb)
8996
{
@@ -118,6 +125,43 @@ int bpf_prog2(struct __sk_buff *skb)
118125

119126
}
120127

128+
SEC("sk_skb3")
129+
int bpf_prog3(struct __sk_buff *skb)
130+
{
131+
const int one = 1;
132+
int err, *f, ret = SK_PASS;
133+
void *data_end;
134+
char *c;
135+
136+
err = bpf_skb_pull_data(skb, 19);
137+
if (err)
138+
goto tls_out;
139+
140+
c = (char *)(long)skb->data;
141+
data_end = (void *)(long)skb->data_end;
142+
143+
if (c + 18 < data_end)
144+
memcpy(&c[13], "PASS", 4);
145+
f = bpf_map_lookup_elem(&sock_skb_opts, &one);
146+
if (f && *f) {
147+
__u64 flags = 0;
148+
149+
ret = 0;
150+
flags = *f;
151+
#ifdef SOCKMAP
152+
return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
153+
#else
154+
return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
155+
#endif
156+
}
157+
158+
f = bpf_map_lookup_elem(&sock_skb_opts, &one);
159+
if (f && *f)
160+
ret = SK_DROP;
161+
tls_out:
162+
return ret;
163+
}
164+
121165
SEC("sockops")
122166
int bpf_sockmap(struct bpf_sock_ops *skops)
123167
{

0 commit comments

Comments
 (0)