Skip to content

Commit f1fdee3

Browse files
committed
Merge branch 'sockmap fixes picked up by stress tests'
John Fastabend says: ==================== Running stress tests with recent patch to remove an extra lock in sockmap resulted in a couple new issues popping up. It seems only one of them is actually related to the patch: 799aa7f ("skmsg: Avoid lock_sock() in sk_psock_backlog()") The other two issues had existed long before, but I guess the timing with the serialization we had before was too tight to get any of our tests or deployments to hit it. With attached series stress testing sockmap+TCP with workloads that create lots of short-lived connections no more splats like below were seen on upstream bpf branch. [224913.935822] WARNING: CPU: 3 PID: 32100 at net/core/stream.c:208 sk_stream_kill_queues+0x212/0x220 [224913.935841] Modules linked in: fuse overlay bpf_preload x86_pkg_temp_thermal intel_uncore wmi_bmof squashfs sch_fq_codel efivarfs ip_tables x_tables uas xhci_pci ixgbe mdio xfrm_algo xhci_hcd wmi [224913.935897] CPU: 3 PID: 32100 Comm: fgs-bench Tainted: G I 5.14.0-rc1alu+ Rust-for-Linux#181 [224913.935908] Hardware name: Dell Inc. Precision 5820 Tower/002KVM, BIOS 1.9.2 01/24/2019 [224913.935914] RIP: 0010:sk_stream_kill_queues+0x212/0x220 [224913.935923] Code: 8b 83 20 02 00 00 85 c0 75 20 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 df e8 2b 11 fe ff eb c3 0f 0b e9 7c ff ff ff 0f 0b eb ce <0f> 0b 5b 5d 41 5c 41 5d 41 5e 41 5f c3 90 0f 1f 44 00 00 41 57 41 [224913.935932] RSP: 0018:ffff88816271fd38 EFLAGS: 00010206 [224913.935941] RAX: 0000000000000ae8 RBX: ffff88815acd5240 RCX: dffffc0000000000 [224913.935948] RDX: 0000000000000003 RSI: 0000000000000ae8 RDI: ffff88815acd5460 [224913.935954] RBP: ffff88815acd5460 R08: ffffffff955c0ae8 R09: fffffbfff2e6f543 [224913.935961] R10: ffffffff9737aa17 R11: fffffbfff2e6f542 R12: ffff88815acd5390 [224913.935967] R13: ffff88815acd5480 R14: ffffffff98d0c080 R15: ffffffff96267500 [224913.935974] FS: 00007f86e6bd1700(0000) GS:ffff888451cc0000(0000) knlGS:0000000000000000 [224913.935981] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [224913.935988] CR2: 000000c0008eb000 CR3: 00000001020e0005 CR4: 00000000003706e0 [224913.935994] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [224913.936000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [224913.936007] Call Trace: [224913.936016] inet_csk_destroy_sock+0xba/0x1f0 [224913.936033] __tcp_close+0x620/0x790 [224913.936047] tcp_close+0x20/0x80 [224913.936056] inet_release+0x8f/0xf0 [224913.936070] __sock_release+0x72/0x120 v3: make sock_drop inline in skmsg.h v2: init skb to null and fix a space/tab issue. Added Jakub's acks. ==================== Signed-off-by: Andrii Nakryiko <[email protected]>
2 parents d6371c7 + 9635720 commit f1fdee3

File tree

2 files changed

+63
-30
lines changed

2 files changed

+63
-30
lines changed

include/linux/skmsg.h

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
285285
return rcu_dereference_sk_user_data(sk);
286286
}
287287

288+
static inline void sk_psock_set_state(struct sk_psock *psock,
289+
enum sk_psock_state_bits bit)
290+
{
291+
set_bit(bit, &psock->state);
292+
}
293+
294+
static inline void sk_psock_clear_state(struct sk_psock *psock,
295+
enum sk_psock_state_bits bit)
296+
{
297+
clear_bit(bit, &psock->state);
298+
}
299+
300+
static inline bool sk_psock_test_state(const struct sk_psock *psock,
301+
enum sk_psock_state_bits bit)
302+
{
303+
return test_bit(bit, &psock->state);
304+
}
305+
306+
static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
307+
{
308+
sk_drops_add(sk, skb);
309+
kfree_skb(skb);
310+
}
311+
312+
static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg)
313+
{
314+
if (msg->skb)
315+
sock_drop(psock->sk, msg->skb);
316+
kfree(msg);
317+
}
318+
288319
static inline void sk_psock_queue_msg(struct sk_psock *psock,
289320
struct sk_msg *msg)
290321
{
291322
spin_lock_bh(&psock->ingress_lock);
292-
list_add_tail(&msg->list, &psock->ingress_msg);
323+
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
324+
list_add_tail(&msg->list, &psock->ingress_msg);
325+
else
326+
drop_sk_msg(psock, msg);
293327
spin_unlock_bh(&psock->ingress_lock);
294328
}
295329

@@ -406,24 +440,6 @@ static inline void sk_psock_restore_proto(struct sock *sk,
406440
psock->psock_update_sk_prot(sk, psock, true);
407441
}
408442

409-
static inline void sk_psock_set_state(struct sk_psock *psock,
410-
enum sk_psock_state_bits bit)
411-
{
412-
set_bit(bit, &psock->state);
413-
}
414-
415-
static inline void sk_psock_clear_state(struct sk_psock *psock,
416-
enum sk_psock_state_bits bit)
417-
{
418-
clear_bit(bit, &psock->state);
419-
}
420-
421-
static inline bool sk_psock_test_state(const struct sk_psock *psock,
422-
enum sk_psock_state_bits bit)
423-
{
424-
return test_bit(bit, &psock->state);
425-
}
426-
427443
static inline struct sk_psock *sk_psock_get(struct sock *sk)
428444
{
429445
struct sk_psock *psock;

net/core/skmsg.c

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -584,29 +584,42 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
584584
return sk_psock_skb_ingress(psock, skb);
585585
}
586586

587-
static void sock_drop(struct sock *sk, struct sk_buff *skb)
587+
static void sk_psock_skb_state(struct sk_psock *psock,
588+
struct sk_psock_work_state *state,
589+
struct sk_buff *skb,
590+
int len, int off)
588591
{
589-
sk_drops_add(sk, skb);
590-
kfree_skb(skb);
592+
spin_lock_bh(&psock->ingress_lock);
593+
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
594+
state->skb = skb;
595+
state->len = len;
596+
state->off = off;
597+
} else {
598+
sock_drop(psock->sk, skb);
599+
}
600+
spin_unlock_bh(&psock->ingress_lock);
591601
}
592602

593603
static void sk_psock_backlog(struct work_struct *work)
594604
{
595605
struct sk_psock *psock = container_of(work, struct sk_psock, work);
596606
struct sk_psock_work_state *state = &psock->work_state;
597-
struct sk_buff *skb;
607+
struct sk_buff *skb = NULL;
598608
bool ingress;
599609
u32 len, off;
600610
int ret;
601611

602612
mutex_lock(&psock->work_mutex);
603-
if (state->skb) {
613+
if (unlikely(state->skb)) {
614+
spin_lock_bh(&psock->ingress_lock);
604615
skb = state->skb;
605616
len = state->len;
606617
off = state->off;
607618
state->skb = NULL;
608-
goto start;
619+
spin_unlock_bh(&psock->ingress_lock);
609620
}
621+
if (skb)
622+
goto start;
610623

611624
while ((skb = skb_dequeue(&psock->ingress_skb))) {
612625
len = skb->len;
@@ -621,9 +634,8 @@ static void sk_psock_backlog(struct work_struct *work)
621634
len, ingress);
622635
if (ret <= 0) {
623636
if (ret == -EAGAIN) {
624-
state->skb = skb;
625-
state->len = len;
626-
state->off = off;
637+
sk_psock_skb_state(psock, state, skb,
638+
len, off);
627639
goto end;
628640
}
629641
/* Hard errors break pipe and stop xmit. */
@@ -722,6 +734,11 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
722734
skb_bpf_redirect_clear(skb);
723735
sock_drop(psock->sk, skb);
724736
}
737+
kfree_skb(psock->work_state.skb);
738+
/* We null the skb here to ensure that calls to sk_psock_backlog
739+
* do not pick up the free'd skb.
740+
*/
741+
psock->work_state.skb = NULL;
725742
__sk_psock_purge_ingress_msg(psock);
726743
}
727744

@@ -773,8 +790,6 @@ static void sk_psock_destroy(struct work_struct *work)
773790

774791
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
775792
{
776-
sk_psock_stop(psock, false);
777-
778793
write_lock_bh(&sk->sk_callback_lock);
779794
sk_psock_restore_proto(sk, psock);
780795
rcu_assign_sk_user_data(sk, NULL);
@@ -784,6 +799,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
784799
sk_psock_stop_verdict(sk, psock);
785800
write_unlock_bh(&sk->sk_callback_lock);
786801

802+
sk_psock_stop(psock, false);
803+
787804
INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
788805
queue_rcu_work(system_wq, &psock->rwork);
789806
}

0 commit comments

Comments
 (0)