Skip to content

Commit 9812386

Browse files
bcodding-rhkuba-moo
authored andcommitted
Treewide: Stop corrupting socket's task_frag
Since moving to memalloc_nofs_save/restore, SUNRPC has stopped setting the GFP_NOIO flag on sk_allocation which the networking system uses to decide when it is safe to use current->task_frag. The results of this are unexpected corruption in task_frag when SUNRPC is involved in memory reclaim. The corruption can be seen in crashes, but the root cause is often difficult to ascertain as a crashing machine's stack trace will have no evidence of being near NFS or SUNRPC code. I believe this problem to be much more pervasive than reports to the community may indicate. Fix this by having kernel users of sockets that may corrupt task_frag due to reclaim set sk_use_task_frag = false. Preemptively correcting this situation for users that still set sk_allocation allows them to convert to memalloc_nofs_save/restore without the same unexpected corruptions that are sure to follow, unlikely to show up in testing, and difficult to bisect. CC: Philipp Reisner <[email protected]> CC: Lars Ellenberg <[email protected]> CC: "Christoph Böhmwalder" <[email protected]> CC: Jens Axboe <[email protected]> CC: Josef Bacik <[email protected]> CC: Keith Busch <[email protected]> CC: Christoph Hellwig <[email protected]> CC: Sagi Grimberg <[email protected]> CC: Lee Duncan <[email protected]> CC: Chris Leech <[email protected]> CC: Mike Christie <[email protected]> CC: "James E.J. Bottomley" <[email protected]> CC: "Martin K. Petersen" <[email protected]> CC: Valentina Manea <[email protected]> CC: Shuah Khan <[email protected]> CC: Greg Kroah-Hartman <[email protected]> CC: David Howells <[email protected]> CC: Marc Dionne <[email protected]> CC: Steve French <[email protected]> CC: Christine Caulfield <[email protected]> CC: David Teigland <[email protected]> CC: Mark Fasheh <[email protected]> CC: Joel Becker <[email protected]> CC: Joseph Qi <[email protected]> CC: Eric Van Hensbergen <[email protected]> CC: Latchesar Ionkov <[email protected]> CC: Dominique Martinet <[email protected]> CC: Ilya Dryomov <[email protected]> CC: Xiubo Li <[email protected]> CC: Chuck Lever <[email protected]> CC: Jeff Layton <[email protected]> CC: Trond Myklebust <[email protected]> CC: Anna Schumaker <[email protected]> CC: Steffen Klassert <[email protected]> CC: Herbert Xu <[email protected]> Suggested-by: Guillaume Nault <[email protected]> Signed-off-by: Benjamin Coddington <[email protected]> Reviewed-by: Guillaume Nault <[email protected]> Signed-off-by: Jakub Kicinski <[email protected]>
1 parent fb87bd4 commit 9812386

File tree

12 files changed

+17
-0
lines changed

12 files changed

+17
-0
lines changed

drivers/block/drbd/drbd_receiver.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,9 @@ static int conn_connect(struct drbd_connection *connection)
10301030
sock.socket->sk->sk_allocation = GFP_NOIO;
10311031
msock.socket->sk->sk_allocation = GFP_NOIO;
10321032

1033+
sock.socket->sk->sk_use_task_frag = false;
1034+
msock.socket->sk->sk_use_task_frag = false;
1035+
10331036
sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
10341037
msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
10351038

drivers/block/nbd.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
512512
noreclaim_flag = memalloc_noreclaim_save();
513513
do {
514514
sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
515+
sock->sk->sk_use_task_frag = false;
515516
msg.msg_name = NULL;
516517
msg.msg_namelen = 0;
517518
msg.msg_control = NULL;

drivers/nvme/host/tcp.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1537,6 +1537,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
15371537
queue->sock->sk->sk_rcvtimeo = 10 * HZ;
15381538

15391539
queue->sock->sk->sk_allocation = GFP_ATOMIC;
1540+
queue->sock->sk->sk_use_task_frag = false;
15401541
nvme_tcp_set_queue_io_cpu(queue);
15411542
queue->request = NULL;
15421543
queue->data_remaining = 0;

drivers/scsi/iscsi_tcp.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,7 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
738738
sk->sk_reuse = SK_CAN_REUSE;
739739
sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
740740
sk->sk_allocation = GFP_ATOMIC;
741+
sk->sk_use_task_frag = false;
741742
sk_set_memalloc(sk);
742743
sock_no_linger(sk);
743744

drivers/usb/usbip/usbip_common.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@ int usbip_recv(struct socket *sock, void *buf, int size)
315315

316316
do {
317317
sock->sk->sk_allocation = GFP_NOIO;
318+
sock->sk->sk_use_task_frag = false;
318319

319320
result = sock_recvmsg(sock, &msg, MSG_WAITALL);
320321
if (result <= 0)

fs/cifs/connect.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2944,6 +2944,7 @@ generic_ip_connect(struct TCP_Server_Info *server)
29442944
cifs_dbg(FYI, "Socket created\n");
29452945
server->ssocket = socket;
29462946
socket->sk->sk_allocation = GFP_NOFS;
2947+
socket->sk->sk_use_task_frag = false;
29472948
if (sfamily == AF_INET6)
29482949
cifs_reclassify_socket6(socket);
29492950
else

fs/dlm/lowcomms.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ static void add_sock(struct socket *sock, struct connection *con)
645645
if (dlm_config.ci_protocol == DLM_PROTO_SCTP)
646646
sk->sk_state_change = lowcomms_state_change;
647647
sk->sk_allocation = GFP_NOFS;
648+
sk->sk_use_task_frag = false;
648649
sk->sk_error_report = lowcomms_error_report;
649650
release_sock(sk);
650651
}
@@ -1769,6 +1770,7 @@ static int dlm_listen_for_all(void)
17691770
listen_con.sock = sock;
17701771

17711772
sock->sk->sk_allocation = GFP_NOFS;
1773+
sock->sk->sk_use_task_frag = false;
17721774
sock->sk->sk_data_ready = lowcomms_listen_data_ready;
17731775
release_sock(sock->sk);
17741776

fs/ocfs2/cluster/tcp.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,6 +1602,7 @@ static void o2net_start_connect(struct work_struct *work)
16021602
sc->sc_sock = sock; /* freed by sc_kref_release */
16031603

16041604
sock->sk->sk_allocation = GFP_ATOMIC;
1605+
sock->sk->sk_use_task_frag = false;
16051606

16061607
myaddr.sin_family = AF_INET;
16071608
myaddr.sin_addr.s_addr = mynode->nd_ipv4_address;

net/9p/trans_fd.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -868,6 +868,7 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
868868
}
869869

870870
csocket->sk->sk_allocation = GFP_NOIO;
871+
csocket->sk->sk_use_task_frag = false;
871872
file = sock_alloc_file(csocket, 0, NULL);
872873
if (IS_ERR(file)) {
873874
pr_err("%s (%d): failed to map fd\n",

net/ceph/messenger.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,7 @@ int ceph_tcp_connect(struct ceph_connection *con)
446446
if (ret)
447447
return ret;
448448
sock->sk->sk_allocation = GFP_NOFS;
449+
sock->sk->sk_use_task_frag = false;
449450

450451
#ifdef CONFIG_LOCKDEP
451452
lockdep_set_class(&sock->sk->sk_lock, &socket_class);

0 commit comments

Comments
 (0)