Skip to content

Commit 4f567ac

Browse files
Alexander Aringteigland
authored andcommitted
fs: dlm: remove socket shutdown handling
Since commit 489d8e5 ("fs: dlm: add reliable connection if reconnect") we have functionality like TCP offers for half-closed sockets on dlm application protocol layer. This feature is required because the cluster manager events about leaving resource memberships can be locally already occurred but other cluster nodes having a pending leaving membership over the cluster manager protocol happening. In this time the local dlm node already shutdown it's connection and don't transmit anymore any new dlm messages, but however it still needs to be able to accept dlm messages because the pending leave membership request of the cluster manager protocol which the dlm kernel implementation has no control about it. We have this functionality on the application for two reasons, the main reason is that SCTP does not support such functionality on socket layer. But we can do it inside application layer. Another small issue is that this feature is broken in the TCP world because some NAT devices does not implement such functionality correctly. This is the same reason why the reliable connection session layer in DLM exists. We give up on middle devices in the networking which sends e.g. TCP resets out. In DLM we cannot have any message dropping and we ensure it over a session layer that it can't happen. Back to the half-closed grace shutdown handling. It's not necessary anymore to do it on socket layer (which is only support for TCP sockets) because we do it on application layer. This patch removes this handling, if there are still issues then we have a problem on the application layer for such handling. Signed-off-by: Alexander Aring <[email protected]> Signed-off-by: David Teigland <[email protected]>
1 parent 1037c2a commit 4f567ac

File tree

3 files changed

+27
-107
lines changed

3 files changed

+27
-107
lines changed

fs/dlm/lowcomms.c

Lines changed: 22 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@
6565

6666
/* Number of messages to send before rescheduling */
6767
#define MAX_SEND_MSG_COUNT 25
68-
#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000)
6968

7069
struct connection {
7170
struct socket *sock; /* NULL if not connected */
@@ -79,22 +78,18 @@ struct connection {
7978
#define CF_CLOSE 6
8079
#define CF_APP_LIMITED 7
8180
#define CF_CLOSING 8
82-
#define CF_SHUTDOWN 9
83-
#define CF_CONNECTED 10
84-
#define CF_RECONNECT 11
85-
#define CF_DELAY_CONNECT 12
86-
#define CF_EOF 13
81+
#define CF_CONNECTED 9
82+
#define CF_RECONNECT 10
83+
#define CF_DELAY_CONNECT 11
8784
struct list_head writequeue; /* List of outgoing writequeue_entries */
8885
spinlock_t writequeue_lock;
89-
atomic_t writequeue_cnt;
9086
int retries;
9187
#define MAX_CONNECT_RETRIES 3
9288
struct hlist_node list;
9389
struct connection *othercon;
9490
struct connection *sendcon;
9591
struct work_struct rwork; /* Receive workqueue */
9692
struct work_struct swork; /* Send workqueue */
97-
wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
9893
unsigned char *rx_buf;
9994
int rx_buflen;
10095
int rx_leftover;
@@ -157,10 +152,6 @@ struct dlm_proto_ops {
157152
int (*listen_validate)(void);
158153
void (*listen_sockopts)(struct socket *sock);
159154
int (*listen_bind)(struct socket *sock);
160-
/* What to do to shutdown */
161-
void (*shutdown_action)(struct connection *con);
162-
/* What to do to eof check */
163-
bool (*eof_condition)(struct connection *con);
164155
};
165156

166157
static struct listen_sock_callbacks {
@@ -241,11 +232,6 @@ static struct connection *__find_con(int nodeid, int r)
241232
return NULL;
242233
}
243234

244-
static bool tcp_eof_condition(struct connection *con)
245-
{
246-
return atomic_read(&con->writequeue_cnt);
247-
}
248-
249235
static int dlm_con_init(struct connection *con, int nodeid)
250236
{
251237
con->rx_buflen = dlm_config.ci_buffer_size;
@@ -257,10 +243,8 @@ static int dlm_con_init(struct connection *con, int nodeid)
257243
mutex_init(&con->sock_mutex);
258244
INIT_LIST_HEAD(&con->writequeue);
259245
spin_lock_init(&con->writequeue_lock);
260-
atomic_set(&con->writequeue_cnt, 0);
261246
INIT_WORK(&con->swork, process_send_sockets);
262247
INIT_WORK(&con->rwork, process_recv_sockets);
263-
init_waitqueue_head(&con->shutdown_wait);
264248

265249
return 0;
266250
}
@@ -771,7 +755,6 @@ static void free_entry(struct writequeue_entry *e)
771755
}
772756

773757
list_del(&e->list);
774-
atomic_dec(&e->con->writequeue_cnt);
775758
kref_put(&e->ref, dlm_page_release);
776759
}
777760

@@ -834,56 +817,10 @@ static void close_connection(struct connection *con, bool and_other,
834817
clear_bit(CF_CONNECTED, &con->flags);
835818
clear_bit(CF_DELAY_CONNECT, &con->flags);
836819
clear_bit(CF_RECONNECT, &con->flags);
837-
clear_bit(CF_EOF, &con->flags);
838820
mutex_unlock(&con->sock_mutex);
839821
clear_bit(CF_CLOSING, &con->flags);
840822
}
841823

842-
static void shutdown_connection(struct connection *con)
843-
{
844-
int ret;
845-
846-
flush_work(&con->swork);
847-
848-
mutex_lock(&con->sock_mutex);
849-
/* nothing to shutdown */
850-
if (!con->sock) {
851-
mutex_unlock(&con->sock_mutex);
852-
return;
853-
}
854-
855-
set_bit(CF_SHUTDOWN, &con->flags);
856-
ret = kernel_sock_shutdown(con->sock, SHUT_WR);
857-
mutex_unlock(&con->sock_mutex);
858-
if (ret) {
859-
log_print("Connection %p failed to shutdown: %d will force close",
860-
con, ret);
861-
goto force_close;
862-
} else {
863-
ret = wait_event_timeout(con->shutdown_wait,
864-
!test_bit(CF_SHUTDOWN, &con->flags),
865-
DLM_SHUTDOWN_WAIT_TIMEOUT);
866-
if (ret == 0) {
867-
log_print("Connection %p shutdown timed out, will force close",
868-
con);
869-
goto force_close;
870-
}
871-
}
872-
873-
return;
874-
875-
force_close:
876-
clear_bit(CF_SHUTDOWN, &con->flags);
877-
close_connection(con, false, true, true);
878-
}
879-
880-
static void dlm_tcp_shutdown(struct connection *con)
881-
{
882-
if (con->othercon)
883-
shutdown_connection(con->othercon);
884-
shutdown_connection(con);
885-
}
886-
887824
static int con_realloc_receive_buf(struct connection *con, int newlen)
888825
{
889826
unsigned char *newbuf;
@@ -975,19 +912,8 @@ static int receive_from_sock(struct connection *con)
975912
log_print("connection %p got EOF from %d",
976913
con, con->nodeid);
977914

978-
if (dlm_proto_ops->eof_condition &&
979-
dlm_proto_ops->eof_condition(con)) {
980-
set_bit(CF_EOF, &con->flags);
981-
mutex_unlock(&con->sock_mutex);
982-
} else {
983-
mutex_unlock(&con->sock_mutex);
984-
close_connection(con, false, true, false);
985-
986-
/* handling for tcp shutdown */
987-
clear_bit(CF_SHUTDOWN, &con->flags);
988-
wake_up(&con->shutdown_wait);
989-
}
990-
915+
mutex_unlock(&con->sock_mutex);
916+
close_connection(con, false, true, false);
991917
/* signal to breaking receive worker */
992918
ret = -1;
993919
} else {
@@ -1261,7 +1187,6 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
12611187
kref_get(&e->ref);
12621188
*ppc = page_address(e->page);
12631189
e->end += len;
1264-
atomic_inc(&con->writequeue_cnt);
12651190
if (cb)
12661191
cb(data);
12671192

@@ -1467,20 +1392,6 @@ static void send_to_sock(struct connection *con)
14671392
}
14681393
spin_unlock(&con->writequeue_lock);
14691394

1470-
/* close if we got EOF */
1471-
if (test_and_clear_bit(CF_EOF, &con->flags)) {
1472-
mutex_unlock(&con->sock_mutex);
1473-
close_connection(con, false, false, true);
1474-
1475-
/* handling for tcp shutdown */
1476-
clear_bit(CF_SHUTDOWN, &con->flags);
1477-
wake_up(&con->shutdown_wait);
1478-
} else {
1479-
mutex_unlock(&con->sock_mutex);
1480-
}
1481-
1482-
return;
1483-
14841395
out:
14851396
mutex_unlock(&con->sock_mutex);
14861397
return;
@@ -1680,16 +1591,8 @@ static int work_start(void)
16801591
return 0;
16811592
}
16821593

1683-
static void shutdown_conn(struct connection *con)
1684-
{
1685-
if (dlm_proto_ops->shutdown_action)
1686-
dlm_proto_ops->shutdown_action(con);
1687-
}
1688-
16891594
void dlm_lowcomms_shutdown(void)
16901595
{
1691-
int idx;
1692-
16931596
restore_callbacks(listen_con.sock);
16941597

16951598
if (recv_workqueue)
@@ -1698,9 +1601,25 @@ void dlm_lowcomms_shutdown(void)
16981601
flush_workqueue(send_workqueue);
16991602

17001603
dlm_close_sock(&listen_con.sock);
1604+
}
1605+
1606+
void dlm_lowcomms_shutdown_node(int nodeid, bool force)
1607+
{
1608+
struct connection *con;
1609+
int idx;
17011610

17021611
idx = srcu_read_lock(&connections_srcu);
1703-
foreach_conn(shutdown_conn);
1612+
con = nodeid2con(nodeid, 0);
1613+
if (WARN_ON_ONCE(!con)) {
1614+
srcu_read_unlock(&connections_srcu, idx);
1615+
return;
1616+
}
1617+
1618+
WARN_ON_ONCE(!force && !list_empty(&con->writequeue));
1619+
clean_one_writequeue(con);
1620+
if (con->othercon)
1621+
clean_one_writequeue(con->othercon);
1622+
close_connection(con, true, true, true);
17041623
srcu_read_unlock(&connections_srcu, idx);
17051624
}
17061625

@@ -1912,8 +1831,6 @@ static const struct dlm_proto_ops dlm_tcp_ops = {
19121831
.listen_validate = dlm_tcp_listen_validate,
19131832
.listen_sockopts = dlm_tcp_listen_sockopts,
19141833
.listen_bind = dlm_tcp_listen_bind,
1915-
.shutdown_action = dlm_tcp_shutdown,
1916-
.eof_condition = tcp_eof_condition,
19171834
};
19181835

19191836
static int dlm_sctp_bind(struct socket *sock)

fs/dlm/lowcomms.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ bool dlm_lowcomms_is_running(void);
3434

3535
int dlm_lowcomms_start(void);
3636
void dlm_lowcomms_shutdown(void);
37+
void dlm_lowcomms_shutdown_node(int nodeid, bool force);
3738
void dlm_lowcomms_stop(void);
3839
void dlm_lowcomms_init(void);
3940
void dlm_lowcomms_exit(void);

fs/dlm/midcomms.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,18 +1426,22 @@ static void midcomms_shutdown(struct midcomms_node *node)
14261426
pr_debug("active shutdown timed out for node %d with state %s\n",
14271427
node->nodeid, dlm_state_str(node->state));
14281428
midcomms_node_reset(node);
1429+
dlm_lowcomms_shutdown_node(node->nodeid, true);
14291430
return;
14301431
}
14311432

14321433
pr_debug("active shutdown done for node %d with state %s\n",
14331434
node->nodeid, dlm_state_str(node->state));
1435+
dlm_lowcomms_shutdown_node(node->nodeid, false);
14341436
}
14351437

14361438
void dlm_midcomms_shutdown(void)
14371439
{
14381440
struct midcomms_node *node;
14391441
int i, idx;
14401442

1443+
dlm_lowcomms_shutdown();
1444+
14411445
mutex_lock(&close_lock);
14421446
idx = srcu_read_lock(&nodes_srcu);
14431447
for (i = 0; i < CONN_HASH_SIZE; i++) {
@@ -1455,8 +1459,6 @@ void dlm_midcomms_shutdown(void)
14551459
}
14561460
srcu_read_unlock(&nodes_srcu, idx);
14571461
mutex_unlock(&close_lock);
1458-
1459-
dlm_lowcomms_shutdown();
14601462
}
14611463

14621464
int dlm_midcomms_close(int nodeid)

0 commit comments

Comments
 (0)