Skip to content

Commit 489d8e5

Browse files
Alexander Aringteigland
authored andcommitted
fs: dlm: add reliable connection if reconnect
This patch introduce to make a tcp lowcomms connection reliable even if reconnects occurs. This is done by an application layer re-transmission handling and sequence numbers in dlm protocols. There are three new dlm commands: DLM_OPTS: This will encapsulate an existing dlm message (and rcom message if they don't have an own application side re-transmission handling). As optional handling additional tlv's (type length fields) can be appended. This can be for example a sequence number field. However because in DLM_OPTS the lockspace field is unused and a sequence number is a mandatory field it isn't made as a tlv and we put the sequence number inside the lockspace id. The possibility to add optional options are still there for future purposes. DLM_ACK: Just a dlm header to acknowledge the receive of a DLM_OPTS message to it's sender. DLM_FIN: This provides a 4 way handshake for connection termination inclusive support for half-closed connections. It's provided on application layer because SCTP doesn't support half-closed sockets, the shutdown() call can interrupted by e.g. TCP resets itself and a hard logic to implement it because the othercon paradigm in lowcomms. The 4-way termination handshake also solve problems to synchronize peer EOF arrival and that the cluster manager removes the peer in the node membership handling of DLM. In some cases messages can be still transmitted in this time and we need to wait for the node membership event. To provide a reliable connection the node will retransmit all unacknowledges message to it's peer on reconnect. The receiver will then filtering out the next received message and drop all messages which are duplicates. As RCOM_STATUS and RCOM_NAMES messages are the first messages which are exchanged and they have they own re-transmission handling, there exists logic that these messages must be first. If these messages arrives we store the dlm version field. This handling is on DLM 3.1 and after this patch 3.2 the same. A backwards compatibility handling has been added which seems to work on tests without tcpkill, however it's not recommended to use DLM 3.1 and 3.2 at the same time, because DLM 3.2 tries to fix long term bugs in the DLM protocol. Signed-off-by: Alexander Aring <[email protected]> Signed-off-by: David Teigland <[email protected]>
1 parent 8e2e408 commit 489d8e5

File tree

7 files changed

+1292
-51
lines changed

7 files changed

+1292
-51
lines changed

fs/dlm/dlm_internal.h

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,26 +371,33 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
371371
/* dlm_header is first element of all structs sent between nodes */
372372

373373
#define DLM_HEADER_MAJOR 0x00030000
374-
#define DLM_HEADER_MINOR 0x00000001
374+
#define DLM_HEADER_MINOR 0x00000002
375+
376+
#define DLM_VERSION_3_1 0x00030001
377+
#define DLM_VERSION_3_2 0x00030002
375378

376379
#define DLM_HEADER_SLOTS 0x00000001
377380

378381
#define DLM_MSG 1
379382
#define DLM_RCOM 2
383+
#define DLM_OPTS 3
384+
#define DLM_ACK 4
385+
#define DLM_FIN 5
380386

381387
struct dlm_header {
382388
uint32_t h_version;
383389
union {
384390
/* for DLM_MSG and DLM_RCOM */
385391
uint32_t h_lockspace;
392+
/* for DLM_ACK and DLM_OPTS */
393+
uint32_t h_seq;
386394
} u;
387395
uint32_t h_nodeid; /* nodeid of sender */
388396
uint16_t h_length;
389397
uint8_t h_cmd; /* DLM_MSG, DLM_RCOM */
390398
uint8_t h_pad;
391399
};
392400

393-
394401
#define DLM_MSG_REQUEST 1
395402
#define DLM_MSG_CONVERT 2
396403
#define DLM_MSG_UNLOCK 3
@@ -458,10 +465,29 @@ struct dlm_rcom {
458465
char rc_buf[];
459466
};
460467

468+
struct dlm_opt_header {
469+
uint16_t t_type;
470+
uint16_t t_length;
471+
uint32_t o_pad;
472+
/* need to be 8 byte aligned */
473+
char t_value[];
474+
};
475+
476+
/* encapsulation header */
477+
struct dlm_opts {
478+
struct dlm_header o_header;
479+
uint8_t o_nextcmd;
480+
uint8_t o_pad;
481+
uint16_t o_optlen;
482+
uint32_t o_pad2;
483+
char o_opts[];
484+
};
485+
461486
union dlm_packet {
462487
struct dlm_header header; /* common to other two */
463488
struct dlm_message message;
464489
struct dlm_rcom rcom;
490+
struct dlm_opts opts;
465491
};
466492

467493
#define DLM_RSF_NEED_SLOTS 0x00000001

fs/dlm/lockspace.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,12 @@ static int new_lockspace(const char *name, const char *cluster,
567567
mutex_init(&ls->ls_requestqueue_mutex);
568568
mutex_init(&ls->ls_clear_proc_locks);
569569

570-
ls->ls_recover_buf = kmalloc(LOWCOMMS_MAX_TX_BUFFER_LEN, GFP_NOFS);
570+
/* Due backwards compatibility with 3.1 we need to use maximum
571+
* possible dlm message size to be sure the message will fit and
572+
* not having out of bounds issues. However on sending side 3.2
573+
* might send less.
574+
*/
575+
ls->ls_recover_buf = kmalloc(DEFAULT_BUFFER_SIZE, GFP_NOFS);
571576
if (!ls->ls_recover_buf)
572577
goto out_lkbidr;
573578

fs/dlm/lowcomms.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1762,8 +1762,10 @@ static void process_send_sockets(struct work_struct *work)
17621762

17631763
clear_bit(CF_WRITE_PENDING, &con->flags);
17641764

1765-
if (test_and_clear_bit(CF_RECONNECT, &con->flags))
1765+
if (test_and_clear_bit(CF_RECONNECT, &con->flags)) {
17661766
close_connection(con, false, false, true);
1767+
dlm_midcomms_unack_msg_resend(con->nodeid);
1768+
}
17671769

17681770
if (con->sock == NULL) { /* not mutex protected so check it inside too */
17691771
if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))

fs/dlm/lowcomms.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
#ifndef __LOWCOMMS_DOT_H__
1313
#define __LOWCOMMS_DOT_H__
1414

15-
#define LOWCOMMS_MAX_TX_BUFFER_LEN 4096
15+
#include "dlm_internal.h"
16+
17+
#define DLM_MIDCOMMS_OPT_LEN sizeof(struct dlm_opts)
18+
#define LOWCOMMS_MAX_TX_BUFFER_LEN (DEFAULT_BUFFER_SIZE - \
19+
DLM_MIDCOMMS_OPT_LEN)
20+
1621
#define CONN_HASH_SIZE 32
1722

1823
/* This is deliberately very simple because most clusters have simple

0 commit comments

Comments
 (0)