Skip to content

Commit b2dc84b

Browse files
murphyjacob4madolson
authored andcommitted
Fix crash that occurs sometimes when aborting a slot migration while child snapshot is active (#2721)
The race condition causes the client to be used and subsequently double freed by the slot migration read pipe handler. The order of events is: 1. We kill the slot migration child process during CANCELSLOTMIGRATIONS 1. We then free the associated client to the target node 1. Although we kill the child process, it is not guaranteed that the pipe will be empty from child to parent 1. If the pipe is not empty, we later will read that out in the slotMigrationPipeReadHandler 1. In the pipe read handler, we attempt to write to the connection. If writing to the connection fails, we will attempt to free the client 1. However, the client was already freed, so this a double free Notably, the slot migration being aborted doesn't need to be triggered by `CANCELSLOTMIGRATIONS`, it can be any failure. To solve this, we simply: 1. Set the slot migration pipe connection to NULL whenever it is unlinked 2. Bail out early in slot migration pipe read handler if the connection is NULL I also consolidate the killSlotMigrationChild call to one code path, which is executed on client unlink. Before, there were two code paths that would do this twice (once on slot migration job finish, and once on client unlink). Sending the signal twice is fine, but inefficient. Also, add a test to cancel during the slot migration snapshot to make sure this case is covered (we only caught it during the module test). --------- Signed-off-by: Jacob Murphy <[email protected]> (cherry picked from commit 28e5dcc) Signed-off-by: cherukum-amazon <[email protected]>
1 parent e761433 commit b2dc84b

File tree

4 files changed

+38
-22
lines changed

4 files changed

+38
-22
lines changed

src/cluster_migrateslots.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2309,9 +2309,6 @@ void finishSlotMigrationJob(slotMigrationJob *job,
23092309
/* If we finish the export, we should not remain paused */
23102310
job->mf_end = 0;
23112311
slotExportTryUnpause();
2312-
/* Fast fail the child process, which will be cleaned up fully in
2313-
* checkChildrenDone. */
2314-
if (job->state == SLOT_EXPORT_SNAPSHOTTING) killSlotMigrationChild();
23152312
}
23162313

23172314
/* Imports that are not successful on primaries need to be cleaned up (if

src/networking.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,8 +1883,7 @@ void unlinkClient(client *c) {
18831883
* snapshot child may take some time to die, during which the migration will continue past
18841884
* the snapshot state. */
18851885
if (c->repl_data && server.rdb_pipe_conns &&
1886-
((c->flag.replica && c->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_END) ||
1887-
(c->slot_migration_job && !isImportSlotMigrationJob(c->slot_migration_job)))) {
1886+
((c->flag.replica && c->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_END))) {
18881887
int i;
18891888
int still_alive = 0;
18901889
for (i = 0; i < server.rdb_pipe_numconns; i++) {
@@ -1895,14 +1894,18 @@ void unlinkClient(client *c) {
18951894
if (server.rdb_pipe_conns[i]) still_alive++;
18961895
}
18971896
if (still_alive == 0) {
1898-
if (c->slot_migration_job && !isImportSlotMigrationJob(c->slot_migration_job)) {
1899-
serverLog(LL_NOTICE, "Slot migration snapshot, migration target dropped, killing fork child.");
1900-
} else {
1901-
serverLog(LL_NOTICE, "Diskless rdb transfer, last replica dropped, killing fork child.");
1902-
}
1897+
serverLog(LL_NOTICE, "Diskless rdb transfer, last replica dropped, killing fork child.");
19031898
killRDBChild();
19041899
}
19051900
}
1901+
/* Check if this is the slot migration client we are writing to in a
1902+
* child process*/
1903+
if (c->slot_migration_job && !isImportSlotMigrationJob(c->slot_migration_job) &&
1904+
server.slot_migration_pipe_conn == c->conn) {
1905+
server.slot_migration_pipe_conn = NULL;
1906+
serverLog(LL_NOTICE, "Slot migration target dropped, killing fork child.");
1907+
killSlotMigrationChild();
1908+
}
19061909
/* Only use shutdown when the fork is active and we are the parent. */
19071910
if (server.child_type && !c->flag.repl_rdb_channel) {
19081911
connShutdown(c->conn);

src/replication.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1849,15 +1849,17 @@ void slotMigrationPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *c
18491849
UNUSED(eventLoop);
18501850
if (!server.slot_migration_pipe_buff) server.slot_migration_pipe_buff = zmalloc(PROTO_IOBUF_LEN);
18511851

1852+
/* No work to do if our connection has been closed. */
1853+
if (!server.slot_migration_pipe_conn) return;
1854+
18521855
while (1) {
18531856
server.slot_migration_pipe_bufflen = read(fd, server.slot_migration_pipe_buff, PROTO_IOBUF_LEN);
18541857
if (server.slot_migration_pipe_bufflen < 0) {
18551858
if (errno == EAGAIN || errno == EWOULDBLOCK) return;
18561859
serverLog(LL_WARNING, "Slot migration, read error sending snapshot to target: %s", strerror(errno));
18571860
client *target = connGetPrivateData(server.slot_migration_pipe_conn);
1858-
freeClient(target);
1861+
freeClient(target); /* Free client will kill the slot migration child */
18591862
server.slot_migration_pipe_conn = NULL;
1860-
killSlotMigrationChild();
18611863
return;
18621864
}
18631865

@@ -1879,7 +1881,7 @@ void slotMigrationPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *c
18791881
if (connGetState(conn) != CONN_STATE_CONNECTED) {
18801882
serverLog(LL_WARNING, "Slot migration transfer, write error sending DB to target: %s",
18811883
connGetLastError(conn));
1882-
freeClient(target);
1884+
freeClient(target); /* Free client will kill the slot migration child */
18831885
server.slot_migration_pipe_conn = NULL;
18841886
return;
18851887
}

tests/unit/cluster/cluster-migrateslots.tcl

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,15 @@ start_cluster 3 3 {tags {logreqres:skip external:skip cluster} overrides {cluste
386386
set_debug_prevent_pause 0
387387
}
388388

389+
set 0_slot_tag "{06S}"
390+
set 1_slot_tag "{Qi}"
391+
set 5462_slot_tag "{450}"
392+
set 16379_slot_tag "{YY}"
393+
set 16380_slot_tag "{wu}"
394+
set 16381_slot_tag "{0TG}"
395+
set 16382_slot_tag "{4oi}"
396+
set 16383_slot_tag "{6ZJ}"
397+
389398
test "Test CLUSTER CANCELSLOTMIGRATIONS" {
390399
set_debug_prevent_pause 1
391400
assert_match "OK" [R 2 CLUSTER MIGRATESLOTS SLOTSRANGE 16382 16382 NODE $node0_id]
@@ -407,19 +416,24 @@ start_cluster 3 3 {tags {logreqres:skip external:skip cluster} overrides {cluste
407416
wait_for_migration_field 0 $jobname1 state failed
408417
wait_for_migration_field 0 $jobname2 state failed
409418

419+
# Do it again, but during snapshotting
420+
# 50 keys * 100ms/key = 5 sec snapshot time
421+
R 2 CONFIG SET rdb-key-save-delay 100000
422+
populate 50 "$16383_slot_tag:1:" 1000 -2
423+
assert_match "OK" [R 2 CLUSTER MIGRATESLOTS SLOTSRANGE 16383 16383 NODE $node0_id]
424+
set jobname [get_job_name 2 16383]
425+
wait_for_migration_field 2 $jobname state snapshotting
426+
assert_match "OK" [R 2 CLUSTER CANCELSLOTMIGRATIONS]
427+
R 2 CONFIG SET rdb-key-save-delay 0
428+
429+
# Jobs are no longer up, migration logs say cancelled
430+
assert {[dict get [get_migration_by_name 2 $jobname] state] eq "cancelled"}
431+
wait_for_migration_field 0 $jobname state failed
432+
410433
# Cleanup
411434
set_debug_prevent_pause 0
412435
}
413436

414-
set 0_slot_tag "{06S}"
415-
set 1_slot_tag "{Qi}"
416-
set 5462_slot_tag "{450}"
417-
set 16379_slot_tag "{YY}"
418-
set 16380_slot_tag "{wu}"
419-
set 16381_slot_tag "{0TG}"
420-
set 16382_slot_tag "{4oi}"
421-
set 16383_slot_tag "{6ZJ}"
422-
423437
test "Slot migration won't migrate the functions" {
424438
assert_does_not_resync {
425439
# R 2 load a function then trigger a slot migration to R 0

0 commit comments

Comments
 (0)