Skip to content

Commit 7f1a005

Browse files
authored
read_snapshot_obj: Pass user_ctx by reference to fix race condition (eBay#836)
There is a race condition where read_snapshot_obj may access a freed user_ctx pointer due to concurrent free_user_snp_ctx calls. This occurs because user_ctx was stored in the snapshot_obj struct as an intermediate copy, making it vulnerable to being freed while still in use. Fix by passing user_ctx as a reference parameter, allowing the listener to modify the caller's context directly without intermediate storage. Signed-off-by: Jilong Kou <jkou@ebay.com>
1 parent 377d847 commit 7f1a005

File tree

3 files changed

+9
-8
lines changed

3 files changed

+9
-8
lines changed

conanfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
class HomestoreConan(ConanFile):
1111
name = "homestore"
12-
version = "7.0.4"
12+
version = "7.0.5"
1313

1414
homepage = "https://github.com/eBay/Homestore"
1515
description = "HomeStore Storage Engine"

src/include/homestore/replication/repl_dev.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,13 @@ class snapshot_context {
9999
};
100100

101101
struct snapshot_obj {
102-
void* user_ctx{nullptr};
102+
void *&user_ctx;
103103
uint64_t offset{0};
104104
sisl::io_blob_safe blob;
105105
bool is_first_obj{false};
106106
bool is_last_obj{false};
107+
108+
snapshot_obj(void *&ctx) : user_ctx(ctx) {}
107109
};
108110

109111
// HomeStore has some meta information to be transmitted during the baseline resync,
@@ -402,7 +404,7 @@ class ReplDevListener {
402404
/// times on the leader till all the data is transferred to the follower. is_last_obj in
403405
/// snapshot_obj will be true once all the data has been trasnferred. After this the raft on
404406
/// the follower side can do the incremental resync.
405-
virtual int read_snapshot_obj(shared< snapshot_context > context, shared< snapshot_obj > snp_obj) = 0;
407+
virtual int read_snapshot_obj(shared<snapshot_context> context, shared<snapshot_obj> snp_obj) = 0;
406408

407409
/// @brief Called on the follower when the leader sends the data during the baseline resyc.
408410
/// is_last_obj in in snapshot_obj will be true once all the data has been transfered.

src/lib/replication/repl_dev/raft_state_machine.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -365,14 +365,12 @@ int RaftStateMachine::read_logical_snp_obj(nuraft::snapshot& s, void*& user_ctx,
365365
return 0;
366366
}
367367
auto snp_ctx = std::make_shared< nuraft_snapshot_context >(s);
368-
auto snp_data = std::make_shared< snapshot_obj >();
369-
snp_data->user_ctx = user_ctx;
368+
auto snp_data = std::make_shared< snapshot_obj >(user_ctx);
370369
snp_data->offset = obj_id;
371370
snp_data->is_last_obj = is_last_obj;
372371

373-
// Listener will read the snapshot data and we pass through the same.
372+
// Listener will read the snapshot data and modify user_ctx through the reference
374373
int ret = m_rd.m_listener->read_snapshot_obj(snp_ctx, snp_data);
375-
user_ctx = snp_data->user_ctx; // Have to pass the user_ctx to NuRaft even if ret<0 to get it freed later
376374
if (ret < 0) return ret;
377375

378376
is_last_obj = snp_data->is_last_obj;
@@ -395,7 +393,8 @@ void RaftStateMachine::save_logical_snp_obj(nuraft::snapshot& s, ulong& obj_id,
395393
return;
396394
}
397395
auto snp_ctx = std::make_shared< nuraft_snapshot_context >(s);
398-
auto snp_data = std::make_shared< snapshot_obj >();
396+
void* dummy_ctx = nullptr;
397+
auto snp_data = std::make_shared< snapshot_obj >(dummy_ctx);
399398
snp_data->offset = obj_id;
400399
snp_data->is_first_obj = is_first_obj;
401400
snp_data->is_last_obj = is_last_obj;

0 commit comments

Comments
 (0)