Skip to content

Commit d6bfb3d

Browse files
yuwmaoyuwmao
authored andcommitted
Add rollback http API for replace_member
1 parent 60152e0 commit d6bfb3d

File tree

11 files changed

+533
-45
lines changed

11 files changed

+533
-45
lines changed

conanfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
class HomeObjectConan(ConanFile):
1212
name = "homeobject"
13-
version = "3.0.14"
13+
version = "3.0.15"
1414

1515
homepage = "https://github.com/eBay/HomeObject"
1616
description = "Blob Store built on HomeStore"

src/include/homeobject/pg_manager.hpp

Lines changed: 87 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ namespace homeobject {
1313
ENUM(PGError, uint16_t, UNKNOWN = 1, INVALID_ARG, TIMEOUT, UNKNOWN_PG, NOT_LEADER, UNKNOWN_PEER, UNSUPPORTED_OP,
1414
CRC_MISMATCH, NO_SPACE_LEFT, DRIVE_WRITE_ERROR, RETRY_REQUEST, SHUTTING_DOWN, ROLL_BACK, CANCELLED,
1515
QUORUM_NOT_MET);
16-
ENUM(PGReplaceMemberTaskStatus, uint16_t, COMPLETED = 0, IN_PROGRESS, NOT_LEADER, TASK_ID_MISMATCH, TASK_NOT_FOUND, UNKNOWN);
16+
ENUM(PGReplaceMemberTaskStatus, uint16_t, COMPLETED = 0, IN_PROGRESS, NOT_LEADER, TASK_ID_MISMATCH, TASK_NOT_FOUND,
17+
UNKNOWN);
1718
// https://github.corp.ebay.com/SDS/nuobject_proto/blob/main/src/proto/pg.proto#L52
1819
ENUM(PGStateMask, uint32_t, HEALTHY = 0, DISK_DOWN = 0x1, SCRUBBING = 0x2, BASELINE_RESYNC = 0x4, INCONSISTENT = 0x8,
1920
REPAIR = 0x10, GC_IN_PROGRESS = 0x20, RESYNCING = 0x40);
@@ -40,6 +41,12 @@ struct PGMember {
4041

4142
using MemberSet = std::set< PGMember >;
4243

44+
struct replace_member_task {
45+
std::string task_id; // Unique task id for this replace member operation
46+
uuid_t replica_out; // The replica which is going to be replaced
47+
uuid_t replica_in; // The replica which is going to be added in place of replica_out
48+
};
49+
4350
struct PGInfo {
4451
explicit PGInfo(pg_id_t _id) : id(_id) {}
4552
pg_id_t id;
@@ -55,14 +62,10 @@ struct PGInfo {
5562

5663
// check if the PGInfo has same id, size and members with the rhs PGInfo.
5764
bool is_equivalent_to(PGInfo const& rhs) const {
58-
if (id != rhs.id || size != rhs.size || members.size() != rhs.members.size()) {
59-
return false;
60-
}
65+
if (id != rhs.id || size != rhs.size || members.size() != rhs.members.size()) { return false; }
6166
for (auto const& m : members) {
6267
auto it = rhs.members.find(m);
63-
if (it == rhs.members.end() || it->priority != m.priority) {
64-
return false;
65-
}
68+
if (it == rhs.members.end() || it->priority != m.priority) { return false; }
6669
}
6770
return true;
6871
}
@@ -72,13 +75,13 @@ struct PGInfo {
7275
uint32_t i = 0ul;
7376
for (auto const& m : members) {
7477
if (i++ > 0) { members_str += ", "; }
75-
members_str += fmt::format("member-{}: id={}, name={}, priority={}",
76-
i, boost::uuids::to_string(m.id), m.name, m.priority);
78+
members_str += fmt::format("member-{}: id={}, name={}, priority={}", i, boost::uuids::to_string(m.id),
79+
m.name, m.priority);
7780
}
7881
return fmt::format("PGInfo: id={}, replica_set_uuid={}, size={}, chunk_size={}, "
7982
"expected_member_num={}, members={}",
80-
id, boost::uuids::to_string(replica_set_uuid), size, chunk_size,
81-
expected_member_num, members_str);
83+
id, boost::uuids::to_string(replica_set_uuid), size, chunk_size, expected_member_num,
84+
members_str);
8285
}
8386
};
8487

@@ -91,25 +94,19 @@ struct peer_info {
9194
};
9295

9396
struct pg_state {
94-
std::atomic<uint64_t> state{0};
97+
std::atomic< uint64_t > state{0};
9598

9699
explicit pg_state(uint64_t s) : state{s} {}
97100

98-
void set_state(PGStateMask mask) {
99-
state.fetch_or(static_cast<uint64_t>(mask), std::memory_order_relaxed);
100-
}
101+
void set_state(PGStateMask mask) { state.fetch_or(static_cast< uint64_t >(mask), std::memory_order_relaxed); }
101102

102-
void clear_state(PGStateMask mask) {
103-
state.fetch_and(~static_cast<uint64_t>(mask), std::memory_order_relaxed);
104-
}
103+
void clear_state(PGStateMask mask) { state.fetch_and(~static_cast< uint64_t >(mask), std::memory_order_relaxed); }
105104

106105
bool is_state_set(PGStateMask mask) const {
107-
return (state.load(std::memory_order_relaxed) & static_cast<uint64_t>(mask)) != 0;
106+
return (state.load(std::memory_order_relaxed) & static_cast< uint64_t >(mask)) != 0;
108107
}
109108

110-
uint64_t get() const {
111-
return state.load(std::memory_order_relaxed);
112-
}
109+
uint64_t get() const { return state.load(std::memory_order_relaxed); }
113110
};
114111

115112
struct PGStats {
@@ -171,12 +168,13 @@ struct PGReplaceMemberStatus {
171168
class PGManager : public Manager< PGError > {
172169
public:
173170
virtual NullAsyncResult create_pg(PGInfo&& pg_info, trace_id_t tid = 0) = 0;
174-
virtual NullAsyncResult replace_member(pg_id_t id, std::string& task_id, peer_id_t const& old_member, PGMember const& new_member,
175-
u_int32_t commit_quorum = 0, trace_id_t tid = 0) = 0;
176-
virtual PGReplaceMemberStatus get_replace_member_status(pg_id_t id, std::string& task_id, const PGMember& old_member,
177-
const PGMember& new_member,
178-
const std::vector< PGMember >& others,
179-
uint64_t trace_id = 0) const = 0;
171+
virtual NullAsyncResult replace_member(pg_id_t id, std::string& task_id, peer_id_t const& old_member,
172+
PGMember const& new_member, u_int32_t commit_quorum = 0,
173+
trace_id_t tid = 0) = 0;
174+
virtual PGReplaceMemberStatus get_replace_member_status(pg_id_t id, std::string& task_id,
175+
const PGMember& old_member, const PGMember& new_member,
176+
const std::vector< PGMember >& others,
177+
uint64_t trace_id = 0) const = 0;
180178

181179
/**
182180
* Retrieves the statistics for a specific PG (Placement Group) identified by its ID.
@@ -201,6 +199,67 @@ class PGManager : public Manager< PGError > {
201199
* @param pg_id The ID of the PG.
202200
*/
203201
virtual void destroy_pg(pg_id_t pg_id) = 0;
202+
203+
/**
204+
* @brief Single member exits a PG (Placement Group) identified by its ID.
205+
* @param pg_id The ID of the PG.
206+
* @param trace_id The trace identifier for logging and tracking purposes.
207+
*/
208+
virtual NullResult exit_pg(pg_id_t pg_id, uint64_t trace_id) = 0;
209+
210+
/**
211+
* @brief Toggle the learner flag for a specified member.
212+
*
213+
* This function changes the state of the learner flag for a given member in the PG.
214+
* It is typically used to revert the learner flag back to false when rolling back pgmove.
215+
*
216+
* @param pg_id The ID of the PG where the member resides.
217+
* @param member_id The ID of the member whose learner flag is to be toggled.
218+
* @param is_learner The new state of the learner flag (true to set as learner, false to unset).
219+
* @param commit_quorum The quorum required for committing the change.
220+
* @param trace_id The trace ID for tracking the operation.
221+
* @return NullAsyncResult indicating the result of the operation.
222+
*/
223+
virtual NullAsyncResult flip_learner_flag(pg_id_t pg_id, peer_id_t const& member_id, bool is_learner,
224+
uint32_t commit_quorum, trace_id_t trace_id) = 0;
225+
226+
/**
227+
* @brief Remove a member from the PG.
228+
*
229+
* This function removes a specified member from the PG, typically used to rollback the pgmove operation.
230+
*
231+
* @param pg_id The ID of the PG from which the member is to be removed.
232+
* @param member_id The ID of the member to be removed.
233+
* @param commit_quorum The quorum required for committing the removal.
234+
* @param trace_id The trace ID for tracking the operation.
235+
* @return NullAsyncResult indicating the result of the operation.
236+
*/
237+
virtual NullAsyncResult remove_member(pg_id_t pg_id, peer_id_t const& member_id, uint32_t commit_quorum,
238+
trace_id_t trace_id) = 0;
239+
240+
/**
241+
* @brief Clean up the replace member task in the PG.
242+
*
243+
* This function cleans up the replace member task, typically used to rollback the pgmove operation.
244+
*
245+
* @param pg_id The ID of the PG where the task is to be cleaned.
246+
* @param task_id The ID of the task to be cleaned.
247+
* @param commit_quorum The quorum required for committing the task cleanup.
248+
* @param trace_id The trace ID for tracking the operation.
249+
* @return NullAsyncResult indicating the result of the operation.
250+
*/
251+
virtual NullAsyncResult clean_replace_member_task(pg_id_t pg_id, std::string& task_id, uint32_t commit_quorum,
252+
trace_id_t trace_id) = 0;
253+
254+
/**
255+
* @brief List all replace member tasks happening on this homeobject instance.
256+
*
257+
* This function retrieves a list of all ongoing tasks on this homeobject instance.
258+
*
259+
* @param trace_id The trace ID for tracking the operation.
260+
* @return Result containing a vector of replace member tasks.
261+
*/
262+
virtual Result< std::vector< replace_member_task > > list_all_replace_member_tasks(trace_id_t trace_id) = 0;
204263
};
205264

206265
} // namespace homeobject

src/lib/homeobject_impl.hpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,20 @@ class HomeObjectImpl : public HomeObject,
113113

114114
virtual void _destroy_pg(pg_id_t pg_id) = 0;
115115

116+
virtual PGManager::NullResult _exit_pg(pg_id_t pg_id, trace_id_t trace_id) = 0;
117+
118+
virtual PGManager::NullAsyncResult _flip_learner_flag(pg_id_t pg_id, peer_id_t const& member_id, bool is_learner,
119+
uint32_t commit_quorum, trace_id_t trace_id) = 0;
120+
121+
virtual PGManager::NullAsyncResult _remove_member(pg_id_t pg_id, peer_id_t const& member_id,
122+
uint32_t commit_quorum, trace_id_t trace_id) = 0;
123+
124+
virtual PGManager::NullAsyncResult _clean_replace_member_task(pg_id_t pg_id, std::string& task_id,
125+
uint32_t commit_quorum, trace_id_t trace_id) = 0;
126+
127+
virtual PGManager::Result< std::vector< replace_member_task > >
128+
_list_all_replace_member_tasks(trace_id_t trace_id) = 0;
129+
116130
protected:
117131
std::mutex _repl_lock;
118132
peer_id_t _our_id;
@@ -164,6 +178,15 @@ class HomeObjectImpl : public HomeObject,
164178
bool get_stats(pg_id_t id, PGStats& stats) const final;
165179
void get_pg_ids(std::vector< pg_id_t >& pg_ids) const final;
166180
void destroy_pg(pg_id_t pg_id) final;
181+
PGManager::NullResult exit_pg(pg_id_t pg_id, trace_id_t trace_id) final;
182+
PGManager::NullAsyncResult flip_learner_flag(pg_id_t pg_id, peer_id_t const& member_id, bool is_learner,
183+
uint32_t commit_quorum, trace_id_t trace_id) final;
184+
PGManager::NullAsyncResult remove_member(pg_id_t pg_id, peer_id_t const& member_id, uint32_t commit_quorum,
185+
trace_id_t trace_id) final;
186+
PGManager::NullAsyncResult clean_replace_member_task(pg_id_t pg_id, std::string& task_id, uint32_t commit_quorum,
187+
trace_id_t trace_id) final;
188+
PGManager::Result< std::vector< replace_member_task > >
189+
list_all_replace_member_tasks(trace_id_t trace_id) final;
167190

168191
/// ShardManager
169192
ShardManager::AsyncResult< ShardInfo > get_shard(shard_id_t id, trace_id_t tid) const final;

src/lib/homestore_backend/hs_homeobject.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,16 @@ class HSHomeObject : public HomeObjectImpl {
7676

7777
HomeObjectStats _get_stats() const override;
7878
void _destroy_pg(pg_id_t pg_id) override;
79+
PGManager::NullResult _exit_pg(pg_id_t pg_id, trace_id_t trace_id) override;
80+
81+
PGManager::NullAsyncResult _flip_learner_flag(pg_id_t pg_id, peer_id_t const& member_id, bool is_learner,
82+
uint32_t commit_quorum, trace_id_t trace_id) override;
83+
PGManager::NullAsyncResult _remove_member(pg_id_t pg_id, peer_id_t const& member_id, uint32_t commit_quorum,
84+
trace_id_t trace_id) override;
85+
PGManager::NullAsyncResult _clean_replace_member_task(pg_id_t pg_id, std::string& task_id, uint32_t commit_quorum,
86+
trace_id_t trace_id) override;
87+
PGManager::Result< std::vector< replace_member_task > >
88+
_list_all_replace_member_tasks(trace_id_t trace_id) override;
7989

8090
// Mapping from index table uuid to pg id.
8191
std::shared_mutex index_lock_;
@@ -367,6 +377,11 @@ class HSHomeObject : public HomeObjectImpl {
367377
* Returns all shards
368378
*/
369379
std::vector< Shard > get_chunk_shards(homestore::chunk_num_t v_chunk_id) const;
380+
381+
/**
382+
* Update membership in pg's superblock.
383+
*/
384+
void update_membership(const MemberSet& members);
370385
};
371386

372387
struct HS_Shard : public Shard {
@@ -778,6 +793,8 @@ class HSHomeObject : public HomeObjectImpl {
778793
const homestore::replica_member_info& member_out,
779794
const homestore::replica_member_info& member_in, trace_id_t tid);
780795

796+
void on_remove_member(homestore::group_id_t group_id, const peer_id_t& member, trace_id_t tid = 0);
797+
781798
/**
782799
* @brief Cleans up and recycles resources for the PG identified by the given pg_id on the current node.
783800
*

0 commit comments

Comments
 (0)