Skip to content

Commit c00ab0d

Browse files
committed
flush dc_lsn before persisting shard metablk in on_commit of sealing shard
1 parent a64afa0 commit c00ab0d

File tree

4 files changed

+52
-24
lines changed

4 files changed

+52
-24
lines changed

conanfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
class HomeObjectConan(ConanFile):
1212
name = "homeobject"
13-
version = "3.0.19"
13+
version = "3.0.20"
1414

1515
homepage = "https://github.com/eBay/HomeObject"
1616
description = "Blob Store built on HomeStore"

src/lib/homestore_backend/gc_manager.cpp

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@ SISL_LOGGING_DECL(gcmgr)
2525
GCManager::GCManager(HSHomeObject* homeobject) :
2626
m_chunk_selector{homeobject->chunk_selector()}, m_hs_home_object{homeobject} {
2727
homestore::meta_service().register_handler(
28-
GCManager::_gc_actor_meta_name,
28+
_gc_actor_meta_name,
2929
[this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
3030
on_gc_actor_meta_blk_found(std::move(buf), voidptr_cast(mblk));
3131
},
3232
nullptr, true);
3333

3434
homestore::meta_service().register_handler(
35-
GCManager::_gc_reserved_chunk_meta_name,
35+
_gc_reserved_chunk_meta_name,
3636
[this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
3737
on_reserved_chunk_meta_blk_found(std::move(buf), voidptr_cast(mblk));
3838
},
@@ -44,7 +44,7 @@ GCManager::GCManager(HSHomeObject* homeobject) :
4444
true);
4545

4646
homestore::meta_service().register_handler(
47-
GCManager::_gc_task_meta_name,
47+
_gc_task_meta_name,
4848
[this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
4949
on_gc_task_meta_blk_found(std::move(buf), voidptr_cast(mblk));
5050
},
@@ -63,8 +63,8 @@ void GCManager::on_gc_task_meta_blk_found(sisl::byte_view const& buf, void* meta
6363

6464
// here, we are under the protection of the lock of metaservice. however, we will also try to update pg and shard
6565
// metablk and then destroy the gc_task_sb, which will also try to acquire the lock of metaservice, as a result, a
66-
// dead lock will happen. so here we will handle all the gc tasks after read all the their metablks
67-
m_recovered_gc_tasks.emplace_back(GCManager::_gc_task_meta_name);
66+
// dead lock will happen. so here we will handle all the gc tasks after read all the metablks
67+
m_recovered_gc_tasks.emplace_back(_gc_task_meta_name);
6868
m_recovered_gc_tasks.back().load(buf, meta_cookie);
6969
}
7070

@@ -89,7 +89,7 @@ void GCManager::handle_all_recovered_gc_tasks() {
8989
}
9090

9191
void GCManager::on_gc_actor_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie) {
92-
m_gc_actor_sbs.emplace_back(GCManager::_gc_actor_meta_name);
92+
m_gc_actor_sbs.emplace_back(_gc_actor_meta_name);
9393
auto& gc_actor_sb = m_gc_actor_sbs.back();
9494
gc_actor_sb.load(buf, meta_cookie);
9595
auto pdev_id = gc_actor_sb->pdev_id;
@@ -100,8 +100,7 @@ void GCManager::on_gc_actor_meta_blk_found(sisl::byte_view const& buf, void* met
100100
}
101101

102102
void GCManager::on_reserved_chunk_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie) {
103-
homestore::superblk< GCManager::gc_reserved_chunk_superblk > reserved_chunk_sb(
104-
GCManager::_gc_reserved_chunk_meta_name);
103+
homestore::superblk< gc_reserved_chunk_superblk > reserved_chunk_sb(_gc_reserved_chunk_meta_name);
105104
auto chunk_id = reserved_chunk_sb.load(buf, meta_cookie)->chunk_id;
106105
auto EXVchunk = m_chunk_selector->get_extend_vchunk(chunk_id);
107106
if (EXVchunk == nullptr) {
@@ -184,8 +183,7 @@ folly::SemiFuture< bool > GCManager::submit_gc_task(task_priority priority, chun
184183
}
185184

186185
std::shared_ptr< GCManager::pdev_gc_actor >
187-
GCManager::try_create_pdev_gc_actor(uint32_t pdev_id,
188-
const homestore::superblk< GCManager::gc_actor_superblk >& gc_actor_sb) {
186+
GCManager::try_create_pdev_gc_actor(uint32_t pdev_id, const homestore::superblk< gc_actor_superblk >& gc_actor_sb) {
189187
auto const [it, happened] = m_pdev_gc_actors.try_emplace(
190188
pdev_id, std::make_shared< pdev_gc_actor >(gc_actor_sb, m_chunk_selector, m_hs_home_object));
191189
RELEASE_ASSERT((it != m_pdev_gc_actors.end()), "Unexpected error in m_pdev_gc_actors!!!");
@@ -776,7 +774,7 @@ bool GCManager::pdev_gc_actor::copy_valid_data(
776774
#endif
777775
// TODO::involve ratelimiter in the following code, where read/write are scheduled. or do we need a central
778776
// ratelimter shared by all components except client io?
779-
auto succeed_copying_shard =
777+
const auto succeed_copying_shard =
780778
// 1 write the shard header to move_to_chunk
781779
data_service.async_alloc_write(header_sgs, hints, out_blkids)
782780
.thenValue([this, &hints, &move_to_chunk, &move_from_chunk, &is_last_shard, &shard_id, &blk_size,
@@ -968,11 +966,9 @@ bool GCManager::pdev_gc_actor::copy_valid_data(
968966
move_from_chunk, move_to_chunk);
969967
return false;
970968
}
971-
972969
GCLOGD(task_id, pg_id, shard_id, "successfully copy blobs from move_from_chunk={} to move_to_chunk={}",
973970
move_from_chunk, move_to_chunk);
974971
}
975-
976972
GCLOGD(task_id, pg_id, NO_SHARD_ID, "all valid blobs are copied from move_from_chunk={} to move_to_chunk={}",
977973
move_from_chunk, move_to_chunk);
978974

@@ -1132,17 +1128,14 @@ bool GCManager::pdev_gc_actor::compare_blob_indexes(
11321128
GCLOGW(task_id, pg_id, shard_id, "copied blob: move_to_chunk={}, blob_id={}, pba={}", k.chunk, k.blob,
11331129
v.pbas().to_string());
11341130
}
1135-
11361131
GCLOGW(task_id, pg_id, NO_SHARD_ID, "start printing valid blobs from gc index table:");
11371132
for (const auto& [k, v] : valid_blob_indexes) {
11381133
const auto shard_id = k.key().shard;
11391134
GCLOGW(task_id, pg_id, shard_id, "valid blob: move_to_chunk={}, blob_id={}, pba={}", k.key().chunk,
11401135
k.key().blob, v.pbas().to_string());
11411136
}
1142-
11431137
RELEASE_ASSERT(false, "copied blobs are not the same as the valid blobs got from gc index table");
11441138
}
1145-
11461139
return ret;
11471140
}
11481141

@@ -1325,6 +1318,7 @@ bool GCManager::pdev_gc_actor::process_after_gc_metablk_persisted(
13251318
}
13261319
}
13271320

1321+
// now, all the blob indexes have been replaced successfully, we can destroy the gc task superblk
13281322
gc_task_sb.destroy();
13291323

13301324
const auto reclaimed_blk_count = m_chunk_selector->get_extend_vchunk(move_from_chunk)->get_used_blks() -

src/lib/homestore_backend/hs_homeobject.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,14 @@ void HSHomeObject::init_homestore() {
251251
zpad_bufs_[i] = std::move(sisl::io_blob_safe(uint32_cast(size), io_align));
252252
std::memset(zpad_bufs_[i].bytes(), 0, size);
253253
}
254+
255+
// when reaching here, all the logs have been replayed, we can start gc now.
256+
if (HS_BACKEND_DYNAMIC_CONFIG(enable_gc)) {
257+
LOGI("Starting GC manager");
258+
gc_mgr_->start();
259+
} else {
260+
LOGI("GC is disabled");
261+
}
254262
}
255263

256264
void HSHomeObject::on_replica_restart() {
@@ -349,14 +357,17 @@ void HSHomeObject::on_replica_restart() {
349357
homestore::meta_service().read_sub_sb(GCManager::_gc_reserved_chunk_meta_name);
350358
homestore::meta_service().read_sub_sb(GCManager::_gc_task_meta_name);
351359

352-
gc_mgr_->handle_all_recovered_gc_tasks();
360+
// when reaching here, log replay doest not start yet. we need to handle all the recovered gc tasks before
361+
// replaying log. when log replay done, in ReplicationStateMachine::on_log_replay_done, we need
362+
// select_specific_chunk for all the chunks with open shard to mark the states of these chunks to inuse. if
363+
// crash happens after the shard metablk has been updated(the pchunk of this shard is changed to
364+
// move_to_chunk)) but before reserved_chunk_superblk has been persisted (move_to_chunk is now still a reserved
365+
// chunk), when log replay is done and try to select_specific_chunk for the chunk with open shard, since the
366+
// state of move_to_chunk is reserved, and thus its state is GC and can not be selected, and will be stuck in
367+
// on_log_replay_done. after handling all the recovered gc tasks, move_to_chunk will be marked to inuse, and
368+
// thus can be selected in on_log_replay_done, and the log replay can be completed successfully.
353369

354-
if (HS_BACKEND_DYNAMIC_CONFIG(enable_gc)) {
355-
LOGI("Starting GC manager");
356-
gc_mgr_->start();
357-
} else {
358-
LOGI("GC is disabled");
359-
}
370+
gc_mgr_->handle_all_recovered_gc_tasks();
360371
});
361372
}
362373

src/lib/homestore_backend/hs_shard_manager.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,29 @@ void HSHomeObject::on_shard_message_commit(int64_t lsn, sisl::blob const& h, hom
535535
RELEASE_ASSERT(v_chunkID.has_value(), "v_chunk id not found");
536536
bool res = chunk_selector()->release_chunk(pg_id, v_chunkID.value());
537537
RELEASE_ASSERT(res, "Failed to release v_chunk_id={}, pg={}", v_chunkID.value(), pg_id);
538+
539+
// there is a corner case:
540+
// let`s say cp_lsn and dc_lsn is 10, lsn 11 is put_blob (blob -> pba-chunk-1), and lsn 12 is
541+
// seal_shard(shard-1 , chunk-1).
542+
543+
// 1 before crash, lsn 11 and lsn 12 are both committed. as a result , we have a blob -> pba-chunk-1 in the
544+
// wbcache of indextable and a persisted superblk of shard-1 with a state sealed.
545+
546+
// 2 crash happens. after restart, blob -> pba-chunk-1 is lost since it only existes in wbcache and not be
547+
// flushed to disk. but shard-1 has a stat of sealed since shard superblk is persisted before crash. now,
548+
// since no open shard in chunk-1, chunk-1 is selected for gc and all the blobs of shard-1 are moved to
549+
// chunk-2 , and chunk-1 becomes a reserved chunk.
550+
551+
// 3 since dc_lsn is 10, after log replay, we start committing lsn 11. since blob -> pba-chunk-1 does not
552+
// exist in pg-index-table, on_blob_put_commit will insert a new item blob -> pba-chunk-1 to pg-index-table.
553+
// this is where issue happens. blob belong to shard-1, which has already been moved to chunk-2. but
554+
// on_blob_put_commit adds blob to indextable with a stale pba belongs to chunk-1 , which is now a reserved
555+
// chunk and will be purged later.
556+
557+
// the solution is before persisting the shard state change to SEALED, we persist dc_lsn of this
558+
// repl_dev , so that we can make sure all the logs before this sealing shard will be committed in log
559+
// replay before gc starts.
560+
repl_dev->flush_durable_commit_lsn();
538561
update_shard_in_map(shard_info);
539562
} else
540563
SLOGW(tid, shard_info.id, "try to commit SEAL_SHARD_MSG but shard state is not sealed.");

0 commit comments

Comments
 (0)