Skip to content

Commit 4d66465

Browse files
yawzhangxiaoxichen
authored andcommitted
support manual trigger snapshot creation function
1 parent 43fd4c1 commit 4d66465

File tree

11 files changed

+155
-11
lines changed

11 files changed

+155
-11
lines changed

conanfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
class HomestoreConan(ConanFile):
1111
name = "homestore"
12-
version = "7.1.0"
12+
version = "7.1.1"
1313

1414
homepage = "https://github.com/eBay/Homestore"
1515
description = "HomeStore Storage Engine"

src/include/homestore/replication/repl_dev.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,9 @@ class ReplDev {
621621
// clear reqs that has allocated blks on the given chunk.
622622
virtual void clear_chunk_req(chunk_num_t chunk_id) = 0;
623623

624+
// create a snapshot manually and try to compact logs upto compact_lsn
625+
virtual void trigger_snapshot_creation(repl_lsn_t compact_lsn, bool wait_for_commit) = 0;
626+
624627
protected:
625628
shared< ReplDevListener > m_listener;
626629

src/include/homestore/replication_service.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,14 @@ class ReplicationService {
110110
/// @param group_id Group where the replace member happens
111111
virtual ReplServiceError destroy_repl_dev(group_id_t group_id, uint64_t trace_id = 0) = 0;
112112

113+
/// @brief Trigger a snapshot creation manually for a given group id
114+
/// @param group_id Group id for which snapshot creation is requested
115+
/// @param compact_lsn LSN upto which the snapshot expect to compact, -1 means no compaction upper needed
116+
/// @param wait_for_commit default is true, means it will wait until the local committed lsn reach the compact_lsn,
117+
/// if set to false, the snapshot and compaction will be triggered based on the latest committed lsn
118+
virtual void trigger_snapshot_creation(group_id_t group_id, repl_lsn_t compact_lsn = -1,
119+
bool wait_for_commit = true) = 0;
120+
113121
/// @brief Get the repl dev for a given group id if it is already created or opened
114122
/// @param group_id Group id interested in
115123
/// @return ReplDev is opened or ReplServiceError::SERVER_NOT_FOUND if it doesn't exist

src/lib/replication/repl_dev/raft_repl_dev.cpp

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,9 @@ RaftReplDev::RaftReplDev(RaftReplService& svc, superblk< raft_repl_dev_superblk
4646
});
4747
m_next_dsn = m_rd_sb->last_applied_dsn + 1;
4848
m_commit_upto_lsn = m_rd_sb->durable_commit_lsn;
49-
m_last_flushed_commit_lsn = m_commit_upto_lsn;
49+
m_last_flushed_cp_lsn = m_rd_sb->checkpoint_lsn;
5050
m_compact_lsn = m_rd_sb->compact_lsn;
51+
m_last_flushed_compact_lsn = m_compact_lsn;
5152

5253
m_rdev_name = m_rd_sb->rdev_name;
5354
// Its ok not to do compare exchange, because loading is always single threaded as of now
@@ -818,6 +819,54 @@ void RaftReplDev::on_create_snapshot(nuraft::snapshot& s, nuraft::async_result<
818819
if (when_done) { when_done(ret_val, null_except); }
819820
}
820821

822+
void RaftReplDev::trigger_snapshot_creation(repl_lsn_t compact_lsn, bool wait_for_commit) {
823+
// Step 1. Update truncation boundary if compact_lsn is specified
824+
if (compact_lsn >= 0) {
825+
// Step 1.1 Wait for commit upto compact_lsn if needed
826+
if (wait_for_commit) {
827+
RD_LOGI(NO_TRACE_ID, "Waiting for commit upto compact_lsn={}, current_commit_lsn={}", compact_lsn,
828+
m_commit_upto_lsn.load());
829+
while (compact_lsn > m_commit_upto_lsn.load()) {
830+
std::this_thread::sleep_for(std::chrono::milliseconds(100));
831+
}
832+
}
833+
// Step 1.2 trigger cp_flush to make sure all changes are flushed to disk before updating truncation boundary
834+
hs()->cp_mgr().trigger_cp_flush(true /*force*/).get();
835+
RD_LOGI(NO_TRACE_ID, "cp_flush completed before updating truncation boundary to lsn={}", compact_lsn);
836+
// Step 1.3 Update truncation boundary
837+
RD_LOGI(NO_TRACE_ID, "Updating truncation boundary to lsn={}, current_truncation_boundary={}", compact_lsn,
838+
m_truncation_upper_limit.load());
839+
update_truncation_boundary(compact_lsn);
840+
}
841+
842+
// Step 2. Create snapshot manually
843+
auto result = raft_server()->create_snapshot();
844+
if (result == 0) {
845+
RD_LOGE(NO_TRACE_ID, "Failed to create snapshot - another snapshot may be in progress");
846+
return;
847+
}
848+
RD_LOGI(NO_TRACE_ID, "Successfully created snapshot, result log_idx={}", result);
849+
850+
// Step 3. Make sure logs are compacted upto compact_lsn after snapshot created
851+
// Since we want to compact logs upto compact_lsn after the snapshot created, here we check the compact_lsn and
852+
// trigger a compact manually if needed (since logs will only to truncated upto min(m_truncation_upper_limit,
853+
// snp_idx-snp_distance), there is a that real_compact_lsn < compact_lsn after snapshot created).
854+
// Note that manual compact may cause concurrency issue with the automatic log compaction in raft server. Currently,
855+
// it doesn't matter because this API is used in op scenario when members syncup and no new write are handled. If we
856+
// want to use it in more phase, we need to add lock to avoid concurrency issue.
857+
if (compact_lsn > 0 && m_compact_lsn.load() < compact_lsn) {
858+
RD_LOGI(NO_TRACE_ID, "Manually compacting logs upto lsn={} after snapshot, current_compact_lsn={}", compact_lsn,
859+
m_compact_lsn.load());
860+
m_data_journal->compact(compact_lsn);
861+
RD_LOGI(NO_TRACE_ID, "Compacted logs upto lsn={} after snapshot", compact_lsn);
862+
}
863+
864+
// Step 4. trigger cp_flush to make sure all changes are flushed to disk after snapshot creation and log compaction
865+
hs()->cp_mgr().trigger_cp_flush(true /*force*/).get();
866+
RD_LOGI(NO_TRACE_ID, "cp_flush completed after snapshot creation and log compaction");
867+
RD_LOGI(NO_TRACE_ID, "snapshot creation and compaction completed");
868+
}
869+
821870
void RaftReplDev::propose_truncate_boundary() {
822871
init_req_counter counter(pending_request_num);
823872
auto repl_status = get_replication_status();
@@ -1591,7 +1640,7 @@ void RaftReplDev::handle_commit(repl_req_ptr_t rreq, bool recovery) {
15911640
complete_replace_member(rreq);
15921641
break;
15931642
case journal_type_t::HS_CTRL_UPDATE_TRUNCATION_BOUNDARY:
1594-
update_truncation_boundary(rreq);
1643+
update_truncation_boundary(r_cast< const truncate_ctx* >(rreq->header().cbytes())->truncation_upper_limit);
15951644
break;
15961645
case journal_type_t::HS_CTRL_REMOVE_MEMBER:
15971646
remove_member(rreq);
@@ -1755,7 +1804,7 @@ void RaftReplDev::clean_replace_member_task(repl_req_ptr_t rreq) {
17551804
RD_LOGI(rreq->traceID(), "Raft repl replace_member_task has been cleared, task_id={}", task_id);
17561805
}
17571806

1758-
void RaftReplDev::update_truncation_boundary(repl_req_ptr_t rreq) {
1807+
void RaftReplDev::update_truncation_boundary(repl_lsn_t truncation_upper_limit) {
17591808
repl_lsn_t cur_checkpoint_lsn = 0;
17601809
{
17611810
std::unique_lock lg{m_sb_mtx};
@@ -1764,8 +1813,7 @@ void RaftReplDev::update_truncation_boundary(repl_req_ptr_t rreq) {
17641813
// expected truncation_upper_limit should not larger than the current checkpoint_lsn, this is to ensure that
17651814
// when a crash happens before index flushed to disk, all the logs larger than checkpoint_lsn are still available
17661815
// to replay.
1767-
auto ctx = r_cast< const truncate_ctx* >(rreq->header().cbytes());
1768-
auto exp_truncation_upper_limit = std::min(ctx->truncation_upper_limit, cur_checkpoint_lsn);
1816+
auto exp_truncation_upper_limit = std::min(truncation_upper_limit, cur_checkpoint_lsn);
17691817
auto cur_truncation_upper_limit = m_truncation_upper_limit.load();
17701818
// exp_truncation_upper_limit might be less or equal to cur_truncation_upper_limit after Baseline Re-sync,
17711819
// we should skip update to ensure the truncation_upper_limit is always increasing.
@@ -2389,8 +2437,9 @@ void RaftReplDev::cp_flush(CP* cp, cshared< ReplDevCPContext > ctx) {
23892437
auto const clsn = ctx->compacted_to_lsn;
23902438
auto const dsn = ctx->last_applied_dsn;
23912439

2392-
if (lsn == m_last_flushed_commit_lsn) {
2393-
// Not dirtied since last flush ignore
2440+
// compact can be triggered manually while no new logs are committed, so both lsn and clsn need to be checked
2441+
if (lsn == m_last_flushed_cp_lsn && clsn == m_last_flushed_compact_lsn) {
2442+
// Not dirtied since last cp flush ignore
23942443
return;
23952444
}
23962445

@@ -2402,7 +2451,8 @@ void RaftReplDev::cp_flush(CP* cp, cshared< ReplDevCPContext > ctx) {
24022451
m_rd_sb->checkpoint_lsn = lsn;
24032452
m_rd_sb->last_applied_dsn = dsn;
24042453
m_rd_sb.write();
2405-
m_last_flushed_commit_lsn = lsn;
2454+
m_last_flushed_cp_lsn = lsn;
2455+
m_last_flushed_compact_lsn = clsn;
24062456
RD_LOGD(NO_TRACE_ID, "cp flush in raft repl dev, lsn={}, clsn={}, next_dsn={}, cp string:{}", lsn, clsn,
24072457
m_next_dsn.load(), cp->to_string());
24082458
}

src/lib/replication/repl_dev/raft_repl_dev.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,8 @@ class RaftReplDev : public ReplDev,
203203

204204
std::mutex m_sb_mtx; // Lock to protect the repl dev superblock
205205

206-
repl_lsn_t m_last_flushed_commit_lsn{0}; // LSN upto which it was flushed to persistent store
206+
repl_lsn_t m_last_flushed_cp_lsn{0}; // LSN upto which it was flushed to persistent store at the checkpoint
207+
repl_lsn_t m_last_flushed_compact_lsn{0}; // LSN upto which it was compacted at the checkpoint
207208
iomgr::timer_handle_t m_sb_flush_timer_hdl;
208209

209210
std::atomic< uint64_t > m_next_dsn{0}; // Data Sequence Number that will keep incrementing for each data entry
@@ -373,6 +374,24 @@ class RaftReplDev : public ReplDev,
373374
*/
374375
void on_create_snapshot(nuraft::snapshot& s, nuraft::async_result< bool >::handler_type& when_done);
375376

377+
/**
378+
* \brief Manually create a snapshot, the snapshot will be created directly based on the latest committed log
379+
* index.
380+
*
381+
* This function is called when we want to create a snapshot and compact the log store manually.
382+
* it provides an optional compact lsn for the log store, which is useful in scenarios like
383+
* the truncation upper limit is reset due to restart (it is a in-memory param) and the previous auto background
384+
* compact was skipped because of the truncation upper limit is zero.
385+
*
386+
* \param compact_lsn The specific truncation upper limit for the log store.
387+
* \param wait_for_commit Whether to wait for the committed lsn to reach the compact_lsn before creating the
388+
* snapshot.
389+
*
390+
* \note This function will trigger cp_flush before creating snapshot and after compaction to make sure the logs was
391+
* compacted and change has been persisted, which might take some time.
392+
*/
393+
void trigger_snapshot_creation(repl_lsn_t compact_lsn, bool wait_for_commit) override;
394+
376395
#if 0
377396
/**
378397
* Truncates the replication log by providing a specified number of reserved entries.
@@ -475,7 +494,7 @@ class RaftReplDev : public ReplDev,
475494
void create_snp_resync_data(raft_buf_ptr_t& data_out);
476495
bool save_snp_resync_data(nuraft::buffer& data, nuraft::snapshot& s);
477496

478-
void update_truncation_boundary(repl_req_ptr_t rreq);
497+
void update_truncation_boundary(repl_lsn_t truncation_upper_limit);
479498
void propose_truncate_boundary();
480499

481500
void report_blk_metrics_if_needed(repl_req_ptr_t rreq);

src/lib/replication/repl_dev/solo_repl_dev.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ class SoloReplDev : public ReplDev {
121121
// solo repl device truncation is triggerred by CP after flushing all the data in its cp_cleanup routine;
122122
void truncate();
123123

124+
void trigger_snapshot_creation(repl_lsn_t compact_lsn, bool wait_for_commit) override { return; }
125+
124126
private:
125127
void write_journal(repl_req_ptr_t rreq);
126128
void on_log_found(logstore_seq_num_t lsn, log_buffer buf, void* ctx);

src/lib/replication/service/generic_repl_svc.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ ReplServiceError SoloReplService::destroy_repl_dev(group_id_t group_id, uint64_t
233233
return remove_repl_dev(group_id).get();
234234
}
235235

236+
void SoloReplService::trigger_snapshot_creation(group_id_t group_id, repl_lsn_t compact_lsn, bool wait_for_commit) {}
237+
236238
std::unique_ptr< CPContext > SoloReplServiceCPHandler::on_switchover_cp(CP* cur_cp, CP* new_cp) {
237239
return std::make_unique< CPContext >(new_cp);
238240
}

src/lib/replication/service/generic_repl_svc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ class SoloReplService : public GenericReplService {
112112
const std::vector< replica_member_info >& others,
113113
uint64_t trace_id = 0) const override;
114114
ReplServiceError destroy_repl_dev(group_id_t group_id, uint64_t trace_id = 0) override;
115+
void trigger_snapshot_creation(group_id_t group_id, repl_lsn_t compact_lsn, bool wait_for_commit) override;
115116
};
116117

117118
class SoloReplServiceCPHandler : public CPCallbacks {

src/lib/replication/service/raft_repl_service.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,18 @@ ReplServiceError RaftReplService::destroy_repl_dev(group_id_t group_id, uint64_t
610610
return ReplServiceError::OK;
611611
}
612612

613+
void RaftReplService::trigger_snapshot_creation(group_id_t group_id, repl_lsn_t compact_lsn, bool wait_for_commit) {
614+
auto rdev_result = get_repl_dev(group_id);
615+
if (!rdev_result) {
616+
LOGWARNMOD(replication, "ReplDev group_id={} not found while scheduling snapshot creation",
617+
boost::uuids::to_string(group_id));
618+
return;
619+
}
620+
621+
std::dynamic_pointer_cast< RaftReplDev >(rdev_result.value())
622+
->trigger_snapshot_creation(compact_lsn, wait_for_commit);
623+
}
624+
613625
////////////////////// Reaper Thread related //////////////////////////////////
614626
void RaftReplService::start_repl_service_timers() {
615627
// we need to explictly cancel the timers before we stop the repl_devs, but we cannot cancel a thread timer

src/lib/replication/service/raft_repl_service.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ class RaftReplService : public GenericReplService,
106106
uint64_t trace_id = 0) const override;
107107
ReplServiceError destroy_repl_dev(group_id_t group_id, uint64_t trace_id = 0) override;
108108

109+
void trigger_snapshot_creation(group_id_t group_id, repl_lsn_t compact_lsn, bool wait_for_commit) override;
110+
109111
private:
110112
RaftReplDev* raft_group_config_found(sisl::byte_view const& buf, void* meta_cookie);
111113
void start_repl_service_timers();

0 commit comments

Comments
 (0)