address comments

JacksonYao287 · JacksonYao287 · commit 09fe0b9f0391 · 2025-09-02T02:09:27.000-07:00
diff --git a/src/lib/homestore_backend/heap_chunk_selector.cpp b/src/lib/homestore_backend/heap_chunk_selector.cpp
@@ -514,7 +514,7 @@ std::shared_ptr< const std::vector< homestore::chunk_num_t > > HeapChunkSelector
     return p_chunk_ids;
 }
 
-std::optional< homestore::chunk_num_t > HeapChunkSelector::get_most_available_blk_chunk(uint64_t ctx, pg_id_t pg_id) {
+std::optional< homestore::chunk_num_t > HeapChunkSelector::pick_most_available_blk_chunk(uint64_t ctx, pg_id_t pg_id) {
     std::shared_lock lock_guard(m_chunk_selector_mtx);
     auto pg_it = m_per_pg_chunks.find(pg_id);
     if (pg_it == m_per_pg_chunks.end()) {
@@ -533,7 +533,6 @@ std::optional< homestore::chunk_num_t > HeapChunkSelector::get_most_available_bl
         LOGWARNMOD(homeobject, "No available chunk for pg={}, ctx=0x{:x}", pg_id, ctx);
         return std::nullopt;
     }
-
     auto v_chunk_id = std::distance(pg_chunks.begin(), max_it);
     LOGDEBUGMOD(homeobject, "Picked v_chunk_id={} : [p_chunk_id={}, avail={}], ctx=0x{:x}", v_chunk_id,
                 pg_chunks[v_chunk_id]->get_chunk_id(), pg_chunks[v_chunk_id]->available_blks(), ctx);
diff --git a/src/lib/homestore_backend/heap_chunk_selector.h b/src/lib/homestore_backend/heap_chunk_selector.h
@@ -126,7 +126,7 @@ class HeapChunkSelector : public homestore::ChunkSelector {
      * @param pg_id The ID of the pg.
      * @return An optional chunk_num_t value representing v_chunk_id, or std::nullopt if no space left.
      */
-    std::optional< chunk_num_t > get_most_available_blk_chunk(uint64_t ctx, pg_id_t pg_id);
+    std::optional< chunk_num_t > pick_most_available_blk_chunk(uint64_t ctx, pg_id_t pg_id);
 
     // this should be called on each pg meta blk found
     bool recover_pg_chunks(pg_id_t pg_id, std::vector< chunk_num_t >&& p_chunk_ids);
diff --git a/src/lib/homestore_backend/hs_shard_manager.cpp b/src/lib/homestore_backend/hs_shard_manager.cpp
@@ -147,7 +147,7 @@ ShardManager::AsyncResult< ShardInfo > HSHomeObject::_create_shard(pg_id_t pg_ow
     SLOGD(tid, new_shard_id, "Create shard request: pg={}, size={}", pg_owner, size_bytes);
 
     // select chunk for shard.
-    const auto v_chunkID = chunk_selector()->get_most_available_blk_chunk(new_shard_id, pg_owner);
+    const auto v_chunkID = chunk_selector()->pick_most_available_blk_chunk(new_shard_id, pg_owner);
 
     if (!v_chunkID.has_value()) {
         SLOGW(tid, new_shard_id, "no availble chunk left to create shard for pg={}", pg_owner);
@@ -176,7 +176,7 @@ ShardManager::AsyncResult< ShardInfo > HSHomeObject::_create_shard(pg_id_t pg_ow
 
         bool res = chunk_selector()->release_chunk(pg_owner, v_chunk_id);
         RELEASE_ASSERT(res, "Failed to release v_chunk_id={}, pg={}", v_chunk_id, pg_owner);
-        // try to gc this chunk to avoid emergent gc in the future
+
         auto gc_mgr = gc_manager();
         if (gc_mgr->is_started()) { gc_manager()->submit_gc_task(task_priority::normal, pchunk_id); }
 
@@ -317,17 +317,17 @@ bool HSHomeObject::on_shard_message_pre_commit(int64_t lsn, sisl::blob const& he
     const auto& shard_id = msg_header->shard_id;
 
     if (msg_header->corrupted()) {
-        LOGE("replication message header is corrupted with crc error when pre_committing shard message, lsn={}, "
-             "traceID={}, shard={}",
-             lsn, tid, shard_id);
+        SLOGE(tid, shard_id,
+              "replication message header is corrupted with crc error when pre_committing shard message, lsn={}", lsn);
         RELEASE_ASSERT(false, "shardID=0x{:x}, pg={}, shard=0x{:x}, failed to pre_commit shard msg", shard_id,
                        (shard_id >> homeobject::shard_width), (shard_id & homeobject::shard_mask));
         if (ctx) { ctx->promise_.setValue(folly::makeUnexpected(ShardError::CRC_MISMATCH)); }
         return false;
     }
 
-    switch (msg_header->msg_type) {
-    case ReplicationMessageType::SEAL_SHARD_MSG: {
+    SLOGD(tid, shard_id, "pre_commit shard message, type={}, lsn= {}", msg_header->msg_type, lsn);
+
+    if (msg_header->msg_type == ReplicationMessageType::SEAL_SHARD_MSG) {
         std::scoped_lock lock_guard(_shard_lock);
         auto iter = _shard_map.find(shard_id);
         RELEASE_ASSERT(iter != _shard_map.end(), "shardID=0x{:x}, pg={}, shard=0x{:x}, shard does not exist", shard_id,
@@ -340,27 +340,8 @@ bool HSHomeObject::on_shard_message_pre_commit(int64_t lsn, sisl::blob const& he
         } else {
             SLOGW(tid, shard_id, "try to seal an unopened shard");
         }
-        break;
-    }
-    // in pre_commit of create_shard, we select_chunk(mark chunk as inuse state) for v_chunk, so that when rollbacking
-    // create_shard, we can also release the chunk.
-    case ReplicationMessageType::CREATE_SHARD_MSG: {
-        const auto v_chunk_id = msg_header->vchunk_id;
-        const auto pg_id = msg_header->pg_id;
-
-        // for leader, the chunk has been selected in create_shard, so when selecting, the chunk will be found as
-        // state::used.
-        auto chunk = chunk_selector_->select_specific_chunk(pg_id, v_chunk_id);
-        RELEASE_ASSERT(chunk != nullptr, "chunk selection failed with v_chunk_id={} in pg={}", v_chunk_id, pg_id);
-        const auto p_chunk_id = homestore::VChunk(chunk).get_chunk_id();
-        SLOGD(tid, shard_id, "pchunk {} is selected for vchunk {} in pg {} for creating shard", p_chunk_id, v_chunk_id,
-              pg_id);
     }
 
-    default: {
-        break;
-    }
-    }
     return true;
 }
 
@@ -372,35 +353,19 @@ void HSHomeObject::on_shard_message_rollback(int64_t lsn, sisl::blob const& head
     }
     auto tid = hs_ctx ? hs_ctx->traceID() : 0;
     const ReplicationMessageHeader* msg_header = r_cast< const ReplicationMessageHeader* >(header.cbytes());
-    const auto shard_id = msg_header->shard_id;
+
     if (msg_header->corrupted()) {
-        LOGW("replication message header is corrupted with crc error, lsn={}, traceID={}", lsn, tid);
-        RELEASE_ASSERT(false, "shardID=0x{:x}, pg={}, shard=0x{:x}, failed to rollback create_shard msg", shard_id,
-                       (shard_id >> homeobject::shard_width), (shard_id & homeobject::shard_mask));
+        RELEASE_ASSERT(false,
+                       "replication message header is corrupted with crc error in on_rollback, lsn={}, traceID={}", lsn,
+                       tid);
         return;
     }
 
+    const auto shard_id = msg_header->shard_id;
     switch (msg_header->msg_type) {
     case ReplicationMessageType::CREATE_SHARD_MSG: {
-        if (ctx) {
-            ctx->promise_.setValue(folly::makeUnexpected(ShardError::RETRY_REQUEST));
-        } else {
-            // we have already added release_chunk logic to thenValue of hoemobject#create_shard in originator, so here
-            // we just need to release_chunk for non-originater case since it will bring a bug if a chunk is released
-            // for two times. for exampele, as a originator:
-
-            // t1 : chunk1 is released in the rollback of create_shard, the chunk state is marked as available
-            // t2 : chunk1 is select by a new create shard (shard1), the chunk state is marked as inuse
-            // t3 : chunk1 is released in thenValue of create_shard, the chunk state is marked as available
-            // t4 : chunk1 is select by a new create shard (shard2), the chunk state is marked as inuse
-            // now, shard1 and shard2 hold the same chunk.
-            bool res = release_chunk_based_on_create_shard_message(header);
-            if (!res) {
-                RELEASE_ASSERT(false,
-                               "shardID=0x{:x}, pg={}, shard=0x{:x}, failed to release chunk based on create shard msg",
-                               shard_id, (shard_id >> homeobject::shard_width), (shard_id & homeobject::shard_mask));
-            }
-        }
+        if (ctx) { ctx->promise_.setValue(folly::makeUnexpected(ShardError::RETRY_REQUEST)); }
+        SLOGD(tid, shard_id, "rollback create_shard message, lsn={}", lsn);
         break;
     }
     case ReplicationMessageType::SEAL_SHARD_MSG: {
@@ -419,8 +384,10 @@ void HSHomeObject::on_shard_message_rollback(int64_t lsn, sisl::blob const& head
 
         // TODO:set a proper error code
         if (ctx) { ctx->promise_.setValue(folly::makeUnexpected(ShardError::RETRY_REQUEST)); }
+        break;
     }
     default: {
+        SLOGE(tid, shard_id, "unsupported op type={} for rollbacking shard message", msg_header->msg_type);
         break;
     }
     }
@@ -455,15 +422,12 @@ void HSHomeObject::on_shard_message_commit(int64_t lsn, sisl::blob const& h, sha
     }
     auto tid = hs_ctx ? hs_ctx->traceID() : 0;
     auto header = r_cast< const ReplicationMessageHeader* >(h.cbytes());
-    const auto shard_id = header->shard_id;
     if (header->corrupted()) {
-        LOGE("replication message header is corrupted with crc error when committing shard message, lsn={}, "
-             "traceID={}, shard={}",
-             lsn, tid, shard_id);
-        RELEASE_ASSERT(false, "shardID=0x{:x}, pg={}, shard=0x{:x}, failed to pre_commit shard msg", shard_id,
-                       (shard_id >> homeobject::shard_width), (shard_id & homeobject::shard_mask));
+        RELEASE_ASSERT(false, "replication message header is corrupted with crc error in on_commit, lsn={}, traceID={}",
+                       lsn, tid);
         return;
     }
+    const auto shard_id = header->shard_id;
 
     RELEASE_ASSERT(header->msg_type == ReplicationMessageType::CREATE_SHARD_MSG ||
                        header->msg_type == ReplicationMessageType::SEAL_SHARD_MSG,
@@ -539,6 +503,15 @@ void HSHomeObject::on_shard_message_commit(int64_t lsn, sisl::blob const& h, sha
 
     switch (header->msg_type) {
     case ReplicationMessageType::CREATE_SHARD_MSG: {
+        // 1 select sepecific chunk.
+        // for leader, the chunk has been selected in create_shard, so when selecting, the chunk will be found as
+        // state::used.
+        auto chunk = chunk_selector_->select_specific_chunk(pg_id, vchunk_id);
+        RELEASE_ASSERT(chunk != nullptr, "chunk selection failed with v_chunk_id={} in pg={}", vchunk_id, pg_id);
+        SLOGD(tid, shard_id, "pchunk {} is selected for vchunk {} in pg {} for creating shard", pchunk, vchunk_id,
+              pg_id);
+
+        // 2 add shard meta blk
         shard_info.id = shard_id;
         shard_info.placement_group = pg_id;
         shard_info.created_time = get_current_timestamp();
@@ -566,18 +539,6 @@ void HSHomeObject::on_shard_message_commit(int64_t lsn, sisl::blob const& h, sha
         if (shard_info.state == ShardInfo::State::SEALED) {
             bool res = chunk_selector()->release_chunk(pg_id, vchunk_id);
             RELEASE_ASSERT(res, "Failed to release v_chunk_id={}, pg={}", vchunk_id, pg_id);
-
-            const static uint64_t shard_super_blk_count{
-                sisl::round_up(sizeof(shard_info_superblk), homestore::data_service().get_blk_size()) /
-                homestore::data_service().get_blk_size()};
-
-            if (vchunk->available_blks() <= shard_super_blk_count * 2) {
-                // try to gc this chunk to avoid emergent gc in the future
-                auto gc_mgr = gc_manager();
-                if (gc_mgr->is_started()) {
-                    gc_manager()->submit_gc_task(task_priority::normal, vchunk->get_chunk_id());
-                }
-            }
             update_shard_in_map(shard_info);
         } else
             RELEASE_ASSERT(false, "try to commit SEAL_SHARD_MSG but shard state is not sealed, shard_id={}", shard_id);
@@ -589,11 +550,15 @@ void HSHomeObject::on_shard_message_commit(int64_t lsn, sisl::blob const& h, sha
         break;
     }
 
-    // write shard header/footer blk
-    // TODO:: check the return status of async if we care about the completeness
-
-    // in log replay case, one more shard header/footer will be written again. it does not matter.
-    homestore::data_service().async_write(generate_shard_super_blk_sg_list(shard_id), blkids);
+    // write shard header/footer blk. in log replay case, one more shard header/footer will be written again. it does
+    // not matter.
+    sisl::sg_list sgs = generate_shard_super_blk_sg_list(shard_id);
+    RELEASE_ASSERT(sgs.iovs.size() == 1, "sgs.iovs.size() for shard header/footer should be 1, but not!");
+    homestore::data_service().async_write(sgs, blkids).thenValue([sgs, lsn, msgtype = header->msg_type](auto&& err) {
+        // it does not matter if fail to write shard header/footer, we never read them
+        if (err) { LOGW("failed to write shard super blk, err={}, lsn={}, msgType={}", err.message(), lsn, msgtype); }
+        iomanager.iobuf_free(reinterpret_cast< uint8_t* >(sgs.iovs[0].iov_base));
+    });
 
     auto hs_pg = get_hs_pg(pg_id);
     RELEASE_ASSERT(hs_pg != nullptr, "shardID=0x{:x}, pg={}, shard=0x{:x}, PG not found", shard_id,
diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp
@@ -131,6 +131,7 @@ void ReplicationStateMachine::on_rollback(int64_t lsn, sisl::blob const& header,
     }
 
     default: {
+        LOGW("unsupported message type in rollback, lsn={}, mesType={}", lsn, msg_header->msg_type);
         break;
     }
     }
@@ -572,51 +573,18 @@ folly::Future< std::error_code > ReplicationStateMachine::on_fetch_data(const in
 
     LOGD("fetch data with lsn={}, msg type={}", lsn, msg_header->msg_type);
 
-    // for nuobject case, we can make this assumption, since we use append_blk_allocator.
-    RELEASE_ASSERT(sgs.iovs.size() == 1, "sgs iovs size should be 1, lsn={}, msg_type={}", lsn, msg_header->msg_type);
+    if (msg_header->msg_type == ReplicationMessageType::PUT_BLOB_MSG) {
+        // for nuobject case, we can make this assumption, since we use append_blk_allocator.
+        RELEASE_ASSERT(sgs.iovs.size() == 1, "sgs iovs size should be 1, lsn={}, msg_type={}", lsn,
+                       msg_header->msg_type);
 
-    auto const total_size = local_blk_id.blk_count() * repl_dev()->get_blk_size();
-    RELEASE_ASSERT(total_size == sgs.size,
-                   "total_blk_size does not match, lsn={}, msg_type={}, expected size={}, given buffer size={}", lsn,
-                   msg_header->msg_type, total_size, sgs.size);
+        auto const total_size = local_blk_id.blk_count() * repl_dev()->get_blk_size();
+        RELEASE_ASSERT(total_size == sgs.size,
+                       "total_blk_size does not match, lsn={}, msg_type={}, expected size={}, given buffer size={}",
+                       lsn, msg_header->msg_type, total_size, sgs.size);
 
-    auto given_buffer = (uint8_t*)(sgs.iovs[0].iov_base);
-    std::memset(given_buffer, 0, total_size);
-
-    // in homeobject, we have three kinds of requests that will write data(thus fetch_data might happen) to a
-    // chunk:
-    // 1 create_shard : will write a shard header to a chunk
-    // 2 seal_shard : will write a shard footer to a chunk
-    // 3 put_blob: will write user data to a chunk
-
-    // for any type that writes data to a chunk, we need to handle the fetch_data request for it.
-
-    switch (msg_header->msg_type) {
-    case ReplicationMessageType::CREATE_SHARD_MSG:
-    case ReplicationMessageType::SEAL_SHARD_MSG: {
-        // this function only returns data, not care about raft related logic, so no need to check the existence of
-        // shard, just return the shard header/footer directly. Also, no need to read the data from disk, generate
-        // it from Header.
-        auto sb =
-            r_cast< HSHomeObject::shard_info_superblk const* >(header.cbytes() + sizeof(ReplicationMessageHeader));
-        auto const raw_size = sizeof(HSHomeObject::shard_info_superblk);
-        auto const expected_size = sisl::round_up(raw_size, repl_dev()->get_blk_size());
-
-        RELEASE_ASSERT(
-            sgs.size == expected_size,
-            "shard metadata size does not match, lsn={}, msg_type={}, expected size={}, given buffer size={}", lsn,
-            msg_header->msg_type, expected_size, sgs.size);
-
-        // TODO：：return error_code if assert fails, so it will not crash here because of the assert failure.
-        std::memcpy(given_buffer, sb, raw_size);
-        return folly::makeFuture< std::error_code >(std::error_code{});
-    }
-
-        // TODO: for shard header and footer, follower can generate it itself according to header, no need to fetch
-        // it from leader. this can been done by adding another callback, which will be called before follower tries
-        // to fetch data.
-
-    case ReplicationMessageType::PUT_BLOB_MSG: {
+        auto given_buffer = (uint8_t*)(sgs.iovs[0].iov_base);
+        std::memset(given_buffer, 0, total_size);
 
         const auto blob_id = msg_header->blob_id;
         const auto shard_id = msg_header->shard_id;
@@ -632,23 +600,15 @@ folly::Future< std::error_code > ReplicationStateMachine::on_fetch_data(const in
                     throw std::system_error(err);
                 }
 
-            // folly future has no machenism to bypass the later thenValue in the then value chain. so for all
-            // the case that no need to schedule the later async_read, we throw a system_error with no error
-            // code to bypass the next thenValue.
-#ifdef _PRERELEASE
-                if (iomgr_flip::instance()->test_flip("local_blk_data_invalid")) {
-                    LOGI("Simulating forcing to read by indextable");
-                } else if (validate_blob(shard_id, blob_id, given_buffer, total_size)) {
-                    LOGD("local_blk_id matches blob data, lsn={}, blob_id={}, shard=0x{:x}", lsn, blob_id, shard_id);
-                    throw std::system_error(std::error_code{});
-                }
-#else
+                // folly future has no machenism to bypass the later thenValue in the then value chain. so for all
+                // the case that no need to schedule the later async_read, we throw a system_error with no error
+                // code to bypass the next thenValue.
+
                 // if data matches
                 if (validate_blob(shard_id, blob_id, given_buffer, total_size)) {
                     LOGD("local_blk_id matches blob data, lsn={}, blob_id={}, shard_id={}", lsn, blob_id, shard_id);
                     throw std::system_error(std::error_code{});
                 }
-#endif
 
                 // if data does not match, try to read data according to the index table. this might happen if the
                 // chunk has once been gc.
@@ -729,12 +689,10 @@ folly::Future< std::error_code > ReplicationStateMachine::on_fetch_data(const in
 
                 return ec;
             });
-    }
-    default: {
+    } else {
         LOGW("msg type={}, should not happen in fetch_data rpc", msg_header->msg_type);
         return folly::makeFuture< std::error_code >(std::make_error_code(std::errc::operation_not_supported));
     }
-    }
 }
 
 bool ReplicationStateMachine::validate_blob(shard_id_t shard_id, blob_id_t blob_id, void* data, size_t size) const {
diff --git a/src/lib/homestore_backend/tests/homeobj_misc_tests.cpp b/src/lib/homestore_backend/tests/homeobj_misc_tests.cpp
@@ -233,8 +233,9 @@ TEST_F(HomeObjectFixture, SnapshotReceiveHandler) {
     for (uint64_t i = 1; i <= num_shards_per_pg; i++) {
         shard_ids.push_back(i);
     }
-    auto pg_entry = CreateResyncPGMetaDataDirect(builder, pg_id, &uuid, pg->pg_info_.size, pg->pg_info_.expected_member_num, pg->pg_info_.chunk_size,
-                                                 blob_seq_num, num_shards_per_pg, &members, &shard_ids);
+    auto pg_entry =
+        CreateResyncPGMetaDataDirect(builder, pg_id, &uuid, pg->pg_info_.size, pg->pg_info_.expected_member_num,
+                                     pg->pg_info_.chunk_size, blob_seq_num, num_shards_per_pg, &members, &shard_ids);
     builder.Finish(pg_entry);
     auto pg_meta = GetResyncPGMetaData(builder.GetBufferPointer());
     auto ret = handler->process_pg_snapshot_data(*pg_meta);
@@ -264,7 +265,7 @@ TEST_F(HomeObjectFixture, SnapshotReceiveHandler) {
         shard.total_capacity_bytes = 1024 * Mi;
         shard.lsn = snp_lsn;
 
-        auto v_chunk_id = _obj_inst->chunk_selector()->get_most_available_blk_chunk(shard.id, pg_id);
+        auto v_chunk_id = _obj_inst->chunk_selector()->pick_most_available_blk_chunk(shard.id, pg_id);
 
         auto shard_entry = CreateResyncShardMetaData(builder, shard.id, pg_id, static_cast< uint8_t >(shard.state),
                                                      shard.lsn, shard.created_time, shard.last_modified_time,
diff --git a/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp b/src/lib/homestore_backend/tests/test_heap_chunk_selector.cpp