@@ -991,21 +991,25 @@ int InstanceChecker::do_inverted_check() {
991991 }
992992
993993 for (auto file = list_iter->next (); file.has_value (); file = list_iter->next ()) {
994+ const auto & path = file->path ;
995+ if (path == " data/packed_file" || path.starts_with (" data/packed_file/" )) {
996+ continue ; // packed_file has dedicated check logic
997+ }
994998 ++num_scanned;
995- int ret = check_segment_file (file-> path );
999+ int ret = check_segment_file (path);
9961000 if (ret != 0 ) {
9971001 LOG (WARNING) << " failed to check segment file, uri=" << accessor->uri ()
998- << " path=" << file-> path ;
1002+ << " path=" << path;
9991003 if (ret == 1 ) {
10001004 ++num_file_leak;
10011005 } else {
10021006 check_ret = -1 ;
10031007 }
10041008 }
1005- ret = check_inverted_index_file (file-> path );
1009+ ret = check_inverted_index_file (path);
10061010 if (ret != 0 ) {
10071011 LOG (WARNING) << " failed to check index file, uri=" << accessor->uri ()
1008- << " path=" << file-> path ;
1012+ << " path=" << path;
10091013 if (ret == 1 ) {
10101014 ++num_file_leak;
10111015 } else {
@@ -2749,6 +2753,18 @@ int InstanceChecker::do_packed_file_check() {
27492753
27502754 // Step 1: Scan all rowset metas to collect packed_slice_locations references
27512755 // Use efficient range scan instead of iterating through each tablet_id
2756+ auto collect_packed_refs = [&](const doris::RowsetMetaCloudPB& rs_meta) {
2757+ const auto & index_map = rs_meta.packed_slice_locations ();
2758+ for (const auto & [small_file_path, index_pb] : index_map) {
2759+ if (!index_pb.has_packed_file_path () || index_pb.packed_file_path ().empty ()) {
2760+ continue ;
2761+ }
2762+ const std::string& packed_file_path = index_pb.packed_file_path ();
2763+ expected_ref_counts[packed_file_path]++;
2764+ packed_file_small_files[packed_file_path].insert (small_file_path);
2765+ }
2766+ };
2767+
27522768 {
27532769 std::string start_key = meta_rowset_key ({instance_id_, 0 , 0 });
27542770 std::string end_key = meta_rowset_key ({instance_id_, INT64_MAX, 0 });
@@ -2788,16 +2804,57 @@ int InstanceChecker::do_packed_file_check() {
27882804
27892805 num_scanned_rowsets++;
27902806
2791- // Check packed_slice_locations in rowset meta
2792- const auto & index_map = rs_meta.packed_slice_locations ();
2793- for (const auto & [small_file_path, index_pb] : index_map) {
2794- if (!index_pb.has_packed_file_path () || index_pb.packed_file_path ().empty ()) {
2795- continue ;
2796- }
2797- const std::string& packed_file_path = index_pb.packed_file_path ();
2798- expected_ref_counts[packed_file_path]++;
2799- packed_file_small_files[packed_file_path].insert (small_file_path);
2807+ collect_packed_refs (rs_meta);
2808+ }
2809+ start_key.push_back (' \x00 ' ); // Update to next smallest key for iteration
2810+ } while (it->more () && !stopped ());
2811+ }
2812+
2813+ // Rowsets in recycle keys may still hold packed file references while ref count
2814+ // updates are pending, so include them when calculating expected references.
2815+ {
2816+ std::string start_key = recycle_rowset_key ({instance_id_, 0 , " " });
2817+ std::string end_key = recycle_rowset_key ({instance_id_, INT64_MAX, " \xff " });
2818+
2819+ std::unique_ptr<RangeGetIterator> it;
2820+ do {
2821+ if (stopped ()) {
2822+ return -1 ;
2823+ }
2824+
2825+ std::unique_ptr<Transaction> txn;
2826+ TxnErrorCode err = txn_kv_->create_txn (&txn);
2827+ if (err != TxnErrorCode::TXN_OK) {
2828+ LOG (WARNING) << " failed to create txn for recycle rowset scan in packed file check" ;
2829+ return -1 ;
2830+ }
2831+
2832+ err = txn->get (start_key, end_key, &it);
2833+ if (err != TxnErrorCode::TXN_OK) {
2834+ LOG (WARNING) << " failed to scan recycle rowset metas, err=" << err;
2835+ check_ret = -1 ;
2836+ break ;
2837+ }
2838+
2839+ while (it->has_next () && !stopped ()) {
2840+ auto [k, v] = it->next ();
2841+ if (!it->has_next ()) {
2842+ start_key = k;
2843+ }
2844+
2845+ RecycleRowsetPB recycle_rowset;
2846+ if (!recycle_rowset.ParseFromArray (v.data (), v.size ())) {
2847+ LOG (WARNING) << " malformed recycle rowset, key=" << hex (k);
2848+ check_ret = -1 ;
2849+ continue ;
28002850 }
2851+
2852+ if (!recycle_rowset.has_rowset_meta ()) {
2853+ continue ;
2854+ }
2855+
2856+ num_scanned_rowsets++;
2857+ collect_packed_refs (recycle_rowset.rowset_meta ());
28012858 }
28022859 start_key.push_back (' \x00 ' ); // Update to next smallest key for iteration
28032860 } while (it->more () && !stopped ());
0 commit comments