@@ -672,7 +672,7 @@ Status VariantColumnWriterImpl::_process_doc_snapshot_column(
672672 return Status::OK ();
673673 }
674674 ptr->reconstruct_doc_snapshot_column ();
675- const int bucket_num = 128 ;
675+ const int bucket_num = std::max ( 1 , _tablet_column-> variant_doc_snapshot_shard_count ()) ;
676676 RETURN_IF_ERROR (
677677 _doc_snapshot_writer.init (_tablet_column, bucket_num, column_id, _opts, _opts.footer ));
678678 RETURN_IF_ERROR (_doc_snapshot_writer.append_data (_tablet_column, *ptr, num_rows, converter));
@@ -972,9 +972,8 @@ Status VariantSubcolumnWriter::append_nullable(const uint8_t* null_map, const ui
972972 return Status::OK ();
973973}
974974
975- VariantCompactionDocSnapshotWriter::VariantCompactionDocSnapshotWriter (const ColumnWriterOptions& opts,
976- const TabletColumn* column,
977- std::unique_ptr<Field> field)
975+ VariantCompactionDocSnapshotWriter::VariantCompactionDocSnapshotWriter (
976+ const ColumnWriterOptions& opts, const TabletColumn* column, std::unique_ptr<Field> field)
978977 : ColumnWriter(std::move(field), opts.meta->is_nullable(), opts.meta) {
979978 _opts = opts;
980979 _tablet_column = column;
@@ -1054,13 +1053,13 @@ Status VariantCompactionDocSnapshotWriter::write_bloom_filter_index() {
10541053 RETURN_IF_ERROR (_doc_snapshot_column_writer->write_bloom_filter_index ());
10551054 return Status::OK ();
10561055}
1057- Status VariantCompactionDocSnapshotWriter::append_nullable (const uint8_t * null_map, const uint8_t ** ptr,
1058- size_t num_rows) {
1056+ Status VariantCompactionDocSnapshotWriter::append_nullable (const uint8_t * null_map,
1057+ const uint8_t ** ptr, size_t num_rows) {
10591058 RETURN_IF_ERROR (append_data (ptr, num_rows));
10601059 return Status::OK ();
10611060}
10621061Status VariantCompactionDocSnapshotWriter::finalize () {
1063- auto * variant_column = assert_cast<vectorized::ColumnVariant*>(_column.get ());
1062+ auto * variant_column = assert_cast<vectorized::ColumnVariant*>(_column.get ());
10641063
10651064 const auto & parent_column =
10661065 _opts.rowset_ctx ->tablet_schema ->column_by_uid (_tablet_column->parent_unique_id ());
@@ -1069,8 +1068,8 @@ Status VariantCompactionDocSnapshotWriter::finalize() {
10691068 auto converter = std::make_unique<vectorized::OlapBlockDataConvertor>();
10701069 int column_id = 0 ;
10711070 int64_t variant_doc_snapshot_min_rows = parent_column.variant_doc_snapshot_min_rows ();
1072- if (variant_doc_snapshot_min_rows >= 0
1073- && num_rows >= static_cast <size_t >(variant_doc_snapshot_min_rows)) {
1071+ if (variant_doc_snapshot_min_rows == 0 ||
1072+ num_rows >= static_cast <size_t >(variant_doc_snapshot_min_rows)) {
10741073 std::unordered_map<std::string_view, vectorized::ColumnVariant::Subcolumn> subcolumns;
10751074
10761075 auto [column_key, column_value] = variant_column->get_doc_snapshot_data_paths_and_values ();
@@ -1128,9 +1127,24 @@ Status VariantCompactionDocSnapshotWriter::finalize() {
11281127 continue ;
11291128 }
11301129 subcolumn.finalize ();
1130+ TabletColumn tablet_column;
1131+ TabletSchema::SubColumnInfo sub_column_info;
1132+ if (vectorized::schema_util::generate_sub_column_info (
1133+ *_opts.rowset_ctx ->tablet_schema , parent_column.unique_id (),
1134+ std::string (path), &sub_column_info)) {
1135+ vectorized::schema_util::inherit_column_attributes (parent_column,
1136+ sub_column_info.column );
1137+ tablet_column = std::move (sub_column_info.column );
1138+ _subcolumns_indexes[column_id] = std::move (sub_column_info.indexes );
1139+ } else {
1140+ tablet_column = generate_column_info (path, subcolumn);
1141+ const auto & indexes =
1142+ _opts.rowset_ctx ->tablet_schema ->inverted_indexs (parent_column.unique_id ());
1143+ vectorized::schema_util::inherit_index (indexes, _subcolumns_indexes[column_id],
1144+ tablet_column);
1145+ }
11311146
11321147 int current_column_id = column_id++;
1133- TabletColumn tablet_column = generate_column_info (path, subcolumn);
11341148 int64_t none_null_value_size = subcolumn.get_non_null_value_size ();
11351149 vectorized::ColumnPtr current_column = subcolumn.get_finalized_column_ptr ()->get_ptr ();
11361150 vectorized::DataTypePtr current_type = subcolumn.get_least_common_type ();
@@ -1166,25 +1180,24 @@ Status VariantCompactionDocSnapshotWriter::finalize() {
11661180 TabletColumn doc_snapshot_column =
11671181 vectorized::schema_util::create_doc_snapshot_column (parent_column, bucket_value);
11681182 _init_column_meta (_opts.meta , column_id, doc_snapshot_column, _opts.compression_type );
1169- RETURN_IF_ERROR (ColumnWriter::create_map_writer (_opts, &doc_snapshot_column,
1170- _opts. file_writer , &_doc_snapshot_column_writer));
1183+ RETURN_IF_ERROR (ColumnWriter::create_map_writer (_opts, &doc_snapshot_column, _opts. file_writer ,
1184+ &_doc_snapshot_column_writer));
11711185 RETURN_IF_ERROR (_doc_snapshot_column_writer->init ());
11721186
11731187 // convert root column data from engine format to storage layer format
11741188 converter->add_column_data_convertor (doc_snapshot_column);
11751189 // Convert MutableColumnPtr to ColumnPtr by creating a shared pointer from the raw pointer
11761190 // The ownership is maintained by _column, so this is safe
1177- RETURN_IF_ERROR (converter->set_source_content_with_specifid_column ({variant_column-> get_doc_snapshot_column (), nullptr , " " }, 0 ,
1178- num_rows, column_id));
1191+ RETURN_IF_ERROR (converter->set_source_content_with_specifid_column (
1192+ {variant_column-> get_doc_snapshot_column (), nullptr , " " }, 0 , num_rows, column_id));
11791193 auto [status, column] = converter->convert_column_data (column_id);
11801194 RETURN_IF_ERROR (status);
1181- RETURN_IF_ERROR (
1182- _doc_snapshot_column_writer-> append (column-> get_nullmap (), column-> get_data (), num_rows));
1195+ RETURN_IF_ERROR (_doc_snapshot_column_writer-> append (column-> get_nullmap (), column-> get_data (),
1196+ num_rows));
11831197 converter->clear_source_content (column_id);
11841198
11851199 _opts.meta ->set_num_rows (num_rows);
11861200
1187-
11881201 auto [column_key, column_value] = variant_column->get_doc_snapshot_data_paths_and_values ();
11891202 const auto & column_offsets = variant_column->serialized_doc_snapshot_column_offsets ();
11901203 std::map<StringRef, uint32_t > column_stats;
0 commit comments