diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 543fad1f3dd61d..4ce6630012993d 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -47,6 +47,7 @@ #include "io/fs/file_writer.h" #include "io/fs/remote_file_system.h" #include "io/io_common.h" +#include "olap/collection_statistics.h" #include "olap/cumulative_compaction.h" #include "olap/cumulative_compaction_policy.h" #include "olap/cumulative_compaction_time_series_policy.h" @@ -423,6 +424,28 @@ bool CompactionMixin::handle_ordered_data_compaction() { if (!config::enable_ordered_data_compaction) { return false; } + + // If some rowsets has idx files and some rowsets has not, we can not do link file compaction. + // Since the output rowset will be broken. + + // Use schema version instead of schema hash to check if they are the same, + // because light schema change will not change the schema hash on BE, but will increase the schema version + // See fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java::2979 + std::vector schema_versions_of_rowsets; + + for (auto input_rowset : _input_rowsets) { + schema_versions_of_rowsets.push_back(input_rowset->rowset_meta()->schema_version()); + } + + // If all rowsets has same schema version, then we can do link file compaction directly. + bool all_same_schema_version = + std::all_of(schema_versions_of_rowsets.begin(), schema_versions_of_rowsets.end(), + [&](int32_t v) { return v == schema_versions_of_rowsets.front(); }); + + if (!all_same_schema_version) { + return false; + } + if (compaction_type() == ReaderType::READER_COLD_DATA_COMPACTION || compaction_type() == ReaderType::READER_FULL_COMPACTION) { // The remote file system and full compaction does not support to link files. diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h index 4cd346ce414850..c0dc1fb8c674ae 100644 --- a/be/src/olap/rowset/rowset_meta.h +++ b/be/src/olap/rowset/rowset_meta.h @@ -424,6 +424,8 @@ class RowsetMeta : public MetadataAdder { RowsetMeta(const RowsetMeta&) = delete; RowsetMeta operator=(const RowsetMeta&) = delete; + int32_t schema_version() const { return _rowset_meta_pb.schema_version(); } + private: bool _deserialize_from_pb(std::string_view value);