Skip to content

Commit 4925ae1

Browse files
committed
[#27095] docdb: Vector Index: Filter tombstoned reverse mapping during compaction
Summary: The diff updates the compaction logic to correctly handle tombstoned reverse-mapping vector index keys. The same approach as for regular compactions is used for cleaning such entries. Additionally, the following changes are included: - `SubDocKey::DebugSliceToStringAsResult()` is updated to correctly render vector index reverse mappings. - A new term `MetaKey` is introduced to describe different types of internal/metadata records from DocDB perspective such as `kVectorIndexMapping` and `kTransactionApplyState` (TBD for intents meta keys). Probably, it'd be good to introduced a common term for MetaKey and SubDocKey/DocKey in future. Jira: DB-16582 Test Plan: ./yb_build.sh --cxx-test pgwrapper_pg_vector_index-test --gtest_filter PgVectorIndexSingleServerTest.ReverseMappingCleanup/Colocated ./yb_build.sh --cxx-test pgwrapper_pg_vector_index-test --gtest_filter PgVectorIndexSingleServerTest.ReverseMappingCleanup/ColocatedHnswlib ./yb_build.sh --cxx-test pgwrapper_pg_vector_index-test --gtest_filter PgVectorIndexSingleServerTest.ReverseMappingCleanup/ColocatedYbHnsw ./yb_build.sh --cxx-test pgwrapper_pg_vector_index-test --gtest_filter PgVectorIndexSingleServerTest.ReverseMappingCleanup/Distributed ./yb_build.sh --cxx-test pgwrapper_pg_vector_index-test --gtest_filter PgVectorIndexSingleServerTest.ReverseMappingCleanup/DistributedHnswlib ./yb_build.sh --cxx-test pgwrapper_pg_vector_index-test --gtest_filter PgVectorIndexSingleServerTest.ReverseMappingCleanup/DistributedYbHnsw Reviewers: sergei, rthallam, timur Reviewed By: sergei, rthallam Subscribers: ybase Differential Revision: https://phorge.dev.yugabyte.com/D47021
1 parent c937939 commit 4925ae1

23 files changed

+893
-451
lines changed

src/yb/common/common_flags.cc

Lines changed: 34 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ TAG_FLAG(wait_for_ysql_backends_catalog_version_client_master_rpc_margin_ms, adv
8787
// TODO(#13369): use this flag in tserver.
8888
DEFINE_NON_RUNTIME_uint32(master_ts_ysql_catalog_lease_ms, 10000, // 10s
8989
"Lease period between master and tserver that guarantees YSQL system catalog is not stale."
90-
" Must be higher than --heartbeat_interval_ms, preferrably many times higher.");
90+
" Must be higher than --heartbeat_interval_ms, preferably many times higher.");
9191
TAG_FLAG(master_ts_ysql_catalog_lease_ms, advanced);
9292
TAG_FLAG(master_ts_ysql_catalog_lease_ms, hidden);
9393

@@ -130,8 +130,8 @@ DEFINE_RUNTIME_AUTO_PG_FLAG(
130130
"Requires yb_enable_replication_commands to be true.");
131131

132132
DEFINE_RUNTIME_PG_PREVIEW_FLAG(bool, yb_enable_consistent_replication_from_hash_range, false,
133-
"Enable consumption of consistent changes via replication slots from "
134-
"a hash range of a table.");
133+
"Enable consumption of consistent changes via replication slots from "
134+
"a hash range of a table.");
135135

136136
DEFINE_NON_RUNTIME_PREVIEW_bool(ysql_yb_enable_implicit_dynamic_tables_logical_replication, false,
137137
"When set to true, modifications to publication will be reflected implicitly. "
@@ -163,7 +163,7 @@ DEFINE_NON_RUNTIME_string(placement_zone, "rack1",
163163
"The cloud availability zone in which this instance is started.");
164164

165165
DEFINE_test_flag(bool, check_catalog_version_overflow, false,
166-
"Check whether received catalog version is unreasonably too big");
166+
"Check whether received catalog version is unreasonably too big");
167167

168168
DEFINE_RUNTIME_PG_FLAG(bool, yb_enable_invalidation_messages, true,
169169
"True to enable invalidation messages");
@@ -197,13 +197,13 @@ DEFINE_validator(TEST_ysql_yb_enable_ddl_savepoint_support,
197197
// is relatively new, it is advisable that we have the flag enabled for now. The value can be
198198
// increased once the feature hardens and the above referred issue is resolved.
199199
DEFINE_RUNTIME_uint64(refresh_waiter_timeout_ms, 30000,
200-
"The maximum amount of time a waiter transaction waits in the wait-queue "
201-
"before its callback is invoked. On invocation, the waiter transaction "
202-
"re-runs conflicts resolution and might enter the wait-queue again with "
203-
"updated blocker(s) information. Setting the value to 0 disables "
204-
"automatically re-running conflict resolution due to timeout. It follows "
205-
"that the waiter callback would only be invoked when a blocker txn commits/ "
206-
"aborts/gets promoted.");
200+
"The maximum amount of time a waiter transaction waits in the wait-queue "
201+
"before its callback is invoked. On invocation, the waiter transaction "
202+
"re-runs conflicts resolution and might enter the wait-queue again with "
203+
"updated blocker(s) information. Setting the value to 0 disables "
204+
"automatically re-running conflict resolution due to timeout. It follows "
205+
"that the waiter callback would only be invoked when a blocker txn commits/ "
206+
"aborts/gets promoted.");
207207
TAG_FLAG(refresh_waiter_timeout_ms, advanced);
208208
TAG_FLAG(refresh_waiter_timeout_ms, hidden);
209209

@@ -282,41 +282,40 @@ DEFINE_NON_RUNTIME_bool(use_fast_backward_scan, true,
282282
"Use backward scan optimization to build a row in the reverse order for YSQL.");
283283

284284
DEFINE_RUNTIME_bool(ysql_enable_auto_analyze_service, false,
285-
"Enable the Auto Analyze service which automatically triggers ANALYZE to "
286-
"update table statistics for tables which have changed more than a "
287-
"configurable threshold.");
285+
"Enable the Auto Analyze service which automatically triggers ANALYZE to "
286+
"update table statistics for tables which have changed more than a "
287+
"configurable threshold.");
288288
TAG_FLAG(ysql_enable_auto_analyze_service, experimental);
289289

290290
DEFINE_RUNTIME_AUTO_bool(cdcsdk_enable_dynamic_table_addition_with_table_cleanup,
291-
kLocalPersisted,
292-
false,
293-
true,
294-
"This flag needs to be true in order to support addition of dynamic tables "
295-
"along with removal of not of interest/expired tables from a CDCSDK "
296-
"stream.");
291+
kLocalPersisted,
292+
false,
293+
true,
294+
"This flag needs to be true in order to support addition of dynamic tables "
295+
"along with removal of not of interest/expired tables from a CDCSDK "
296+
"stream.");
297297
TAG_FLAG(cdcsdk_enable_dynamic_table_addition_with_table_cleanup, advanced);
298298

299299
DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_update_optimization_infra, kLocalPersisted, false, true,
300-
"Enables optimizations of YSQL UPDATE queries. This includes "
301-
"(but not limited to) skipping redundant secondary index updates "
302-
"and redundant constraint checks.");
300+
"Enables optimizations of YSQL UPDATE queries. This includes "
301+
"(but not limited to) skipping redundant secondary index updates "
302+
"and redundant constraint checks.");
303303

304304
DEFINE_RUNTIME_PG_FLAG(bool, yb_skip_redundant_update_ops, true,
305-
"Enables the comparison of old and new values of columns specified in the "
306-
"SET clause of YSQL UPDATE queries to skip redundant secondary index "
307-
"updates and redundant constraint checks.");
305+
"Enables the comparison of old and new values of columns specified in the "
306+
"SET clause of YSQL UPDATE queries to skip redundant secondary index "
307+
"updates and redundant constraint checks.");
308308
TAG_FLAG(ysql_yb_skip_redundant_update_ops, advanced);
309309

310-
DEFINE_RUNTIME_bool(cdc_disable_sending_composite_values,
311-
true,
312-
"When this flag is set to true, cdc service will send null values for columns "
313-
"of composite types");
310+
DEFINE_RUNTIME_bool(cdc_disable_sending_composite_values, true,
311+
"When this flag is set to true, cdc service will send null values for columns "
312+
"of composite types");
314313

315-
DEFINE_UNKNOWN_int32(timestamp_history_retention_interval_sec, 900,
316-
"The time interval in seconds to retain DocDB history for. Point-in-time "
317-
"reads at a hybrid time further than this in the past might not be allowed "
318-
"after a compaction. Set this to be higher than the expected maximum duration "
319-
"of any single transaction in your application.");
314+
DEFINE_RUNTIME_int32(timestamp_history_retention_interval_sec, 900,
315+
"The time interval in seconds to retain DocDB history for. Point-in-time "
316+
"reads at a hybrid time further than this in the past might not be allowed "
317+
"after a compaction. Set this to be higher than the expected maximum duration "
318+
"of any single transaction in your application.");
320319

321320
namespace yb {
322321

src/yb/docdb/doc_boundary_values_extractor.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class DocBoundaryValuesExtractor : public rocksdb::BoundaryValuesExtractor {
3939
virtual ~DocBoundaryValuesExtractor() {}
4040

4141
Status Extract(Slice user_key, rocksdb::UserBoundaryValueRefs* values) override {
42-
if (dockv::IsInternalRecordKeyType(dockv::DecodeKeyEntryType(user_key))) {
42+
if (dockv::IsMetaKeyType(dockv::DecodeKeyEntryType(user_key))) {
4343
// Skipping internal DocDB records.
4444
return Status::OK();
4545
}

src/yb/docdb/doc_operation-test.cc

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,7 @@ class DiscardUntilFileFilterFactory : public rocksdb::CompactionFileFilterFactor
135135
// rocksdb_max_file_size_for_compaction flag if it is set to a positive number, and returns
136136
// the max uint64 otherwise. It does NOT take the schema's table TTL into consideration.
137137
auto MakeExcludeFromCompactionFunction() {
138-
using ExcludeFromCompaction = decltype(std::declval<rocksdb::Options>().exclude_from_compaction);
139-
using ExcludeFromCompactionFunction = typename ExcludeFromCompaction::element_type;
140-
141-
return std::make_shared<ExcludeFromCompactionFunction>([](const rocksdb::FileMetaData& file) {
138+
return std::make_shared<rocksdb::CompactionFileExcluder>([](const rocksdb::FileMetaData& file) {
142139
if (FLAGS_rocksdb_max_file_size_for_compaction > 0) {
143140
return file.fd.GetTotalFileSize() > FLAGS_rocksdb_max_file_size_for_compaction;
144141
}

0 commit comments

Comments
 (0)