yugabyte
diff --git a/‎src/yb/docdb/bounded_rocksdb_iterator.cc‎
Lines changed: 2 additions & 2 deletions b/‎src/yb/docdb/bounded_rocksdb_iterator.cc‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/yb/docdb/bounded_rocksdb_iterator.h‎
Lines changed: 1 addition & 1 deletion b/‎src/yb/docdb/bounded_rocksdb_iterator.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/yb/docdb/conflict_resolution.cc‎
Lines changed: 1 addition & 1 deletion b/‎src/yb/docdb/conflict_resolution.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/yb/docdb/doc_operation-test.cc‎
Lines changed: 1 addition & 1 deletion b/‎src/yb/docdb/doc_operation-test.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/yb/docdb/doc_read_context.cc‎
Lines changed: 12 additions & 0 deletions b/‎src/yb/docdb/doc_read_context.cc‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎src/yb/docdb/doc_read_context.h‎
Lines changed: 3 additions & 0 deletions b/‎src/yb/docdb/doc_read_context.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/yb/docdb/doc_rowwise_iterator.cc‎
Lines changed: 47 additions & 34 deletions b/‎src/yb/docdb/doc_rowwise_iterator.cc‎
Lines changed: 47 additions & 34 deletions
diff --git a/‎src/yb/docdb/doc_rowwise_iterator.h‎
Lines changed: 9 additions & 11 deletions b/‎src/yb/docdb/doc_rowwise_iterator.h‎
Lines changed: 9 additions & 11 deletions
diff --git a/‎src/yb/docdb/docdb_fwd.h‎
Lines changed: 1 addition & 1 deletion b/‎src/yb/docdb/docdb_fwd.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/yb/docdb/docdb_rocksdb_util.cc‎
Lines changed: 14 additions & 0 deletions b/‎src/yb/docdb/docdb_rocksdb_util.cc‎
Lines changed: 14 additions & 0 deletions
@@ -96,8 +96,8 @@ void BoundedRocksDbIterator::UseFastNext(bool value) {
   iterator_->UseFastNext(value);
 }
 
-void BoundedRocksDbIterator::UpdateFilterKey(Slice user_key_for_filter) {
-  iterator_->UpdateFilterKey(user_key_for_filter);
+void BoundedRocksDbIterator::UpdateFilterKey(Slice user_key_for_filter, Slice seek_key) {
+  iterator_->UpdateFilterKey(user_key_for_filter, seek_key);
 }
 
 }  // namespace yb::docdb
@@ -71,7 +71,7 @@ class BoundedRocksDbIterator final : public rocksdb::Iterator {
   }
 
   void UseFastNext(bool value) override;
-  void UpdateFilterKey(Slice user_key_for_filter) override;
+  void UpdateFilterKey(Slice user_key_for_filter, Slice seek_key) override;
 
  private:
   const rocksdb::KeyValueEntry& FilterEntry(const rocksdb::KeyValueEntry& entry) const;
 
@@ -968,7 +968,7 @@ class StrongConflictChecker {
           /* iterate_upper_bound = */ nullptr,
           rocksdb::CacheRestartBlockKeys::kFalse);
     }
-    value_iter_.UpdateFilterKey(intent_key);
+    value_iter_.UpdateFilterKey(intent_key, Slice());
     const auto* entry = &value_iter_.Seek(intent_key);
 
     VLOG_WITH_PREFIX_AND_FUNC(4)
 
@@ -614,7 +614,7 @@ SubDocKey(DocKey(0x0000, [100], []), [ColumnId(3); HT{ physical: 0 logical: 3000
   DocRowwiseIterator iter(
       projection, doc_read_context, kNonTransactionalOperationContext, doc_db(),
       ReadOperationData::FromReadTime(ReadHybridTime::FromUint64(3000)), pending_op);
-  iter.InitForTableType(YQL_TABLE_TYPE);
+  ASSERT_OK(iter.InitForTableType(YQL_TABLE_TYPE));
   ASSERT_FALSE(ASSERT_RESULT(iter.FetchNext(nullptr)));
 
   // Now verify row exists even with one valid column.
 
@@ -15,6 +15,7 @@
 
 #include "yb/common/ql_type.h"
 
+#include "yb/dockv/doc_key.h"
 #include "yb/dockv/value_type.h"
 
 #include "yb/util/logging.h"
@@ -154,6 +155,17 @@ void DocReadContext::UpdateKeyPrefix() {
   }
 }
 
+Result<bool> DocReadContext::HaveEqualBloomFilterKey(Slice lhs, Slice rhs) const {
+  return dockv::HashedOrFirstRangeComponentsEqual(lhs, rhs);
+}
+
+size_t DocReadContext::NumColumnsUsedByBloomFilterKey() const {
+  // If there are hash columns, when we include hash code, otherwise bloom filter
+  // pick the first range component.
+  // So num columns used by bloom filter always num hash columns + 1.
+  return schema_.num_hash_key_columns() + 1;
+}
+
 DocReadContext DocReadContext::TEST_Create(const Schema& schema) {
   static const auto registry = std::make_shared<dockv::SchemaPackingRegistry>("TEST: ");
   return DocReadContext(
 
@@ -100,6 +100,9 @@ struct DocReadContext {
     return Slice(shared_key_prefix_buffer_.data(), table_key_prefix_len_);
   }
 
+  Result<bool> HaveEqualBloomFilterKey(Slice lhs, Slice rhs) const;
+  size_t NumColumnsUsedByBloomFilterKey() const;
+
   void TEST_SetDefaultTimeToLive(uint64_t ttl_msec) {
     schema_.SetDefaultTimeToLive(ttl_msec);
   }
 
@@ -136,13 +136,13 @@ void DocRowwiseIterator::SetSchema(const Schema& schema) {
   schema_ = &schema;
 }
 
-void DocRowwiseIterator::InitForTableType(
+Status DocRowwiseIterator::InitForTableType(
     TableType table_type, Slice sub_doc_key, SkipSeek skip_seek,
     AddTablePrefixToKey add_table_prefix_to_key) {
   CheckInitOnce();
   table_type_ = table_type;
   ignore_ttl_ = (table_type_ == TableType::PGSQL_TABLE_TYPE);
-  InitIterator(BloomFilterOptions::Inactive());
+  RETURN_NOT_OK(InitIterator(BloomFilterOptions::Inactive()));
 
   if (sub_doc_key.empty() || add_table_prefix_to_key) {
     dockv::DocKeyEncoder(&row_key_).Schema(*schema_);
@@ -154,11 +154,13 @@ void DocRowwiseIterator::InitForTableType(
   has_bound_key_ = false;
 
   scan_choices_ = ScanChoices::CreateEmpty();
+
+  return Status::OK();
 }
 
 Status DocRowwiseIterator::Init(
     const qlexpr::YQLScanSpec& doc_spec, SkipSeek skip_seek,
-    UseVariableBloomFilter use_variable_bloom_filter) {
+    AllowVariableBloomFilter allow_variable_bloom_filter) {
   table_type_ = doc_spec.client_type() == YQL_CLIENT_CQL ? TableType::YQL_TABLE_TYPE
                                                          : TableType::PGSQL_TABLE_TYPE;
   ignore_ttl_ = table_type_ == TableType::PGSQL_TABLE_TYPE;
@@ -172,17 +174,6 @@ Status DocRowwiseIterator::Init(
   VLOG(2) << "DocKey Bounds " << DocKey::DebugSliceToString(bounds.lower.AsSlice()) << ", "
           << DocKey::DebugSliceToString(bounds.upper.AsSlice());
 
-  // TODO(bogdan): decide if this is a good enough heuristic for using blooms for scans.
-  const bool is_fixed_point_get =
-      !bounds.lower.empty() &&
-      VERIFY_RESULT(HashedOrFirstRangeComponentsEqual(bounds.lower, bounds.upper));
-  auto bloom_filter = BloomFilterOptions::Inactive();
-  if (is_fixed_point_get) {
-    bloom_filter = BloomFilterOptions::Fixed(bounds.lower.AsSlice());
-  } else if (use_variable_bloom_filter) {
-    bloom_filter = BloomFilterOptions::Variable();
-  }
-
   if (is_forward_scan_) {
     has_bound_key_ = !bounds.upper.empty();
     if (has_bound_key_) {
@@ -206,17 +197,21 @@ Status DocRowwiseIterator::Init(
     }
   }
 
-  InitIterator(bloom_filter, doc_spec.QueryId(), CreateFileFilter(doc_spec));
-
   if (has_bound_key_) {
     if (is_forward_scan_) {
       bounds.upper = bound_key_;
     } else {
       bounds.lower = bound_key_;
     }
   }
-  scan_choices_ = ScanChoices::Create(
-      *schema_, doc_spec, bounds, doc_read_context_.table_key_prefix());
+
+  scan_choices_ = VERIFY_RESULT(ScanChoices::Create(
+      doc_read_context_, doc_spec, bounds, doc_read_context_.table_key_prefix(),
+      allow_variable_bloom_filter));
+
+  RETURN_NOT_OK(InitIterator(
+      scan_choices_->BloomFilterOptions(), doc_spec.QueryId(), CreateFileFilter(doc_spec)));
+
   if (!skip_seek) {
     if (is_forward_scan_) {
       Seek(bounds.lower);
@@ -314,7 +309,7 @@ Slice DocRowwiseIterator::GetRowKey() const {
   return row_key_;
 }
 
-void DocRowwiseIterator::SeekTuple(Slice tuple_id) {
+void DocRowwiseIterator::SeekTuple(Slice tuple_id, docdb::UpdateFilterKey update_filter_key) {
   // If cotable id / colocation id is present in the table schema, then
   // we need to prepend it in the tuple key to seek.
   if (schema_->has_cotable_id() || schema_->has_colocation_id()) {
@@ -338,7 +333,9 @@ void DocRowwiseIterator::SeekTuple(Slice tuple_id) {
     tuple_key_->AppendRawBytes(tuple_id);
     tuple_id = *tuple_key_;
   }
-  UpdateFilterKey(tuple_id);
+  if (update_filter_key) {
+    UpdateFilterKey(tuple_id);
+  }
   Seek(tuple_id);
 
   row_key_.Clear();
@@ -505,7 +502,7 @@ Result<DocHybridTime> DocRowwiseIterator::GetTableTombstoneTime(Slice root_doc_k
   return doc_ht;
 }
 
-void DocRowwiseIterator::InitIterator(
+Status DocRowwiseIterator::InitIterator(
     const BloomFilterOptions& bloom_filter,
     const rocksdb::QueryId query_id,
     std::shared_ptr<rocksdb::ReadFileFilter> file_filter) {
@@ -534,16 +531,23 @@ void DocRowwiseIterator::InitIterator(
       FastBackwardScan{use_fast_backward_scan_});
   InitResult();
 
-  auto prefix = shared_key_prefix();
-  if (is_forward_scan_ && has_bound_key_ &&
-      bound_key_.data().data()[0] != dockv::KeyEntryTypeAsChar::kHighest) {
-    DCHECK(bound_key_.AsSlice().starts_with(prefix))
-        << "Bound key: " << bound_key_.AsSlice().ToDebugHexString()
-        << ", prefix: " << prefix.ToDebugHexString();
-    upperbound_scope_.emplace(bound_key_, db_iter_.get());
-  } else {
-    DCHECK(!upperbound().empty());
-    upperbound_scope_.emplace(upperbound(), db_iter_.get());
+  const auto scan_choices_has_upperbound =
+      scan_choices_ &&
+      VERIFY_RESULT(scan_choices_->PrepareIterator(
+          *db_iter_, doc_read_context_.table_key_prefix()));
+
+  if (!scan_choices_has_upperbound) {
+    auto prefix = shared_key_prefix();
+    if (is_forward_scan_ && has_bound_key_ &&
+        bound_key_.data().data()[0] != dockv::KeyEntryTypeAsChar::kHighest) {
+      DCHECK(bound_key_.AsSlice().starts_with(prefix))
+          << "Bound key: " << bound_key_.AsSlice().ToDebugHexString()
+          << ", prefix: " << prefix.ToDebugHexString();
+      upperbound_scope_.emplace(bound_key_, db_iter_.get());
+    } else {
+      DCHECK(!upperbound().empty());
+      upperbound_scope_.emplace(upperbound(), db_iter_.get());
+    }
   }
 
   if (use_fast_backward_scan_) {
@@ -557,6 +561,7 @@ void DocRowwiseIterator::InitIterator(
   }
 
   VLOG_WITH_FUNC(4) << "Initialization done";
+  return Status::OK();
 }
 
 void DocRowwiseIterator::ConfigureForYsql() {
@@ -580,11 +585,13 @@ void DocRowwiseIterator::Refresh(SeekFilter seek_filter) {
 }
 
 void DocRowwiseIterator::UpdateFilterKey(Slice user_key_for_filter) {
+  DCHECK(!scan_choices_ || scan_choices_->BloomFilterOptions().mode() != BloomFilterMode::kInactive)
+      << "Mode: " << scan_choices_->BloomFilterOptions().mode();
   db_iter_->UpdateFilterKey(user_key_for_filter);
 }
 
 void DocRowwiseIterator::Seek(Slice key) {
-  VLOG_WITH_FUNC(3) << " Seeking to " << key << "/" << dockv::DocKey::DebugSliceToString(key);
+  VLOG_WITH_FUNC(3) << key << "/" << dockv::DocKey::DebugSliceToString(key);
 
   DCHECK(!done_);
 
@@ -630,7 +637,7 @@ inline void DocRowwiseIterator::SeekPrevDocKey(Slice key) {
 Status DocRowwiseIterator::AdvanceIteratorToNextDesiredRow(bool row_finished,
                                                            bool current_fetched_row_skipped) {
   if (seek_filter_ == SeekFilter::kAll && !IsFetchedRowStatic() &&
-      VERIFY_RESULT(scan_choices_->AdvanceToNextRow(&row_key_, db_iter_.get(),
+      VERIFY_RESULT(scan_choices_->AdvanceToNextRow(&row_key_, *db_iter_,
                                                     current_fetched_row_skipped))) {
     return Status::OK();
   }
@@ -702,6 +709,12 @@ Result<bool> DocRowwiseIterator::FetchNextImpl(TableRow table_row) {
 
     const auto& key_data = VERIFY_RESULT_REF(db_iter_->Fetch());
     if (!key_data) {
+      // It could happen that iterator did not find anything because of upper bound limit from
+      // scan choices. So need to update it and retry.
+      if (seek_filter_ == SeekFilter::kAll && !IsFetchedRowStatic() &&
+          VERIFY_RESULT(scan_choices_->AdvanceToNextRow(nullptr, *db_iter_, true))) {
+        continue;
+      }
       done_ = true;
       return false;
     }
@@ -744,7 +757,7 @@ Result<bool> DocRowwiseIterator::FetchNextImpl(TableRow table_row) {
 
     bool is_static_column = IsFetchedRowStatic();
     if (!is_static_column &&
-        !VERIFY_RESULT(scan_choices_->InterestedInRow(&row_key_, db_iter_.get()))) {
+        !VERIFY_RESULT(scan_choices_->InterestedInRow(&row_key_, *db_iter_))) {
       continue;
     }
 
 
@@ -41,8 +41,7 @@
 #include "yb/util/operation_counter.h"
 #include "yb/util/status_fwd.h"
 
-namespace yb {
-namespace docdb {
+namespace yb::docdb {
 
 YB_STRONGLY_TYPED_BOOL(AddTablePrefixToKey);
 
@@ -71,14 +70,13 @@ class DocRowwiseIterator final : public YQLRowwiseIteratorIf {
   void SetSchema(const Schema& schema);
 
   // Init scan iterator.
-  void InitForTableType(
+  Status InitForTableType(
       TableType table_type, Slice sub_doc_key = Slice(), SkipSeek skip_seek = SkipSeek::kFalse,
       AddTablePrefixToKey add_table_prefix_to_key = AddTablePrefixToKey::kFalse);
   // Init QL read scan.
   Status Init(
-      const qlexpr::YQLScanSpec& spec,
-      SkipSeek skip_seek = SkipSeek::kFalse,
-      UseVariableBloomFilter use_variable_bloom_filter = UseVariableBloomFilter::kFalse);
+      const qlexpr::YQLScanSpec& spec, SkipSeek skip_seek = SkipSeek::kFalse,
+      AllowVariableBloomFilter allow_variable_bloom_filter = AllowVariableBloomFilter::kFalse);
 
   bool IsFetchedRowStatic() const override;
 
@@ -90,7 +88,8 @@ class DocRowwiseIterator final : public YQLRowwiseIteratorIf {
 
   // Seeks to the given tuple by its id. The tuple id should be the serialized DocKey and without
   // the cotable id.
-  void SeekTuple(Slice tuple_id) override;
+  void SeekTuple(
+      Slice tuple_id, UpdateFilterKey update_filter_key = UpdateFilterKey::kTrue) override;
 
   // Returns true if tuple was fetched, false otherwise.
   Result<bool> FetchTuple(Slice tuple_id, qlexpr::QLTableRow* row) override;
@@ -174,7 +173,7 @@ class DocRowwiseIterator final : public YQLRowwiseIteratorIf {
   Slice shared_key_prefix() const;
   Slice upperbound() const;
 
-  void InitIterator(
+  Status InitIterator(
       const BloomFilterOptions& bloom_filter,
       const rocksdb::QueryId query_id = rocksdb::kDefaultQueryId,
       std::shared_ptr<rocksdb::ReadFileFilter> file_filter = nullptr);
@@ -241,6 +240,7 @@ class DocRowwiseIterator final : public YQLRowwiseIteratorIf {
   bool has_bound_key_ = false;
   dockv::KeyBytes bound_key_;
 
+  std::unique_ptr<IntentAwareIterator> db_iter_;
   std::unique_ptr<ScanChoices> scan_choices_;
 
   // We keep the "pending operation" counter incremented for the lifetime of this iterator so that
@@ -276,7 +276,6 @@ class DocRowwiseIterator final : public YQLRowwiseIteratorIf {
   size_t obsolete_keys_found_ = 0;
   size_t obsolete_keys_found_past_cutoff_ = 0;
 
-  std::unique_ptr<IntentAwareIterator> db_iter_;
   KeyBuffer prefix_buffer_;
   std::optional<IntentAwareIteratorUpperboundScope> upperbound_scope_;
   std::optional<IntentAwareIteratorLowerboundScope> lowerbound_scope_;
@@ -300,5 +299,4 @@ class DocRowwiseIterator final : public YQLRowwiseIteratorIf {
   SeekFilter seek_filter_ = SeekFilter::kAll;
 };
 
-}  // namespace docdb
-}  // namespace yb
+}  // namespace yb::docdb
@@ -100,9 +100,9 @@ using DocVectorIndexesPtr = std::shared_ptr<DocVectorIndexes>;
 using DocVectorIndexInsertEntries = std::vector<DocVectorIndexInsertEntry>;
 
 YB_STRONGLY_TYPED_BOOL(FastBackwardScan);
+YB_STRONGLY_TYPED_BOOL(AllowVariableBloomFilter);
 YB_STRONGLY_TYPED_BOOL(IncludeIntents);
 YB_STRONGLY_TYPED_BOOL(SkipFlush);
 YB_STRONGLY_TYPED_BOOL(SkipSeek);
-YB_STRONGLY_TYPED_BOOL(UseVariableBloomFilter);
 
 }  // namespace yb::docdb
@@ -26,6 +26,7 @@
 #include "yb/docdb/bounded_rocksdb_iterator.h"
 #include "yb/docdb/consensus_frontier.h"
 #include "yb/docdb/doc_ql_filefilter.h"
+#include "yb/docdb/doc_read_context.h"
 #include "yb/docdb/docdb_filter_policy.h"
 #include "yb/docdb/docdb_statistics.h"
 #include "yb/docdb/intent_aware_iterator.h"
@@ -1107,5 +1108,18 @@ std::shared_ptr<rocksdb::RateLimiter> CreateRocksDBRateLimiter() {
   return nullptr;
 }
 
+Result<BloomFilterOptions> BloomFilterOptions::Make(
+    const DocReadContext& doc_read_context, Slice lower, Slice upper, bool allow_variable) {
+  const bool is_fixed_point_get =
+      !lower.empty() && VERIFY_RESULT(doc_read_context.HaveEqualBloomFilterKey(lower, upper));
+  if (is_fixed_point_get) {
+    return BloomFilterOptions::Fixed(lower);
+  }
+  if (allow_variable) {
+    return BloomFilterOptions::Variable();
+  }
+  return BloomFilterOptions::Inactive();
+}
+
 } // namespace docdb
 } // namespace yb
Original file line number	Diff line number	Diff line change
`@@ -96,8 +96,8 @@ void BoundedRocksDbIterator::UseFastNext(bool value) {`
`96`	`96`	`iterator_->UseFastNext(value);`
`97`	`97`	`}`
`98`	`98`
`99`		`-void BoundedRocksDbIterator::UpdateFilterKey(Slice user_key_for_filter) {`
`100`		`- iterator_->UpdateFilterKey(user_key_for_filter);`
	`99`	`+void BoundedRocksDbIterator::UpdateFilterKey(Slice user_key_for_filter, Slice seek_key) {`
	`100`	`+ iterator_->UpdateFilterKey(user_key_for_filter, seek_key);`
`101`	`101`	`}`
`102`	`102`
`103`	`103`	`} // namespace yb::docdb`
Original file line number	Diff line number	Diff line change
`@@ -71,7 +71,7 @@ class BoundedRocksDbIterator final : public rocksdb::Iterator {`
`71`	`71`	`}`
`72`	`72`
`73`	`73`	`void UseFastNext(bool value) override;`
`74`		`- void UpdateFilterKey(Slice user_key_for_filter) override;`
	`74`	`+ void UpdateFilterKey(Slice user_key_for_filter, Slice seek_key) override;`
`75`	`75`
`76`	`76`	`private:`
`77`	`77`	`const rocksdb::KeyValueEntry& FilterEntry(const rocksdb::KeyValueEntry& entry) const;`
Original file line number	Diff line number	Diff line change
`@@ -968,7 +968,7 @@ class StrongConflictChecker {`
`968`	`968`	`/* iterate_upper_bound = */ nullptr,`
`969`	`969`	`rocksdb::CacheRestartBlockKeys::kFalse);`
`970`	`970`	`}`
`971`		`- value_iter_.UpdateFilterKey(intent_key);`
	`971`	`+ value_iter_.UpdateFilterKey(intent_key, Slice());`
`972`	`972`	`const auto* entry = &value_iter_.Seek(intent_key);`
`973`	`973`
`974`	`974`	`VLOG_WITH_PREFIX_AND_FUNC(4)`
Original file line number	Diff line number	Diff line change
`@@ -100,6 +100,9 @@ struct DocReadContext {`
`100`	`100`	`return Slice(shared_key_prefix_buffer_.data(), table_key_prefix_len_);`
`101`	`101`	`}`
`102`	`102`
	`103`	`+ Result<bool> HaveEqualBloomFilterKey(Slice lhs, Slice rhs) const;`
	`104`	`+ size_t NumColumnsUsedByBloomFilterKey() const;`
	`105`	`+`
`103`	`106`	`void TEST_SetDefaultTimeToLive(uint64_t ttl_msec) {`
`104`	`107`	`schema_.SetDefaultTimeToLive(ttl_msec);`
`105`	`108`	`}`