|
| 1 | +/* |
| 2 | + * Copyright 2026-present Alibaba Inc. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +#pragma once |
| 18 | +#include <functional> |
| 19 | +#include <memory> |
| 20 | +#include <string> |
| 21 | +#include <vector> |
| 22 | + |
| 23 | +#include "paimon/predicate/predicate.h" |
| 24 | +#include "paimon/visibility.h" |
| 25 | + |
| 26 | +namespace paimon { |
| 27 | +/// `VectorSearch` to perform vector similarity search. |
| 28 | +struct PAIMON_EXPORT VectorSearch { |
| 29 | + /// `PreFilter`: A lightweight pre-filtering function applied **before** similarity |
| 30 | + /// scoring. It operates solely on **local row ids** and is typically driven by other global |
| 31 | + /// index, such as bitmap, or range index. This filter enables early pruning of irrelevant |
| 32 | + /// candidates (e.g., "only consider rows with label X"), significantly reducing the search |
| 33 | + /// space. Returns true to include the row in vector search process; false to exclude it. |
| 34 | + /// |
| 35 | + /// @note Must be thread-safe. |
| 36 | + using PreFilter = std::function<bool(int64_t)>; |
| 37 | + |
| 38 | + VectorSearch(const std::string& _field_name, int32_t _limit, const std::vector<float>& _query, |
| 39 | + PreFilter _pre_filter, const std::shared_ptr<Predicate>& _predicate) |
| 40 | + : field_name(_field_name), |
| 41 | + limit(_limit), |
| 42 | + query(_query), |
| 43 | + pre_filter(_pre_filter), |
| 44 | + predicate(_predicate) {} |
| 45 | + |
| 46 | + std::shared_ptr<VectorSearch> ReplacePreFilter(PreFilter _pre_filter) const { |
| 47 | + return std::make_shared<VectorSearch>(field_name, limit, query, _pre_filter, predicate); |
| 48 | + } |
| 49 | + |
| 50 | + /// Search field name. |
| 51 | + std::string field_name; |
| 52 | + /// Number of top results to return. |
| 53 | + int32_t limit; |
| 54 | + /// The query vector (must match the dimensionality of the indexed vectors). |
| 55 | + std::vector<float> query; |
| 56 | + /// A pre-filter based on **local row ids**, implemented by leveraging other global index |
| 57 | + std::function<bool(int64_t)> pre_filter; |
| 58 | + /// A runtime filtering condition that may involve graph traversal of |
| 59 | + /// structured attributes. **Using this parameter often yields better |
| 60 | + /// filtering accuracy** because during index construction, the underlying |
| 61 | + /// graph was built with explicit consideration of field connectivity (e.g., |
| 62 | + /// relationships between attributes). As a result, predicates can leverage |
| 63 | + /// this pre-established semantic structure to perform more meaningful and |
| 64 | + /// context-aware filtering at query time. |
| 65 | + /// @note All fields referenced in the predicate must have been materialized |
| 66 | + /// in the index during build to ensure availability. |
| 67 | + std::shared_ptr<Predicate> predicate; |
| 68 | +}; |
| 69 | +} // namespace paimon |
0 commit comments