@@ -198,3 +198,99 @@ message StorageType {
198198 LocalStorage local = 2 ;
199199 }
200200}
201+
202+ service IncrementalSearchlight {
203+ // Query a set of tokens against the term dictionary, optionally allowing
204+ // for fuzzy matching and prefix matching. Take the top `K` results with
205+ // respect to to `(edit distance, term)` lexicographical order.
206+ rpc QueryTokens (QueryTokensRequest ) returns (QueryTokensResponse );
207+
208+ // For the given index, compute the total number of documents and terms
209+ // in the index. Also, given a list of pointers to terms within the index,
210+ // compute the document frequency of each term.
211+ rpc QueryBm25Stats (QueryBm25StatsRequest ) returns (QueryBm25StatsResponse );
212+
213+ // Given a AND + OR query of term pointers and BM25 statistics for the OR
214+ // terms, return the top `K` results with respect to BM25 score.
215+ rpc QueryPostingLists (QueryPostingListsRequest ) returns (QueryPostingListsResponse );
216+ }
217+
218+ message QueryTokensRequest {
219+ StorageType storage_type = 1 ;
220+ FragmentedTextSegmentPaths segment = 2 ;
221+ SearchIndexConfig index_config = 3 ;
222+ repeated TokenQuery token_queries = 4 ;
223+ uint32 max_results = 5 ;
224+ }
225+
226+ message FragmentedTextSegmentPaths {
227+ StorageKey segment = 1 ;
228+ StorageKey id_tracker = 2 ;
229+ StorageKey deletions = 3 ;
230+ }
231+
232+ message TokenQuery {
233+ convex_token.FieldPath field_path = 1 ;
234+ bytes token = 2 ;
235+ uint32 max_distance = 3 ;
236+ bool prefix = 4 ;
237+ }
238+
239+ message QueryTokensResponse {
240+ repeated TokenMatch token_matches = 2 ;
241+ }
242+
243+ message TokenMatch {
244+ uint32 distance = 1 ;
245+ bool prefix = 2 ;
246+ bytes tantivy_bytes = 3 ;
247+ // Offset into `QueryTokensRequest.token_queries`.
248+ uint32 token_ord = 4 ;
249+ }
250+
251+ message QueryBm25StatsRequest {
252+ StorageType storage_type = 1 ;
253+ FragmentedTextSegmentPaths segment = 2 ;
254+ repeated bytes terms = 3 ;
255+ }
256+
257+ message QueryBm25StatsResponse {
258+ uint64 num_terms = 1 ;
259+ uint64 num_documents = 2 ;
260+ repeated DocFrequency doc_frequencies = 3 ;
261+ }
262+
263+ message DocFrequency {
264+ bytes term = 1 ;
265+ uint64 frequency = 2 ;
266+ }
267+
268+ message QueryPostingListsRequest {
269+ StorageType storage_type = 1 ;
270+ FragmentedTextSegmentPaths segment = 2 ;
271+ PostingListQuery query = 3 ;
272+ }
273+
274+ message PostingListQuery {
275+ repeated bytes deleted_internal_ids = 1 ;
276+
277+ repeated bytes or_terms = 2 ;
278+ repeated bytes and_terms = 3 ;
279+
280+ uint64 num_terms = 4 ;
281+ uint64 num_documents = 5 ;
282+ repeated DocFrequency doc_frequencies = 6 ;
283+
284+ uint32 max_results = 7 ;
285+ }
286+
287+ message QueryPostingListsResponse {
288+ repeated PostingListMatch matches = 1 ;
289+ }
290+
291+ message PostingListMatch {
292+ bytes internal_id = 1 ;
293+ uint64 ts = 2 ;
294+ double creation_time = 3 ;
295+ float bm25_score = 4 ;
296+ }
0 commit comments