|
2 | 2 |
|
3 | 3 | import logging |
4 | 4 | from time import monotonic, sleep |
5 | | -from typing import Any, Callable, Dict, List, Optional, Union |
| 5 | +from typing import Any, Callable, Dict, List, Optional |
6 | 6 |
|
7 | 7 | from pymongo.collection import Collection |
8 | | -from pymongo.operations import SearchIndexModel |
| 8 | +from pymongo_search_utils import ( |
| 9 | + create_fulltext_search_index, # noqa: F401 |
| 10 | + create_vector_search_index, # noqa: F401 |
| 11 | + drop_vector_search_index, # noqa: F401 |
| 12 | + update_vector_search_index, # noqa: F401 |
| 13 | +) |
9 | 14 |
|
10 | 15 | logger = logging.getLogger(__file__) |
11 | 16 |
|
@@ -37,139 +42,6 @@ def _vector_search_index_definition( |
37 | 42 | return definition |
38 | 43 |
|
39 | 44 |
|
40 | | -def create_vector_search_index( |
41 | | - collection: Collection, |
42 | | - index_name: str, |
43 | | - dimensions: int, |
44 | | - path: str, |
45 | | - similarity: str, |
46 | | - filters: Optional[List[str]] = None, |
47 | | - vector_index_options: dict | None = None, |
48 | | - *, |
49 | | - wait_until_complete: Optional[float] = None, |
50 | | - **kwargs: Any, |
51 | | -) -> None: |
52 | | - """Experimental Utility function to create a vector search index |
53 | | -
|
54 | | - Args: |
55 | | - collection (Collection): MongoDB Collection |
56 | | - index_name (str): Name of Index |
57 | | - dimensions (int): Number of dimensions in embedding |
58 | | - path (str): field with vector embedding |
59 | | - similarity (str): The similarity score used for the index |
60 | | - filters (List[str]): Fields/paths to index to allow filtering in $vectorSearch |
61 | | - wait_until_complete (Optional[float]): If provided, number of seconds to wait |
62 | | - until search index is ready. |
63 | | - kwargs: Keyword arguments supplying any additional options to SearchIndexModel. |
64 | | - """ |
65 | | - logger.info("Creating Search Index %s on %s", index_name, collection.name) |
66 | | - |
67 | | - if collection.name not in collection.database.list_collection_names( |
68 | | - authorizedCollections=True |
69 | | - ): |
70 | | - collection.database.create_collection(collection.name) |
71 | | - |
72 | | - result = collection.create_search_index( |
73 | | - SearchIndexModel( |
74 | | - definition=_vector_search_index_definition( |
75 | | - dimensions=dimensions, |
76 | | - path=path, |
77 | | - similarity=similarity, |
78 | | - filters=filters, |
79 | | - vector_index_options=vector_index_options, |
80 | | - **kwargs, |
81 | | - ), |
82 | | - name=index_name, |
83 | | - type="vectorSearch", |
84 | | - ) |
85 | | - ) |
86 | | - |
87 | | - if wait_until_complete: |
88 | | - _wait_for_predicate( |
89 | | - predicate=lambda: _is_index_ready(collection, index_name), |
90 | | - err=f"{index_name=} did not complete in {wait_until_complete}!", |
91 | | - timeout=wait_until_complete, |
92 | | - ) |
93 | | - logger.info(result) |
94 | | - |
95 | | - |
96 | | -def drop_vector_search_index( |
97 | | - collection: Collection, |
98 | | - index_name: str, |
99 | | - *, |
100 | | - wait_until_complete: Optional[float] = None, |
101 | | -) -> None: |
102 | | - """Drop a created vector search index |
103 | | -
|
104 | | - Args: |
105 | | - collection (Collection): MongoDB Collection with index to be dropped |
106 | | - index_name (str): Name of the MongoDB index |
107 | | - wait_until_complete (Optional[float]): If provided, number of seconds to wait |
108 | | - until search index is ready. |
109 | | - """ |
110 | | - logger.info( |
111 | | - "Dropping Search Index %s from Collection: %s", index_name, collection.name |
112 | | - ) |
113 | | - collection.drop_search_index(index_name) |
114 | | - if wait_until_complete: |
115 | | - _wait_for_predicate( |
116 | | - predicate=lambda: len(list(collection.list_search_indexes())) == 0, |
117 | | - err=f"Index {index_name} did not drop in {wait_until_complete}!", |
118 | | - timeout=wait_until_complete, |
119 | | - ) |
120 | | - logger.info("Vector Search index %s.%s dropped", collection.name, index_name) |
121 | | - |
122 | | - |
123 | | -def update_vector_search_index( |
124 | | - collection: Collection, |
125 | | - index_name: str, |
126 | | - dimensions: int, |
127 | | - path: str, |
128 | | - similarity: str, |
129 | | - filters: Optional[List[str]] = None, |
130 | | - vector_index_options: dict | None = None, |
131 | | - *, |
132 | | - wait_until_complete: Optional[float] = None, |
133 | | - **kwargs: Any, |
134 | | -) -> None: |
135 | | - """Update a search index. |
136 | | -
|
137 | | - Replace the existing index definition with the provided definition. |
138 | | -
|
139 | | - Args: |
140 | | - collection (Collection): MongoDB Collection |
141 | | - index_name (str): Name of Index |
142 | | - dimensions (int): Number of dimensions in embedding |
143 | | - path (str): field with vector embedding |
144 | | - similarity (str): The similarity score used for the index. |
145 | | - filters (List[str]): Fields/paths to index to allow filtering in $vectorSearch |
146 | | - wait_until_complete (Optional[float]): If provided, number of seconds to wait |
147 | | - until search index is ready. |
148 | | - kwargs: Keyword arguments supplying any additional options to SearchIndexModel. |
149 | | - """ |
150 | | - logger.info( |
151 | | - "Updating Search Index %s from Collection: %s", index_name, collection.name |
152 | | - ) |
153 | | - collection.update_search_index( |
154 | | - name=index_name, |
155 | | - definition=_vector_search_index_definition( |
156 | | - dimensions=dimensions, |
157 | | - path=path, |
158 | | - similarity=similarity, |
159 | | - filters=filters, |
160 | | - vector_index_options=vector_index_options, |
161 | | - **kwargs, |
162 | | - ), |
163 | | - ) |
164 | | - if wait_until_complete: |
165 | | - _wait_for_predicate( |
166 | | - predicate=lambda: _is_index_ready(collection, index_name), |
167 | | - err=f"Index {index_name} update did not complete in {wait_until_complete}!", |
168 | | - timeout=wait_until_complete, |
169 | | - ) |
170 | | - logger.info("Update succeeded") |
171 | | - |
172 | | - |
173 | 45 | def _is_index_ready(collection: Collection, index_name: str) -> bool: |
174 | 46 | """Check for the index name in the list of available search indexes to see if the |
175 | 47 | specified index is of status READY |
@@ -206,50 +78,3 @@ def _wait_for_predicate( |
206 | 78 | if monotonic() - start > timeout: |
207 | 79 | raise TimeoutError(err) |
208 | 80 | sleep(interval) |
209 | | - |
210 | | - |
211 | | -def create_fulltext_search_index( |
212 | | - collection: Collection, |
213 | | - index_name: str, |
214 | | - field: Union[str, List[str]], |
215 | | - *, |
216 | | - wait_until_complete: Optional[float] = None, |
217 | | - **kwargs: Any, |
218 | | -) -> None: |
219 | | - """Experimental Utility function to create an Atlas Search index |
220 | | -
|
221 | | - Args: |
222 | | - collection (Collection): MongoDB Collection |
223 | | - index_name (str): Name of Index |
224 | | - field (str): Field to index |
225 | | - wait_until_complete (Optional[float]): If provided, number of seconds to wait |
226 | | - until search index is ready |
227 | | - kwargs: Keyword arguments supplying any additional options to SearchIndexModel. |
228 | | - """ |
229 | | - logger.info("Creating Search Index %s on %s", index_name, collection.name) |
230 | | - |
231 | | - if collection.name not in collection.database.list_collection_names( |
232 | | - authorizedCollections=True |
233 | | - ): |
234 | | - collection.database.create_collection(collection.name) |
235 | | - |
236 | | - if isinstance(field, str): |
237 | | - fields_definition = {field: [{"type": "string"}]} |
238 | | - else: |
239 | | - fields_definition = {f: [{"type": "string"}] for f in field} |
240 | | - definition = {"mappings": {"dynamic": False, "fields": fields_definition}} |
241 | | - result = collection.create_search_index( |
242 | | - SearchIndexModel( |
243 | | - definition=definition, |
244 | | - name=index_name, |
245 | | - type="search", |
246 | | - **kwargs, |
247 | | - ) |
248 | | - ) |
249 | | - if wait_until_complete: |
250 | | - _wait_for_predicate( |
251 | | - predicate=lambda: _is_index_ready(collection, index_name), |
252 | | - err=f"{index_name=} did not complete in {wait_until_complete}!", |
253 | | - timeout=wait_until_complete, |
254 | | - ) |
255 | | - logger.info(result) |
0 commit comments