1313 embedding ,
1414 agreement ,
1515 general ,
16+ record ,
1617)
1718from submodules .model import daemon
1819
@@ -99,7 +100,7 @@ def get_embedding_name(
99100 return name
100101
101102
102- def recreate_embeddings (
103+ def recreate_or_extend_embeddings (
103104 project_id : str , embedding_ids : Optional [List [str ]] = None , user_id : str = None
104105) -> None :
105106 if not embedding_ids :
@@ -126,7 +127,9 @@ def recreate_embeddings(
126127 embedding_item = embedding .get (project_id , embedding_id )
127128 if not embedding_item :
128129 continue
129- embedding_item = __recreate_embedding (project_id , embedding_id )
130+ embedding_item = __recreate_or_extend_embedding (project_id , embedding_id )
131+ if not embedding_item :
132+ continue
130133 new_id = embedding_item .id
131134 time .sleep (2 )
132135 while True :
@@ -179,49 +182,77 @@ def __handle_failed_embedding(
179182 general .commit ()
180183
181184
182- def __recreate_embedding (project_id : str , embedding_id : str ) -> Embedding :
183- old_embedding_item = embedding .get (project_id , embedding_id )
184- old_id = old_embedding_item .id
185- new_embedding_item = embedding .create (
186- project_id ,
187- old_embedding_item .attribute_id ,
188- old_embedding_item .name ,
189- old_embedding_item .created_by ,
190- enums .EmbeddingState .INITIALIZING .value ,
191- type = old_embedding_item .type ,
192- model = old_embedding_item .model ,
193- platform = old_embedding_item .platform ,
194- api_token = old_embedding_item .api_token ,
195- filter_attributes = old_embedding_item .filter_attributes ,
196- additional_data = old_embedding_item .additional_data ,
197- with_commit = False ,
198- )
199- embedding .delete (project_id , embedding_id , with_commit = False )
200- embedding .delete_tensors (embedding_id , with_commit = False )
201- general .commit ()
185+ def __recreate_or_extend_embedding (project_id : str , embedding_id : str ) -> Embedding :
202186
203- if (
204- new_embedding_item .platform == enums .EmbeddingPlatform .OPENAI .value
205- or new_embedding_item .platform == enums .EmbeddingPlatform .COHERE .value
206- or new_embedding_item .platform == enums .EmbeddingPlatform .AZURE .value
207- ):
208- agreement_item = agreement .get_by_xfkey (
209- project_id , old_id , enums .AgreementType .EMBEDDING .value
187+ # check how many embeddings need to be recreated
188+ old_embedding_item = embedding .get (project_id , embedding_id )
189+ if not old_embedding_item :
190+ return None
191+ needs_full_recreation = False
192+ if old_embedding_item .delta_full_recalculation_threshold == 0 :
193+ needs_full_recreation = True
194+ elif old_embedding_item .delta_full_recalculation_threshold > 0 :
195+ already_deltaed = old_embedding_item .current_delta_record_count
196+ full_count = record .count (project_id )
197+ current_count = embedding .get_record_ids_count (embedding_id )
198+ to_calc = full_count - current_count
199+ if (
200+ already_deltaed + to_calc
201+ > old_embedding_item .delta_full_recalculation_threshold * full_count
202+ ):
203+ # only to a full recreation if the delta is larger than the threshold
204+ needs_full_recreation = True
205+ else :
206+ old_embedding_item .current_delta_record_count += to_calc
207+ #
208+ if needs_full_recreation :
209+ new_embedding_item = embedding .create (
210+ project_id ,
211+ old_embedding_item .attribute_id ,
212+ old_embedding_item .name ,
213+ old_embedding_item .created_by ,
214+ enums .EmbeddingState .INITIALIZING .value ,
215+ type = old_embedding_item .type ,
216+ model = old_embedding_item .model ,
217+ platform = old_embedding_item .platform ,
218+ api_token = old_embedding_item .api_token ,
219+ filter_attributes = old_embedding_item .filter_attributes ,
220+ additional_data = old_embedding_item .additional_data ,
221+ with_commit = False ,
210222 )
211- if not agreement_item :
212- new_embedding_item .state = enums .EmbeddingState .FAILED .value
213- general .commit ()
214- raise ApiTokenImportError (
215- f"No agreement found for embedding { new_embedding_item .name } "
223+ embedding .delete (project_id , embedding_id , with_commit = False )
224+ embedding .delete_tensors (embedding_id , with_commit = False )
225+ general .commit ()
226+
227+ if (
228+ new_embedding_item .platform == enums .EmbeddingPlatform .OPENAI .value
229+ or new_embedding_item .platform == enums .EmbeddingPlatform .COHERE .value
230+ or new_embedding_item .platform == enums .EmbeddingPlatform .AZURE .value
231+ ):
232+ agreement_item = agreement .get_by_xfkey (
233+ project_id , embedding_id , enums .AgreementType .EMBEDDING .value
216234 )
217- agreement_item .xfkey = new_embedding_item .id
235+ if not agreement_item :
236+ new_embedding_item .state = enums .EmbeddingState .FAILED .value
237+ general .commit ()
238+ raise ApiTokenImportError (
239+ f"No agreement found for embedding { new_embedding_item .name } "
240+ )
241+ agreement_item .xfkey = new_embedding_item .id
242+ general .commit ()
243+
244+ connector .request_deleting_embedding (project_id , embedding_id )
245+ else :
218246 general .commit ()
219247
220- connector .request_deleting_embedding (project_id , old_id )
248+ # request handles delta and full recreation
249+ request_embedding_id = (
250+ new_embedding_item .id if needs_full_recreation else embedding_id
251+ )
221252 daemon .run_without_db_token (
222- connector .request_embedding , project_id , new_embedding_item . id
253+ connector .request_embedding , project_id , request_embedding_id
223254 )
224- return new_embedding_item
255+ return new_embedding_item if needs_full_recreation else old_embedding_item
225256
226257
227258def update_embedding_payload (
@@ -262,3 +293,11 @@ def update_label_payloads_for_neural_search(
262293 embedding_ids = [str (e .id ) for e in relevant_embeddings ],
263294 record_ids = record_ids ,
264295 )
296+
297+
298+ def remove_tensors_by_record_ids (
299+ project_id : str , record_ids : List [str ], embedding_id : Optional [str ] = None
300+ ) -> None :
301+ if not record_ids :
302+ return
303+ embedding .delete_tensors_by_record_ids (project_id , record_ids , embedding_id )
0 commit comments