88 record ,
99 tokenization ,
1010 notification ,
11+ organization ,
1112)
1213import torch
1314import traceback
2425from util import daemon , request_util
2526from util .decorator import param_throttle
2627from util .embedders import get_embedder
27- from util .notification import send_project_update
28+ from util .notification import send_project_update , embedding_warning_templates
2829import os
2930import pandas as pd
30- from submodules .model .business_objects import embedding , general , organization
3131from submodules .s3 import controller as s3
3232
3333logging .basicConfig (level = logging .INFO )
@@ -133,7 +133,7 @@ def prepare_run_encoding(request: data_type.Request, embedding_type: str) -> int
133133 request .project_id ,
134134 request .user_id ,
135135 message ,
136- " ERROR" ,
136+ enums . Notification . ERROR . value ,
137137 enums .NotificationType .EMBEDDING_CREATION_FAILED .value ,
138138 True ,
139139 )
@@ -142,11 +142,6 @@ def prepare_run_encoding(request: data_type.Request, embedding_type: str) -> int
142142 f"notification_created:{ request .user_id } " ,
143143 True ,
144144 )
145- embedding .update_embedding_state_failed (
146- request .project_id ,
147- embedding_id ,
148- with_commit = True ,
149- )
150145 doc_ock .post_embedding_failed (
151146 request .user_id , request .config_string
152147 )
@@ -175,7 +170,7 @@ def run_encoding(
175170 request .project_id ,
176171 request .user_id ,
177172 f"Initializing model { request .config_string } . This can take a few minutes." ,
178- " INFO" ,
173+ enums . Notification . INFO . value ,
179174 enums .NotificationType .EMBEDDING_CREATION_STARTED .value ,
180175 True ,
181176 )
@@ -184,7 +179,9 @@ def run_encoding(
184179 )
185180 iso2_code = project .get_blank_tokenizer_from_project (request .project_id )
186181 try :
187- embedder = get_embedder (embedding_type , request .config_string , iso2_code )
182+ embedder = get_embedder (
183+ request .project_id , embedding_type , request .config_string , iso2_code
184+ )
188185 except OSError :
189186 embedding .update_embedding_state_failed (
190187 request .project_id ,
@@ -201,7 +198,7 @@ def run_encoding(
201198 request .project_id ,
202199 request .user_id ,
203200 message ,
204- " ERROR" ,
201+ enums . Notification . ERROR . value ,
205202 enums .NotificationType .EMBEDDING_CREATION_FAILED .value ,
206203 True ,
207204 )
@@ -226,7 +223,7 @@ def run_encoding(
226223 request .project_id ,
227224 request .user_id ,
228225 message ,
229- " ERROR" ,
226+ enums . Notification . ERROR . value ,
230227 enums .NotificationType .EMBEDDING_CREATION_FAILED .value ,
231228 True ,
232229 )
@@ -240,7 +237,7 @@ def run_encoding(
240237 request .project_id ,
241238 request .user_id ,
242239 f"Could not load model { request .config_string } . Please contact the support." ,
243- " ERROR" ,
240+ enums . Notification . ERROR . value ,
244241 enums .NotificationType .EMBEDDING_CREATION_FAILED .value ,
245242 True ,
246243 )
@@ -283,7 +280,7 @@ def run_encoding(
283280 request .project_id ,
284281 request .user_id ,
285282 f"Started encoding { attribute_name } using model { request .config_string } ." ,
286- " INFO" ,
283+ enums . Notification . INFO . value ,
287284 enums .NotificationType .EMBEDDING_CREATION_STARTED .value ,
288285 True ,
289286 )
@@ -324,6 +321,39 @@ def run_encoding(
324321 initial_count ,
325322 )
326323 except Exception :
324+ for warning_type , idx_list in embedder .get_warnings ().items ():
325+ # use last record with warning as example
326+ example_record_id = record_ids [idx_list [- 1 ]]
327+
328+ primary_keys = [
329+ pk .name for pk in attribute .get_primary_keys (request .project_id )
330+ ]
331+ if primary_keys :
332+ example_record_data = record .get (
333+ request .project_id , example_record_id
334+ ).data
335+ example_record_msg = "with primary key: " + ", " .join (
336+ [str (example_record_data [p_key ]) for p_key in primary_keys ]
337+ )
338+ else :
339+ example_record_msg = " with record id: " + str (example_record_id )
340+
341+ warning_msg = embedding_warning_templates [warning_type ].format (
342+ record_number = len (idx_list ), example_record_msg = example_record_msg
343+ )
344+
345+ notification .create (
346+ request .project_id ,
347+ request .user_id ,
348+ warning_msg ,
349+ enums .Notification .WARNING .value ,
350+ enums .NotificationType .EMBEDDING_CREATION_WARNING .value ,
351+ True ,
352+ )
353+ send_project_update (
354+ request .project_id , f"notification_created:{ request .user_id } " , True
355+ )
356+
327357 embedding .update_embedding_state_failed (
328358 request .project_id ,
329359 embedding_id ,
@@ -337,27 +367,51 @@ def run_encoding(
337367 request .project_id ,
338368 request .user_id ,
339369 "Error at runtime. Please contact support." ,
340- " ERROR" ,
370+ enums . Notification . ERROR . value ,
341371 enums .NotificationType .EMBEDDING_CREATION_FAILED .value ,
342372 True ,
343373 )
344374 send_project_update (
345375 request .project_id , f"notification_created:{ request .user_id } " , True
346376 )
347377 print (traceback .format_exc (), flush = True )
348- embedding .update_embedding_state_failed (
349- request .project_id ,
350- embedding_id ,
351- with_commit = True ,
352- )
353- send_project_update (
354- request .project_id ,
355- f"embedding:{ embedding_id } :state:{ enums .EmbeddingState .FAILED .value } " ,
356- )
357378 doc_ock .post_embedding_failed (request .user_id , request .config_string )
358379 return 500
359380
360381 if embedding .get (request .project_id , embedding_id ):
382+ for warning_type , idx_list in embedder .get_warnings ().items ():
383+ # use last record with warning as example
384+ example_record_id = record_ids [idx_list [- 1 ]]
385+
386+ primary_keys = [
387+ pk .name for pk in attribute .get_primary_keys (request .project_id )
388+ ]
389+ if primary_keys :
390+ example_record_data = record .get (
391+ request .project_id , example_record_id
392+ ).data
393+ example_record_msg = "with primary key: " + ", " .join (
394+ [str (example_record_data [p_key ]) for p_key in primary_keys ]
395+ )
396+ else :
397+ example_record_msg = " with record id: " + str (example_record_id )
398+
399+ warning_msg = embedding_warning_templates [warning_type ].format (
400+ record_number = len (idx_list ), example_record_msg = example_record_msg
401+ )
402+
403+ notification .create (
404+ request .project_id ,
405+ request .user_id ,
406+ warning_msg ,
407+ enums .Notification .WARNING .value ,
408+ enums .NotificationType .EMBEDDING_CREATION_WARNING .value ,
409+ True ,
410+ )
411+ send_project_update (
412+ request .project_id , f"notification_created:{ request .user_id } " , True
413+ )
414+
361415 if embedding_type == "classification" :
362416 request_util .post_embedding_to_neural_search (
363417 request .project_id , embedding_id
@@ -376,7 +430,7 @@ def run_encoding(
376430 request .project_id ,
377431 request .user_id ,
378432 f"Finished encoding { attribute_name } using model { request .config_string } ." ,
379- " SUCCESS" ,
433+ enums . Notification . SUCCESS . value ,
380434 enums .NotificationType .EMBEDDING_CREATION_DONE .value ,
381435 True ,
382436 )
0 commit comments