@@ -803,9 +803,11 @@ async def delete_dedupe_index(
803803 if not coll .indexState :
804804 raise HTTPException (status_code = 404 , detail = "no_dedupe_index" )
805805
806- # if index is not idle, can't delete it yet
807- if coll .indexState != "idle" :
808- raise HTTPException (status_code = 400 , detail = "dedupe_index_is_in_use" )
806+ # if index is not idle/ready, check if any crawls running
807+ if coll .indexState not in ("idle" , "ready" ):
808+ # if crawls running using index, can't delete
809+ if await self .crawl_ops .has_active_crawls_with_dedupe_coll (org .id , coll .id ):
810+ raise HTTPException (status_code = 400 , detail = "dedupe_index_is_in_use" )
809811
810812 if coll .indexFile :
811813 if not await self .storage_ops .delete_file_object (org , coll .indexFile ):
@@ -816,17 +818,24 @@ async def delete_dedupe_index(
816818 raise HTTPException (status_code = 400 , detail = "file_deletion_error" )
817819
818820 await self .collections .find_one_and_update (
819- {"_id" : coll .id , "indexState" : "idle" },
821+ {"_id" : coll .id },
820822 {
821823 "$set" : {
822824 "indexStats" : None ,
823825 "indexState" : None ,
824826 "indexFile" : None ,
825827 "indexLastSavedAt" : None ,
828+ "indexDiskSpaceUsed" : None ,
826829 }
827830 },
828831 )
829832
833+ # if not idle, delete k8s dedupe resources
834+ if coll .indexState != "idle" :
835+ await self .crawl_manager .delete_dedupe_index_resources (
836+ str (org .id ), str (coll .id )
837+ )
838+
830839 if remove_from_workflows :
831840 await self .crawl_configs .update_many (
832841 {"oid" : org .id , "dedupeCollId" : coll .id },
@@ -840,7 +849,7 @@ async def update_dedupe_index_stats(
840849 ):
841850 """update dedupe index stats for specified collection"""
842851 self .collections .find_one_and_update (
843- {"_id" : coll_id },
852+ {"_id" : coll_id , "indexState" : { "$ne" : None } },
844853 {
845854 "$set" : {
846855 "indexStats" : stats .dict (),
@@ -855,14 +864,20 @@ async def update_dedupe_index_info(
855864 state : TYPE_DEDUPE_INDEX_STATES ,
856865 index_file : Optional [DedupeIndexFile ] = None ,
857866 dt : Optional [datetime ] = None ,
867+ if_exists = False ,
858868 ):
859869 """update the state, and optionally, dedupe index file info"""
860870 query : dict [str , Any ] = {"indexState" : state }
861871 if index_file and dt :
862872 query ["indexLastSavedAt" ] = dt
863873 query ["indexFile" ] = index_file .model_dump ()
864874
865- res = self .collections .find_one_and_update ({"_id" : coll_id }, {"$set" : query })
875+ match : dict [str , Any ] = {"_id" : coll_id }
876+ # only update if index already exists
877+ if if_exists :
878+ match ["indexState" ] = {"$ne" : None }
879+
880+ res = self .collections .find_one_and_update (match , {"$set" : query })
866881 return res is not None
867882
868883 async def get_dedupe_index_saved (self , coll_id : UUID ) -> Optional [datetime ]:
@@ -886,6 +901,13 @@ async def get_dedupe_index_disk_size(self, coll_id: UUID) -> int:
886901 return coll .get ("indexDiskSpaceUsed" , 0 )
887902 return 0
888903
904+ async def has_dedupe_index (self , coll_id : UUID , oid : UUID ) -> bool :
905+ """return true if collection exists and indexState is set on collection"""
906+ coll = await self .collections .find_one (
907+ {"_id" : coll_id , "oid" : oid }, projection = {"indexState" }
908+ )
909+ return coll and coll .get ("indexState" ) is not None
910+
889911 # END DEDUPE OPS
890912
891913 async def recalculate_org_collection_stats (self , org : Organization ):
0 commit comments