@@ -759,10 +759,17 @@ def add_symbol(self, symbol: Symbol):
759759 document .set_data (pickle .dumps (symbol ))
760760 db .add_document (document )
761761
762- def delete_file (self , file : Path ):
763- """Delete all documents for the given file path."""
764- term_with_prefix = self .schema ["path" ].prefix + str (file )
765- self ._live_writable_db ().delete_document (term_with_prefix )
762+ def replace_symbol (self , old_document : xapian .Document , symbol : Symbol ):
763+ """Replace an existing document."""
764+ db = self ._live_writable_db ()
765+ new_document = xapian .Document ()
766+ self .schema .index_document (new_document , ** asdict (symbol ))
767+ new_document .set_data (pickle .dumps (symbol ))
768+ db .replace_document (old_document .get_docid (), new_document )
769+
770+ def delete_document (self , doc : xapian .Document ):
771+ """Delete the given document from this index."""
772+ self ._live_writable_db ().delete_document (doc .get_docid ())
766773
767774 def all_files (self ) -> Iterator [Path ]:
768775 """Yield all the files indexed in this SymbolIndex."""
@@ -787,6 +794,22 @@ def all_files(self) -> Iterator[Path]:
787794 seen_paths .add (path )
788795 yield path
789796
797+ def get_docs_for_path (self , path : Path ) -> list [xapian .Document ]:
798+ """Get all documents for given path."""
799+ raw_term = self .schema ["path" ].prefix .encode () + str (path ).encode ()
800+ raw_term = raw_term [:MAX_TERM_SIZE ]
801+
802+ db = self ._live_db ()
803+ postlist : xapian .PostingIter = db .postlist (raw_term ) # pyright: ignore
804+
805+ docs = []
806+
807+ for it in postlist :
808+ doc = db .get_document (it .docid )
809+ docs .append (doc )
810+
811+ return docs
812+
790813 def iter_prefix (self , field : str , value_prefix : str ) -> Iterator [str ]:
791814 """Return all the possible values for ``field`` with given prefix."""
792815 db = self ._live_db ()
@@ -896,6 +919,7 @@ def _index_single_file(
896919 file : Path ,
897920 options : IndexingOptions ,
898921 use_compilation_database : bool ,
922+ outdated_documents : list [xapian .Document ],
899923) -> int :
900924 try :
901925 symtab = read_symbols_in_file (
@@ -923,28 +947,33 @@ def _index_single_file(
923947 symbol .mtime ,
924948 )
925949
926- index .add_symbol (symbol )
950+ if outdated_documents :
951+ index .replace_symbol (outdated_documents .pop (), symbol )
952+ else :
953+ index .add_symbol (symbol )
927954
928955 num += 1
929956
930957 if num == 0 :
931958 trace ("{}: No symbols found" , file )
932959 # Add a single document if there are no symbols. Otherwise,
933960 # we would always treat it as unindexed.
934- index .add_symbol (
935- Symbol (
936- path = file ,
937- source = None ,
938- name = "" ,
939- demangled = None ,
940- section = "" ,
941- address = 0 ,
942- size = 0 ,
943- type = SymbolType .NOTYPE ,
944- relocations = list (),
945- mtime = file .stat ().st_mtime_ns ,
946- )
961+ symbol = Symbol (
962+ path = file ,
963+ source = None ,
964+ name = "" ,
965+ demangled = None ,
966+ section = "" ,
967+ address = 0 ,
968+ size = 0 ,
969+ type = SymbolType .NOTYPE ,
970+ relocations = list (),
971+ mtime = file .stat ().st_mtime_ns ,
947972 )
973+ if outdated_documents :
974+ index .replace_symbol (outdated_documents .pop (), symbol )
975+ else :
976+ index .add_symbol (symbol )
948977 num += 1
949978
950979 trace ("{}: Adding {} symbol(s) to index" , file , num )
@@ -960,12 +989,14 @@ def __init__(
960989 options : IndexingOptions ,
961990 should_quit : Callable [[], bool ],
962991 index_path : Path ,
992+ files_to_delete : Collection [Path ],
963993 use_compilation_database : bool ,
964994 dry_run : bool ,
965995 ):
966996 self .options = options
967997 self .should_quit = should_quit
968998 self .index_path = index_path
999+ self .files_to_delete = set (files_to_delete )
9691000 self .use_compilation_database = use_compilation_database
9701001 self .dry_run = dry_run
9711002
@@ -1050,6 +1081,20 @@ def _worker(self):
10501081 SymbolIndex .open_shard (self .index_path ) as index ,
10511082 index .transaction (),
10521083 ):
1084+ deletable_files = set (self .files_to_delete ).intersection (
1085+ set (index .all_files ())
1086+ )
1087+
1088+ outdated_documents : list [xapian .Document ] = []
1089+ for f in deletable_files :
1090+ docs = index .get_docs_for_path (f )
1091+ outdated_documents .extend (docs )
1092+
1093+ debug (
1094+ "There are {} outdated documents to recycle" ,
1095+ len (outdated_documents ),
1096+ )
1097+
10531098 while not self ._stop_event .is_set ():
10541099 parent = mp .parent_process ()
10551100 if parent is not None and not parent .is_alive ():
@@ -1069,10 +1114,20 @@ def _worker(self):
10691114 path ,
10701115 self .options ,
10711116 self .use_compilation_database ,
1117+ outdated_documents ,
10721118 )
10731119
10741120 self ._result_queue .put (result )
10751121
1122+ trace (
1123+ "There are {} outdated documents to delete" ,
1124+ len (outdated_documents ),
1125+ )
1126+
1127+ if not self .dry_run :
1128+ for doc in outdated_documents :
1129+ index .delete_document (doc )
1130+
10761131
10771132def index_binary_directory (
10781133 directory : str | Path ,
@@ -1136,30 +1191,19 @@ def index_binary_directory(
11361191 stats .num_files_changed = len (changed_files )
11371192 stats .num_files_deleted = len (deleted_files )
11381193
1139- def unindex_file (path , is_deleted ):
1140- if dry_run :
1141- if path in existing_files :
1142- if is_deleted :
1143- print (f"unindex-deleted-file { path } " )
1144- else :
1145- print (f"unindex-outdated-file { path } " )
1146- else :
1147- index .delete_file (path )
1194+ def log_unindex_file (path , is_deleted ):
1195+ if dry_run and path in existing_files :
1196+ if is_deleted :
1197+ print (f"unindex-deleted-file { path } " )
1198+ debug ("File deleted: {}" , file )
1199+ else :
1200+ print (f"unindex-outdated-file { path } " )
1201+ debug ("File modified: {}" , file )
11481202
1149- for file in make_progress_bar (
1150- changed_files ,
1151- desc = "Removing outdated files" ,
1152- leave = False ,
1153- ):
1154- unindex_file (file , is_deleted = False )
1155- debug ("File modified: {}" , file )
1156- for file in make_progress_bar (
1157- deleted_files ,
1158- desc = "Removing deleted files" ,
1159- leave = False ,
1160- ):
1161- unindex_file (file , is_deleted = True )
1162- debug ("File deleted: {}" , file )
1203+ for file in changed_files :
1204+ log_unindex_file (file , is_deleted = False )
1205+ for file in deleted_files :
1206+ log_unindex_file (file , is_deleted = True )
11631207
11641208 if options .save_filters :
11651209 saved_exclusions .extend (original_exclusions )
@@ -1172,6 +1216,7 @@ def unindex_file(path, is_deleted):
11721216 options ,
11731217 interrupted ,
11741218 index_path ,
1219+ files_to_delete = changed_files + deleted_files ,
11751220 use_compilation_database = use_compilation_database ,
11761221 dry_run = dry_run ,
11771222 ) as pool ,
0 commit comments