33import json
44import yaml
55from .repo import Repo
6+ from datetime import datetime
67
78class CustomJSONEncoder (json .JSONEncoder ):
89 def default (self , obj ):
@@ -35,8 +36,50 @@ def __init__(self, repos_path, repos):
3536 "experiment" : os .path .join (repos_path , "index_experiment.json" )
3637 }
3738 self .indices = {key : [] for key in self .index_files .keys ()}
39+ self .modified_times_file = os .path .join (repos_path , "modified_times.json" )
40+ self .modified_times = self ._load_modified_times ()
41+ self ._load_existing_index ()
3842 self .build_index ()
3943
44+ def _load_modified_times (self ):
45+ """
46+ Load stored mtimes to check for changes in scripts.
47+ """
48+ if os .path .exists (self .modified_times_file ):
49+ try :
50+ # logger.info(f"Loading modified times from {self.modified_times_file}")
51+ with open (self .modified_times_file , "r" ) as f :
52+ return json .load (f )
53+ except Exception :
54+ return {}
55+ return {}
56+
57+ def _save_modified_times (self ):
58+ """
59+ Save updated mtimes in modified_times json file.
60+ """
61+ logger .debug (f"Saving modified times to { self .modified_times_file } " )
62+ with open (self .modified_times_file , "w" ) as f :
63+ json .dump (self .modified_times , f , indent = 4 )
64+
65+ def _load_existing_index (self ):
66+ """
67+ Load previously saved index to allow incremental updates.
68+ """
69+ for folder_type , file_path in self .index_files .items ():
70+ if os .path .exists (file_path ):
71+ try :
72+ # logger.info(f"Loading existing index for {folder_type}")
73+ with open (file_path , "r" ) as f :
74+ self .indices [folder_type ] = json .load (f )
75+ # Convert repo dicts back into Repo objects
76+ for item in self .indices [folder_type ]:
77+ if isinstance (item .get ("repo" ), dict ):
78+ item ["repo" ] = Repo (** item ["repo" ])
79+
80+ except Exception :
81+ pass # fall back to empty index
82+
4083 def add (self , meta , folder_type , path , repo ):
4184 if not repo :
4285 logger .error (f"Repo for index add for { path } is none" )
@@ -87,6 +130,16 @@ def rm(self, meta, folder_type, path):
87130 del (self .indices [folder_type ][index ])
88131 self ._save_indices ()
89132
133+ def get_item_mtime (self ,file ):
134+ # logger.debug(f"Getting latest modified time for file: {file}")
135+ latest = 0
136+ t = os .path .getmtime (file )
137+ if t > latest :
138+ latest = t
139+ logger .debug (f"Latest modified time updated to: { latest } " )
140+ # logger.debug("No changes in modified time detected.")
141+ return latest
142+
90143 def build_index (self ):
91144 """
92145 Build shared indices for script, cache, and experiment folders across all repositories.
@@ -95,31 +148,158 @@ def build_index(self):
95148 None
96149 """
97150
98- #for repo in os.listdir(self.repos_path):
151+ # track all currently detected item paths
152+ current_item_keys = set ()
153+ changed = False
154+ repos_changed = False
155+
156+ # load existing modified times
157+ self .modified_times = self ._load_modified_times ()
158+
159+ index_json_path = os .path .join (self .repos_path , "index_script.json" )
160+
161+ rebuild_index = False
162+
163+ #file does not exist, rebuild
164+ if not os .path .exists (index_json_path ):
165+ logger .warning ("index_script.json missing. Forcing full index rebuild..." )
166+ logger .debug ("Resetting modified_times..." )
167+ self .modified_times = {}
168+ self ._save_modified_times ()
169+ else :
170+ logger .debug ("index_script.json exists. Skipping forced rebuild." )
171+
172+ #check repos.json mtime
173+ repos_json_path = os .path .join (self .repos_path , "repos.json" )
174+ repos_mtime = os .path .getmtime (repos_json_path )
175+
176+ key = f"{ repos_json_path } "
177+ old = self .modified_times .get (key )
178+ repo_old_mtime = old ["mtime" ] if isinstance (old , dict ) else old
179+
180+ logger .debug (f"Current repos.json mtime: { repos_mtime } " )
181+ logger .debug (f"Old repos.json mtime: { repo_old_mtime } " )
182+ current_item_keys .add (key )
183+
184+ # if changed, reset indexes
185+ if repo_old_mtime is None or repo_old_mtime != repos_mtime :
186+ logger .debug ("repos.json modified. Clearing index ........" )
187+ # reset indices
188+ self .indices = {key : [] for key in self .index_files .keys ()}
189+ # record repo mtime
190+ self .modified_times [key ] = {
191+ "mtime" : repos_mtime ,
192+ "date_time" : datetime .fromtimestamp (repos_mtime ).strftime ("%Y-%m-%d %H:%M:%S" )
193+ }
194+ # clear modified times except for repos.json
195+ self .modified_times = {key : self .modified_times [key ]}
196+ self ._save_indices ()
197+ self ._save_modified_times ()
198+ repos_changed = True
199+ else :
200+ logger .debug ("Repos.json not modified" )
201+
99202 for repo in self .repos :
100- repo_path = repo .path #os.path.join(self.repos_path, repo)
203+ repo_path = repo .path #os.path.join(self.repos_path, repo)
101204 if not os .path .isdir (repo_path ):
102205 continue
103-
206+ logger . debug ( f"Checking repository: { repo_path } " )
104207 # Filter for relevant directories in the repo
105208 for folder_type in ["script" , "cache" , "experiment" ]:
209+ logger .debug (f"Checking folder type: { folder_type } " )
106210 folder_path = os .path .join (repo_path , folder_type )
107211 if not os .path .isdir (folder_path ):
108212 continue
109213
110214 # Process each automation directory
111215 for automation_dir in os .listdir (folder_path ):
216+ # logger.debug(f"Checking automation directory: {automation_dir}")
112217 automation_path = os .path .join (folder_path , automation_dir )
113218 if not os .path .isdir (automation_path ):
219+ logger .debug (f"Skipping non-directory automation path: { automation_path } " )
114220 continue
221+
222+ yaml_path = os .path .join (automation_path , "meta.yaml" )
223+ json_path = os .path .join (automation_path , "meta.json" )
115224
116- # Check for configuration files (meta.yaml or meta.json)
117- for config_file in ["meta.yaml" , "meta.json" ]:
118- config_path = os .path .join (automation_path , config_file )
119- if os .path .isfile (config_path ):
120- self ._process_config_file (config_path , folder_type , automation_path , repo )
121- break # Only process one config file per automation_dir
122- self ._save_indices ()
225+ if os .path .isfile (yaml_path ):
226+ # logger.debug(f"Found YAML config file: {yaml_path}")
227+ config_path = yaml_path
228+ elif os .path .isfile (json_path ):
229+ # logger.debug(f"Found JSON config file: {json_path}")
230+ config_path = json_path
231+ else :
232+ logger .debug (f"No config file found in { automation_path } , skipping" )
233+ if automation_dir in self .modified_times :
234+ del self .modified_times [automation_dir ]
235+ if any (automation_dir in item ["path" ] for item in self .indices [folder_type ]):
236+ logger .debug (f"Removed index entry (if it exists) for { folder_type } : { automation_dir } " )
237+ self ._remove_index_entry (automation_path )
238+ self ._save_indices ()
239+ continue
240+ current_item_keys .add (config_path )
241+ mtime = self .get_item_mtime (config_path )
242+
243+ old = self .modified_times .get (config_path )
244+ old_mtime = old ["mtime" ] if isinstance (old , dict ) else old
245+
246+ # skip if unchanged
247+ if old_mtime == mtime and repos_changed != 1 :
248+ # logger.debug(f"No changes detected for {config_path}, skipping reindexing.")
249+ continue
250+ if (old_mtime is None ):
251+ logger .debug (f"New config file detected: { config_path } . Adding to index." )
252+ # update mtime
253+ logger .debug (f"{ config_path } is modified, index getting updated" )
254+ if config_path not in self .modified_times :
255+ logger .debug (f"*************{ config_path } not found in modified_times; creating new entry***************" )
256+
257+ self .modified_times [config_path ] = {
258+ "mtime" : mtime ,
259+ "date_time" : datetime .fromtimestamp (mtime ).strftime ("%Y-%m-%d %H:%M:%S" )
260+ }
261+ logger .debug (f"Modified time for { config_path } updated to { mtime } " )
262+ changed = True
263+ # meta file changed, so reindex
264+ self ._process_config_file (config_path , folder_type , automation_path , repo )
265+
266+ # remove deleted scripts
267+ old_keys = set (self .modified_times .keys ())
268+ deleted_keys = old_keys - current_item_keys
269+ for key in deleted_keys :
270+ logger .warning (f"Detected deleted item, removing entry form modified times: { key } " )
271+ del self .modified_times [key ]
272+ folder_key = os .path .dirname (key )
273+ logger .warning (f"Removing index entry for folder: { folder_key } " )
274+ self ._remove_index_entry (folder_key )
275+ changed = True
276+ logger .debug (f"Deleted keys removed from modified times and indices: { deleted_keys } " )
277+
278+ if changed :
279+ logger .debug ("Changes detected, saving updated index and modified times." )
280+ self ._save_modified_times ()
281+ self ._save_indices ()
282+ logger .debug ("**************Index updated (changes detected).*************************" )
283+ else :
284+ logger .debug ("**************Index unchanged (no changes detected).********************" )
285+
286+ def _remove_index_entry (self , key ):
287+ logger .debug (f"Removing index entry for { key } " )
288+ for ft in self .indices :
289+ self .indices [ft ] = [
290+ item for item in self .indices [ft ]
291+ if key not in item ["path" ]
292+ ]
293+
294+ def _delete_by_uid (self , folder_type , uid , alias ):
295+ """
296+ Delete old index entry using UID (prevents duplicates).
297+ """
298+ logger .debug (f"Deleting and updating index entry for the script { alias } with UID { uid } " )
299+ self .indices [folder_type ] = [
300+ item for item in self .indices [folder_type ]
301+ if item ["uid" ] != uid
302+ ]
123303
124304 def _process_config_file (self , config_file , folder_type , folder_path , repo ):
125305 """
@@ -133,25 +313,36 @@ def _process_config_file(self, config_file, folder_type, folder_path, repo):
133313 Returns:
134314 None
135315 """
316+ if config_file is None :
317+ logger .debug (f"No meta file in { folder_path } , skipping" )
318+ return
319+
136320 try :
137321 # Determine the file type based on the extension
138322 if config_file .endswith (".yaml" ) or config_file .endswith (".yml" ):
139323 with open (config_file , "r" ) as f :
140- data = yaml .safe_load (f )
324+ data = yaml .safe_load (f ) or {}
141325 elif config_file .endswith (".json" ):
142326 with open (config_file , "r" ) as f :
143- data = json .load (f )
327+ data = json .load (f ) or {}
144328 else :
145- logger .info (f"Skipping { config_file } : Unsupported file format." )
329+ logger .warning (f"Skipping { config_file } : Unsupported file format." )
330+ return
331+
332+ if not isinstance (data , dict ):
333+ logger .warning (f"Skipping { config_file } : Invalid or empty meta" )
146334 return
147-
148335 # Extract necessary fields
149336 unique_id = data .get ("uid" )
337+ if not unique_id :
338+ logger .warning (f"Skipping { config_file } : missing uid" )
339+ return
150340 tags = data .get ("tags" , [])
151341 alias = data .get ("alias" , None )
152342
153343 # Validate and add to indices
154344 if unique_id :
345+ self ._delete_by_uid (folder_type , unique_id , alias )
155346 self .indices [folder_type ].append ({
156347 "uid" : unique_id ,
157348 "tags" : tags ,
@@ -160,7 +351,8 @@ def _process_config_file(self, config_file, folder_type, folder_path, repo):
160351 "repo" : repo
161352 })
162353 else :
163- logger .info (f"Skipping { config_file } : Missing 'uid' field." )
354+ logger .warning (f"Skipping { config_file } : Missing 'uid' field." )
355+
164356 except Exception as e :
165357 logger .error (f"Error processing { config_file } : { e } " )
166358
@@ -178,6 +370,6 @@ def _save_indices(self):
178370 try :
179371 with open (output_file , "w" ) as f :
180372 json .dump (index_data , f , indent = 4 , cls = CustomJSONEncoder )
181- # logger.debug(f"Shared index for {folder_type} saved to {output_file}.")
373+ logger .debug (f"Shared index for { folder_type } saved to { output_file } ." )
182374 except Exception as e :
183375 logger .error (f"Error saving shared index for { folder_type } : { e } " )
0 commit comments