Skip to content

Commit 067b815

Browse files
committed
Update index.py to incrementally add and remove index entries of a repo
1 parent ae44d44 commit 067b815

File tree

2 files changed

+132
-141
lines changed

2 files changed

+132
-141
lines changed

mlc/index.py

Lines changed: 115 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def __init__(self, repos_path, repos):
2727
"""
2828
self.repos_path = repos_path
2929
self.repos = repos
30-
#logger.info(repos)
3130

3231
logger.debug(f"Repos path for Index: {self.repos_path}")
3332
self.index_files = {
@@ -140,7 +139,64 @@ def get_item_mtime(self,file):
140139
if t > latest:
141140
latest = t
142141
return latest
143-
142+
143+
def _index_single_repo(self, repo, repos_changed=False, current_item_keys=None):
144+
repo_path = repo.path
145+
if not os.path.isdir(repo_path):
146+
return False
147+
148+
changed = False
149+
150+
for folder_type in ["script", "cache", "experiment"]:
151+
folder_path = os.path.join(repo_path, folder_type)
152+
if not os.path.isdir(folder_path):
153+
continue
154+
155+
for automation_dir in os.listdir(folder_path):
156+
automation_path = os.path.join(folder_path, automation_dir)
157+
if not os.path.isdir(automation_path):
158+
continue
159+
160+
yaml_path = os.path.join(automation_path, "meta.yaml")
161+
json_path = os.path.join(automation_path, "meta.json")
162+
163+
if os.path.isfile(yaml_path):
164+
config_path = yaml_path
165+
elif os.path.isfile(json_path):
166+
config_path = json_path
167+
else:
168+
#logger.debug(f"No config file found in {automation_path}, skipping")
169+
delete_flag = False
170+
if automation_dir in self.modified_times:
171+
del self.modified_times[automation_dir]
172+
if any(automation_dir in item["path"] for item in self.indices[folder_type]):
173+
logger.debug(f"Removed index entry (if it exists) for {folder_type} : {automation_dir}")
174+
delete_flag = True
175+
self._remove_index_entry(automation_path)
176+
if delete_flag:
177+
self._save_indices()
178+
continue
179+
if current_item_keys is not None:
180+
current_item_keys.add(config_path)
181+
mtime = self.get_item_mtime(config_path)
182+
old = self.modified_times.get(config_path)
183+
old_mtime = old["mtime"] if isinstance(old, dict) else old
184+
185+
# skip if unchanged
186+
if old_mtime == mtime and repos_changed != 1:
187+
continue
188+
189+
self.modified_times[config_path] = {
190+
"mtime": mtime,
191+
"date_time": datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M:%S")
192+
}
193+
194+
# meta file changed, so reindex
195+
self._process_config_file(config_path, folder_type, automation_path, repo)
196+
changed = True
197+
198+
return changed
199+
144200
def build_index(self):
145201
"""
146202
Build shared indices for script, cache, and experiment folders across all repositories.
@@ -152,125 +208,27 @@ def build_index(self):
152208
# track all currently detected item paths
153209
current_item_keys = set()
154210
changed = False
155-
repos_changed = False
156-
157-
# load existing modified times
211+
force_rebuild = False
212+
213+
# load modified times
158214
self.modified_times = self._load_modified_times()
159215

216+
# if missing index file, then force full rebuild
160217
index_json_path = os.path.join(self.repos_path, "index_script.json")
161-
162-
rebuild_index = False
163-
164-
#file does not exist, rebuild
165218
if not os.path.exists(index_json_path):
166219
logger.warning("index_script.json missing. Forcing full index rebuild...")
167-
#logger.debug("Resetting modified_times...")
168220
self.modified_times = {}
169-
self._save_modified_times()
170-
#else:
171-
# logger.debug("index_script.json exists. Skipping forced rebuild.")
172-
173-
#check repos.json mtime
174-
repos_json_path = os.path.join(self.repos_path, "repos.json")
175-
repos_mtime = os.path.getmtime(repos_json_path)
176-
177-
key = f"{repos_json_path}"
178-
old = self.modified_times.get(key)
179-
repo_old_mtime = old["mtime"] if isinstance(old, dict) else old
180-
181-
#logger.debug(f"Current repos.json mtime: {repos_mtime}")
182-
#logger.debug(f"Old repos.json mtime: {repo_old_mtime}")
183-
current_item_keys.add(key)
184-
185-
# if changed, reset indexes
186-
if repo_old_mtime is None or repo_old_mtime != repos_mtime:
187-
logger.debug("repos.json modified. Clearing index ........")
188-
# reset indices
189-
self.indices = {key: [] for key in self.index_files.keys()}
190-
# record repo mtime
191-
self.modified_times[key] = {
192-
"mtime": repos_mtime,
193-
"date_time": datetime.fromtimestamp(repos_mtime).strftime("%Y-%m-%d %H:%M:%S")
194-
}
195-
# clear modified times except for repos.json
196-
self.modified_times = {key: self.modified_times[key]}
197-
self._save_indices()
198-
self._save_modified_times()
199-
repos_changed = True
200-
#else:
201-
# logger.debug("Repos.json not modified")
221+
self.indices = {k: [] for k in self.index_files.keys()}
222+
force_rebuild = True
202223

224+
# index each repo
203225
for repo in self.repos:
204-
repo_path = repo.path #os.path.join(self.repos_path, repo)
205-
if not os.path.isdir(repo_path):
206-
continue
207-
#logger.debug(f"------------Checking repository: {repo_path}---------------")
208-
# Filter for relevant directories in the repo
209-
for folder_type in ["script", "cache", "experiment"]:
210-
#logger.debug(f"Checking folder type: {folder_type}")
211-
folder_path = os.path.join(repo_path, folder_type)
212-
if not os.path.isdir(folder_path):
213-
continue
214-
215-
# Process each automation directory
216-
for automation_dir in os.listdir(folder_path):
217-
# logger.debug(f"Checking automation directory: {automation_dir}")
218-
automation_path = os.path.join(folder_path, automation_dir)
219-
if not os.path.isdir(automation_path):
220-
#logger.debug(f"Skipping non-directory automation path: {automation_path}")
221-
continue
222-
223-
yaml_path = os.path.join(automation_path, "meta.yaml")
224-
json_path = os.path.join(automation_path, "meta.json")
225-
226-
if os.path.isfile(yaml_path):
227-
# logger.debug(f"Found YAML config file: {yaml_path}")
228-
config_path = yaml_path
229-
elif os.path.isfile(json_path):
230-
# logger.debug(f"Found JSON config file: {json_path}")
231-
config_path = json_path
232-
else:
233-
#logger.debug(f"No config file found in {automation_path}, skipping")
234-
delete_flag = False
235-
if automation_dir in self.modified_times:
236-
del self.modified_times[automation_dir]
237-
if any(automation_dir in item["path"] for item in self.indices[folder_type]):
238-
logger.debug(f"Removed index entry (if it exists) for {folder_type} : {automation_dir}")
239-
delete_flag = True
240-
self._remove_index_entry(automation_path)
241-
if delete_flag:
242-
self._save_indices()
243-
continue
244-
current_item_keys.add(config_path)
245-
mtime = self.get_item_mtime(config_path)
246-
247-
old = self.modified_times.get(config_path)
248-
old_mtime = old["mtime"] if isinstance(old, dict) else old
249-
250-
# skip if unchanged
251-
if old_mtime == mtime and repos_changed != 1:
252-
# logger.debug(f"No changes detected for {config_path}, skipping reindexing.")
253-
continue
254-
#if(old_mtime is None):
255-
# logger.debug(f"New meta.yaml file detected: {config_path}. Adding to index.")
256-
257-
# update mtime
258-
#logger.debug(f"{config_path} is modified, index getting updated")
259-
#if config_path not in self.modified_times:
260-
# logger.debug(f"*************{config_path} not found in modified_times; creating new entry***************")
261-
262-
self.modified_times[config_path] = {
263-
"mtime": mtime,
264-
"date_time": datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M:%S")
265-
}
266-
#logger.debug(f"Modified time for {config_path} updated to {mtime}")
267-
changed = True
268-
# meta file changed, so reindex
269-
self._process_config_file(config_path, folder_type, automation_path, repo)
226+
repo_changed = self._index_single_repo(repo, force_rebuild, current_item_keys)
227+
if repo_changed:
228+
changed = True
270229

271230
# remove deleted scripts
272-
old_keys = set(self.modified_times.keys())
273-
deleted_keys = old_keys - current_item_keys
231+
deleted_keys = set(self.modified_times) - current_item_keys
274232
for key in deleted_keys:
275233
logger.warning(f"Detected deleted item, removing entry from modified times: {key}")
276234
del self.modified_times[key]
@@ -281,13 +239,10 @@ def build_index(self):
281239
if deleted_keys:
282240
logger.debug(f"Deleted keys removed from modified times and indices: {deleted_keys}")
283241

284-
if changed:
242+
if force_rebuild or changed:
285243
logger.debug("Changes detected, saving updated index and modified times.")
286244
self._save_modified_times()
287245
self._save_indices()
288-
#logger.debug("**************Index updated (changes detected).*************************")
289-
#else:
290-
#logger.debug("**************Index unchanged (no changes detected).********************")
291246

292247
def _remove_index_entry(self, key):
293248
logger.debug(f"Removing index entry for {key}")
@@ -379,3 +334,48 @@ def _save_indices(self):
379334
#logger.debug(f"Shared index for {folder_type} saved to {output_file}.")
380335
except Exception as e:
381336
logger.error(f"Error saving shared index for {folder_type}: {e}")
337+
338+
339+
def add_repo(self, repo):
340+
"""
341+
Incrementally index a newly registered repository.
342+
"""
343+
changed = self._index_single_repo(repo, repos_changed=True)
344+
345+
if changed:
346+
self._save_indices()
347+
self._save_modified_times()
348+
349+
350+
def remove_repo_from_index(self, repo_path):
351+
"""
352+
Remove all index entries and modified times belonging to a repo.
353+
Called when a repo is unregistered from repos.json.
354+
"""
355+
356+
logger.info(f"Removing repo from index: {repo_path}")
357+
changed = False
358+
359+
# remove index entries
360+
for folder_type in self.indices:
361+
before = len(self.indices[folder_type])
362+
self.indices[folder_type] = [
363+
item for item in self.indices[folder_type]
364+
if not item["path"].startswith(repo_path)
365+
]
366+
if len(self.indices[folder_type]) != before:
367+
changed = True
368+
369+
# remove modified times
370+
keys_to_delete = [
371+
k for k in self.modified_times
372+
if k.startswith(repo_path)
373+
]
374+
375+
for k in keys_to_delete:
376+
del self.modified_times[k]
377+
changed = True
378+
379+
if changed:
380+
self._save_indices()
381+
self._save_modified_times()

mlc/repo_action.py

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -166,31 +166,19 @@ def register_repo(self, repo_path, repo_meta, ignore_on_conflict=False):
166166

167167
with open(repos_file_path, 'w') as f:
168168
json.dump(repos_list, f, indent=2)
169-
170-
logger.debug("Forcing Index rebuild ...")
171-
# reload repos list
172-
with open(repos_file_path, 'r') as f:
173-
repos_list = json.load(f)
174-
175-
repos = []
176-
for p in repos_list:
177-
meta = {}
178-
yaml_file = os.path.join(p, "meta.yaml")
179-
json_file = os.path.join(p, "meta.json")
180-
181-
if os.path.exists(yaml_file):
182-
meta = utils.read_yaml(yaml_file)
183-
elif os.path.exists(json_file):
184-
meta = utils.read_json(json_file)
185-
else:
186-
logger.info(f"No meta file found in {self.repos_path}")
187-
continue
188-
189-
repos.append(Repo(path=p, meta=meta))
190-
191-
# rebuild index via constructor
192-
Index(self.repos_path, repos)
193-
logger.debug("repos.json and index file has been updated")
169+
logger.info(f"Updated repos.json at {repos_file_path}")
170+
171+
self.repos = self.load_repos_and_meta()
172+
repo_obj = next(
173+
(r for r in self.repos if r.path == repo_path),
174+
None
175+
)
176+
177+
if repo_obj:
178+
index = Index(self.repos_path, self.repos)
179+
index.add_repo(repo_obj)
180+
logger.debug("Index file has been updated")
181+
194182
return {'return': 0}
195183

196184
def unregister_repo(self, repo_path):
@@ -531,7 +519,8 @@ def rm(self, run_args):
531519
Action: rm
532520
####################################################################################################################
533521
534-
The `rm` action removes a specified repository from MLCFlow, deleting both the repo folder and its registration.
522+
The `rm` action removes a specified repository from MLCFlow, deleting the repository folder, its index entries,
523+
and its registration.
535524
If there are any modified local changes, the user will be prompted for confirmation unless the `-f` flag is used
536525
for force removal.
537526
@@ -578,6 +567,8 @@ def rm(self, run_args):
578567
repos_file_path = os.path.join(self.repos_path, 'repos.json')
579568

580569
force_remove = True if run_args.get('f') else False
570+
index = Index(self.repos_path, self.repos)
571+
index.remove_repo_from_index(repo_path)
581572

582573
return rm_repo(repo_path, repos_file_path, force_remove)
583574

0 commit comments

Comments
 (0)