Skip to content

Commit 43ff994

Browse files
authored
Merge pull request #203 from mlcommons/dev
Merge Dev
2 parents 96ad267 + 1cf476e commit 43ff994

File tree

10 files changed

+342
-40
lines changed

10 files changed

+342
-40
lines changed

.github/workflows/mlperf-inference-resnet50.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ jobs:
4444
- name: Test MLPerf inference ResNet50 on Windows (prebuilt loadgen)
4545
if: runner.os == 'Windows'
4646
run: |
47+
git config --system core.longpaths true
4748
mlc run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --hw_name=gh_action --model=resnet50 --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=100 --target_qps=1 -v --quiet --adr.loadgen.tags=_from-pip --pip_loadgen=yes
4849
4950
- name: Test MLPerf inference ResNet50 on Unix systems

.github/workflows/test-mlc-core-actions.yaml

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,13 +151,14 @@ jobs:
151151
mlc add repo my-new-repo
152152
mlc add repo https://github.com/mlcommons/inference
153153
mlc add repo https://mygit.com/myrepo
154-
154+
155+
' Disabled now as MLCFlow automatically deletes corrupted entries
155156
- name: Test 13 - rm repo where we have a corrupt entry
156157
run: |
157158
rm -r $HOME/MLC/repos/mlcommons@mlperf-automations
158159
mlc rm repo mlcommons@mlperf-automations
159160
mlc pull repo mlcommons@mlperf-automations --branch=dev
160-
161+
'
161162
- name: Test 14 - add script - Add a new MLC script
162163
run: |
163164
mlc add script my-script-1 --tags=my,new-tags-1
@@ -201,14 +202,30 @@ jobs:
201202
- name: Test 21 - Test mlc pull repo to checkout based on particular release tag
202203
run: |
203204
mlc rm repo mlcommons@mlperf-automations -f
204-
mlc pull repo mlcommons@mlperf-automations --tag=mlperf-automations-v1.0.0
205+
mlc pull repo mlcommons@mlperf-automations --tag=v1.2.0
206+
207+
- name: Test 22 - Test silent mode
208+
run: |
209+
mlcr detect,cpu -j -s --quiet
210+
! mlcr detect,cpu -j -s --quiet 2>&1 | grep -q INFO
211+
! mlcr detect,cpu -j --silent --quiet 2>&1 | grep -q INFO
212+
213+
- name: Test 23 - Test verbose mode
214+
run: |
215+
mlcr detect,cpu -j -v 2>&1 | grep -q DEBUG
216+
mlcr detect,cpu -j --verbose 2>&1 | grep -q DEBUG
205217
206-
- name: Test 22 - Test recursive mlc pull repo
218+
- name: Test 24 - Test recursive mlc pull repo
207219
run: |
208220
export MLC_REPOS=$HOME/test
209221
mlcp https://github.com/GATEOverflow/GO-PDFs
210222
mlcr detect,os -j
211223
224+
- name: Test 25 - Test automatic repo pull
225+
run: |
226+
mlc rm repo mlcommons@mlperf-automations -f
227+
mlcr detect,cpu -j
228+
212229
test_mlc_access_core_actions:
213230

214231
runs-on: ${{ matrix.os }}

CONTRIBUTORS.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ Once your contribution exceeds 50 lines of code (in total), we will:
2929
## Current Contributors
3030

3131
- **[Arjun Suresh](https://github.com/arjunsuresh)** - *Initial Development Discussions, {Script,Cache} Action implementations*
32-
- **[Anandhu Sooraj](https://github.com/anandhu-eng)** - *Initial Development Discussions, RepoAction implmentation, Github Tests*
32+
- **[Anandhu Sooraj](https://github.com/anandhu-eng)** - *Initial Development Discussions, RepoAction implementation, Github Tests*
33+
- **[Sujith Kanakkassery](https://github.com/sujik18)** - *Initial Development Discussions, Persistent index implementation*
3334
- **[Shaik Masthan](https://github.com/csemasthan)** - *Initial Development Discussions*
3435
- **[Sahil Avaran](https://github.com/sahilavaran)** - *Initial Development Discussions*, added logging
3536
- **[R.A Sidharth](https://github.com/Sid9993)** - *Find repo implementation*
36-
- **[Sujith Kanakkassery](https://github.com/sujik18)** - *Initial Development Discussions*, adding logging to a file
3737
- **[Your Name Here]** - This could be you! 🎉
3838

3939
---

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.1.9
1+
1.1.14

mlc/action.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,7 @@ def is_curdir_inside_path(base_path):
106106
# Iterate through the list of repository paths
107107
for repo_path in repo_paths:
108108
if not os.path.exists(repo_path):
109-
logger.warning(f"""Warning: {repo_path} not found. Considering it as a corrupt entry and deleting automatically...""")
110-
logger.warning(f"Deleting the {meta_yaml_path} entry from repos.json")
109+
logger.warning(f"""Warning: {repo_path} not found. Considering it as a corrupt entry and deleting from repos.json...""")
111110
from .repo_action import rm_repo
112111
res = rm_repo(repo_path, os.path.join(self.repos_path, 'repos.json'), True)
113112

mlc/index.py

Lines changed: 208 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44
import yaml
55
from .repo import Repo
6+
from datetime import datetime
67

78
class CustomJSONEncoder(json.JSONEncoder):
89
def default(self, obj):
@@ -35,8 +36,50 @@ def __init__(self, repos_path, repos):
3536
"experiment": os.path.join(repos_path, "index_experiment.json")
3637
}
3738
self.indices = {key: [] for key in self.index_files.keys()}
39+
self.modified_times_file = os.path.join(repos_path, "modified_times.json")
40+
self.modified_times = self._load_modified_times()
41+
self._load_existing_index()
3842
self.build_index()
3943

44+
def _load_modified_times(self):
45+
"""
46+
Load stored mtimes to check for changes in scripts.
47+
"""
48+
if os.path.exists(self.modified_times_file):
49+
try:
50+
# logger.info(f"Loading modified times from {self.modified_times_file}")
51+
with open(self.modified_times_file, "r") as f:
52+
return json.load(f)
53+
except Exception:
54+
return {}
55+
return {}
56+
57+
def _save_modified_times(self):
58+
"""
59+
Save updated mtimes in modified_times json file.
60+
"""
61+
logger.debug(f"Saving modified times to {self.modified_times_file}")
62+
with open(self.modified_times_file, "w") as f:
63+
json.dump(self.modified_times, f, indent=4)
64+
65+
def _load_existing_index(self):
66+
"""
67+
Load previously saved index to allow incremental updates.
68+
"""
69+
for folder_type, file_path in self.index_files.items():
70+
if os.path.exists(file_path):
71+
try:
72+
# logger.info(f"Loading existing index for {folder_type}")
73+
with open(file_path, "r") as f:
74+
self.indices[folder_type] = json.load(f)
75+
# Convert repo dicts back into Repo objects
76+
for item in self.indices[folder_type]:
77+
if isinstance(item.get("repo"), dict):
78+
item["repo"] = Repo(**item["repo"])
79+
80+
except Exception:
81+
pass # fall back to empty index
82+
4083
def add(self, meta, folder_type, path, repo):
4184
if not repo:
4285
logger.error(f"Repo for index add for {path} is none")
@@ -87,6 +130,16 @@ def rm(self, meta, folder_type, path):
87130
del(self.indices[folder_type][index])
88131
self._save_indices()
89132

133+
def get_item_mtime(self,file):
134+
# logger.debug(f"Getting latest modified time for file: {file}")
135+
latest = 0
136+
t = os.path.getmtime(file)
137+
if t > latest:
138+
latest = t
139+
logger.debug(f"Latest modified time updated to: {latest}")
140+
# logger.debug("No changes in modified time detected.")
141+
return latest
142+
90143
def build_index(self):
91144
"""
92145
Build shared indices for script, cache, and experiment folders across all repositories.
@@ -95,31 +148,158 @@ def build_index(self):
95148
None
96149
"""
97150

98-
#for repo in os.listdir(self.repos_path):
151+
# track all currently detected item paths
152+
current_item_keys = set()
153+
changed = False
154+
repos_changed = False
155+
156+
# load existing modified times
157+
self.modified_times = self._load_modified_times()
158+
159+
index_json_path = os.path.join(self.repos_path, "index_script.json")
160+
161+
rebuild_index = False
162+
163+
#file does not exist, rebuild
164+
if not os.path.exists(index_json_path):
165+
logger.warning("index_script.json missing. Forcing full index rebuild...")
166+
logger.debug("Resetting modified_times...")
167+
self.modified_times = {}
168+
self._save_modified_times()
169+
else:
170+
logger.debug("index_script.json exists. Skipping forced rebuild.")
171+
172+
#check repos.json mtime
173+
repos_json_path = os.path.join(self.repos_path, "repos.json")
174+
repos_mtime = os.path.getmtime(repos_json_path)
175+
176+
key = f"{repos_json_path}"
177+
old = self.modified_times.get(key)
178+
repo_old_mtime = old["mtime"] if isinstance(old, dict) else old
179+
180+
logger.debug(f"Current repos.json mtime: {repos_mtime}")
181+
logger.debug(f"Old repos.json mtime: {repo_old_mtime}")
182+
current_item_keys.add(key)
183+
184+
# if changed, reset indexes
185+
if repo_old_mtime is None or repo_old_mtime != repos_mtime:
186+
logger.debug("repos.json modified. Clearing index ........")
187+
# reset indices
188+
self.indices = {key: [] for key in self.index_files.keys()}
189+
# record repo mtime
190+
self.modified_times[key] = {
191+
"mtime": repos_mtime,
192+
"date_time": datetime.fromtimestamp(repos_mtime).strftime("%Y-%m-%d %H:%M:%S")
193+
}
194+
# clear modified times except for repos.json
195+
self.modified_times = {key: self.modified_times[key]}
196+
self._save_indices()
197+
self._save_modified_times()
198+
repos_changed = True
199+
else:
200+
logger.debug("Repos.json not modified")
201+
99202
for repo in self.repos:
100-
repo_path = repo.path#os.path.join(self.repos_path, repo)
203+
repo_path = repo.path #os.path.join(self.repos_path, repo)
101204
if not os.path.isdir(repo_path):
102205
continue
103-
206+
logger.debug(f"Checking repository: {repo_path}")
104207
# Filter for relevant directories in the repo
105208
for folder_type in ["script", "cache", "experiment"]:
209+
logger.debug(f"Checking folder type: {folder_type}")
106210
folder_path = os.path.join(repo_path, folder_type)
107211
if not os.path.isdir(folder_path):
108212
continue
109213

110214
# Process each automation directory
111215
for automation_dir in os.listdir(folder_path):
216+
# logger.debug(f"Checking automation directory: {automation_dir}")
112217
automation_path = os.path.join(folder_path, automation_dir)
113218
if not os.path.isdir(automation_path):
219+
logger.debug(f"Skipping non-directory automation path: {automation_path}")
114220
continue
221+
222+
yaml_path = os.path.join(automation_path, "meta.yaml")
223+
json_path = os.path.join(automation_path, "meta.json")
115224

116-
# Check for configuration files (meta.yaml or meta.json)
117-
for config_file in ["meta.yaml", "meta.json"]:
118-
config_path = os.path.join(automation_path, config_file)
119-
if os.path.isfile(config_path):
120-
self._process_config_file(config_path, folder_type, automation_path, repo)
121-
break # Only process one config file per automation_dir
122-
self._save_indices()
225+
if os.path.isfile(yaml_path):
226+
# logger.debug(f"Found YAML config file: {yaml_path}")
227+
config_path = yaml_path
228+
elif os.path.isfile(json_path):
229+
# logger.debug(f"Found JSON config file: {json_path}")
230+
config_path = json_path
231+
else:
232+
logger.debug(f"No config file found in {automation_path}, skipping")
233+
if automation_dir in self.modified_times:
234+
del self.modified_times[automation_dir]
235+
if any(automation_dir in item["path"] for item in self.indices[folder_type]):
236+
logger.debug(f"Removed index entry (if it exists) for {folder_type} : {automation_dir}")
237+
self._remove_index_entry(automation_path)
238+
self._save_indices()
239+
continue
240+
current_item_keys.add(config_path)
241+
mtime = self.get_item_mtime(config_path)
242+
243+
old = self.modified_times.get(config_path)
244+
old_mtime = old["mtime"] if isinstance(old, dict) else old
245+
246+
# skip if unchanged
247+
if old_mtime == mtime and repos_changed != 1:
248+
# logger.debug(f"No changes detected for {config_path}, skipping reindexing.")
249+
continue
250+
if(old_mtime is None):
251+
logger.debug(f"New config file detected: {config_path}. Adding to index.")
252+
# update mtime
253+
logger.debug(f"{config_path} is modified, index getting updated")
254+
if config_path not in self.modified_times:
255+
logger.debug(f"*************{config_path} not found in modified_times; creating new entry***************")
256+
257+
self.modified_times[config_path] = {
258+
"mtime": mtime,
259+
"date_time": datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M:%S")
260+
}
261+
logger.debug(f"Modified time for {config_path} updated to {mtime}")
262+
changed = True
263+
# meta file changed, so reindex
264+
self._process_config_file(config_path, folder_type, automation_path, repo)
265+
266+
# remove deleted scripts
267+
old_keys = set(self.modified_times.keys())
268+
deleted_keys = old_keys - current_item_keys
269+
for key in deleted_keys:
270+
logger.warning(f"Detected deleted item, removing entry form modified times: {key}")
271+
del self.modified_times[key]
272+
folder_key = os.path.dirname(key)
273+
logger.warning(f"Removing index entry for folder: {folder_key}")
274+
self._remove_index_entry(folder_key)
275+
changed = True
276+
logger.debug(f"Deleted keys removed from modified times and indices: {deleted_keys}")
277+
278+
if changed:
279+
logger.debug("Changes detected, saving updated index and modified times.")
280+
self._save_modified_times()
281+
self._save_indices()
282+
logger.debug("**************Index updated (changes detected).*************************")
283+
else:
284+
logger.debug("**************Index unchanged (no changes detected).********************")
285+
286+
def _remove_index_entry(self, key):
287+
logger.debug(f"Removing index entry for {key}")
288+
for ft in self.indices:
289+
self.indices[ft] = [
290+
item for item in self.indices[ft]
291+
if key not in item["path"]
292+
]
293+
294+
def _delete_by_uid(self, folder_type, uid, alias):
295+
"""
296+
Delete old index entry using UID (prevents duplicates).
297+
"""
298+
logger.debug(f"Deleting and updating index entry for the script {alias} with UID {uid}")
299+
self.indices[folder_type] = [
300+
item for item in self.indices[folder_type]
301+
if item["uid"] != uid
302+
]
123303

124304
def _process_config_file(self, config_file, folder_type, folder_path, repo):
125305
"""
@@ -133,25 +313,36 @@ def _process_config_file(self, config_file, folder_type, folder_path, repo):
133313
Returns:
134314
None
135315
"""
316+
if config_file is None:
317+
logger.debug(f"No meta file in {folder_path}, skipping")
318+
return
319+
136320
try:
137321
# Determine the file type based on the extension
138322
if config_file.endswith(".yaml") or config_file.endswith(".yml"):
139323
with open(config_file, "r") as f:
140-
data = yaml.safe_load(f)
324+
data = yaml.safe_load(f) or {}
141325
elif config_file.endswith(".json"):
142326
with open(config_file, "r") as f:
143-
data = json.load(f)
327+
data = json.load(f) or {}
144328
else:
145-
logger.info(f"Skipping {config_file}: Unsupported file format.")
329+
logger.warning(f"Skipping {config_file}: Unsupported file format.")
330+
return
331+
332+
if not isinstance(data, dict):
333+
logger.warning(f"Skipping {config_file}: Invalid or empty meta")
146334
return
147-
148335
# Extract necessary fields
149336
unique_id = data.get("uid")
337+
if not unique_id:
338+
logger.warning(f"Skipping {config_file}: missing uid")
339+
return
150340
tags = data.get("tags", [])
151341
alias = data.get("alias", None)
152342

153343
# Validate and add to indices
154344
if unique_id:
345+
self._delete_by_uid(folder_type, unique_id, alias)
155346
self.indices[folder_type].append({
156347
"uid": unique_id,
157348
"tags": tags,
@@ -160,7 +351,8 @@ def _process_config_file(self, config_file, folder_type, folder_path, repo):
160351
"repo": repo
161352
})
162353
else:
163-
logger.info(f"Skipping {config_file}: Missing 'uid' field.")
354+
logger.warning(f"Skipping {config_file}: Missing 'uid' field.")
355+
164356
except Exception as e:
165357
logger.error(f"Error processing {config_file}: {e}")
166358

@@ -178,6 +370,6 @@ def _save_indices(self):
178370
try:
179371
with open(output_file, "w") as f:
180372
json.dump(index_data, f, indent=4, cls=CustomJSONEncoder)
181-
#logger.debug(f"Shared index for {folder_type} saved to {output_file}.")
373+
logger.debug(f"Shared index for {folder_type} saved to {output_file}.")
182374
except Exception as e:
183375
logger.error(f"Error saving shared index for {folder_type}: {e}")

0 commit comments

Comments
 (0)