Skip to content

Commit 9082c8c

Browse files
authored
Merge branch 'dev' into dev
2 parents 455f3db + b4e0feb commit 9082c8c

File tree

4 files changed

+172
-59
lines changed

4 files changed

+172
-59
lines changed

.github/workflows/test-mlc-core-actions.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,10 @@ jobs:
133133
- name: Test 10 - cp script - Copy mlc script
134134
run: |
135135
mlc cp script detect-os my-os-detect
136+
137+
- name: Test 11 - add repo - Add a new MLC repo
138+
run: |
139+
mlc add repo my-new-repo
140+
mlc add repo https://github.com/mlcommons/inference
141+
mlc add repo https://mygit.com/myrepo
136142

mlc/main.py

Lines changed: 154 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,10 @@ def is_curdir_inside_path(base_path):
147147

148148
# Iterate through the list of repository paths
149149
for repo_path in repo_paths:
150+
if not os.path.exists(repo_path):
151+
logger.warning(f"""Warning: {repo_path} not found. Consider doing `mlc rm repo {repo_path}`. Skipping...""")
152+
continue
153+
150154
if is_curdir_inside_path(repo_path):
151155
self.current_repo_path = repo_path
152156
repo_path = repo_path.strip() # Remove any extra whitespace or newlines
@@ -198,48 +202,6 @@ def load_repos(self):
198202
logger.error(f"Error reading file: {e}")
199203
return None
200204

201-
def conflicting_repo(self, repo_meta):
202-
for repo_object in self.repos:
203-
if repo_object.meta.get('uid', '') == '':
204-
return {"return": 1, "error": f"UID is not present in file 'meta.yaml' in the repo path {repo_object.path}"}
205-
if repo_meta["uid"] == repo_object.meta.get('uid', ''):
206-
if repo_meta['path'] == repo_object.path:
207-
return {"return": 1, "error": f"Same repo is already registered"}
208-
else:
209-
return {"return": 1, "error": f"Conflicting with repo in the path {repo_object.path}", "conflicting_path": repo_object.path}
210-
return {"return": 0}
211-
212-
def register_repo(self, repo_meta):
213-
# Get the path to the repos.json file in $HOME/MLC
214-
repos_file_path = os.path.join(self.repos_path, 'repos.json')
215-
216-
with open(repos_file_path, 'r') as f:
217-
repos_list = json.load(f)
218-
219-
new_repo_path = repo_meta.get('path')
220-
if new_repo_path and new_repo_path not in repos_list:
221-
repos_list.append(new_repo_path)
222-
logger.info(f"Added new repo path: {new_repo_path}")
223-
224-
with open(repos_file_path, 'w') as f:
225-
json.dump(repos_list, f, indent=2)
226-
logger.info(f"Updated repos.json at {repos_file_path}")
227-
228-
def unregister_repo(self, repo_path):
229-
logger.info(f"Unregistering the repo in path {repo_path}")
230-
repos_file_path = os.path.join(self.repos_path, 'repos.json')
231-
232-
with open(repos_file_path, 'r') as f:
233-
repos_list = json.load(f)
234-
235-
if repo_path in repos_list:
236-
repos_list.remove(repo_path)
237-
with open(repos_file_path, 'w') as f:
238-
json.dump(repos_list, f, indent=2)
239-
logger.info(f"Path: {repo_path} has been removed.")
240-
else:
241-
logger.info(f"Path: {repo_path} not found in {repos_file_path}. Nothing to be unregistered!")
242-
243205

244206
def __init__(self):
245207
setup_logging(log_path=os.getcwd(),log_file='mlc-log.txt')
@@ -330,10 +292,13 @@ def add(self, i):
330292
if res["return"] > 0:
331293
return res
332294

295+
if len(res["list"]) == 0:
296+
return {'return': 1, 'error': f"""The given repo {item_repo} is not registered in MLC"""}
297+
333298
# Determine paths and metadata format
334299
repo = res["list"][0]
335300
repo_path = repo.path
336-
301+
337302
target_name = i.get('target_name', self.action_type)
338303
target_path = os.path.join(repo_path, target_name)
339304
if target_name == "cache":
@@ -434,7 +399,7 @@ def rm(self, i):
434399

435400
def save_new_meta(self, i, item_id, item_name, target_name, item_path, repo):
436401
# Prepare metadata
437-
item_meta = i.get('meta')
402+
item_meta = i.get('meta', {})
438403
item_meta.update({
439404
"alias": item_name,
440405
"uid": item_id,
@@ -546,6 +511,7 @@ def is_uid(self, name):
546511
# Check if the name matches the pattern
547512
return bool(re.fullmatch(hex_uid_pattern, name))
548513

514+
549515
def cp(self, run_args):
550516
action_target = run_args['target']
551517
src_item = run_args['src']
@@ -945,6 +911,100 @@ def __init__(self, parent=None):
945911
self.parent = parent
946912
self.__dict__.update(vars(parent))
947913

914+
915+
def add(self, run_args):
916+
if not run_args['repo']:
917+
logger.error("The repository to be added is not specified")
918+
return {"return": 1, "error": "The repository to be added is not specified"}
919+
920+
i_repo_path = run_args['repo'] #can be a path, forder_name or URL
921+
repo_folder_name = os.path.basename(i_repo_path)
922+
923+
repo_path = os.path.join(self.repos_path, repo_folder_name)
924+
925+
if os.path.exists(repo_path):
926+
return {'return': 1, "error": f"""Repo {run_args['repo']} already exists at {repo_path}"""}
927+
for repo in self.repos:
928+
if repo.path == i_repo_path:
929+
return {'return': 1, "error": f"""Repo {run_args['repo']} already exists at {repo_path}"""}
930+
931+
if not os.path.exists(i_repo_path):
932+
#check if its an URL
933+
if utils.is_valid_url(i_repo_path):
934+
if "github.com" in i_repo_path:
935+
res = self.github_url_to_user_repo_format(i_repo_path)
936+
if res['return'] > 0:
937+
return res
938+
repo_folder_name = res['value']
939+
repo_path = os.path.join(self.repos_path, repo_folder_name)
940+
941+
os.makedirs(repo_path)
942+
else:
943+
repo_path = os.path.abspath(i_repo_path)
944+
logger.info(f"""New repo path: {repo_path}""")
945+
946+
#check if it has MLC meta
947+
meta_file = os.path.join(repo_path, "meta.yaml")
948+
if not os.path.exists(meta_file):
949+
meta = {}
950+
meta['uid'] = utils.get_new_uid()['uid']
951+
meta['alias'] = repo_folder_name
952+
meta['git'] = True
953+
utils.save_yaml(meta_file, meta)
954+
else:
955+
meta = utils.read_yaml(meta_file)
956+
self.register_repo(repo_path, meta)
957+
958+
return {'return': 0}
959+
960+
def conflicting_repo(self, repo_meta):
961+
for repo_object in self.repos:
962+
if repo_object.meta.get('uid', '') == '':
963+
return {"return": 1, "error": f"UID is not present in file 'meta.yaml' in the repo path {repo_object.path}"}
964+
if repo_meta["uid"] == repo_object.meta.get('uid', ''):
965+
if repo_meta['path'] == repo_object.path:
966+
return {"return": 1, "error": f"Same repo is already registered"}
967+
else:
968+
return {"return": 1, "error": f"Conflicting with repo in the path {repo_object.path}", "conflicting_path": repo_object.path}
969+
return {"return": 0}
970+
971+
def register_repo(self, repo_path, repo_meta):
972+
973+
if repo_meta.get('deps'):
974+
for dep in repo_meta['deps']:
975+
self.pull_repo(dep['url'], branch=dep.get('branch'), checkout=dep.get('checkout'))
976+
977+
# Get the path to the repos.json file in $HOME/MLC
978+
repos_file_path = os.path.join(self.repos_path, 'repos.json')
979+
980+
with open(repos_file_path, 'r') as f:
981+
repos_list = json.load(f)
982+
983+
if repo_path not in repos_list:
984+
repos_list.append(repo_path)
985+
logger.info(f"Added new repo path: {repo_path}")
986+
987+
with open(repos_file_path, 'w') as f:
988+
json.dump(repos_list, f, indent=2)
989+
logger.info(f"Updated repos.json at {repos_file_path}")
990+
return {'return': 0}
991+
992+
def unregister_repo(self, repo_path):
993+
logger.info(f"Unregistering the repo in path {repo_path}")
994+
repos_file_path = os.path.join(self.repos_path, 'repos.json')
995+
996+
with open(repos_file_path, 'r') as f:
997+
repos_list = json.load(f)
998+
999+
if repo_path in repos_list:
1000+
repos_list.remove(repo_path)
1001+
with open(repos_file_path, 'w') as f:
1002+
json.dump(repos_list, f, indent=2)
1003+
logger.info(f"Path: {repo_path} has been removed.")
1004+
else:
1005+
logger.info(f"Path: {repo_path} not found in {repos_file_path}. Nothing to be unregistered!")
1006+
return {'return': 0}
1007+
9481008
def find(self, run_args):
9491009
try:
9501010
# Get repos_list using the existing method
@@ -1101,10 +1161,12 @@ def pull_repo(self, repo_url, branch=None, checkout = None):
11011161
logger.warning(f"The repo to be cloned has conflict with the repo already in the path: {is_conflict['conflicting_path']}")
11021162
logger.warning(f"The repo currently being pulled will be registered in repos.json and already existing one would be unregistered.")
11031163
self.unregister_repo(is_conflict['conflicting_path'])
1104-
self.register_repo(meta_data)
1164+
self.register_repo(repo_path, meta_data)
11051165
return {"return": 0}
11061166
else:
1107-
self.register_repo(meta_data)
1167+
r = self.register_repo(repo_path, meta_data)
1168+
if r['return'] > 0:
1169+
return r
11081170
return {"return": 0}
11091171

11101172
except subprocess.CalledProcessError as e:
@@ -1185,6 +1247,46 @@ def rm(self, i):
11851247
logger.debug(f"Removing script with input: {i}")
11861248
return self.parent.rm(i)
11871249

1250+
def add(self, i):
1251+
"""
1252+
Adds a new script to the repository.
1253+
1254+
Args:
1255+
i (dict): Input dictionary with the following keys:
1256+
- item_repo (tuple): Repository alias and UID (default: local repo).
1257+
- item (str): Item alias and optional UID in "alias,uid" format.
1258+
- tags (str): Comma-separated tags.
1259+
- new_tags (str): Additional comma-separated tags to add.
1260+
- yaml (bool): Whether to save metadata in YAML format. Defaults to JSON.
1261+
1262+
Returns:
1263+
dict: Result of the operation with 'return' code and error/message if applicable.
1264+
"""
1265+
# Determine repository
1266+
if i.get('details'):
1267+
item = i['details']
1268+
else:
1269+
item = i.get('item')
1270+
if not item:
1271+
return {'return': 1, 'error': f"""No script item given to add. Please use mlc add script <repo_name>:<script_name> --tags=<script_tags> format to add a script to a given repo"""}
1272+
1273+
if ":" in item:
1274+
item_split = item.split(":")
1275+
item_repo = item_split[0]
1276+
item = item_split[1]
1277+
else:
1278+
item_repo = i.get("item_repo", self.local_repo)
1279+
1280+
i['item_repo'] = item_repo
1281+
i['item'] = item
1282+
i['target_name'] = "script"
1283+
i['yaml'] = True
1284+
res = self.parent.add(i)
1285+
if res['return'] > 0:
1286+
return res
1287+
#Todo post processing to update the script meta
1288+
return res
1289+
11881290
def dynamic_import_module(self, script_path):
11891291
# Validate the script_path
11901292
if not os.path.exists(script_path):
@@ -1428,16 +1530,8 @@ def main():
14281530
# The chosen subcommand will be stored in the "command" attribute of the parsed arguments.
14291531
subparsers = parser.add_subparsers(dest='command', required=True)
14301532

1431-
for action in ['pull']:
1432-
# Pull parser - handles repo URLs directly
1433-
# The chosen subcommand will be stored in the "pull" attribute of the parsed arguments.
1434-
pull_parser = subparsers.add_parser('pull', help='Pull a repository by URL or target.')
1435-
pull_parser.add_argument('target', choices=['repo'], help='Target type (repo).')
1436-
pull_parser.add_argument('repo', nargs='?', help='Repo to pull in URL format or owner@repo_name format for github repos')
1437-
pull_parser.add_argument('extra', nargs=argparse.REMAINDER, help='Extra options (e.g., -v)')
1438-
14391533
# Script and Cache-specific subcommands
1440-
for action in ['run', 'test', 'show', 'list', 'find', 'search', 'rm', 'cp', 'mv']:
1534+
for action in ['run', 'pull', 'test', 'add', 'show', 'list', 'find', 'search', 'rm', 'cp', 'mv']:
14411535
action_parser = subparsers.add_parser(action, help=f'{action} a target.')
14421536
action_parser.add_argument('target', choices=['repo', 'script', 'cache'], help='Target type (repo, script, cache).')
14431537
# the argument given after target and before any extra options like --tags will be stored in "details"
@@ -1469,15 +1563,17 @@ def main():
14691563
run_args = res['args_dict']
14701564
if hasattr(args, 'repo') and args.repo:
14711565
run_args['repo'] = args.repo
1472-
1473-
1474-
if args.command in ['rm','find']:
1566+
1567+
if args.command in ['pull', 'rm', 'add', 'find']:
14751568
if args.target == "repo":
14761569
run_args['repo'] = args.details
14771570

14781571
if hasattr(args, 'details') and args.details and "," in args.details and not run_args.get("tags") and args.target in ["script", "cache"]:
14791572
run_args['tags'] = args.details
14801573

1574+
if not run_args.get('details') and args.details:
1575+
run_args['details'] = args.details
1576+
14811577
if args.command in ["cp", "mv"]:
14821578
run_args['target'] = args.target
14831579
if hasattr(args, 'details') and args.details:

mlc/utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,17 @@ def convert_args_to_dictionary(inp):
425425

426426
return {'return': 0, 'args_dict': args_dict}
427427

428+
def is_valid_url(url):
429+
pattern = re.compile(
430+
r"^(https?|ftp)://" # Protocol (http, https, ftp)
431+
r"(\S+:\S+@)?" # Optional username:password@
432+
r"([a-zA-Z0-9.-]+)" # Domain
433+
r"(:\d{2,5})?" # Optional port
434+
r"(/[\S]*)?$", # Path
435+
re.IGNORECASE
436+
)
437+
return re.match(pattern, url) is not None
438+
428439

429440
def sub_input(i, keys, reverse=False):
430441
"""

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "mlcflow"
7-
version = "0.1.25"
7+
version = "0.1.26"
88
description = "An automation interface for ML applications"
99
authors = [
1010
{ name = "MLCommons", email = "systems@mlcommons.org" }

0 commit comments

Comments
 (0)