diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index d2cbed22a..7810ec825 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -200,6 +200,12 @@ jobs: run: | mlc rm repo mlcommons@mlperf-automations -f mlc pull repo mlcommons@mlperf-automations --tag=mlperf-automations-v1.0.0 + + - name: Test 22 - Test recursive mlc pull repo + run: | + export MLC_REPOS=$HOME/test + mlc pull repo https://github.com/GATEOverflow/GO-PDFs + mlcr detect,os -j test_mlc_access_core_actions: @@ -245,7 +251,7 @@ jobs: - name: Test for rm cache - invalid cache entry tags(test succeeds if command fails) run: | - ! mlc rm cache --tags=sample,invalid,tags + mlc rm cache --tags=sample,invalid,tags - name: Test for rm cache when the cache folder is empty(only for mlc rm cache without specifying particular script) run: | diff --git a/docs/error_codes.md b/docs/error_codes.md new file mode 100644 index 000000000..f7f6d97c7 --- /dev/null +++ b/docs/error_codes.md @@ -0,0 +1,85 @@ +# Error and Warning Codes in MLCFlow + +MLCFlow uses a standardized system for error and warning codes to provide consistent error handling across the framework. This document explains the error code system and how to use it. + +## Overview + +The error code system consists of two main components: + +1. **WarningCode(1000-1007)**: Enum class for warning codes (return = 0, with warning_code field) +2. **ErrorCode(2000-2007)**: Enum class for error codes (return > 0) + + +## Usage + +### In MLCFlow Framework + +When returning an error: + +```python +from mlc.error_codes import ErrorCode, get_error_message + +return {'return': ErrorCode.UNSUPPORTED_OS.code, 'error': get_error_message(ErrorCode.UNSUPPORTED_OS.description)} +``` + +When returning a warning: + +```python +from mlc.error_codes import WarningCode, get_warning_message + +return {'return': 0, 'warning_code': WarningCode.ELEVATED_PERMISSION_NEEDED.code, 'warning': get_warning_message(WarningCode.ELEVATED_PERMISSION_NEEDED.description)} +``` + +### In Scripts + +When checking for errors or warnings: + +```python +from mlc.error_codes import is_warning_code + +result = mlc_cache.access({'action': 'rm', 'target': 'cache', 'tags': cache_rm_tags, 'f': True}) +if result['return'] != 0 and not is_warning_code(result['return']): + # Handle error + return result +``` + +## Helper Functions + +The error code system provides several helper functions: + +- `get_error_message(error_code)`: Get the description for an error code +- `get_warning_message(warning_code)`: Get the description for a warning code +- `is_warning_code(code)`: Check if a code is a warning code +- `is_error_code(code)`: Check if a code is an error code +- `get_code_type(code)`: Get the type of a code (error, warning, or unknown) + +## Adding New Error or Warning Codes + +To add a new error or warning code, update the appropriate enum class in `mlc/error_codes.py`: + +```python +# For a new error code +NEW_ERROR = (2008, "Description of the new error") + +# For a new warning code +NEW_WARNING = (1007, "Description of the new warning") +``` + +Make sure to follow the category structure and use the next available code in the appropriate range. \ No newline at end of file diff --git a/mlc/action.py b/mlc/action.py index 54d4ba3bd..e3cf63bb4 100644 --- a/mlc/action.py +++ b/mlc/action.py @@ -13,6 +13,7 @@ from .index import Index from .repo import Repo from .item import Item +from .error_codes import WarningCode # Base class for actions class Action: @@ -113,9 +114,9 @@ def is_curdir_inside_path(base_path): return res continue + repo_path = repo_path.strip() # Remove any extra whitespace or newlines if is_curdir_inside_path(repo_path): self.current_repo_path = repo_path - repo_path = repo_path.strip() # Remove any extra whitespace or newlines # Skip empty lines if not repo_path: @@ -260,8 +261,13 @@ def add(self, i): target_name = i.get('target_name', self.action_type) target_path = os.path.join(repo_path, target_name) - if target_name == "cache": - folder_name = f"""{i["script_alias"]}_{item_name or item_id[:8]}""" if i.get("script_alias") else item_name or item_id + if target_name in ["cache", "experiment"]: + extra_tags_suffix=i.get('extra_tags', '').replace(",", "-")[:15] + if extra_tags_suffix != '': + suffix = f"_{extra_tags_suffix}" + else: + suffix = '' + folder_name = f"""{i["script_alias"]}{suffix}_{item_name or item_id[:8]}""" if i.get("script_alias") else item_name or item_id else: folder_name = item_name or item_id @@ -340,9 +346,10 @@ def rm(self, i): # Do not error out if fetch_all is used if inp.get("fetch_all", False) == True: logger.warning(f"{target_name} is empty! nothing to be cleared!") - return {"return": 0} + return {"return": 0, "warnings": [{"code": WarningCode.EMPTY_TARGET.code, "description": f"{target_name} is empty! nothing to be cleared!"}]} else: - return {'return': 16, 'error': f'No {target_name} found for {inp}'} + logger.warning(f"No {target_name} found for {inp}") + return {'return': 0, "warnings": [{"code": WarningCode.EMPTY_TARGET.code, "description": f"No {target_name} found for {inp}"}]} elif len(res['list']) > 1: logger.info(f"More than 1 {target_name} found for {inp}:") if not i.get('all'): @@ -405,7 +412,7 @@ def save_new_meta(self, i, item_id, item_name, target_name, item_path, repo): if save_result["return"] > 0: return save_result - + self.index.add(item_meta, target_name, item_path, repo) return {'return': 0} @@ -422,7 +429,7 @@ def update(self, i): dict: Return code and message. """ # Step 1: Search for items based on input tags - target_name = i.get('target_name',"cache") + target_name = i.get('target_name', i.get('target', "cache")) i['target_name'] = target_name ii = i.copy() @@ -551,16 +558,16 @@ def cp(self, run_args): target_split = target_item.split(":") if len(target_split) > 1: - target_repo = target_split[0].strip() - if target_repo == ".": + target_repo_name = target_split[0].strip() + if target_repo_name == ".": if not self.current_repo_path: return {'return': 1, 'error': f"""Current directory is not inside a registered MLC repo and so using ".:" is not valid"""} - target_repo = self.current_repo_path + target_repo_name = os.path.basename(self.current_repo_path) else: - if not any(os.path.basename(repodata.path) == target_repo for repodata in self.repos): + if not any(os.path.basename(repodata.path) == target_repo_name for repodata in self.repos): return {'return': 1, 'error': f"""The target repo {target_repo} is not registered in MLC. Either register in MLC by cloning from Git through command `mlc pull repo` or create repo using `mlc add repo` command and try to rerun the command again"""} - target_repo_path = os.path.join(self.repos_path, target_repo) - target_repo = Repo(target_repo_path) + target_repo_path = os.path.join(self.repos_path, target_repo_name) + target_repo = next((k for k in self.repos if os.path.basename(k.path) == target_repo_name), None) target_item_name = target_split[1].strip() else: target_repo = result.repo @@ -710,7 +717,7 @@ def search(self, i): if target == "script": non_variation_tags = [t for t in tags_split if not t.startswith("_")] tags_to_match = non_variation_tags - elif target =="cache": + elif target in ["cache", "experiment"]: tags_to_match = tags_split else: return {'return': 1, 'error': f"""Target {target} not handled in mlc yet"""} diff --git a/mlc/action_factory.py b/mlc/action_factory.py index fa930d206..c0076d467 100644 --- a/mlc/action_factory.py +++ b/mlc/action_factory.py @@ -1,6 +1,7 @@ from .repo_action import RepoAction from .script_action import ScriptAction from .cache_action import CacheAction +from .experiment_action import ExperimentAction # Factory to get the appropriate action class @@ -11,5 +12,6 @@ def get_action(target, parent): actions = { 'repo': RepoAction, 'script': ScriptAction, - 'cache': CacheAction + 'cache': CacheAction, + 'experiment': ExperimentAction } diff --git a/mlc/error_codes.py b/mlc/error_codes.py new file mode 100644 index 000000000..e9ba5395c --- /dev/null +++ b/mlc/error_codes.py @@ -0,0 +1,77 @@ +from enum import Enum, auto + +class ErrorCode(Enum): + """Enum class for error codes in MLCFlow""" + # General errors (2000-2007) + AUTOMATION_SCRIPT_NOT_FOUND = (2000, "The specified automation script was not found") + PATH_DOES_NOT_EXIST = (2001, "Provided path does not exists") + FILE_NOT_FOUND = (2002, "Required file was not found") + PERMISSION_DENIED = (2003, "Insufficient permission to execute the script") + IO_Error = (2004, "File I/O operation failed") + AUTOMATION_CUSTOM_ERROR = (2005, "Custom error triggered by the script") + UNSUPPORTED_OS = (2006, "The Operating System is not supported by the script") + MISSING_ENV_VARIABLE = (2007, "Required environment variables are missing") + + def __init__(self, code, description): + self.code = code + self.description = description + +class WarningCode(Enum): + """Enum class for warning codes in MLCFlow""" + # General warnings (1000-1007) + IO_WARNING = (1000, "File I/O operation warning") + AUTOMATION_SCRIPT_NOT_TESTED = (1001, "the script is not tested on the current operatinig system or is in a development state") + AUTOMATION_SCRIPT_SKIPPED = (1002, "The script has been skipped during execution") + AUTOMATION_CUSTOM_ERROR = (1003, "Custom warning triggered by the script") + NON_INTERACTIVE_ENV = (1004, "Non interactive environment detected") + ELEVATED_PERMISSION_NEEDED = (1005, "Elevated permission needed") + EMPTY_TARGET = (1006, "The specified target is empty") + + def __init__(self, code, description): + self.code = code + self.description = description + +def get_error_info(error_code): + """Get the error message for a given error code""" + try: + return {"error_code": ErrorCode(error_code).code, "error_message": ErrorCode(error_code).description} + except ValueError: + return f"Unknown error code: {error_code}" + +def get_warning_info(warning_code): + """Get the warning message for a given warning code""" + try: + return {"warning_code": WarningCode(warning_code).code, "warning_message": WarningCode(warning_code).description} + except ValueError: + return f"Unknown warning code: {warning_code}" + +def is_warning_code(code): + """Check if a given code is a warning code""" + try: + # Check if code is in warning range (2000-2399) + if 2000 <= code <= 2399: + WarningCode(code) + return True + return False + except ValueError: + return False + +def is_error_code(code): + """Check if a given code is an error code""" + try: + # Check if code is in error range (1000-1399) + if 1000 <= code <= 1399: + ErrorCode(code) + return True + return False + except ValueError: + return False + +def get_code_type(code): + """Get the type of a code (error or warning)""" + if is_error_code(code): + return "error" + elif is_warning_code(code): + return "warning" + else: + return "unknown" \ No newline at end of file diff --git a/mlc/experiment_action.py b/mlc/experiment_action.py index b54b8c017..81fc58874 100644 --- a/mlc/experiment_action.py +++ b/mlc/experiment_action.py @@ -5,8 +5,6 @@ class ExperimentAction(Action): def __init__(self, parent=None): - if parent is None: - parent = default_parent #super().__init__(parent) self.parent = parent self.__dict__.update(vars(parent)) diff --git a/mlc/index.py b/mlc/index.py index 2e6abbeff..6ee5b1fb5 100644 --- a/mlc/index.py +++ b/mlc/index.py @@ -38,6 +38,10 @@ def __init__(self, repos_path, repos): self.build_index() def add(self, meta, folder_type, path, repo): + if not repo: + logger.error(f"Repo for index add for {path} is none") + return + unique_id = meta['uid'] alias = meta['alias'] tags = meta['tags'] diff --git a/mlc/main.py b/mlc/main.py index 000f5b513..1b98b0a28 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -97,6 +97,10 @@ def process_console_output(res, target, action, run_args): else: for item in res['list']: logger.info(f"""Item path: {item.path}""") + if "warnings" in res: + logger.warning(f"{len(res['warnings'])} warning(s) found during the execution of the mlc command.") + for warning in res["warnings"]: + logger.warning(f"Warning code: {warning['code']}, Discription: {warning['description']}") if default_parent is None: default_parent = Action() diff --git a/mlc/repo_action.py b/mlc/repo_action.py index c49276ba6..31bf81d50 100644 --- a/mlc/repo_action.py +++ b/mlc/repo_action.py @@ -126,7 +126,7 @@ def register_repo(self, repo_path, repo_meta): if repo_meta.get('deps'): for dep in repo_meta['deps']: - self.pull_repo(dep['url'], branch=dep.get('branch'), checkout=dep.get('checkout')) + self.pull_repo(dep['url'], branch=dep.get('branch'), checkout=dep.get('checkout'), ignore_on_conflict=dep.get('is_alias_okay', True)) # Get the path to the repos.json file in $HOME/MLC repos_file_path = os.path.join(self.repos_path, 'repos.json') @@ -248,7 +248,7 @@ def github_url_to_user_repo_format(self, url): else: return {"return": 0, "value": os.path.basename(url).replace(".git", "")} - def pull_repo(self, repo_url, branch=None, checkout = None, tag = None, pat = None, ssh = None): + def pull_repo(self, repo_url, branch=None, checkout = None, tag = None, pat = None, ssh = None, ignore_on_conflict = False): # Determine the checkout path from environment or default repo_base_path = self.repos_path # either the value will be from 'MLC_REPOS' @@ -269,7 +269,6 @@ def pull_repo(self, repo_url, branch=None, checkout = None, tag = None, pat = No if res["return"] > 0: return res else: - print(res) repo_url = res["url"] @@ -306,8 +305,9 @@ def pull_repo(self, repo_url, branch=None, checkout = None, tag = None, pat = No print(local_changes.stdout.strip()) return {"return": 0, "warning": f"Local changes detected in the already existing repository: {repo_path}, skipping the pull"} else: - logger.info("No local changes detected. Fetching latest changes...") - subprocess.run(['git', '-C', repo_path, 'fetch'], check=True) + logger.info("No local changes detected. Pulling latest changes...") + subprocess.run(['git', '-C', repo_path, 'pull'], check=True) + logger.info("Repository successfully pulled.") if tag: checkout = "tags/"+tag @@ -317,9 +317,9 @@ def pull_repo(self, repo_url, branch=None, checkout = None, tag = None, pat = No logger.info(f"Checking out to {checkout} in {repo_path}...") subprocess.run(['git', '-C', repo_path, 'checkout', checkout], check=True) - if not tag: - subprocess.run(['git', '-C', repo_path, 'pull'], check=True) - logger.info("Repository successfully pulled.") + #if not tag: + # subprocess.run(['git', '-C', repo_path, 'pull'], check=True) + # logger.info("Repository successfully pulled.") logger.info("Registering the repo in repos.json") @@ -341,9 +341,13 @@ def pull_repo(self, repo_url, branch=None, checkout = None, tag = None, pat = No return {"return": 0} elif "already registered" in is_conflict["error"]: #logger.warning(is_conflict["error"]) - logger.info("No changes made to repos.json.") + logger.debug("No changes made to repos.json.") return {"return": 0} else: + if ignore_on_conflict: + logger.debug("Repo alias existing. Ignoring the repo pull") + return {"return": 0} + logger.warning(f"The repo to be cloned has conflict with the repo already in the path: {is_conflict['conflicting_path']}") self.unregister_repo(is_conflict['conflicting_path']) self.register_repo(repo_path, meta_data) diff --git a/mlc/script_action.py b/mlc/script_action.py index 7e3be15a7..190c297f8 100644 --- a/mlc/script_action.py +++ b/mlc/script_action.py @@ -224,7 +224,6 @@ def call_script_module_function(self, function_name, run_args): module_path = os.path.join(script_path, "module.py") module = self.dynamic_import_module(module_path) - # Check if ScriptAutomation is defined in the module if hasattr(module, 'ScriptAutomation'): automation_instance = module.ScriptAutomation(self, module_path) diff --git a/pyproject.toml b/pyproject.toml index 8883ea745..2ecf40bdf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mlcflow" -version = "1.0.9" +version = "1.0.13" description = "An automation interface for ML applications" authors = [