mlcommons · arjunsuresh · Mar 25, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026
@@ -16,4 +16,3 @@
 - [ ] No secrets or credentials are committed
 - [ ] Paths, shell commands, and environment handling are safe and portable
 
-
@@ -6,7 +6,7 @@ on:
   schedule:
     - cron: '0 0 * * *'  # Runs daily at 12 AM UTC
   pull_request_target:
-    branches: [ "main_off", "dev_off" ]
+    branches: [ "main", "dev" ]
     paths:
       - '.github/workflows/test-mlperf-inference-yolo.yml'
       - '**'
@@ -17,7 +17,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     env:
       MLC_INDEX: "on"
-      SUBMISSION_DIR: ${{ github.workspace }}/mlperf_inference_results
+      RESULTS_DIR: ${{ github.workspace }}/mlperf_inference_results
     strategy:
       fail-fast: false
       matrix:
@@ -51,20 +51,22 @@ jobs:
     - name: Test MLPerf Inference YOLO-v11 (Linux/macOS)
       run: |
         mlcr run-mlperf,inference,_full,_find-performance,_all-scenarios,_r6.0-dev --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name="gh_${{ matrix.os }}x86" --model=yolo-99 --implementation=reference --category=edge --backend=${{ matrix.backend }} --framework=pytorch --device=cpu --execution_mode=test -v --quiet     
-        mlcr run-mlperf,inference,_submission,_full,_all-modes,_all-scenarios,_r6.0-dev --division=closed --submission_dir=${{ env.SUBMISSION_DIR }} --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name="gh_${{ matrix.os }}x86" --model=yolo-99 --implementation=reference --category=edge --backend=${{ matrix.backend }} --framework=pytorch --device=cpu --execution_mode=valid --multistream_target_latency=900 --env.MLC_MLPERF_USE_MAX_DURATION=no -v --quiet     
-
+        mlcr run-mlperf,inference,_compliance,_full,_all-modes,_all-scenarios,_r6.0-dev --division=closed --results_dir=${{ env.RESULTS_DIR }}  --model=yolo-99 --implementation=reference --category=edge --backend=${{ matrix.backend }} --framework=pytorch --device=cpu --execution_mode=valid --multistream_target_latency=900 --env.MLC_MLPERF_USE_MAX_DURATION=no -v --quiet --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name="gh_${{ matrix.os }}_python${{ matrix.python-version }}x86"
     - name: upload results artifact
       uses: actions/upload-artifact@v4
       with:
         name: mlperf-inference-yolo-results-${{ matrix.os }}-py${{ matrix.python-version }}-bk${{ matrix.backend }}
-        path: ${{ env.SUBMISSION_DIR }}
+        path: ${{ env.RESULTS_DIR }}
 
   upload-results-to-github:
     needs: mlc-run
     runs-on: ubuntu-latest
     env:
       MLC_INDEX: "on"
-      SUBMISSION_DIR: ${{ github.workspace }}/mlperf_inference_results
+      SUBMISSION_DIR: ${{ github.workspace }}/mlperf_inference_submission
+      ARTIFACT_DOWNLOAD_PATH: ${{ github.workspace }}/gh_artifacts
+      RESULTS_DIR: ${{ github.workspace }}/mlperf_inference_results
+
     concurrency: 
       group: upload-results-v6.0
       cancel-in-progress: false
@@ -100,7 +102,7 @@ jobs:
     - name: Download benchmark artifacts
       uses: actions/download-artifact@v4
       with:
-        path: "${{ env.SUBMISSION_DIR }}/closed"
+        path: "${{ env.ARTIFACT_DOWNLOAD_PATH }}"
 
     - name: Load secrets
       id: op-load-secrets
@@ -109,6 +111,26 @@ jobs:
         OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
         PAT: op://7basd2jirojjckncf6qnq3azai/bzbaco3uxoqs2rcyu42rvuccga/credential
 
+    - name: Prepare Results Directory
+      run: |
+        set -euo pipefail
+
+        mkdir -p "${{ env.RESULTS_DIR }}"
+
+        # Iterate over system folders
+        for system_dir in "${{ env.ARTIFACT_DOWNLOAD_PATH }}/*"; do
+          if [ -d "${system_dir}/valid_results" ]; then
+            echo "Processing ${system_dir}/valid_results"
+            cp -r "${system_dir}/valid_results/"* "${{ env.RESULTS_DIR }}/"
+          else
+            echo "Skipping ${system_dir} (no valid_results directory)"
+          fi
+        done
+
+        tree -a "${{ env.RESULTS_DIR }}"
+
+        mlcr generate,inference,submission --adr.compiler.tags=gcc --version=v6.0 --clean --preprocess_submission=yes --submission_base_dir="${{ env.SUBMISSION_DIR}}" --results_dir="${{ env.RESULTS_DIR }}" --run_checker --submitter=MLCommons --tar=yes --division=closed --env.MLC_DETERMINE_MEMORY_CONFIGURATION=yes --extra_checker_args="--skip-extra-accuracy-files-check" --quiet $docker_tags
+
     - name: Push Results
       env:
         GITHUB_TOKEN: ${{ steps.op-load-secrets.outputs.PAT }}

@@ -6,11 +6,11 @@
 [![MLPerf Inference ABTF POC Test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlperf-inference-abtf-poc.yml/badge.svg)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlperf-inference-abtf-poc.yml)
 
 
+
 Welcome to the **MLPerf Automations and Scripts** repository! This repository is your go-to resource for tools, automations, and scripts designed to streamline the execution of **MLPerf benchmarks**—with a strong emphasis on **MLPerf Inference benchmarks**.
 
 Starting **January 2025**, MLPerf automation scripts is powered by [MLCFlow](https://github.com/mlcommons/mlcflow) automation interface. This new and simplified framework replaces the previous [Collective Mind (CM)](https://github.com/mlcommons/ck/tree/master/cm), providing a more robust, efficient, and self-contained solution for benchmarking workflows, making MLPerf automations independent of any external projects. 
 
-
 ---
 
 ## 🚀 Key Features

@@ -323,8 +323,10 @@ def run_validate_cache_if_present(i, cached_script):
     )
 
     # reconstruct env/state from cached metadata
-    env_tmp = copy.deepcopy(i['env'])
-    state_tmp = copy.deepcopy(i['state'])
+    env = i['env']
+    state = i['state']
+    env_saved = copy.deepcopy(env)
+    state_saved = copy.deepcopy(state)
 
     path_to_cached_state_file = os.path.join(
         cached_script.path,
@@ -340,10 +342,10 @@ def run_validate_cache_if_present(i, cached_script):
         return None
     new_env = cached_meta.get("new_env", {})
     if new_env:
-        env_tmp.update(new_env)
+        env.update(new_env)
     new_state = cached_meta.get("new_state", {})
     if new_state:
-        state_tmp.update(new_state)
+        state.update(new_state)
 
     # re-run deps
     deps = i['meta'].get('deps')
@@ -377,11 +379,14 @@ def run_validate_cache_if_present(i, cached_script):
 
     r = i['self'].run_native_script({
         'run_script_input': run_script_input,
-        'env': env_tmp,
+        'env': env,
         'script_name': 'validate_cache',
         'detect_version': True
     })
 
+    i['env'] = env_saved
+    i['state'] = state_saved
+
     if r['return'] > 0:
         return None
 

@@ -40,6 +40,12 @@ def experiment_run(self_module, i):
         return prune_result
 
     run_input = prune_result['new_input']
+
+    if 'env' not in run_input:
+        run_input['env'] = {}
+    current_path = os.path.abspath(os.getcwd())
+    run_input['env']['MLC_USER_RUN_DIR'] = current_path
+
     if run_input.get('exp'):
         del (run_input['exp'])
 

@@ -127,6 +127,20 @@ def __init__(self, action_object, automation_file, run_args={}):
         self.main_script_force_new_cache = run_args.get(
             'new', False)  # only set for the initial script being called
 
+        if self.env.get('MLC_USER_RUN_DIR', '') == '':
+            current_path = os.path.abspath(os.getcwd())
+            r = _update_env(self.env, 'MLC_USER_RUN_DIR', current_path)
+            if r['return'] > 0:
+                return r
+
+        if self.const.get('MLC_USER_RUN_DIR', '') == '':
+            r = _update_env(
+                self.const,
+                'MLC_USER_RUN_DIR',
+                self.env['MLC_USER_RUN_DIR'])
+            if r['return'] > 0:
+                return r
+
     def init_run_state(self, run_state):
 
         run_state = run_state or {}
@@ -988,17 +1002,18 @@ def _run(self, i):
                         if r['return'] > 0:
                             return r
 
-                    # Check chain of prehook dependencies on other MLC scripts.
-                    # (No execution of customize.py for cached scripts)
-                    logger.debug(
-                        self.recursion_spaces +
-                        '    - Checking prehook dependencies on other MLC scripts:')
+                    if len(prehook_deps):
+                        # Check chain of prehook dependencies on other MLC scripts.
+                        # (No execution of customize.py for cached scripts)
+                        logger.debug(
+                            self.recursion_spaces +
+                            '    - Checking prehook dependencies on other MLC scripts:')
 
-                    r = self._call_run_deps(prehook_deps, self.local_env_keys, local_env_keys_from_meta,
-                                            self.recursion_spaces + extra_recursion_spaces,
-                                            variation_tags_string, True, debug_script_tags, show_time, extra_recursion_spaces, run_state)
-                    if r['return'] > 0:
-                        return r
+                        r = self._call_run_deps(prehook_deps, self.local_env_keys, local_env_keys_from_meta,
+                                                self.recursion_spaces + extra_recursion_spaces,
+                                                variation_tags_string, True, debug_script_tags, show_time, extra_recursion_spaces, run_state)
+                        if r['return'] > 0:
+                            return r
 
                     # Continue with the selected cached script
                     cached_script = found_cached_scripts[selection]
@@ -1038,38 +1053,42 @@ def _run(self, i):
                                        'append_unique': True})
 
                     utils.merge_dicts(
-                        {'dict1': new_env, 'dict2': const, 'append_lists': True, 'append_unique': True})
-                    utils.merge_dicts({'dict1': new_state,
+                        {'dict1': env, 'dict2': const, 'append_lists': True, 'append_unique': True})
+                    utils.merge_dicts({'dict1': state,
                                        'dict2': const_state,
                                        'append_lists': True,
                                        'append_unique': True})
 
                     if not fake_run:
-                        # Check chain of posthook dependencies on other MLC scripts. We consider them same as postdeps when
-                        # script is in cache
-                        logger.debug(
-                            self.recursion_spaces +
-                            '    - Checking posthook dependencies on other MLC scripts:')
 
                         clean_env_keys_post_deps = meta.get(
                             'clean_env_keys_post_deps', [])
 
-                        r = self._call_run_deps(posthook_deps, self.local_env_keys, clean_env_keys_post_deps,
-                                                self.recursion_spaces + extra_recursion_spaces,
-                                                variation_tags_string, True, debug_script_tags, show_time, extra_recursion_spaces, run_state)
-                        if r['return'] > 0:
-                            return r
+                        # Check chain of posthook dependencies on other MLC scripts. We consider them same as postdeps when
+                        # script is in cache
+                        if len(posthook_deps):
+                            logger.debug(
+                                self.recursion_spaces +
+                                '    - Checking posthook dependencies on other MLC scripts:')
+
+                            r = self._call_run_deps(posthook_deps, self.local_env_keys, clean_env_keys_post_deps,
+                                                    self.recursion_spaces + extra_recursion_spaces,
+                                                    variation_tags_string, True, debug_script_tags, show_time, extra_recursion_spaces, run_state)
+                            if r['return'] > 0:
+                                return r
 
-                        logger.debug(
-                            self.recursion_spaces +
-                            '    - Checking post dependencies on other MLC scripts:')
+                        if len(post_deps):
+                            logger.debug(
+                                self.recursion_spaces +
+                                '    - Checking post dependencies on other MLC scripts:')
 
-                        # Check chain of post dependencies on other MLC scripts
-                        r = self._call_run_deps(post_deps, self.local_env_keys, clean_env_keys_post_deps,
-                                                self.recursion_spaces + extra_recursion_spaces,
-                                                variation_tags_string, True, debug_script_tags, show_time, extra_recursion_spaces, run_state)
-                        if r['return'] > 0:
-                            return r
+                            # Check chain of post dependencies on other MLC
+                            # scripts
+                            r = self._call_run_deps(post_deps, self.local_env_keys, clean_env_keys_post_deps,
+                                                    self.recursion_spaces + extra_recursion_spaces,
+                                                    variation_tags_string, True, debug_script_tags, show_time, extra_recursion_spaces, run_state)
+                            if r['return'] > 0:
+                                return r
 
             if renew or (not found_cached and num_found_cached_scripts == 0):
                 # Add more tags to cached tags
@@ -1874,19 +1893,6 @@ def _run(self, i):
             import json
             logger.info(json.dumps(rr, indent=2))
 
-        if show_time:
-            logger.info(
-                self.recursion_spaces +
-                '  - running time of script "{}": {:.2f} sec.'.format(
-                    ','.join(found_script_tags),
-                    elapsed_time))
-        else:
-            logger.debug(
-                self.recursion_spaces +
-                '  - running time of script "{}": {:.2f} sec.'.format(
-                    ','.join(found_script_tags),
-                    elapsed_time))
-
         if not recursion and show_space:
             stop_disk_stats = shutil.disk_usage("/")
 
@@ -1908,12 +1914,26 @@ def _run(self, i):
 
                 v = new_env.get(p, None)
 
-                logger.info('{}: {}'.format(t, str(v)))
+                logger.info(self.recursion_spaces +
+                            '  * {}: {}'.format(t, str(v)))
 
         # Check if print nice versions
         if print_versions:
             self._print_versions(run_state)
 
+        if show_time:
+            logger.info(
+                self.recursion_spaces +
+                '  - running time of script "{}": {:.2f} sec.'.format(
+                    ','.join(found_script_tags),
+                    elapsed_time))
+        else:
+            logger.debug(
+                self.recursion_spaces +
+                '  - running time of script "{}": {:.2f} sec.'.format(
+                    ','.join(found_script_tags),
+                    elapsed_time))
+
         # Check if pause (useful if running a given script in a new terminal
         # that may close automatically)
         if i.get('pause', False):
@@ -5568,11 +5588,25 @@ def update_env_from_input_mapping(
     """
     Internal: update env from input and input_mapping
     """
+
     for key in input_mapping:
         if key in inp:
             if key in input_description and str(input_description[key].get(
                     'is_path', '')).lower() in ['1', 'yes', 'on', 'true']:
-                env[input_mapping[key]] = os.path.expanduser(inp[key])
+
+                # 1. Expand the '~' if it exists
+                path_val = os.path.expanduser(inp[key])
+
+                # 2. Check if the path is NOT already absolute
+                if not os.path.isabs(path_val) and env.get(
+                        'MLC_USER_RUN_DIR', '') != '':
+                    base_dir = env["MLC_USER_RUN_DIR"]
+
+                    # Join the base dir with the relative path and normalize it
+                    path_val = os.path.abspath(
+                        os.path.join(base_dir, path_val))
+
+                env[input_mapping[key]] = path_val
             else:
                 env[input_mapping[key]] = inp[key]
 

@@ -1,6 +1,6 @@
 # MLCommons Automation Scripts
 
-*Last updated: 2026-03-06 15:50:52*
+*Last updated: 2026-03-23 23:44:00*
 
 This directory contains automation scripts for MLPerf benchmarks, AI/ML workflows, and development operations.
 

@@ -62,7 +62,7 @@ def preprocess(i):
             env['MLC_NUM_THREADS'] = env.get('MLC_HOST_CPU_TOTAL_CORES', '1')
 
     if env.get('MLC_MLPERF_LOADGEN_MAX_BATCHSIZE', '') != '' and not env.get(
-            'MLC_MLPERF_MODEL_SKIP_BATCHING', False) and env['MLC_MODEL'] != "llama3_2-3b":
+            'MLC_MLPERF_MODEL_SKIP_BATCHING', False) and env['MLC_MODEL'] != "llama3_1-8b":
         env['MLC_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --max-batchsize " + \
             str(env['MLC_MLPERF_LOADGEN_MAX_BATCHSIZE'])
 
@@ -71,7 +71,7 @@ def preprocess(i):
             str(env['MLC_MLPERF_LOADGEN_BATCH_SIZE'])
 
     if env.get('MLC_MLPERF_LOADGEN_QUERY_COUNT', '') != '' and not env.get(
-            'MLC_TMP_IGNORE_MLPERF_QUERY_COUNT', False) and env.get('MLC_MLPERF_RUN_STYLE', '') != "valid" and env['MLC_MODEL'] != "llama3_2-3b":
+            'MLC_TMP_IGNORE_MLPERF_QUERY_COUNT', False) and env.get('MLC_MLPERF_RUN_STYLE', '') != "valid" and env['MLC_MODEL'] != "llama3_1-8b":
         env['MLC_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --count " + \
             env['MLC_MLPERF_LOADGEN_QUERY_COUNT']
 
@@ -284,8 +284,8 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options,
 
         cmd = f"""{env['MLC_PYTHON_BIN_WITH_PATH']} {os.path.join(run_dir, "main.py")} --output {env['OUTPUT_DIR']} --scenario {env['MLC_MLPERF_LOADGEN_SCENARIO']} --backend {backend} --dataset cognata --device {"cuda" if device == "gpu" else "cpu"} --dataset-path {env['MLC_PREPROCESSED_DATASET_COGNATA_PATH']} --checkpoint {env['MLC_ML_MODEL_DEEPLABV3_PLUS_PATH']} {env['MLC_MLPERF_LOADGEN_EXTRA_OPTIONS']} {scenario_extra_options} {mode_extra_options} {dataset_options}"""
 
-    elif env['MLC_MODEL'] in ['llm', 'llama3_2-3b']:
-        run_dir = env['MLC_MLPERF_INFERENCE_LLAMA3_2_3B_PATH']
+    elif env['MLC_MODEL'] in ['llama3_1-8b']:
+        run_dir = env['MLC_MLPERF_INFERENCE_LLAMA3_1_8B_PATH']
 
         env['RUN_DIR'] = run_dir
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,4 +16,3 @@
		- [ ] No secrets or credentials are committed
		- [ ] Paths, shell commands, and environment handling are safe and portable