mlcommons · hanyunfan · Jan 14, 2026 · Nov 19, 2025 · Nov 25, 2025 · Nov 26, 2025
@@ -35,9 +35,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python3 -m pip install mlcflow
-        mlc pull repo mlcommons@mlperf-automations --branch=dev
-#python3 -m pip install mlc-scripts
+        python3 -m pip install mlc-scripts
     - name: Test Resnet50 and end to end submission generation
       run: |
-        mlcr run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.${{ github.event.pull_request.head.ref }},_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom --adr.inference-src-loadgen.tags=_branch.${{ github.event.pull_request.head.ref }},_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom ${{ matrix.loadgen-flag }}
+        mlcr run,mlperf,inference,generate-run-cmds,_submission,_short,_r6.0-dev --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.${{ github.event.pull_request.head.ref }},_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom --adr.inference-src-loadgen.tags=_branch.${{ github.event.pull_request.head.ref }},_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom ${{ matrix.loadgen-flag }}
@@ -33,9 +33,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python3 -m pip install mlcflow
-        mlc pull repo mlcommons@mlperf-automations --branch=dev
-#python3 -m pip install mlc-scripts
+        python3 -m pip install mlc-scripts
     - name: Test Retinanet and end to end submission generation
       run: |
-        mlcr run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=retinanet --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=10 --adr.compiler.tags=gcc  --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }} --adr.inference-src-loadgen.tags=_branch.${{ github.event.pull_request.head.ref }},_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom
+        mlcr run,mlperf,inference,generate-run-cmds,_submission,_short,_r6.0-dev --quiet --submitter="MLCommons" --hw_name=default --model=retinanet --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=10 --adr.compiler.tags=gcc  --adr.inference-src.version=custom --adr.accuracy-check-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }} --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }} --adr.inference-src-loadgen.tags=_branch.${{ github.event.pull_request.head.ref }},_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom
@@ -32,9 +32,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python3 -m pip install mlcflow
-        mlc pull repo mlcommons@mlperf-automations --branch=dev
-#python3 -m pip install mlc-scripts
+        python3 -m pip install mlc-scripts
     - name: Test R-GAT and end to end submission generation
       run: |
-        mlcr run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --category=datacenter --hw_name=default --model=rgat --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom ${{ matrix.loadgen-flag }}
+        mlcr run,mlperf,inference,generate-run-cmds,_submission,_short,_r6.0-dev --quiet --submitter="MLCommons" --category=datacenter --hw_name=default --model=rgat --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom ${{ matrix.loadgen-flag }}
@@ -13,9 +13,9 @@ on:
 
 jobs:
   run-tests:
-    uses: mlcommons/mlperf-automations/.github/workflows/test-mlperf-inference-submission-generation.yml@subcheckmod_change
+    uses: mlcommons/mlperf-automations/.github/workflows/test-mlperf-inference-submission-generation.yml@dev
     with:
       inf-branch: ${{ github.event.pull_request.head.ref }}
       inf-repo-url: ${{ github.event.pull_request.head.repo.html_url }}
       automation-repo: mlcommons@mlperf-automations
-      automation-repo-branch: subcheckmod_change
+      automation-repo-branch: dev
@@ -29,7 +29,7 @@ Output directory with submission with truncated `mlperf_log_accuracy.json` files
 ### Summary
 The input submission directory is modified with empty directories removed and low accuracy results inferred. Multistream and offline scenario results are also wherever possible. The original input directory is saved in a timestamped directory.
 
-## `submission_checker.py` (Mandatory)
+## `submission_checker/main.py` (Mandatory)
 ### Inputs
 **input**: Path to the directory containing one or several submissions.<br>
 **version**: Checker version. E.g v1.1, v2.0, v2.1, v3.0, v3.1. <br>
@@ -50,17 +50,73 @@ The below input fields are off by default since v3.1 and are mandatory but can b
 **skip-check-power-measure-files**: Flag to avoid checking if the required power measurement files are present
 
 ### Summary
-Checks a directory that contains one or several submissions. This script can be used by running the following command:
+Checks a directory that contains one or several submissions. This script can be used by running the following command (outside the inference repository):
 ```
-python3 submission_checker.py --input <path-to-folder> 
+python3 -m inference.tools.submission.submission_checker.main 
+    --input <path-to-folder> 
     [--version <version>]
     [--submitter <submitter-name>]
     [--csv <path-to-output>]
     [--skip_compliance]
     [--extra-model-benchmark-map <extra-mapping-string>]
     [--submission-exceptions]
+    [--skip-power-check]
+    [--skip-meaningful-fields-emptiness-check]
+    [--skip-check-power-measure-files]
+    [--skip-empty-files-check]
+    [--skip-extra-files-in-root-check]
+    [--skip-extra-accuracy-files-check]
+    [--scenarios-to-skip]
+    [--skip-all-systems-have-results-check]
+    [--skip-calibration-check]
 ```
 
+### implemented checks
+**performance:**
+- Check performance detailed log exists
+- Check for loadgen errors
+- Check for equal issue mode when it is required
+- Check the performance sample count used for running the benchmark
+- Check loadgen seeds are correct
+- Check latency constrain is met
+- Check minimun query count is met
+- Check minimun duration is met
+- Check network requirements
+- Check LLM latencies are met (if applies)
+- Check loadgen scenario matches with submission scenario or that result can be inferred
+
+**accuracy**
+- Check the accuracy metric is correct and over the expected threshold (or within a range if applies)
+- Check accuracy json exists and is truncated
+- Check for loadgen error
+- Check full dataset is used for the accuracy run
+
+**compliance**
+- Check compliance directory exists
+- Run performance checks for compliance run
+- Check accuracy test passes
+- Check performance test passes
+
+**measurements**
+- Check measurements files exist
+- Check the required files are there
+- Check the required fields are there
+
+**power**
+- Check the required power files are there (if the submission has power)
+- Run the external power checks
+- Check power metric can be calculated
+
+**system**
+- Check system json exists
+- Check availability is valid
+- Check system type is valid
+- Check network fields
+- Check required fields are include in system json file
+- Check submitter is correct
+- Check division is correct
+
+
 ### Outputs
 - CSV file containing all the valid results in the directory.
 - It raises several errors and logs invalid results.

@@ -9,7 +9,7 @@
 import shutil
 import json
 
-import submission_checker as checker
+import submission_checker_old as checker
 
 
 logging.basicConfig(level=logging.INFO)
@@ -120,6 +120,8 @@ def copy_submission_dir(src, dst, filter_submitter, keep_structure=True):
                             src, division, submitter, dir)):
                         target_dir = "results" if dir in [
                             "compliance", "measurements"] else dir
+                        target_dir = "src" if dir in [
+                            "code"] else target_dir
                         shutil.copytree(
                             os.path.join(src, division, submitter, dir),
                             os.path.join(dst, division, submitter, target_dir),
@@ -301,12 +303,10 @@ def clean_invalid_results(args, log_path, config, system_desc, system_json,
                 model_compliance_path = model_results_path
                 model_code_path = os.path.join(
                     change_folder_name_in_path(
-                        log_path, "results", "code"), model)
+                        log_path, "results", "src"), model)
                 if not args.nomove_failed_to_open:
                     target_code_path = change_first_directory_to_open(
                         model_code_path)
-                    target_code_path = change_folder_name_in_path(
-                        log_path, "code", "src")
                     target_results_path = change_first_directory_to_open(
                         model_results_path)
                     target_measurements_path = change_first_directory_to_open(