Update submission_checker.py | Prevent empty accuracy in open division (#2097)

arjunsuresh · web-flow · commit e1d1a4f39b36 · 2025-02-06T17:42:08.000-05:00
* Update submission_checker.py | Prevent empty accuracy in open division

* Update test-submission-checker.yml

* Fix accuracy RE for pointpainting

* Support v4.1 accuracy RE for SDXL
diff --git a/.github/workflows/test-submission-checker.yml b/.github/workflows/test-submission-checker.yml
@@ -31,8 +31,8 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python3 -m pip install cm4mlops
+        python3 -m pip install mlc-scripts
         git clone https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --depth 1
     - name: Test MLPerf inference submission checker
       run: |
-        cm run script --tags=run,mlperf,inference,submission,checker --adr.inference-src.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom --input=`pwd`/mlperf_inference_unofficial_submissions_v5.0 --src_version=v5.0 --extra_args=" --skip-extra-files-in-root-check --skip-extra-accuracy-files-check" --quiet 
+        mlcr run,mlperf,inference,submission,checker --adr.inference-src.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom --input=`pwd`/mlperf_inference_unofficial_submissions_v5.0 --src_version=v5.0 --extra_args=" --skip-extra-files-in-root-check --skip-extra-accuracy-files-check" --quiet 
diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
@@ -735,7 +735,7 @@
 ACC_PATTERN = {
     "acc": r"^(?:\{\"accuracy|accuracy)[\": ]*=?\s*([\d\.]+).*",
     "AUC": r"^AUC=([\d\.]+).*",
-    "mAP": r".*(?:mAP=|Total:)\s*([\d.]+)",
+    "mAP": r".*(?:mAP=|'Total':)\s*([\d.]+)",
     "bleu": r"^BLEU\:\s*([\d\.]+).*",
     "F1": r"^{[\"\']exact_match[\"\']\:\s*[\d\.]+,\s*[\"\']f1[\"\']\:\s*([\d\.]+)}",
     "WER": r"Word Error Rate\:.*, accuracy=([0-9\.]+)%",
@@ -746,8 +746,8 @@
     "ROUGELSUM": r".*'rougeLsum':\s([\d.]+).*",
     "GEN_LEN": r".*'gen_len':\s([\d.]+).*",
     "TOKENS_PER_SAMPLE": r".*'tokens_per_sample':\s([\d.]+).*",
-    "CLIP_SCORE": r".*'CLIP_SCORE':\s.([\d.]+).*",
-    "FID_SCORE": r".*'FID_SCORE':\s.([\d.]+).*",
+    "CLIP_SCORE": r".*'CLIP_SCORE':\s+'?([\d.]+).*",
+    "FID_SCORE": r".*'FID_SCORE':\s+'?([\d.]+).*",
     "gsm8k_accuracy": r".*'gsm8k':\s([\d.]+).*",
     "mbxp_accuracy": r".*'mbxp':\s([\d.]+).*",
     "exact_match": r".*'exact_match':\s([\d.]+).*"
@@ -2407,7 +2407,7 @@ def log_result(
                                 .replace('"', "")
                                 .replace("{", "")
                                 .replace("}", "")
-                            )
+                            ).strip()
                             if mlperf_model in REQUIRED_ACC_BENCHMARK:
                                 if (
                                         config.version
@@ -2425,7 +2425,7 @@ def log_result(
                                             missing_files,
                                         )
                                         accuracy_is_valid = False
-                            if not accuracy_is_valid and not is_closed_or_network:
+                            if not accuracy_is_valid and acc and not is_closed_or_network:
                                 if debug:
                                     log.warning(
                                         "%s, accuracy not valid but taken for open",