mlcommons
diff --git a/‎cli/cli_tests.sh‎
Lines changed: 23 additions & 16 deletions b/‎cli/cli_tests.sh‎
Lines changed: 23 additions & 16 deletions
diff --git a/‎cli/medperf/cli.py‎
Lines changed: 16 additions & 14 deletions b/‎cli/medperf/cli.py‎
Lines changed: 16 additions & 14 deletions
diff --git a/‎cli/medperf/commands/benchmark/benchmark.py‎
Lines changed: 44 additions & 3 deletions b/‎cli/medperf/commands/benchmark/benchmark.py‎
Lines changed: 44 additions & 3 deletions
diff --git a/‎cli/medperf/commands/compatibility_test/run.py‎
Lines changed: 2 additions & 2 deletions b/‎cli/medperf/commands/compatibility_test/run.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cli/medperf/commands/dataset/associate_benchmark.py‎
Lines changed: 7 additions & 6 deletions b/‎cli/medperf/commands/dataset/associate_benchmark.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎cli/medperf/commands/dataset/dataset.py‎
Lines changed: 8 additions & 0 deletions b/‎cli/medperf/commands/dataset/dataset.py‎
Lines changed: 8 additions & 0 deletions
@@ -439,33 +439,41 @@ checkFailed "run all outstanding models failed"
 echo "\n"
 
 ##########################################################
-echo "======================================================================================"
-echo "Run failing container with ignore errors (This SHOULD fail since predictions folder exists)"
-echo "======================================================================================"
-print_eval medperf run -b $BMK_UID -d $DSET_A_UID -m $FAILING_MODEL_UID -y --ignore-model-errors
-checkSucceeded "Container ran successfuly but should fail since predictions folder exists"
+echo "====================================================================="
+echo "Run failing container with ignore errors"
+echo "====================================================================="
+print_eval medperf result create -b $BMK_UID -d $DSET_A_UID -m $FAILING_MODEL_UID --ignore-model-errors
+checkFailed "Failing container run with ignore errors failed"
 ##########################################################
 
 echo "\n"
 
 ##########################################################
 echo "====================================================================="
-echo "Run failing container with ignore errors after deleting predictions folder"
+echo "Submit failing container's result"
 echo "====================================================================="
-print_eval rm -rf $MEDPERF_STORAGE/predictions/$SERVER_STORAGE_ID/model-fail/$DSET_A_GENUID
-print_eval medperf run -b $BMK_UID -d $DSET_A_UID -m $FAILING_MODEL_UID -y --ignore-model-errors
+print_eval medperf result submit -b $BMK_UID -d $DSET_A_UID -m $FAILING_MODEL_UID -y
 checkFailed "Failing container run with ignore errors failed"
 ##########################################################
 
 echo "\n"
 
 ##########################################################
-echo "====================================="
-echo "Running logging model without logging env"
-echo "====================================="
-print_eval rm -rf $MEDPERF_STORAGE/predictions/$SERVER_STORAGE_ID/model-log-none/$DSET_A_GENUID
-print_eval medperf run -b $BMK_UID -d $DSET_A_UID -m $MODEL_LOG_NONE_UID -y
-checkFailed "run logging model without logging env failed"
+echo "====================================================================="
+echo "Rerun (execute+submit). This will error out"
+echo "====================================================================="
+print_eval medperf run -b $BMK_UID -d $DSET_A_UID -m $FAILING_MODEL_UID --ignore-model-errors -y
+checkSucceeded "Rerunning should fail, but it succeeded"
+##########################################################
+
+echo "\n"
+
+##########################################################
+echo "====================================================================="
+echo "Rerun (execute+submit) with --new-result flag. This should work."
+echo "====================================================================="
+print_eval medperf run -b $BMK_UID -d $DSET_A_UID -m $FAILING_MODEL_UID --ignore-model-errors --new-result -y
+checkFailed "Rerunning with --new-result failed"
 ##########################################################
 
 echo "\n"
@@ -474,8 +482,7 @@ echo "\n"
 echo "====================================="
 echo "Running logging model with debug logging env"
 echo "====================================="
-print_eval rm -rf $MEDPERF_STORAGE/predictions/$SERVER_STORAGE_ID/model-log-debug/$DSET_A_GENUID
-print_eval medperf --container-loglevel debug run -b $BMK_UID -d $DSET_A_UID -m $MODEL_LOG_DEBUG_UID -y
+print_eval medperf --container-loglevel debug run -b $BMK_UID -d $DSET_A_UID -m $MODEL_LOG_DEBUG_UID --new-result -y
 checkFailed "run logging model with debug logging env failed"
 ##########################################################
 
 
@@ -6,9 +6,9 @@
 from medperf import __version__
 import medperf.config as config
 from medperf.decorators import clean_except, add_inline_parameters
-import medperf.commands.result.result as result
-from medperf.commands.result.create import BenchmarkExecution
-from medperf.commands.result.submit import ResultSubmission
+from medperf.commands.execution import execution
+from medperf.commands.execution.create import BenchmarkExecution
+from medperf.commands.execution.submit import ResultSubmission
 import medperf.commands.mlcube.mlcube as mlcube
 import medperf.commands.dataset.dataset as dataset
 import medperf.commands.auth.auth as auth
@@ -28,7 +28,7 @@
 app = typer.Typer()
 app.add_typer(mlcube.app, name="mlcube", help="Manage mlcubes")
 app.add_typer(mlcube.app, name="container", help="Manage containers")
-app.add_typer(result.app, name="result", help="Manage results")
+app.add_typer(execution.app, name="result", help="Manage results")
 app.add_typer(dataset.app, name="dataset", help="Manage datasets")
 app.add_typer(benchmark.app, name="benchmark", help="Manage benchmarks")
 app.add_typer(association.app, name="association", help="Manage associations")
@@ -65,23 +65,25 @@ def execute(
         "--no-cache",
         help="Ignore existing results. The experiment then will be rerun",
     ),
+    new_result: bool = typer.Option(
+        False,
+        "--new-result",
+        help=(
+            "Works if the result of the execution was already uploaded."
+            "This will rerun and create a new record."
+        ),
+    ),
 ):
     """Runs the benchmark execution step for a given benchmark, prepared dataset and model"""
-    result = BenchmarkExecution.run(
+    BenchmarkExecution.run(
         benchmark_uid,
         data_uid,
         [model_uid],
         ignore_model_errors=ignore_model_errors,
         no_cache=no_cache,
-    )[0]
-    if result.id:  # TODO: use result.is_registered once PR #338 is merged
-        config.ui.print(  # TODO: msg should be colored yellow
-            """An existing registered result for the requested execution has been\n
-            found. If you wish to submit a new result for the same execution,\n
-            please run the command again with the --no-cache option.\n"""
-        )
-    else:
-        ResultSubmission.run(result.local_id, approved=approval)
+        rerun_finalized_executions=new_result,
+    )
+    ResultSubmission.run(benchmark_uid, data_uid, model_uid, approved=approval)
     config.ui.print("✅ Done!")
 
 
 
@@ -8,7 +8,7 @@
 from medperf.commands.view import EntityView
 from medperf.commands.benchmark.submit import SubmitBenchmark
 from medperf.commands.benchmark.associate import AssociateBenchmark
-from medperf.commands.result.create import BenchmarkExecution
+from medperf.commands.execution.create import BenchmarkExecution
 
 app = typer.Typer()
 
@@ -20,13 +20,48 @@ def list(
         False, "--unregistered", help="Get unregistered benchmarks"
     ),
     mine: bool = typer.Option(False, "--mine", help="Get current-user benchmarks"),
+    name: str = typer.Option(None, "--name", help="Filter by name"),
+    owner: int = typer.Option(None, "--owner", help="Filter by owner"),
+    state: str = typer.Option(
+        None, "--state", help="Filter by state (DEVELOPMENT/OPERATION)"
+    ),
+    is_valid: bool = typer.Option(
+        None, "--valid/--invalid", help="Filter by valid status"
+    ),
+    is_active: bool = typer.Option(
+        None, "--active/--inactive", help="Filter by active status"
+    ),
+    data_prep: int = typer.Option(
+        None,
+        "-d",
+        "--data-preparation-container",
+        help="Filter by Data Preparation Container",
+    ),
 ):
     """List benchmarks"""
+    filters = {
+        "name": name,
+        "owner": owner,
+        "state": state,
+        "is_valid": is_valid,
+        "is_active": is_active,
+        "data_preparation_mlcube": data_prep,
+    }
+
     EntityList.run(
         Benchmark,
-        fields=["UID", "Name", "Description", "State", "Approval Status", "Registered"],
+        fields=[
+            "UID",
+            "Name",
+            "Description",
+            "Data Preparation Container",
+            "State",
+            "Approval Status",
+            "Registered",
+        ],
         unregistered=unregistered,
         mine_only=mine,
+        **filters,
     )
 
 
@@ -139,17 +174,23 @@ def run(
         "--no-cache",
         help="Execute even if results already exist",
     ),
+    rerun_finalized: bool = typer.Option(
+        False,
+        "--rerun-finalized",
+        help="Execute even if results have been already uploaded (this will create new records)",
+    ),
 ):
     """Runs the benchmark execution step for a given benchmark, prepared dataset and model"""
     BenchmarkExecution.run(
         benchmark_uid,
         data_uid,
         models_uids=None,
-        no_cache=no_cache,
         models_input_file=file,
         ignore_model_errors=ignore_model_errors,
+        no_cache=no_cache,
         show_summary=True,
         ignore_failed_experiments=True,
+        rerun_finalized_executions=rerun_finalized,
     )
     config.ui.print("✅ Done!")
 
 
@@ -1,6 +1,6 @@
 import logging
 
-from medperf.commands.execution import Execution
+from medperf.commands.execution.execution_flow import ExecutionFlow
 from medperf.entities.dataset import Dataset
 from medperf.entities.benchmark import Benchmark
 from medperf.entities.report import TestReport
@@ -265,7 +265,7 @@ def execute(self):
         Returns:
             dict: returns the results of the test execution.
         """
-        execution_summary = Execution.run(
+        execution_summary = ExecutionFlow.run(
             dataset=self.dataset,
             model=self.model_cube,
             evaluator=self.evaluator_cube,
 
@@ -2,8 +2,8 @@
 from medperf.entities.dataset import Dataset
 from medperf.entities.benchmark import Benchmark
 from medperf.utils import dict_pretty_print, approval_prompt
-from medperf.commands.result.create import BenchmarkExecution
-from medperf.exceptions import InvalidArgumentError
+from medperf.commands.execution.create import BenchmarkExecution
+from medperf.exceptions import InvalidArgumentError, CleanExit
 
 
 class AssociateBenchmarkDataset:
@@ -29,24 +29,25 @@ def run(data_uid: int, benchmark_uid: int, approved=False, no_cache=False):
                 "The specified dataset wasn't prepared for this benchmark"
             )
 
-        result = BenchmarkExecution.run(
+        execution = BenchmarkExecution.run(
             benchmark_uid,
             data_uid,
             [benchmark.reference_model_mlcube],
             no_cache=no_cache,
         )[0]
+        results = execution.read_results()
         ui.print("These are the results generated by the compatibility test. ")
         ui.print("This will be sent along the association request.")
         ui.print("They will not be part of the benchmark.")
-        dict_pretty_print(result.results)
+        dict_pretty_print(results)
 
         msg = "Please confirm that you would like to associate"
         msg += f" the dataset {dset.name} with the benchmark {benchmark.name}."
         msg += " [Y/n]"
         approved = approved or approval_prompt(msg)
         if approved:
             ui.print("Generating dataset benchmark association")
-            metadata = {"test_result": result.results}
+            metadata = {"test_result": results}
             comms.associate_benchmark_dataset(dset.id, benchmark_uid, metadata)
         else:
-            ui.print("Dataset association operation cancelled.")
+            raise CleanExit("Dataset association operation cancelled.")
@@ -31,6 +31,10 @@ def list(
         "-m",
         help="Get datasets for a given data preparation container",
     ),
+    name: str = typer.Option(None, "--name", help="Filter by name"),
+    owner: int = typer.Option(None, "--owner", help="Filter by owner"),
+    state: str = typer.Option(None, "--state", help="Filter by state (DEVELOPMENT/OPERATION)"),
+    is_valid: bool = typer.Option(None, "--valid/--invalid", help="Filter by valid status"),
 ):
     """List datasets"""
     EntityList.run(
@@ -46,6 +50,10 @@ def list(
         unregistered=unregistered,
         mine_only=mine,
         mlcube=mlcube,
+        name=name,
+        owner=owner,
+        state=state,
+        is_valid=is_valid,
     )