mlcommons · anhappdev · Jan 14, 2025 · Dec 26, 2024 · Dec 26, 2024 · Dec 27, 2024
@@ -42,11 +42,7 @@ flutter run
 If you want to run or debug the Flutter app for any platform using graphical user interface,
 you can use [VS Code with Flutter extension](https://docs.flutter.dev/get-started/editor?tab=vscode).
 
-If you want to test something without spending a lot of time on the benchmark,
-you can use flag `--dart-define=FAST_MODE=true` to speed up the benchmark.
-You should not evaluate performance when using this flag.
-
-Add `WITH_<VENDOR>=1` to make commands to build the the app with backends.
+Add `WITH_<VENDOR>=1` to make commands to build the app with certain backends.
 For example:
 
 ```bash

@@ -4,12 +4,26 @@
 task {
   id: "image_classification_v2"
   name: "Image Classification v2"
-  min_query_count: 1024
-  min_duration: 60
-  max_duration: 600
   max_throughput: 1000
   max_accuracy: 1.0
   scenario: "SingleStream"
+  runs {
+    normal {
+      min_query_count: 1024
+      min_duration: 60
+      max_duration: 600
+    }
+    quick {
+      min_query_count: 128
+      min_duration: 6
+      max_duration: 60
+    }
+    rapid {
+      min_query_count: 64
+      min_duration: 6
+      max_duration: 60
+    }
+  }
   datasets {
     type: IMAGENET
     full {
@@ -47,12 +61,26 @@ task {
 task {
   id: "object_detection"
   name: "Object Detection"
-  min_query_count: 1024
-  min_duration: 60
-  max_duration: 600
   max_throughput: 2000
   max_accuracy: 1.0
   scenario: "SingleStream"
+  runs {
+    normal {
+      min_query_count: 1024
+      min_duration: 60
+      max_duration: 600
+    }
+    quick {
+      min_query_count: 128
+      min_duration: 6
+      max_duration: 60
+    }
+    rapid {
+      min_query_count: 64
+      min_duration: 6
+      max_duration: 60
+    }
+  }
   datasets {
     type: COCO
     full {
@@ -90,12 +118,26 @@ task {
 task {
   id: "image_segmentation_v2"
   name: "Image Segmentation v2"
-  min_query_count: 1024
-  min_duration: 60
-  max_duration: 600
   max_throughput: 2000
   max_accuracy: 1.0
   scenario: "SingleStream"
+  runs {
+    normal {
+      min_query_count: 1024
+      min_duration: 60
+      max_duration: 600
+    }
+    quick {
+      min_query_count: 128
+      min_duration: 6
+      max_duration: 60
+    }
+    rapid {
+      min_query_count: 64
+      min_duration: 6
+      max_duration: 60
+    }
+  }
   datasets {
     type: ADE20K
     full {
@@ -132,12 +174,26 @@ task {
 task {
   id: "natural_language_processing"
   name: "Language Understanding"
-  min_query_count: 1024
-  min_duration: 60
-  max_duration: 600
   max_throughput: 2000
   max_accuracy: 1.0
   scenario: "SingleStream"
+  runs {
+    normal {
+      min_query_count: 1024
+      min_duration: 60
+      max_duration: 600
+    }
+    quick {
+      min_query_count: 128
+      min_duration: 6
+      max_duration: 60
+    }
+    rapid {
+      min_query_count: 64
+      min_duration: 6
+      max_duration: 60
+    }
+  }
   datasets {
     type: SQUAD
     full {
@@ -171,12 +227,26 @@ task {
 task {
   id: "super_resolution"
   name: "Super Resolution "
-  min_query_count: 1024
-  min_duration: 60
-  max_duration: 600
   max_throughput: 2000
   max_accuracy: 1.0
   scenario: "SingleStream"
+  runs {
+    normal {
+      min_query_count: 1024
+      min_duration: 60
+      max_duration: 600
+    }
+    quick {
+      min_query_count: 128
+      min_duration: 6
+      max_duration: 60
+    }
+    rapid {
+      min_query_count: 64
+      min_duration: 6
+      max_duration: 60
+    }
+  }
   datasets {
     type: SNUSR
     full {
@@ -212,12 +282,26 @@ task {
 task {
   id: "image_classification_offline_v2"
   name: "Image Classification v2 (Offline)"
-  min_query_count: 24576
-  min_duration: 0
-  max_duration: 0
   max_throughput: 2000
   max_accuracy: 1.0
   scenario: "Offline"
+  runs {
+    normal {
+      min_query_count: 24576
+      min_duration: 0
+      max_duration: 0
+    }
+    quick {
+      min_query_count: 2457
+      min_duration: 0
+      max_duration: 0
+    }
+    rapid {
+      min_query_count: 64
+      min_duration: 6
+      max_duration: 60
+    }
+  }
   datasets {
     type: IMAGENET
     full {
@@ -255,12 +339,26 @@ task {
 task {
   id: "stable_diffusion"
   name: "Stable Diffusion"
-  min_query_count: 1024
-  min_duration: 60
-  max_duration: 300
   max_throughput: 2000
   max_accuracy: 1.0
   scenario: "SingleStream"
+  runs {
+    normal {
+      min_query_count: 1024
+      min_duration: 60
+      max_duration: 300
+    }
+    quick {
+      min_query_count: 128
+      min_duration: 6
+      max_duration: 30
+    }
+    rapid {
+      min_query_count: 64
+      min_duration: 6
+      max_duration: 60
+    }
+  }
   datasets {
     type: COCOGEN
     full {

@@ -31,30 +31,42 @@ message MLPerfConfig {
 // Config of the mlperf tasks.
 // A task is basically a combination of models and a dataset.
 //
-// Next ID: 12
+// Next ID: 13
 message TaskConfig {
   // Must be unique in one task file. Ex: image_classification
   // used to match backend settings
   required string id = 1;
   // Human-readable name. Ex: Image classification.
   required string name = 2;
-  // Minimum number of samples the test should run in the performance mode.
-  required int32 min_query_count = 3;
-  // Minimum duration the test should run in the performance mode, in seconds.
-  required double min_duration = 4 [default = 60];
-  // Maximum duration the test should run in the performance mode, in seconds.
-  required double max_duration = 10 [default = 600];
   // Max expected throughput score
   required float max_throughput = 5;
   // Max expected accuracy
   required float max_accuracy = 6;
   // LoadGen parameter. Allowed values: SingleStream, Offline
   required string scenario = 7;
+  required RunConfig runs = 12;
   required DatasetConfig datasets = 8;
   required ModelConfig model = 9;
   repeated CustomConfig custom_config = 11;
 }
 
+// Run configurations
+message RunConfig {
+  required OneRunConfig normal = 1;
+  required OneRunConfig quick = 2;
+  required OneRunConfig rapid = 3;
+}
+
+// Config of one run
+message OneRunConfig {
+  // Minimum number of samples the test should run in the performance mode.
+  required int32 min_query_count = 3;
+  // Minimum duration the test should run in the performance mode, in seconds.
+  required double min_duration = 4 [default = 60];
+  // Maximum duration the test should run in the performance mode, in seconds.
+  required double max_duration = 10 [default = 600];
+}
+
 // Datasets for a task
 //
 // Next ID: 5

@@ -19,17 +19,12 @@ void main() {
   binding.framePolicy = LiveTestWidgetsFlutterBindingFramePolicy.fullyLive;
 
   final prefs = <String, Object>{
-    StoreConstants.testMode: true,
     StoreConstants.selectedBenchmarkRunMode:
-        BenchmarkRunModeEnum.submissionRun.name,
-    StoreConstants.testMinDuration: 1,
-    StoreConstants.testMinQueryCount: 4,
+        BenchmarkRunModeEnum.integrationTestRun.name,
+    StoreConstants.cooldown: true,
+    StoreConstants.cooldownDuration:
+        BenchmarkRunModeEnum.integrationTestRun.cooldownDuration,
   };
-  if (DartDefine.perfTestEnabled) {
-    prefs[StoreConstants.testMinDuration] = 15;
-    prefs[StoreConstants.testMinQueryCount] = 64;
-    prefs[StoreConstants.testCooldownDuration] = 2;
-  }
   SharedPreferences.setMockInitialValues(prefs);
 
   group('integration tests', () {
@@ -67,9 +62,7 @@ void checkTasks(ExtendedResult extendedResult) {
     expect(benchmarkResult.performanceRun!.throughput, isNotNull);
 
     checkAccuracy(benchmarkResult);
-    if (DartDefine.perfTestEnabled) {
-      checkThroughput(benchmarkResult, extendedResult.environmentInfo);
-    }
+    checkThroughput(benchmarkResult, extendedResult.environmentInfo);
   }
 }
 

@@ -5,9 +5,6 @@ class DartDefine {
       bool.fromEnvironment('OFFICIAL_BUILD', defaultValue: false);
   static const firebaseCrashlyticsEnabled =
       bool.fromEnvironment('FIREBASE_CRASHLYTICS_ENABLED', defaultValue: false);
-  static const isFastMode =
-      bool.fromEnvironment('FAST_MODE', defaultValue: false);
-
   static const perfTestEnabled =
       bool.fromEnvironment('PERF_TEST', defaultValue: false);
 }

@@ -1,6 +1,5 @@
 import 'package:collection/collection.dart';
 
-import 'package:mlperfbench/app_constants.dart';
 import 'package:mlperfbench/backend/bridge/run_settings.dart';
 import 'package:mlperfbench/backend/loadgen_info.dart';
 import 'package:mlperfbench/benchmark/info.dart';
@@ -69,24 +68,13 @@ class Benchmark {
     required List<pb.CommonSetting> commonSettings,
     required String backendLibName,
     required String logDir,
-    required int testMinDuration,
-    required int testMinQueryCount,
   }) async {
     final dataset = runMode.chooseDataset(taskConfig);
+    final runConfig = runMode.chooseRunConfig(taskConfig);
 
-    int minQueryCount;
-    double minDuration;
-    if (testMinDuration != 0) {
-      minQueryCount = testMinQueryCount;
-      minDuration = testMinDuration.toDouble();
-    } else if (DartDefine.isFastMode) {
-      minQueryCount = 8;
-      minDuration = 1.0;
-    } else {
-      minQueryCount = taskConfig.minQueryCount;
-      minDuration = taskConfig.minDuration;
-    }
-    double maxDuration = taskConfig.maxDuration;
+    int minQueryCount = runConfig.minQueryCount;
+    double minDuration = runConfig.minDuration;
+    double maxDuration = runConfig.maxDuration;
 
     final settings = pb.SettingList(
       setting: commonSettings,
@@ -114,7 +102,7 @@ class Benchmark {
       model_image_width: taskConfig.model.imageWidth,
       model_image_height: taskConfig.model.imageHeight,
       scenario: taskConfig.scenario,
-      mode: runMode.loadgenMode,
+      mode: runMode.loadgenMode.name,
       batch_size: selectedDelegate.batchSize,
       min_query_count: minQueryCount,
       min_duration: minDuration,