Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions docs/build-and-run.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,7 @@ flutter run
If you want to run or debug the Flutter app for any platform using graphical user interface,
you can use [VS Code with Flutter extension](https://docs.flutter.dev/get-started/editor?tab=vscode).

If you want to test something without spending a lot of time on the benchmark,
you can use flag `--dart-define=FAST_MODE=true` to speed up the benchmark.
You should not evaluate performance when using this flag.

Add `WITH_<VENDOR>=1` to make commands to build the the app with backends.
Add `WITH_<VENDOR>=1` to make commands to build the app with certain backends.
For example:

```bash
Expand Down
140 changes: 119 additions & 21 deletions flutter/assets/tasks.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,26 @@
task {
id: "image_classification_v2"
name: "Image Classification v2"
min_query_count: 1024
min_duration: 60
max_duration: 600
max_throughput: 1000
max_accuracy: 1.0
scenario: "SingleStream"
runs {
normal {
min_query_count: 1024
min_duration: 60
max_duration: 600
}
quick {
min_query_count: 128
min_duration: 6
max_duration: 60
}
rapid {
min_query_count: 64
min_duration: 6
max_duration: 60
}
}
Comment on lines +10 to +26
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can define the settings for each run mode in the tasks.pbtxt file.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please change the min_duration: 6 to min_duration: 10 for the quick mode

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for stable diffusion: max duration 40 seconds.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes will be in #947

datasets {
type: IMAGENET
full {
Expand Down Expand Up @@ -47,12 +61,26 @@ task {
task {
id: "object_detection"
name: "Object Detection"
min_query_count: 1024
min_duration: 60
max_duration: 600
max_throughput: 2000
max_accuracy: 1.0
scenario: "SingleStream"
runs {
normal {
min_query_count: 1024
min_duration: 60
max_duration: 600
}
quick {
min_query_count: 128
min_duration: 6
max_duration: 60
}
rapid {
min_query_count: 64
min_duration: 6
max_duration: 60
}
}
datasets {
type: COCO
full {
Expand Down Expand Up @@ -90,12 +118,26 @@ task {
task {
id: "image_segmentation_v2"
name: "Image Segmentation v2"
min_query_count: 1024
min_duration: 60
max_duration: 600
max_throughput: 2000
max_accuracy: 1.0
scenario: "SingleStream"
runs {
normal {
min_query_count: 1024
min_duration: 60
max_duration: 600
}
quick {
min_query_count: 128
min_duration: 6
max_duration: 60
}
rapid {
min_query_count: 64
min_duration: 6
max_duration: 60
}
}
datasets {
type: ADE20K
full {
Expand Down Expand Up @@ -132,12 +174,26 @@ task {
task {
id: "natural_language_processing"
name: "Language Understanding"
min_query_count: 1024
min_duration: 60
max_duration: 600
max_throughput: 2000
max_accuracy: 1.0
scenario: "SingleStream"
runs {
normal {
min_query_count: 1024
min_duration: 60
max_duration: 600
}
quick {
min_query_count: 128
min_duration: 6
max_duration: 60
}
rapid {
min_query_count: 64
min_duration: 6
max_duration: 60
}
}
datasets {
type: SQUAD
full {
Expand Down Expand Up @@ -171,12 +227,26 @@ task {
task {
id: "super_resolution"
name: "Super Resolution "
min_query_count: 1024
min_duration: 60
max_duration: 600
max_throughput: 2000
max_accuracy: 1.0
scenario: "SingleStream"
runs {
normal {
min_query_count: 1024
min_duration: 60
max_duration: 600
}
quick {
min_query_count: 128
min_duration: 6
max_duration: 60
}
rapid {
min_query_count: 64
min_duration: 6
max_duration: 60
}
}
datasets {
type: SNUSR
full {
Expand Down Expand Up @@ -212,12 +282,26 @@ task {
task {
id: "image_classification_offline_v2"
name: "Image Classification v2 (Offline)"
min_query_count: 24576
min_duration: 0
max_duration: 0
max_throughput: 2000
max_accuracy: 1.0
scenario: "Offline"
runs {
normal {
min_query_count: 24576
min_duration: 0
max_duration: 0
}
quick {
min_query_count: 2457
min_duration: 0
max_duration: 0
}
rapid {
min_query_count: 64
min_duration: 6
max_duration: 60
}
}
datasets {
type: IMAGENET
full {
Expand Down Expand Up @@ -255,12 +339,26 @@ task {
task {
id: "stable_diffusion"
name: "Stable Diffusion"
min_query_count: 1024
min_duration: 60
max_duration: 300
max_throughput: 2000
max_accuracy: 1.0
scenario: "SingleStream"
runs {
normal {
min_query_count: 1024
min_duration: 60
max_duration: 300
}
quick {
min_query_count: 128
min_duration: 6
max_duration: 30
}
rapid {
min_query_count: 64
min_duration: 6
max_duration: 60
}
}
datasets {
type: COCOGEN
full {
Expand Down
26 changes: 19 additions & 7 deletions flutter/cpp/proto/mlperf_task.proto
Original file line number Diff line number Diff line change
Expand Up @@ -31,30 +31,42 @@ message MLPerfConfig {
// Config of the mlperf tasks.
// A task is basically a combination of models and a dataset.
//
// Next ID: 12
// Next ID: 13
message TaskConfig {
// Must be unique in one task file. Ex: image_classification
// used to match backend settings
required string id = 1;
// Human-readable name. Ex: Image classification.
required string name = 2;
// Minimum number of samples the test should run in the performance mode.
required int32 min_query_count = 3;
// Minimum duration the test should run in the performance mode, in seconds.
required double min_duration = 4 [default = 60];
// Maximum duration the test should run in the performance mode, in seconds.
required double max_duration = 10 [default = 600];
// Max expected throughput score
required float max_throughput = 5;
// Max expected accuracy
required float max_accuracy = 6;
// LoadGen parameter. Allowed values: SingleStream, Offline
required string scenario = 7;
required RunConfig runs = 12;
required DatasetConfig datasets = 8;
required ModelConfig model = 9;
repeated CustomConfig custom_config = 11;
}

// Run configurations
message RunConfig {
required OneRunConfig normal = 1;
required OneRunConfig quick = 2;
required OneRunConfig rapid = 3;
}

// Config of one run
message OneRunConfig {
// Minimum number of samples the test should run in the performance mode.
required int32 min_query_count = 3;
// Minimum duration the test should run in the performance mode, in seconds.
required double min_duration = 4 [default = 60];
// Maximum duration the test should run in the performance mode, in seconds.
required double max_duration = 10 [default = 600];
}

// Datasets for a task
//
// Next ID: 5
Expand Down
17 changes: 5 additions & 12 deletions flutter/integration_test/first_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,12 @@ void main() {
binding.framePolicy = LiveTestWidgetsFlutterBindingFramePolicy.fullyLive;

final prefs = <String, Object>{
StoreConstants.testMode: true,
StoreConstants.selectedBenchmarkRunMode:
BenchmarkRunModeEnum.submissionRun.name,
StoreConstants.testMinDuration: 1,
StoreConstants.testMinQueryCount: 4,
BenchmarkRunModeEnum.integrationTestRun.name,
StoreConstants.cooldown: true,
StoreConstants.cooldownDuration:
BenchmarkRunModeEnum.integrationTestRun.cooldownDuration,
};
if (DartDefine.perfTestEnabled) {
prefs[StoreConstants.testMinDuration] = 15;
prefs[StoreConstants.testMinQueryCount] = 64;
prefs[StoreConstants.testCooldownDuration] = 2;
}
SharedPreferences.setMockInitialValues(prefs);

group('integration tests', () {
Expand Down Expand Up @@ -67,9 +62,7 @@ void checkTasks(ExtendedResult extendedResult) {
expect(benchmarkResult.performanceRun!.throughput, isNotNull);

checkAccuracy(benchmarkResult);
if (DartDefine.perfTestEnabled) {
checkThroughput(benchmarkResult, extendedResult.environmentInfo);
}
checkThroughput(benchmarkResult, extendedResult.environmentInfo);
}
}

Expand Down
3 changes: 0 additions & 3 deletions flutter/lib/app_constants.dart
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@ class DartDefine {
bool.fromEnvironment('OFFICIAL_BUILD', defaultValue: false);
static const firebaseCrashlyticsEnabled =
bool.fromEnvironment('FIREBASE_CRASHLYTICS_ENABLED', defaultValue: false);
static const isFastMode =
bool.fromEnvironment('FAST_MODE', defaultValue: false);

static const perfTestEnabled =
bool.fromEnvironment('PERF_TEST', defaultValue: false);
}
Expand Down
22 changes: 5 additions & 17 deletions flutter/lib/benchmark/benchmark.dart
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import 'package:collection/collection.dart';

import 'package:mlperfbench/app_constants.dart';
import 'package:mlperfbench/backend/bridge/run_settings.dart';
import 'package:mlperfbench/backend/loadgen_info.dart';
import 'package:mlperfbench/benchmark/info.dart';
Expand Down Expand Up @@ -69,24 +68,13 @@ class Benchmark {
required List<pb.CommonSetting> commonSettings,
required String backendLibName,
required String logDir,
required int testMinDuration,
required int testMinQueryCount,
}) async {
final dataset = runMode.chooseDataset(taskConfig);
final runConfig = runMode.chooseRunConfig(taskConfig);

int minQueryCount;
double minDuration;
if (testMinDuration != 0) {
minQueryCount = testMinQueryCount;
minDuration = testMinDuration.toDouble();
} else if (DartDefine.isFastMode) {
minQueryCount = 8;
minDuration = 1.0;
} else {
minQueryCount = taskConfig.minQueryCount;
minDuration = taskConfig.minDuration;
}
double maxDuration = taskConfig.maxDuration;
int minQueryCount = runConfig.minQueryCount;
double minDuration = runConfig.minDuration;
double maxDuration = runConfig.maxDuration;

final settings = pb.SettingList(
setting: commonSettings,
Expand Down Expand Up @@ -114,7 +102,7 @@ class Benchmark {
model_image_width: taskConfig.model.imageWidth,
model_image_height: taskConfig.model.imageHeight,
scenario: taskConfig.scenario,
mode: runMode.loadgenMode,
mode: runMode.loadgenMode.name,
batch_size: selectedDelegate.batchSize,
min_query_count: minQueryCount,
min_duration: minDuration,
Expand Down
Loading
Loading