Skip to content

Commit 06666e1

Browse files
authored
feat: add checksum verification for datasets (#939)
* Add input_checksum and groundtruth_checksum * Verify datasets checksum in app
1 parent 8ebf840 commit 06666e1

File tree

6 files changed

+68
-16
lines changed

6 files changed

+68
-16
lines changed

flutter/assets/tasks.pbtxt

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,23 @@ task {
1515
full {
1616
name: "Imagenet classification validation set"
1717
input_path: "local:///mlperf_datasets/imagenet/img"
18+
input_checksum: ""
1819
groundtruth_path: "local:///mlperf_datasets/imagenet/imagenet_val_full.txt"
20+
groundtruth_checksum: ""
1921
}
2022
lite {
2123
name: "Open images subset for classification"
2224
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v0_7/imagenet.zip"
25+
input_checksum: "fdcce28e06475321dc9b1c63a5539a4e"
2326
groundtruth_path: ""
27+
groundtruth_checksum: ""
2428
}
2529
tiny {
2630
name: "Imagenet dataset for integration test"
2731
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v0_7/imagenet_tiny.zip"
32+
input_checksum: "42cb83d80d0341d6719d68b32322adf2"
2833
groundtruth_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v3_0/imagenet_tiny-groundtruth.txt"
34+
groundtruth_checksum: "cc6d83ca25daeb2475f36c079e53bc26"
2935
}
3036
}
3137
model {
@@ -52,17 +58,23 @@ task {
5258
full {
5359
name: "COCO 2017 validation set"
5460
input_path: "local:///mlperf_datasets/coco/img"
61+
input_checksum: ""
5562
groundtruth_path: "local:///mlperf_datasets/coco/coco_val_full.pbtxt"
63+
groundtruth_checksum: ""
5664
}
5765
lite {
5866
name: "Open images subset for detection"
5967
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v0_7/coco.zip"
68+
input_checksum: "c630e2ddf96a1e9482c4a9e1d4aced70"
6069
groundtruth_path: ""
70+
groundtruth_checksum: ""
6171
}
6272
tiny {
6373
name: "Coco dataset for integration test"
6474
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v3_0/coco-test.zip"
75+
input_checksum: "7440da62aee7043b825d3ac1c6732e0a"
6576
groundtruth_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v1_0/coco_val_tiny.pbtxt"
77+
groundtruth_checksum: "0bae0c934cae7fa41f178c085bf31c7b"
6678
}
6779
}
6880
model {
@@ -89,17 +101,23 @@ task {
89101
full {
90102
name: "ADE20K validation set"
91103
input_path: "local:///mlperf_datasets/ade20k/images"
104+
input_checksum: ""
92105
groundtruth_path: "local:///mlperf_datasets/ade20k/annotations"
106+
groundtruth_checksum: ""
93107
}
94108
lite {
95109
name: "Open images subset for segmentation"
96110
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v0_7/ade20k.zip"
111+
input_checksum: "97f48388e5ca3b4f19047e0b7a062c88"
97112
groundtruth_path: ""
113+
groundtruth_checksum: ""
98114
}
99115
tiny {
100116
name: "ADE20K validation subset for integration test"
101117
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v3_1/ade20k_tiny.zip"
118+
input_checksum: "544a538eed16283586bd583dd39c44db"
102119
groundtruth_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v3_1/ade20k_tiny_annotations.zip"
120+
groundtruth_checksum: "a0e4b79dceac96a784b7abaa59aa76df"
103121
}
104122
}
105123
model {
@@ -125,17 +143,23 @@ task {
125143
full {
126144
name: "Squad V1.1 validation set"
127145
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v1_0/squad_eval_mini.tfrecord"
146+
input_checksum: "3c9a1c4c373037889938d4b13d41f4c2"
128147
groundtruth_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v1_0/squad_groundtruth.tfrecord"
148+
groundtruth_checksum: "f9d6d209ec9a05a522749a6bd597a75c"
129149
}
130150
lite {
131151
name: "Squad V1.1 mini set"
132152
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v1_0/squad_eval_mini.tfrecord"
153+
input_checksum: "3c9a1c4c373037889938d4b13d41f4c2"
133154
groundtruth_path: ""
155+
groundtruth_checksum: ""
134156
}
135157
tiny {
136158
name: "Squad V1.1 mini set"
137159
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v1_0/squad_eval_tiny.tfrecord"
160+
input_checksum: "fa018f599d0016ed385b31aee7b00bed"
138161
groundtruth_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v1_0/squad_groundtruth.tfrecord"
162+
groundtruth_checksum: "f9d6d209ec9a05a522749a6bd597a75c"
139163
}
140164
}
141165
model {
@@ -158,17 +182,23 @@ task {
158182
full {
159183
name: "SNUSR dataset for accuracy test"
160184
input_path: "local:///mlperf_datasets/snusr/lr"
185+
input_checksum: ""
161186
groundtruth_path: "local:///mlperf_datasets/snusr/hr"
187+
groundtruth_checksum: ""
162188
}
163189
lite {
164190
name: "SNUSR dataset for performance test"
165191
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v3_1/snusr_lr_png.zip"
192+
input_checksum: "38bf296e38cba0a8229d946fc1a5095a"
166193
groundtruth_path: ""
194+
groundtruth_checksum: ""
167195
}
168196
tiny {
169197
name: "SNUSR dataset for integration test"
170198
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v3_1/snusr_lr_png_tiny.zip"
199+
input_checksum: "134e9f96e4bbd5e925093e30a4433548"
171200
groundtruth_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v3_1/snusr_hr_png_tiny.zip"
201+
groundtruth_checksum: "c8eac83e32d163552ca8e317c2dd57c3"
172202
}
173203
}
174204
model {
@@ -193,17 +223,23 @@ task {
193223
full {
194224
name: "Imagenet classification validation set"
195225
input_path: "local:///mlperf_datasets/imagenet/img"
226+
input_checksum: ""
196227
groundtruth_path: "local:///mlperf_datasets/imagenet/imagenet_val_full.txt"
228+
groundtruth_checksum: ""
197229
}
198230
lite {
199231
name: "Open images subset for classification"
200232
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v0_7/imagenet.zip"
233+
input_checksum: "fdcce28e06475321dc9b1c63a5539a4e"
201234
groundtruth_path: ""
235+
groundtruth_checksum: ""
202236
}
203237
tiny {
204238
name: "Imagenet dataset for integration test"
205239
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v0_7/imagenet_tiny.zip"
240+
input_checksum: "42cb83d80d0341d6719d68b32322adf2"
206241
groundtruth_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v3_0/imagenet_tiny-groundtruth.txt"
242+
groundtruth_checksum: "cc6d83ca25daeb2475f36c079e53bc26"
207243
}
208244
}
209245
model {
@@ -230,17 +266,23 @@ task {
230266
full {
231267
name: "COCO validation set for Stable Diffusion"
232268
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v4_1/coco_gen_test.tfrecord"
233-
groundtruth_path: "local:///mlperf_models/stable-diffusion/clip_model_512x512.tflite"
269+
input_checksum: "b564d2c228a867148fa7d6df415a0368"
270+
groundtruth_path: "local:///mlperf_models/stable-diffusion/clip_model_512x512_openai-clip-vit-large-patch14.tflite"
271+
groundtruth_checksum: "39a07ffaea0806ee6148874ef228cc77"
234272
}
235273
lite {
236274
name: "COCO validation set for Stable Diffusion"
237275
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v4_1/coco_gen_full.tfrecord"
276+
input_checksum: "5cf967d2b2128edeb1b4d6eca6e8d94d"
238277
groundtruth_path: ""
278+
groundtruth_checksum: ""
239279
}
240280
tiny {
241281
name: "COCO validation set for Stable Diffusion"
242-
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v4_1/coco_gen.tfrecord"
243-
groundtruth_path: "local:///mlperf_models/stable-diffusion/clip_model_512x512.tflite"
282+
input_path: "https://mobile.mlcommons-storage.org/app-resources/datasets/v4_1/coco_gen_test.tfrecord"
283+
input_checksum: "b564d2c228a867148fa7d6df415a0368"
284+
groundtruth_path: "local:///mlperf_models/stable-diffusion/clip_model_512x512_openai-clip-vit-large-patch14.tflite"
285+
groundtruth_checksum: "39a07ffaea0806ee6148874ef228cc77"
244286
}
245287
}
246288
model {

flutter/cpp/proto/mlperf_task.proto

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,14 @@ message DatasetConfig {
8585
message OneDatasetConfig {
8686
// Human-readable name of the dataset
8787
required string name = 1;
88-
// URL or local path to dataset input files
88+
// URL or local path to dataset input file
8989
required string input_path = 2;
90-
// URL or local path to dataset groundtruth files
90+
// MD5 checksum to validate the input file
91+
required string input_checksum = 4;
92+
// URL or local path to dataset groundtruth file
9193
required string groundtruth_path = 3;
94+
// MD5 checksum to validate the groundtruth file
95+
required string groundtruth_checksum = 5;
9296
}
9397

9498
// Config of a model.

flutter/lib/benchmark/benchmark.dart

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,12 +162,14 @@ class BenchmarkStore {
162162
for (var mode in modes) {
163163
final dataset = mode.chooseDataset(b.taskConfig);
164164
final data = Resource(
165-
path: dataset.inputPath,
166165
type: ResourceTypeEnum.datasetData,
166+
path: dataset.inputPath,
167+
md5Checksum: dataset.inputChecksum,
167168
);
168169
final groundtruth = Resource(
169-
path: dataset.groundtruthPath,
170170
type: ResourceTypeEnum.datasetGroundtruth,
171+
path: dataset.groundtruthPath,
172+
md5Checksum: dataset.groundtruthChecksum,
171173
);
172174
result.addAll([data, groundtruth]);
173175
}

flutter/lib/resources/resource.dart

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
enum ResourceTypeEnum { model, datasetData, datasetGroundtruth }
22

33
class Resource {
4-
final String path;
54
final ResourceTypeEnum type;
6-
final String? md5Checksum;
5+
final String path;
6+
final String md5Checksum;
77

88
Resource({
9-
required this.path,
109
required this.type,
11-
this.md5Checksum,
10+
required this.path,
11+
required this.md5Checksum,
1212
});
1313

1414
@override

flutter/lib/resources/resource_manager.dart

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ class ResourceManager {
200200
final checksumFailedResources = <Resource>[];
201201
for (final resource in resources) {
202202
final md5Checksum = resource.md5Checksum;
203-
if (md5Checksum == null || md5Checksum.isEmpty) continue;
203+
if (md5Checksum.isEmpty) continue;
204204
String? localPath;
205205
if (cacheManager.isResourceAnArchive(resource.path)) {
206206
localPath = cacheManager.getArchive(resource.path);

flutter/unit_test/benchmark/benchmark_store_test.dart

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,14 +116,16 @@ void main() {
116116
expect(
117117
resources,
118118
contains(Resource(
119-
path: task1.datasets.full.inputPath,
120119
type: ResourceTypeEnum.datasetData,
120+
path: task1.datasets.full.inputPath,
121+
md5Checksum: task1.datasets.full.inputChecksum,
121122
)));
122123
expect(
123124
resources,
124125
contains(Resource(
125-
path: task1.datasets.full.groundtruthPath,
126126
type: ResourceTypeEnum.datasetGroundtruth,
127+
path: task1.datasets.full.groundtruthPath,
128+
md5Checksum: task1.datasets.full.groundtruthChecksum,
127129
)));
128130
expect(
129131
resources,
@@ -153,8 +155,9 @@ void main() {
153155
expect(
154156
resources,
155157
contains(Resource(
156-
path: task1.datasets.lite.inputPath,
157158
type: ResourceTypeEnum.datasetData,
159+
path: task1.datasets.lite.inputPath,
160+
md5Checksum: task1.datasets.lite.inputChecksum,
158161
)));
159162
expect(
160163
resources,
@@ -187,8 +190,9 @@ void main() {
187190
expect(
188191
resources,
189192
contains(Resource(
190-
path: task1.datasets.tiny.inputPath,
191193
type: ResourceTypeEnum.datasetData,
194+
path: task1.datasets.tiny.inputPath,
195+
md5Checksum: task1.datasets.tiny.inputChecksum,
192196
)));
193197
expect(
194198
resources,

0 commit comments

Comments
 (0)