Skip to content
This repository was archived by the owner on Dec 21, 2023. It is now read-only.

Commit 1f6deba

Browse files
authored
Clean up OD CoreML export some more (#3138)
Add some experimental support for the NMS NN layer added in Catalina
1 parent cc25435 commit 1f6deba

File tree

4 files changed

+187
-71
lines changed

4 files changed

+187
-71
lines changed

src/toolkits/coreml_export/neural_net_models_exporter.cpp

Lines changed: 153 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@ using CoreML::Specification::ArrayFeatureType;
1717
using CoreML::Specification::FeatureDescription;
1818
using CoreML::Specification::ImageFeatureType;
1919
using CoreML::Specification::ImageFeatureType_ImageSizeRange;
20+
using CoreML::Specification::Model;
2021
using CoreML::Specification::ModelDescription;
2122
using CoreML::Specification::NeuralNetworkLayer;
2223
using CoreML::Specification::NeuralNetworkPreprocessing;
24+
using CoreML::Specification::NonMaximumSuppressionLayerParams;
2325
using CoreML::Specification::SizeRange;
2426
using turi::coreml::MLModelWrapper;
2527

@@ -28,6 +30,8 @@ namespace turi {
2830

2931
namespace {
3032

33+
constexpr size_t MAX_NUM_BOXES_FOR_NMS_LAYER = 64;
34+
3135
constexpr char CONFIDENCE_STR[] = "Boxes × Class confidence (see user-defined metadata \"classes\")";
3236
constexpr char COORDINATES_STR[] = "Boxes × [x, y, width, height] (relative to image size)";
3337

@@ -164,36 +168,13 @@ ImageFeatureType* set_image_feature(
164168
return image_feature;
165169
}
166170

167-
} //namespace
168-
169-
std::shared_ptr<MLModelWrapper> export_object_detector_model(
170-
neural_net::pipeline_spec raw_pipeline, size_t num_classes,
171-
size_t num_predictions, flex_list class_labels,
172-
std::map<std::string, flexible_type> options) {
173-
// Set up Pipeline
174-
CoreML::Specification::Model model_pipeline;
175-
model_pipeline.set_specificationversion(3);
176-
ModelDescription* pipeline_desc = model_pipeline.mutable_description();
177-
178-
// Adopt the model pipeline passed to us as input.
179-
std::unique_ptr<CoreML::Specification::Pipeline> raw_pipeline_spec =
180-
std::move(raw_pipeline).move_coreml_spec();
181-
model_pipeline.mutable_pipeline()->Swap(raw_pipeline_spec.get());
182-
183-
if (!options["include_non_maximum_suppression"].to<bool>()){
184-
// Only support this case for models supporting spec version 1, which means
185-
// no pipeline models.
186-
ASSERT_EQ(model_pipeline.pipeline().models_size(), 1);
187-
188-
auto model_wrapper = std::make_shared<MLModelWrapper>(
189-
std::make_shared<CoreML::Model>(model_pipeline.pipeline().models(0)));
190-
191-
return model_wrapper;
192-
}
193-
194-
// Add Non Maximum Suppression model to pipeline
195-
auto* model_nms = model_pipeline.mutable_pipeline()->add_models();
196-
model_nms->set_specificationversion(3);
171+
void set_non_maximum_suppression_model(Model* model_nms,
172+
ModelDescription* pipeline_desc,
173+
float num_classes, float num_predictions,
174+
const flex_list& class_labels,
175+
float confidence_threshold,
176+
float iou_threshold) {
177+
model_nms->set_specificationversion(CoreML::MLMODEL_SPECIFICATION_VERSION);
197178

198179
ModelDescription* nms_desc = model_nms->mutable_description();
199180

@@ -229,31 +210,26 @@ std::shared_ptr<MLModelWrapper> export_object_detector_model(
229210
}
230211

231212
//Write Features for Non Maximum Suppression
232-
first_layer_nms->set_iouthreshold(options["iou_threshold"]);
233-
first_layer_nms->set_confidencethreshold(options["confidence_threshold"]);
213+
first_layer_nms->set_iouthreshold(iou_threshold);
214+
first_layer_nms->set_confidencethreshold(confidence_threshold);
234215
first_layer_nms->set_confidenceinputfeaturename("raw_confidence");
235216
first_layer_nms->set_coordinatesinputfeaturename("raw_coordinates");
236217
first_layer_nms->set_iouthresholdinputfeaturename("iouThreshold");
237218
first_layer_nms->set_confidencethresholdinputfeaturename("confidenceThreshold");
238219
first_layer_nms->set_confidenceoutputfeaturename("confidence");
239220
first_layer_nms->set_coordinatesoutputfeaturename("coordinates");
240221

241-
// Copy input feature descriptions from the first model in the pipeline.
242-
*pipeline_desc->mutable_input() =
243-
model_pipeline.pipeline().models(0).description().input();
244-
245222
// Write FeatureDescription for the IOU Threshold input.
246-
FeatureDescription* iou_threshold = pipeline_desc->add_input();
247-
set_threshold_feature(iou_threshold, "iouThreshold",
248-
iou_threshold_description(options["iou_threshold"]));
249-
set_feature_optional(iou_threshold);
223+
FeatureDescription* iou_threshold_desc = pipeline_desc->add_input();
224+
set_threshold_feature(iou_threshold_desc, "iouThreshold",
225+
iou_threshold_description(iou_threshold));
226+
set_feature_optional(iou_threshold_desc);
250227

251228
// Write FeatureDescription for the Confidence Threshold input.
252-
FeatureDescription* confidence_threshold = pipeline_desc->add_input();
253-
set_threshold_feature(
254-
confidence_threshold, "confidenceThreshold",
255-
confidence_threshold_description(options["confidence_threshold"]));
256-
set_feature_optional(confidence_threshold);
229+
FeatureDescription* confidence_threshold_desc = pipeline_desc->add_input();
230+
set_threshold_feature(confidence_threshold_desc, "confidenceThreshold",
231+
confidence_threshold_description(confidence_threshold));
232+
set_feature_optional(confidence_threshold_desc);
257233

258234
// Write FeatureDescription for the Confidence output.
259235
set_predictions_feature(pipeline_desc->add_output(), "confidence", num_predictions, num_classes,
@@ -262,7 +238,136 @@ std::shared_ptr<MLModelWrapper> export_object_detector_model(
262238
// Write FeatureDescription for the Coordinates output.
263239
set_predictions_feature(pipeline_desc->add_output(), "coordinates", num_predictions, 4,
264240
false, true, COORDINATES_STR);
241+
}
242+
243+
void add_non_maximum_suppression_layer(Model* model_nn,
244+
ModelDescription* pipeline_desc,
245+
size_t num_classes, size_t max_boxes,
246+
float confidence_threshold,
247+
float iou_threshold) {
248+
// The model we're modifying must be a NeuralNetwork.
249+
ASSERT_TRUE(model_nn->has_neuralnetwork());
250+
251+
// Append the actual NMS layer.
252+
NeuralNetworkLayer* nms_layer =
253+
model_nn->mutable_neuralnetwork()->add_layers();
254+
nms_layer->set_name("nonmaximumsuppression");
255+
256+
// Name the inputs and outputs.
257+
nms_layer->add_input("raw_coordinates");
258+
nms_layer->add_input("raw_confidence");
259+
nms_layer->add_input("iouThreshold");
260+
nms_layer->add_input("confidenceThreshold");
261+
nms_layer->add_output("coordinates");
262+
nms_layer->add_output("confidence");
263+
nms_layer->add_output("indicesOfBoxes");
264+
nms_layer->add_output("numberOfBoxes");
265+
266+
// Write the parameters of the NMS layer.
267+
NonMaximumSuppressionLayerParams* nms_params =
268+
nms_layer->mutable_nonmaximumsuppression();
269+
nms_params->set_iouthreshold(iou_threshold);
270+
nms_params->set_scorethreshold(confidence_threshold);
271+
nms_params->set_maxboxes(
272+
static_cast<::_tc_google::protobuf::uint64>(max_boxes));
273+
nms_params->set_perclasssuppression(false);
274+
275+
// Add the necessary feature descriptions to both the NN model and to the
276+
// overall pipeline.
277+
278+
// Adjust the model description to reflect the new inputs and outputs.
279+
ModelDescription* model_desc = model_nn->mutable_description();
280+
281+
// Write FeatureDescription for the IOU Threshold input.
282+
FeatureDescription* iou_threshold_desc = pipeline_desc->add_input();
283+
set_array_feature(iou_threshold_desc, "iouThreshold",
284+
iou_threshold_description(iou_threshold), {1});
285+
set_feature_optional(iou_threshold_desc);
286+
model_desc->add_input()->CopyFrom(*iou_threshold_desc);
287+
288+
// Write FeatureDescription for the Confidence Threshold input.
289+
FeatureDescription* confidence_threshold_desc = pipeline_desc->add_input();
290+
set_array_feature(confidence_threshold_desc, "confidenceThreshold",
291+
confidence_threshold_description(confidence_threshold),
292+
{1});
293+
set_feature_optional(confidence_threshold_desc);
294+
model_desc->add_input()->CopyFrom(*confidence_threshold_desc);
265295

296+
// Write FeatureDescription for the Confidence output.
297+
FeatureDescription* confidence_desc = pipeline_desc->add_output();
298+
set_predictions_feature(confidence_desc, "confidence", max_boxes, num_classes,
299+
false, true, CONFIDENCE_STR);
300+
model_desc->add_output()->CopyFrom(*confidence_desc);
301+
302+
// Write FeatureDescription for the Coordinates output.
303+
FeatureDescription* coordinates_desc = pipeline_desc->add_output();
304+
set_predictions_feature(coordinates_desc, "coordinates", max_boxes, 4, false,
305+
true, COORDINATES_STR);
306+
model_desc->add_output()->CopyFrom(*coordinates_desc);
307+
308+
// Write FeatureDescription for the numberOfBoxes output.
309+
FeatureDescription* number_of_boxes_desc = pipeline_desc->add_output();
310+
set_array_feature(number_of_boxes_desc, "numberOfBoxes",
311+
"The number of valid output bounding boxes", {1});
312+
model_desc->add_output()->CopyFrom(*number_of_boxes_desc);
313+
314+
// Write FeatureDescription for the indicesOfBoxes output.
315+
FeatureDescription* indices_of_boxes_desc = pipeline_desc->add_output();
316+
set_array_feature(indices_of_boxes_desc, "indicesOfBoxes",
317+
"For each output bounding box, the index of the "
318+
"corresponding input bounding box",
319+
{max_boxes});
320+
model_desc->add_output()->CopyFrom(*indices_of_boxes_desc);
321+
}
322+
323+
} // namespace
324+
325+
std::shared_ptr<MLModelWrapper> export_object_detector_model(
326+
neural_net::pipeline_spec raw_pipeline, size_t num_classes,
327+
size_t num_predictions, flex_list class_labels, float confidence_threshold,
328+
float iou_threshold, bool include_non_maximum_suppression,
329+
bool use_nms_layer) {
330+
// Set up Pipeline
331+
CoreML::Specification::Model model_pipeline;
332+
model_pipeline.set_specificationversion(
333+
CoreML::MLMODEL_SPECIFICATION_VERSION);
334+
ModelDescription* pipeline_desc = model_pipeline.mutable_description();
335+
336+
// Adopt the model pipeline passed to us as input.
337+
std::unique_ptr<CoreML::Specification::Pipeline> raw_pipeline_spec =
338+
std::move(raw_pipeline).move_coreml_spec();
339+
model_pipeline.mutable_pipeline()->Swap(raw_pipeline_spec.get());
340+
341+
if (!include_non_maximum_suppression) {
342+
// Only support this case for models supporting spec version 1, which means
343+
// no pipeline models.
344+
ASSERT_EQ(model_pipeline.pipeline().models_size(), 1);
345+
346+
auto model_wrapper = std::make_shared<MLModelWrapper>(
347+
std::make_shared<CoreML::Model>(model_pipeline.pipeline().models(0)));
348+
349+
return model_wrapper;
350+
}
351+
352+
// Copy input feature descriptions from the first model in the pipeline.
353+
*pipeline_desc->mutable_input() =
354+
model_pipeline.pipeline().models(0).description().input();
355+
356+
if (use_nms_layer) {
357+
int num_models = model_pipeline.pipeline().models_size();
358+
ASSERT_GT(num_models, 0);
359+
Model* model_nn =
360+
model_pipeline.mutable_pipeline()->mutable_models(num_models - 1);
361+
add_non_maximum_suppression_layer(model_nn, pipeline_desc, num_classes,
362+
MAX_NUM_BOXES_FOR_NMS_LAYER,
363+
confidence_threshold, iou_threshold);
364+
} else {
365+
// Add Non Maximum Suppression model to pipeline
366+
auto* model_nms = model_pipeline.mutable_pipeline()->add_models();
367+
set_non_maximum_suppression_model(model_nms, pipeline_desc, num_classes,
368+
num_predictions, class_labels,
369+
confidence_threshold, iou_threshold);
370+
}
266371

267372
// Wrap the pipeline
268373
auto pipeline_wrapper = std::make_shared<MLModelWrapper>(
@@ -278,7 +383,7 @@ std::shared_ptr<MLModelWrapper> export_activity_classifier_model(
278383
const flex_list& class_labels, const flex_string& target)
279384
{
280385
CoreML::Specification::Model model;
281-
model.set_specificationversion(1);
386+
model.set_specificationversion(CoreML::MLMODEL_SPECIFICATION_VERSION);
282387

283388
// Write the model description.
284389
ModelDescription* model_desc = model.mutable_description();
@@ -338,7 +443,7 @@ std::shared_ptr<coreml::MLModelWrapper> export_style_transfer_model(
338443
std::string content_feature, std::string style_feature, size_t num_styles) {
339444

340445
CoreML::Specification::Model model;
341-
model.set_specificationversion(3);
446+
model.set_specificationversion(CoreML::MLMODEL_SPECIFICATION_VERSION);
342447

343448
ModelDescription* model_desc = model.mutable_description();
344449

@@ -398,7 +503,7 @@ std::shared_ptr<coreml::MLModelWrapper> export_drawing_classifier_model(
398503
const flex_list& class_labels, const flex_string& target)
399504
{
400505
CoreML::Specification::Model model;
401-
model.set_specificationversion(1);
506+
model.set_specificationversion(CoreML::MLMODEL_SPECIFICATION_VERSION);
402507

403508
// Write the model description.
404509
ModelDescription* model_desc = model.mutable_description();

src/toolkits/coreml_export/neural_net_models_exporter.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@ namespace turi {
2929
*/
3030
std::shared_ptr<coreml::MLModelWrapper> export_object_detector_model(
3131
neural_net::pipeline_spec pipeline, size_t num_classes,
32-
size_t num_predictions, flex_list class_labels,
33-
std::map<std::string, flexible_type> options);
32+
size_t num_predictions, flex_list class_labels, float confidence_threshold,
33+
float iou_threshold, bool include_non_maximum_suppression,
34+
bool use_nms_layer);
3435

3536
/** Wraps a trained activity classifier model_spec as a complete MLModel. */
3637
std::shared_ptr<coreml::MLModelWrapper> export_activity_classifier_model(

src/toolkits/object_detection/object_detector.cpp

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -741,19 +741,30 @@ std::shared_ptr<MLModelWrapper> object_detector::export_to_coreml(
741741
std::string confidence_str = "confidence";
742742

743743
// No options provided defaults to include Non Maximum Suppression.
744-
if (opts.find("include_non_maximum_suppression") == opts.end()) {
745-
opts["include_non_maximum_suppression"] = 1;
744+
bool include_non_maximum_suppression = true;
745+
bool use_nms_layer = false;
746+
float iou_threshold = DEFAULT_NON_MAXIMUM_SUPPRESSION_THRESHOLD;
747+
float confidence_threshold = DEFAULT_CONFIDENCE_THRESHOLD_PREDICT;
748+
auto opts_it = opts.find("include_non_maximum_suppression");
749+
if (opts_it != opts.end()) {
750+
include_non_maximum_suppression = opts_it->second.to<bool>();
746751
}
747-
748-
if (opts["include_non_maximum_suppression"].to<bool>()){
752+
if (include_non_maximum_suppression) {
749753
coordinates_str = "raw_coordinates";
750754
confidence_str = "raw_confidence";
751-
//Set default values if thresholds not provided.
752-
if (opts.find("iou_threshold") == opts.end()) {
753-
opts["iou_threshold"] = DEFAULT_NON_MAXIMUM_SUPPRESSION_THRESHOLD;
755+
756+
// Read user-provided options.
757+
opts_it = opts.find("iou_threshold");
758+
if (opts_it != opts.end()) {
759+
iou_threshold = opts_it->second.to<float>();
760+
}
761+
opts_it = opts.find("confidence_threshold");
762+
if (opts_it != opts.end()) {
763+
confidence_threshold = opts_it->second.to<float>();
754764
}
755-
if (opts.find("confidence_threshold") == opts.end()) {
756-
opts["confidence_threshold"] = DEFAULT_CONFIDENCE_THRESHOLD_PREDICT;
765+
opts_it = opts.find("use_nms_layer");
766+
if (opts_it != opts.end()) {
767+
use_nms_layer = opts_it->second.to<bool>();
757768
}
758769
}
759770

@@ -783,7 +794,7 @@ std::shared_ptr<MLModelWrapper> object_detector::export_to_coreml(
783794
user_defined_metadata.emplace_back(kvp.first, kvp.second);
784795
}
785796

786-
if (opts["include_non_maximum_suppression"].to<bool>()){
797+
if (include_non_maximum_suppression) {
787798
user_defined_metadata.emplace_back("include_non_maximum_suppression", "True");
788799
user_defined_metadata.emplace_back("confidence_threshold", opts["confidence_threshold"]);
789800
user_defined_metadata.emplace_back("iou_threshold", opts["iou_threshold"]);
@@ -797,7 +808,8 @@ std::shared_ptr<MLModelWrapper> object_detector::export_to_coreml(
797808
std::shared_ptr<MLModelWrapper> model_wrapper = export_object_detector_model(
798809
std::move(spec), class_labels.size(),
799810
grid_height * grid_width * anchor_boxes().size(), std::move(class_labels),
800-
std::move(opts));
811+
confidence_threshold, iou_threshold, include_non_maximum_suppression,
812+
use_nms_layer);
801813

802814
model_wrapper->add_metadata({
803815
{"user_defined", std::move(user_defined_metadata)},

test/unity/toolkits/coreml_export/test_neural_nets_model_exporter.cxx

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,8 @@ BOOST_AUTO_TEST_CASE(test_object_detector_export_coreml_with_nms) {
2828
const std::string test_image_name = "test_image";
2929
const std::vector<std::string> test_class_labels = { "label1", "label2" };
3030
static constexpr size_t test_max_iterations = 4;
31-
double test_iou_threshold = 0.55;
32-
double test_confidence_threshold = 0.15;
33-
34-
std::map<std::string, flexible_type> options;
35-
options["include_non_maximum_suppression"] = 1;
36-
options["iou_threshold"] = test_iou_threshold;
37-
options["confidence_threshold"] = test_confidence_threshold;
31+
float test_iou_threshold = 0.55f;
32+
float test_confidence_threshold = 0.15f;
3833

3934
flex_dict user_defined_metadata;
4035
user_defined_metadata.emplace_back("model", "model");
@@ -45,8 +40,9 @@ BOOST_AUTO_TEST_CASE(test_object_detector_export_coreml_with_nms) {
4540
user_defined_metadata.emplace_back("annotations", test_annotations_name);
4641
user_defined_metadata.emplace_back("classes", "label1, label2");
4742
user_defined_metadata.emplace_back("type", "object_detector");
48-
user_defined_metadata.emplace_back("confidence_threshold", options["confidence_threshold"]);
49-
user_defined_metadata.emplace_back("iou_threshold", options["iou_threshold"]);
43+
user_defined_metadata.emplace_back("confidence_threshold",
44+
test_confidence_threshold);
45+
user_defined_metadata.emplace_back("iou_threshold", test_iou_threshold);
5046

5147
// Create an arbitrary pipeline with one model with one input description.
5248
std::unique_ptr<CoreML::Specification::Pipeline> model_to_export;
@@ -60,7 +56,9 @@ BOOST_AUTO_TEST_CASE(test_object_detector_export_coreml_with_nms) {
6056
export_object_detector_model(
6157
neural_net::pipeline_spec(std::move(model_to_export)),
6258
test_class_labels.size(), 13 * 13 * 15, std::move(t_class_labels),
63-
std::move(options));
59+
test_confidence_threshold, test_iou_threshold,
60+
/* include_non_maximum_suppression */ true,
61+
/* use_nms_layer */ false);
6462
std::shared_ptr<CoreML::Model> c_model = model_wrapper->coreml_model();
6563
auto p_model = c_model->getProto();
6664

0 commit comments

Comments
 (0)