[CoreML ML Program] support acclerators selector (#22383)

wejoncy · edgchen1 · web-flow · commit 20a45dd67b77 · 2024-10-15T11:50:11.000+08:00
### Description
For no, CoreML only support run mlmodels on CPU/ALL, However, sometimes
CPU_GPU would be faster a lot.

We support the option to select different hardware to boost performance
in this PR.



### Motivation and Context
&lt;!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. --&gt;

---------

Co-authored-by: Edward Chen &lt;18449977+edgchen1@users.noreply.github.com&gt;
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/ProviderOptions.shared.cs
@@ -330,7 +330,8 @@ public enum CoreMLFlags : uint
         COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE = 0x004,
         COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES = 0x008,
         COREML_FLAG_CREATE_MLPROGRAM = 0x010,
-        COREML_FLAG_LAST = COREML_FLAG_CREATE_MLPROGRAM,
+        COREML_FLAG_USE_CPU_AND_GPU = 0x020,
+        COREML_FLAG_LAST = COREML_FLAG_USE_CPU_AND_GPU,
     }
 
     /// <summary>
diff --git a/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h b/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h
@@ -31,9 +31,14 @@ enum COREMLFlags {
   // Create an MLProgram. By default it will create a NeuralNetwork model. Requires Core ML 5 or later.
   COREML_FLAG_CREATE_MLPROGRAM = 0x010,
 
+  // Exclude ANE as sometimes this decrease performance
+  // https://developer.apple.com/documentation/coreml/mlcomputeunits?language=objc
+  // there are four compute units:
+  // MLComputeUnitsCPUAndNeuralEngine|MLComputeUnitsCPUAndGPU|MLComputeUnitsCPUOnly|MLComputeUnitsAll
+  COREML_FLAG_USE_CPU_AND_GPU = 0x020,
   // Keep COREML_FLAG_LAST at the end of the enum definition
   // And assign the last COREMLFlag to it
-  COREML_FLAG_LAST = COREML_FLAG_CREATE_MLPROGRAM,
+  COREML_FLAG_LAST = COREML_FLAG_USE_CPU_AND_GPU,
 };
 
 #ifdef __cplusplus
diff --git a/java/src/main/java/ai/onnxruntime/providers/CoreMLFlags.java b/java/src/main/java/ai/onnxruntime/providers/CoreMLFlags.java
@@ -25,7 +25,9 @@ public enum CoreMLFlags implements OrtFlags {
    * Create an MLProgram. By default it will create a NeuralNetwork model. Requires Core ML 5 or
    * later.
    */
-  CREATE_MLPROGRAM(16); // COREML_FLAG_CREATE_MLPROGRAM(0x010)
+  CREATE_MLPROGRAM(16), // COREML_FLAG_CREATE_MLPROGRAM(0x010)
+  /** exclude ANE */
+  CPU_AND_GPU(32); // COREML_FLAG_USE_CPU_AND_GPU(0x020)
 
   /** The native value of the enum. */
   public final int value;
diff --git a/js/common/lib/inference-session.ts b/js/common/lib/inference-session.ts
@@ -320,6 +320,7 @@ export declare namespace InferenceSession {
      * COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE = 0x004
      * COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES = 0x008
      * COREML_FLAG_CREATE_MLPROGRAM = 0x010
+     * COREML_FLAG_USE_CPU_AND_GPU = 0x020
      * ```
      *
      * See include/onnxruntime/core/providers/coreml/coreml_provider_factory.h for more details.
@@ -333,6 +334,7 @@ export declare namespace InferenceSession {
      * This setting is available only in ONNXRuntime (react-native).
      */
     useCPUOnly?: boolean;
+    useCPUAndGPU?: boolean;
     /**
      * Specify whether to enable CoreML EP on subgraph.
      *
diff --git a/js/react_native/ios/OnnxruntimeModule.mm b/js/react_native/ios/OnnxruntimeModule.mm
@@ -389,6 +389,8 @@ - (NSDictionary*)run:(NSString*)url
         if (useOptions) {
           if ([[executionProvider objectForKey:@"useCPUOnly"] boolValue]) {
             coreml_flags |= COREML_FLAG_USE_CPU_ONLY;
+          } else if ([[executionProvider objectForKey:@"useCPUAndGPU"] boolValue]) {
+            coreml_flags |= COREML_FLAG_USE_CPU_AND_GPU;
           }
           if ([[executionProvider objectForKey:@"enableOnSubgraph"] boolValue]) {
             coreml_flags |= COREML_FLAG_ENABLE_ON_SUBGRAPH;
diff --git a/objectivec/include/ort_coreml_execution_provider.h b/objectivec/include/ort_coreml_execution_provider.h
@@ -29,7 +29,10 @@ NS_ASSUME_NONNULL_BEGIN
  * Whether the CoreML execution provider should run on CPU only.
  */
 @property BOOL useCPUOnly;
-
+/**
+ * exclude ANE in CoreML.
+ */
+@property BOOL useCPUAndGPU;
 /**
  * Whether the CoreML execution provider is enabled on subgraphs.
  */
diff --git a/objectivec/ort_coreml_execution_provider.mm b/objectivec/ort_coreml_execution_provider.mm
@@ -25,6 +25,7 @@ - (BOOL)appendCoreMLExecutionProviderWithOptions:(ORTCoreMLExecutionProviderOpti
   try {
     const uint32_t flags =
         (options.useCPUOnly ? COREML_FLAG_USE_CPU_ONLY : 0) |
+        (options.useCPUAndGPU ? COREML_FLAG_USE_CPU_AND_GPU : 0) |
         (options.enableOnSubgraphs ? COREML_FLAG_ENABLE_ON_SUBGRAPH : 0) |
         (options.onlyEnableForDevicesWithANE ? COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE : 0) |
         (options.onlyAllowStaticInputShapes ? COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES : 0) |
diff --git a/onnxruntime/core/providers/coreml/coreml_execution_provider.cc b/onnxruntime/core/providers/coreml/coreml_execution_provider.cc
@@ -32,6 +32,14 @@ CoreMLExecutionProvider::CoreMLExecutionProvider(uint32_t coreml_flags)
     LOGS_DEFAULT(ERROR) << "CoreML EP is not supported on this platform.";
   }
 
+  // check if only one flag is set
+  if ((coreml_flags & COREML_FLAG_USE_CPU_ONLY) && (coreml_flags & COREML_FLAG_USE_CPU_AND_GPU)) {
+    // multiple device options selected
+    ORT_THROW(
+        "Multiple device options selected, you should use at most one of the following options:"
+        "COREML_FLAG_USE_CPU_ONLY or COREML_FLAG_USE_CPU_AND_GPU or not set");
+  }
+
 #if defined(COREML_ENABLE_MLPROGRAM)
   if (coreml_version_ < MINIMUM_COREML_MLPROGRAM_VERSION &&
       (coreml_flags_ & COREML_FLAG_CREATE_MLPROGRAM) != 0) {
diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm
@@ -395,9 +395,15 @@ Status Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
       compiled_model_path_ = [compileUrl path];
 
       MLModelConfiguration* config = [[MLModelConfiguration alloc] init];
-      config.computeUnits = (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY)
-                                ? MLComputeUnitsCPUOnly
-                                : MLComputeUnitsAll;
+
+      if (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY) {
+        config.computeUnits = MLComputeUnitsCPUOnly;
+      } else if (coreml_flags_ & COREML_FLAG_USE_CPU_AND_GPU) {
+        config.computeUnits = MLComputeUnitsCPUAndGPU;
+      } else {
+        config.computeUnits = MLComputeUnitsAll;
+      }
+
       model_ = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error];
 
       if (error != nil || model_ == nil) {
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -1213,6 +1213,8 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
 
         if (flags_str.find("COREML_FLAG_USE_CPU_ONLY") != std::string::npos) {
           coreml_flags |= COREMLFlags::COREML_FLAG_USE_CPU_ONLY;
+        } else if (flags_str.find("COREML_FLAG_USE_CPU_AND_GPU") != std::string::npos) {
+          coreml_flags |= COREMLFlags::COREML_FLAG_USE_CPU_AND_GPU;
         }
 
         if (flags_str.find("COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES") != std::string::npos) {
diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc
@@ -127,7 +127,7 @@ namespace perftest {
       "\t    [NNAPI only] [NNAPI_FLAG_CPU_ONLY]: Using CPU only in NNAPI EP.\n"
       "\t    [Example] [For NNAPI EP] -e nnapi -i \"NNAPI_FLAG_USE_FP16 NNAPI_FLAG_USE_NCHW NNAPI_FLAG_CPU_DISABLED\"\n"
       "\n"
-      "\t    [CoreML only] [COREML_FLAG_CREATE_MLPROGRAM]: Create an ML Program model instead of Neural Network.\n"
+      "\t    [CoreML only] [COREML_FLAG_CREATE_MLPROGRAM COREML_FLAG_USE_CPU_ONLY COREML_FLAG_USE_CPU_AND_GPU]: Create an ML Program model instead of Neural Network.\n"
       "\t    [Example] [For CoreML EP] -e coreml -i \"COREML_FLAG_CREATE_MLPROGRAM\"\n"
       "\n"
       "\t    [SNPE only] [runtime]: SNPE runtime, options: 'CPU', 'GPU', 'GPU_FLOAT16', 'DSP', 'AIP_FIXED_TF'. \n"
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
@@ -425,6 +425,12 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
       if (key == "COREML_FLAG_CREATE_MLPROGRAM") {
         coreml_flags |= COREML_FLAG_CREATE_MLPROGRAM;
         std::cout << "Enabling ML Program.\n";
+      } else if (key == "COREML_FLAG_USE_CPU_ONLY") {
+        coreml_flags |= COREML_FLAG_USE_CPU_ONLY;
+        std::cout << "CoreML enabled COREML_FLAG_USE_CPU_ONLY.\n";
+      } else if (key == "COREML_FLAG_USE_CPU_AND_GPU") {
+        coreml_flags |= COREML_FLAG_USE_CPU_AND_GPU;
+        std::cout << "CoreML enabled COREML_FLAG_USE_CPU_AND_GPU.\n";
       } else if (key.empty()) {
       } else {
         ORT_THROW(

Original file line number	Diff line number	Diff line change
`@@ -330,7 +330,8 @@ public enum CoreMLFlags : uint`
`330`	`330`	`COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE = 0x004,`
`331`	`331`	`COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES = 0x008,`
`332`	`332`	`COREML_FLAG_CREATE_MLPROGRAM = 0x010,`
`333`		`- COREML_FLAG_LAST = COREML_FLAG_CREATE_MLPROGRAM,`
	`333`	`+ COREML_FLAG_USE_CPU_AND_GPU = 0x020,`
	`334`	`+ COREML_FLAG_LAST = COREML_FLAG_USE_CPU_AND_GPU,`
`334`	`335`	`}`
`335`	`336`
`336`	`337`	`/// <summary>`
Original file line number	Diff line number	Diff line change
`@@ -389,6 +389,8 @@ - (NSDictionary)run:(NSString)url`
`389`	`389`	`if (useOptions) {`
`390`	`390`	`if ([[executionProvider objectForKey:@"useCPUOnly"] boolValue]) {`
`391`	`391`	`coreml_flags \|= COREML_FLAG_USE_CPU_ONLY;`
	`392`	`+ } else if ([[executionProvider objectForKey:@"useCPUAndGPU"] boolValue]) {`
	`393`	`+ coreml_flags \|= COREML_FLAG_USE_CPU_AND_GPU;`
`392`	`394`	`}`
`393`	`395`	`if ([[executionProvider objectForKey:@"enableOnSubgraph"] boolValue]) {`
`394`	`396`	`coreml_flags \|= COREML_FLAG_ENABLE_ON_SUBGRAPH;`
Original file line number	Diff line number	Diff line change
`@@ -1213,6 +1213,8 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(`
`1213`	`1213`
`1214`	`1214`	`if (flags_str.find("COREML_FLAG_USE_CPU_ONLY") != std::string::npos) {`
`1215`	`1215`	`coreml_flags \|= COREMLFlags::COREML_FLAG_USE_CPU_ONLY;`
	`1216`	`+ } else if (flags_str.find("COREML_FLAG_USE_CPU_AND_GPU") != std::string::npos) {`
	`1217`	`+ coreml_flags \|= COREMLFlags::COREML_FLAG_USE_CPU_AND_GPU;`
`1216`	`1218`	`}`
`1217`	`1219`
`1218`	`1220`	`if (flags_str.find("COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES") != std::string::npos) {`