Make tflite interpreter threads a model parameter

jishminor · jishminor · commit 84e7b921a72d · 2021-09-17T15:09:12.000-05:00
diff --git a/README.md b/README.md
@@ -120,6 +120,20 @@ optimization { execution_accelerators {
 }}
 ```
 
+### Default Optimization Options
+Optimization parameters for the default tflite interpreter can be passed using the `parameters` section of the model configuration.
+
+By default the tflite interpreter will use the maximum number of threads available to the system. 
+To set the number to threads available to the tflite interpreter you can add the following section to your model configuration:
+```
+parameters: {
+key: "tflite_num_threads"
+value: {
+string_value:"<num_threads>"
+}
+}
+```
+
 ### ArmNN Delegate Optimization Options
 Users also have the ability to specify ArmNN specific optimizations. 
 The following options are available for CPU:
diff --git a/src/tflite.cc b/src/tflite.cc
@@ -60,6 +60,9 @@ class ModelState : public BackendModel {
   // Validate that model configuration is supported by this backend.
   // TRITONSERVER_Error* ValidateModelConfig();
 
+  // Default TFLite runtime options
+  int32_t tflite_num_threads_ = int32_t(std::thread::hardware_concurrency());
+
 #ifdef ARMNN_DELEGATE_ENABLE
   // ArmNN Delegate options
   bool use_armnn_delegate_cpu_ = false;
@@ -136,6 +139,37 @@ ModelState::LoadModel(
         ("failed to load model " + Name()).c_str());
   }
 
+  // Handle tflite default interpeter options set in parameters
+  {
+    triton::common::TritonJson::Value params;
+    if (ModelConfig().Find("parameters", &params)) {
+      // Handle tflite_num_threads parameter
+      std::string value_str;
+      auto err = GetParameterValue(params, "tflite_num_threads", &value_str);
+
+      // tflite_num_threads is not required so clear error if not found
+      if (err != nullptr) {
+        if (TRITONSERVER_ErrorCode(err) != TRITONSERVER_ERROR_NOT_FOUND) {
+          return err;
+        } else {
+          TRITONSERVER_ErrorDelete(err);
+        }
+      } else {
+        RETURN_IF_ERROR(ParseIntValue(value_str, &tflite_num_threads_));
+
+        if (tflite_num_threads_ < 0) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INVALID_ARG,
+              (std::string(
+                   "parameter 'tflite_num_threads' must be non-negative "
+                   "number for tflite model '") +
+               Name() + "'")
+                  .c_str());
+        }
+      }
+    }
+  }
+
   // Handle tflite optimizations from model config
   {
     triton::common::TritonJson::Value optimization;
@@ -536,7 +570,7 @@ ModelInstanceState::BuildInterpreter()
   }
 
   // Tell interpreter to use max threads available to system
-  if (interpreter_->SetNumThreads(std::thread::hardware_concurrency()) !=
+  if (interpreter_->SetNumThreads(model_state_->tflite_num_threads_) !=
       kTfLiteOk) {
     return TRITONSERVER_ErrorNew(
         TRITONSERVER_ERROR_INTERNAL,