Merge branch 'dev' into 'main'

jishminor · jishminor · commit 6665d58d0c9a · 2021-09-23T16:53:21.000Z
Dev

See merge request arm-research/smarter/armnn_tflite_backend!18
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ The easiest way to get up and running with the triton armnn tflite backend is to
 
 To build a triton server docker image with the armnn tflite backend built in simply run the following command from the root of the server repo:
 ```bash
-./build.py --cmake-dir=/workspace/build --build-dir=/tmp/citritonbuild --target-platform=ubuntu/arm64 --enable-logging --enable-stats --enable-tracing --enable-metrics --endpoint=http --endpoint=grpc --backend=armnn_tflite
+./build.py --cmake-dir=/workspace/build --build-dir=/tmp/citritonbuild --image=base,arm64v8/ubuntu:20.04 --enable-logging --enable-stats --enable-tracing --enable-metrics --endpoint=http --endpoint=grpc --backend=armnn_tflite
 ```
 
 ### Build Independently with CMake
diff --git a/src/tflite.cc b/src/tflite.cc
@@ -635,7 +635,7 @@ ModelInstanceState::BuildInterpreter()
           TfLiteXNNPackDelegateDelete(xnnpack_delegate);
         });
 
-    // Instruct the Interpreter to use the xnn pack
+    // Instruct the Interpreter to use the xnnpack
     if (interpreter_->ModifyGraphWithDelegate(std::move(xnnpack_delegate)) !=
         kTfLiteOk) {
       return TRITONSERVER_ErrorNew(
@@ -644,6 +644,15 @@ ModelInstanceState::BuildInterpreter()
     }
   }
 
+  // Allocate memory for input and output tensors
+  if (interpreter_->AllocateTensors() != kTfLiteOk) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        ("TfLite interpreter failed to allocate tensor inputs for model " +
+         Name())
+            .c_str());
+  }
+
   return nullptr;
 }
 
@@ -964,15 +973,6 @@ ModelInstanceState::SetInputTensors(
       batchn_shape[0] = total_batch_size;
     }
 
-    // Allocate memory for tensors
-    if (interpreter_->AllocateTensors() != kTfLiteOk) {
-      SendErrorForResponses(
-          responses, request_count,
-          TRITONSERVER_ErrorNew(
-              TRITONSERVER_ERROR_INTERNAL,
-              "TfLite interpreter failed to allocate tensor inputs"));
-    }
-
     // Even if running on MALI GPU, we use CPU memory
     std::vector<std::pair<TRITONSERVER_MemoryType, int64_t>> alloc_perference;
     alloc_perference = alloc_perference = {{TRITONSERVER_MEMORY_CPU, 0}};