@@ -635,7 +635,7 @@ ModelInstanceState::BuildInterpreter()
635635 TfLiteXNNPackDelegateDelete (xnnpack_delegate);
636636 });
637637
638- // Instruct the Interpreter to use the xnn pack
638+ // Instruct the Interpreter to use the xnnpack
639639 if (interpreter_->ModifyGraphWithDelegate (std::move (xnnpack_delegate)) !=
640640 kTfLiteOk ) {
641641 return TRITONSERVER_ErrorNew (
@@ -644,6 +644,15 @@ ModelInstanceState::BuildInterpreter()
644644 }
645645 }
646646
647+ // Allocate memory for input and output tensors
648+ if (interpreter_->AllocateTensors () != kTfLiteOk ) {
649+ return TRITONSERVER_ErrorNew (
650+ TRITONSERVER_ERROR_INTERNAL,
651+ (" TfLite interpreter failed to allocate tensor inputs for model " +
652+ Name ())
653+ .c_str ());
654+ }
655+
647656 return nullptr ;
648657}
649658
@@ -964,15 +973,6 @@ ModelInstanceState::SetInputTensors(
964973 batchn_shape[0 ] = total_batch_size;
965974 }
966975
967- // Allocate memory for tensors
968- if (interpreter_->AllocateTensors () != kTfLiteOk ) {
969- SendErrorForResponses (
970- responses, request_count,
971- TRITONSERVER_ErrorNew (
972- TRITONSERVER_ERROR_INTERNAL,
973- " TfLite interpreter failed to allocate tensor inputs" ));
974- }
975-
976976 // Even if running on MALI GPU, we use CPU memory
977977 std::vector<std::pair<TRITONSERVER_MemoryType, int64_t >> alloc_perference;
978978 alloc_perference = alloc_perference = {{TRITONSERVER_MEMORY_CPU, 0 }};
0 commit comments