diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..9bbf7264c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "/Users/demo/git/multi-model-server/examples/tesnorflow_c_handler/bindings/pybind11"] + path = /Users/demo/git/multi-model-server/examples/tesnorflow_c_handler/bindings/pybind11 + url = https://github.com/pybind/pybind11.git diff --git a/examples/tesnorflow_c_handler/README.md b/examples/tesnorflow_c_handler/README.md new file mode 100644 index 000000000..6d49eb3f2 --- /dev/null +++ b/examples/tesnorflow_c_handler/README.md @@ -0,0 +1,53 @@ +# TensorFlow Model inference using C API + +This example demonstrates adding custom handler for TensorFlow Model inference using C API. Since custom handlers +need to be written in Python. We use [pybind11](https://github.com/pybind/pybind11) module for Python to C++ binding. + + +## Prerequisites + +* TensorFlow C API +* A compiler with C++11 support +* CMake >= 2.8.12 + + +## Generate Python Module + +We are using [CppFlow](https://github.com/serizba/cppflow) source code, to invoke TensorFlow C API. The CppFlow +uses TensorFlow C API to run the models. Download it from [here](https://www.tensorflow.org/install/lang_c). +You can install the library system wide or place it in 'libtensorflow' in home directory. +The [CMakeList.txt](bindings/CMakeList.txt) assumes 'libtensorflow' is in home directory. + +## Steps to create a custom handler +1. Create pybind module +Use commands below to create a pybind python module which can be imported in python program. +This will create a python module called "tf_c_inference". + +```bash +git clone --recursive https://github.com/awslabs/multi-model-server.git + +# If you haven't used recursive option while cloning, use below command to get updated submodule +git submodule update --init + +pip install ./bindings +``` +2. Create a python handler +Create a python custom handler which invokes 'load_model' and 'run_model' API of "tf_c_inference". +Here is the example [handler](handler.py). + +3. Create a MAR file +Use model-archiver utility to create a MAR file using handler.py. The handler uses +[mobilenet model](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz). However you can use +any other TensorFlow model. +If you want to ship the model to different machine. Either ensure that you install "tf_c_inference" using step 1 or ship the .so file to that machine and ensure it's in python path.command + +## Details about pybind code: +The binding code is specified in [tf_c_inference.cpp](bindings/src/tf_c_inference.cpp) file. +The APIs exposed are 'load_model' and 'run_model'. The singleton is used so that model is loaded only once. +The API invokes CppFlow C++ API which is a wrapper over TensorFlow C API. Feel free to modify the code as per your needs. + +## Reference Links +1. [pybind11](https://github.com/pybind/pybind11) +2. [cmake_example](https://github.com/pybind/cmake_example) +3. [CppFlow](https://github.com/serizba/cppflow) +4. [Tensorflow C API](https://www.tensorflow.org/install/lang_c) \ No newline at end of file diff --git a/examples/tesnorflow_c_handler/bindings/CMakeLists.txt b/examples/tesnorflow_c_handler/bindings/CMakeLists.txt new file mode 100644 index 000000000..fe44ef2cf --- /dev/null +++ b/examples/tesnorflow_c_handler/bindings/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 3.10) +project(tf_c_inference) + +find_library(TENSORFLOW_LIB tensorflow HINT $ENV{HOME}/libtensorflow/lib) + +set(CMAKE_CXX_STANDARD 17) +add_subdirectory(pybind11) +pybind11_add_module(tf_c_inference src/tf_c_inference.cpp src/Model.cpp src/Tensor.cpp) + +target_include_directories(tf_c_inference PRIVATE ../../include $ENV{HOME}/libtensorflow/include) + +target_link_libraries (tf_c_inference PRIVATE "${TENSORFLOW_LIB}") \ No newline at end of file diff --git a/examples/tesnorflow_c_handler/bindings/include/Model.h b/examples/tesnorflow_c_handler/bindings/include/Model.h new file mode 100644 index 000000000..c23e7f76d --- /dev/null +++ b/examples/tesnorflow_c_handler/bindings/include/Model.h @@ -0,0 +1,69 @@ +// +// Created by sergio on 12/05/19. +// + +#ifndef CPPFLOW_MODEL_H +#define CPPFLOW_MODEL_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "Tensor.h" + +class Tensor; + +class Model { +public: + // Pass a path to the model file and optional Tensorflow config options. See examples/load_model/main.cpp. + explicit Model(const std::string& model_filename, const std::vector& config_options = {}); + + // Rule of five, moving is easy as the pointers can be copied, copying not as i have no idea how to copy + // the contents of the pointer (i guess dereferencing won't do a deep copy) + Model(const Model &model) = delete; + Model(Model &&model) = default; + Model& operator=(const Model &model) = delete; + Model& operator=(Model &&model) = default; + + ~Model(); + + void init(); + void restore(const std::string& ckpt); + void save(const std::string& ckpt); + std::vector get_operations() const; + + // Original Run + void run(const std::vector& inputs, const std::vector& outputs); + + // Run with references + void run(Tensor& input, const std::vector& outputs); + void run(const std::vector& inputs, Tensor& output); + void run(Tensor& input, Tensor& output); + + // Run with pointers + void run(Tensor* input, const std::vector& outputs); + void run(const std::vector& inputs, Tensor* output); + void run(Tensor* input, Tensor* output); + +private: + TF_Graph* graph; + TF_Session* session; + TF_Status* status; + + // Read a file from a string + static TF_Buffer* read(const std::string&); + + bool status_check(bool throw_exc) const; + void error_check(bool condition, const std::string &error) const; + +public: + friend class Tensor; +}; + + + +#endif //CPPFLOW_MODEL_H diff --git a/examples/tesnorflow_c_handler/bindings/include/Tensor.h b/examples/tesnorflow_c_handler/bindings/include/Tensor.h new file mode 100644 index 000000000..501b78ece --- /dev/null +++ b/examples/tesnorflow_c_handler/bindings/include/Tensor.h @@ -0,0 +1,66 @@ +// +// Created by sergio on 13/05/19. +// + +#ifndef CPPFLOW_TENSOR_H +#define CPPFLOW_TENSOR_H + +#include +#include +#include +#include +#include +#include +#include +#include "Model.h" + +class Model; + +class Tensor { +public: + Tensor(const Model& model, const std::string& operation); + + // Rule of five, moving is easy as the pointers can be copied, copying not as i have no idea how to copy + // the contents of the pointer (i guess dereferencing won't do a deep copy) + Tensor(const Tensor &tensor) = delete; + Tensor(Tensor &&tensor) = default; + Tensor& operator=(const Tensor &tensor) = delete; + Tensor& operator=(Tensor &&tensor) = default; + + ~Tensor(); + + void clean(); + + template + void set_data(std::vector new_data); + + template + void set_data(std::vector new_data, const std::vector& new_shape); + + template + std::vector get_data(); + + std::vector get_shape(); + +private: + TF_Tensor* val; + TF_Output op; + TF_DataType type; + std::vector shape; + std::unique_ptr> actual_shape; + void* data; + int flag; + + // Aux functions + void error_check(bool condition, const std::string& error); + + template + static TF_DataType deduce_type(); + + void deduce_shape(); + +public: + friend class Model; +}; + +#endif //CPPFLOW_TENSOR_H diff --git a/examples/tesnorflow_c_handler/bindings/pybind11 b/examples/tesnorflow_c_handler/bindings/pybind11 new file mode 160000 index 000000000..2a5a5ec0a --- /dev/null +++ b/examples/tesnorflow_c_handler/bindings/pybind11 @@ -0,0 +1 @@ +Subproject commit 2a5a5ec0a47c245fbf1bb1a8a90b4c3278e01693 diff --git a/examples/tesnorflow_c_handler/bindings/setup.py b/examples/tesnorflow_c_handler/bindings/setup.py new file mode 100644 index 000000000..8ad1ec7ed --- /dev/null +++ b/examples/tesnorflow_c_handler/bindings/setup.py @@ -0,0 +1,73 @@ +import os +import re +import sys +import platform +import subprocess + +from setuptools import setup, Extension +from setuptools.command.build_ext import build_ext +from distutils.version import LooseVersion + + +class CMakeExtension(Extension): + def __init__(self, name, sourcedir=''): + Extension.__init__(self, name, sources=[]) + self.sourcedir = os.path.abspath(sourcedir) + + +class CMakeBuild(build_ext): + def run(self): + try: + out = subprocess.check_output(['cmake', '--version']) + except OSError: + raise RuntimeError("CMake must be installed to build the following extensions: " + + ", ".join(e.name for e in self.extensions)) + + if platform.system() == "Windows": + cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1)) + if cmake_version < '3.1.0': + raise RuntimeError("CMake >= 3.1.0 is required on Windows") + + for ext in self.extensions: + self.build_extension(ext) + + def build_extension(self, ext): + extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) + # required for auto-detection of auxiliary "native" libs + if not extdir.endswith(os.path.sep): + extdir += os.path.sep + + cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, + '-DPYTHON_EXECUTABLE=' + sys.executable] + + cfg = 'Debug' if self.debug else 'Release' + build_args = ['--config', cfg] + + if platform.system() == "Windows": + cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)] + if sys.maxsize > 2**32: + cmake_args += ['-A', 'x64'] + build_args += ['--', '/m'] + else: + cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] + build_args += ['--', '-j2'] + + env = os.environ.copy() + env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), + self.distribution.get_version()) + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) + subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) + +setup( + name='tf_c_inference', + version='0.0.1', + author='', + author_email='', + description='Pybind module to use Tensorflow C API', + long_description='', + ext_modules=[CMakeExtension('tf_c_inference')], + cmdclass=dict(build_ext=CMakeBuild), + zip_safe=False, +) diff --git a/examples/tesnorflow_c_handler/bindings/src/Model.cpp b/examples/tesnorflow_c_handler/bindings/src/Model.cpp new file mode 100644 index 000000000..30e94f2cf --- /dev/null +++ b/examples/tesnorflow_c_handler/bindings/src/Model.cpp @@ -0,0 +1,267 @@ +// +// Created by sergio on 12/05/19. +// + +#include "../include/Model.h" + + +Model::Model(const std::string& model_filename, const std::vector& config_options) { + this->status = TF_NewStatus(); + this->graph = TF_NewGraph(); + + // Create the session. + TF_SessionOptions* sess_opts = TF_NewSessionOptions(); + + if (!config_options.empty()) + { + TF_SetConfig(sess_opts, static_cast(config_options.data()), config_options.size(), this->status); + this->status_check(true); + } + + this->session = TF_NewSession(this->graph, sess_opts, this->status); + TF_DeleteSessionOptions(sess_opts); + + // Check the status + this->status_check(true); + + // Create the graph + TF_Graph* g = this->graph; + + + // Import the graph definition + TF_Buffer* def = read(model_filename); + this->error_check(def != nullptr, "An error occurred reading the model"); + + TF_ImportGraphDefOptions* graph_opts = TF_NewImportGraphDefOptions(); + TF_GraphImportGraphDef(g, def, graph_opts, this->status); + TF_DeleteImportGraphDefOptions(graph_opts); + TF_DeleteBuffer(def); + + + this->status_check(true); +} + +Model::~Model() { + TF_DeleteSession(this->session, this->status); + TF_DeleteGraph(this->graph); + this->status_check(true); + TF_DeleteStatus(this->status); +} + + +void Model::init() { + TF_Operation* init_op[1] = {TF_GraphOperationByName(this->graph, "init")}; + + this->error_check(init_op[0]!= nullptr, "Error: No operation named \"init\" exists"); + + TF_SessionRun(this->session, nullptr, nullptr, nullptr, 0, nullptr, nullptr, 0, init_op, 1, nullptr, this->status); + this->status_check(true); +} + +void Model::save(const std::string &ckpt) { + // Encode file_name to tensor + size_t size = 8 + TF_StringEncodedSize(ckpt.length()); + TF_Tensor* t = TF_AllocateTensor(TF_STRING, nullptr, 0, size); + char* data = static_cast(TF_TensorData(t)); + for (int i=0; i<8; i++) {data[i]=0;} + TF_StringEncode(ckpt.c_str(), ckpt.size(), data + 8, size - 8, status); + + memset(data, 0, 8); // 8-byte offset of first string. + TF_StringEncode(ckpt.c_str(), ckpt.length(), (char*)(data + 8), size - 8, status); + + // Check errors + if (!this->status_check(false)) { + TF_DeleteTensor(t); + std::cerr << "Error during filename " << ckpt << " encoding" << std::endl; + this->status_check(true); + } + + TF_Output output_file; + output_file.oper = TF_GraphOperationByName(this->graph, "save/Const"); + output_file.index = 0; + TF_Output inputs[1] = {output_file}; + + TF_Tensor* input_values[1] = {t}; + const TF_Operation* restore_op[1] = {TF_GraphOperationByName(this->graph, "save/control_dependency")}; + if (!restore_op[0]) { + TF_DeleteTensor(t); + this->error_check(false, "Error: No operation named \"save/control_dependencyl\" exists"); + } + + + TF_SessionRun(this->session, nullptr, inputs, input_values, 1, nullptr, nullptr, 0, restore_op, 1, nullptr, this->status); + TF_DeleteTensor(t); + + this->status_check(true); +} + +void Model::restore(const std::string& ckpt) { + + // Encode file_name to tensor + size_t size = 8 + TF_StringEncodedSize(ckpt.size()); + TF_Tensor* t = TF_AllocateTensor(TF_STRING, nullptr, 0, size); + char* data = static_cast(TF_TensorData(t)); + for (int i=0; i<8; i++) {data[i]=0;} + TF_StringEncode(ckpt.c_str(), ckpt.size(), data + 8, size - 8, status); + + // Check errors + if (!this->status_check(false)) { + TF_DeleteTensor(t); + std::cerr << "Error during filename " << ckpt << " encoding" << std::endl; + this->status_check(true); + } + + TF_Output output_file; + output_file.oper = TF_GraphOperationByName(this->graph, "save/Const"); + output_file.index = 0; + TF_Output inputs[1] = {output_file}; + + TF_Tensor* input_values[1] = {t}; + const TF_Operation* restore_op[1] = {TF_GraphOperationByName(this->graph, "save/restore_all")}; + if (!restore_op[0]) { + TF_DeleteTensor(t); + this->error_check(false, "Error: No operation named \"save/restore_all\" exists"); + } + + + + TF_SessionRun(this->session, nullptr, inputs, input_values, 1, nullptr, nullptr, 0, restore_op, 1, nullptr, this->status); + TF_DeleteTensor(t); + + this->status_check(true); +} + +TF_Buffer *Model::read(const std::string& filename) { + std::ifstream file (filename, std::ios::binary | std::ios::ate); + + // Error opening the file + if (!file.is_open()) { + std::cerr << "Unable to open file: " << filename << std::endl; + return nullptr; + } + + + // Cursor is at the end to get size + auto size = file.tellg(); + // Move cursor to the beginning + file.seekg (0, std::ios::beg); + + // Read + auto data = new char [size]; + file.seekg (0, std::ios::beg); + file.read (data, size); + + // Error reading the file + if (!file) { + std::cerr << "Unable to read the full file: " << filename << std::endl; + return nullptr; + } + + + // Create tensorflow buffer from read data + TF_Buffer* buffer = TF_NewBufferFromString(data, size); + + // Close file and remove data + file.close(); + delete[] data; + + return buffer; +} + +std::vector Model::get_operations() const { + std::vector result; + size_t pos = 0; + TF_Operation* oper; + + // Iterate through the operations of a graph + while ((oper = TF_GraphNextOperation(this->graph, &pos)) != nullptr) { + result.emplace_back(TF_OperationName(oper)); + } + + return result; +} + +void Model::run(const std::vector& inputs, const std::vector& outputs) { + + this->error_check(std::all_of(inputs.begin(), inputs.end(), [](const Tensor* i){return i->flag == 1;}), + "Error: Not all elements from the inputs are full"); + + this->error_check(std::all_of(outputs.begin(), outputs.end(), [](const Tensor* o){return o->flag != -1;}), + "Error: Not all outputs Tensors are valid"); + + + // Clean previous stored outputs + std::for_each(outputs.begin(), outputs.end(), [](Tensor* o){o->clean();}); + + // Get input operations + std::vector io(inputs.size()); + std::transform(inputs.begin(), inputs.end(), io.begin(), [](const Tensor* i) {return i->op;}); + + // Get input values + std::vector iv(inputs.size()); + std::transform(inputs.begin(), inputs.end(), iv.begin(), [](const Tensor* i) {return i->val;}); + + // Get output operations + std::vector oo(outputs.size()); + std::transform(outputs.begin(), outputs.end(), oo.begin(), [](const Tensor* o) {return o->op;}); + + // Prepare output recipients + auto ov = new TF_Tensor*[outputs.size()]; + + TF_SessionRun(this->session, nullptr, io.data(), iv.data(), inputs.size(), oo.data(), ov, outputs.size(), nullptr, 0, nullptr, this->status); + this->status_check(true); + + // Save results on outputs and mark as full + for (std::size_t i=0; ival = ov[i]; + outputs[i]->flag = 1; + outputs[i]->deduce_shape(); + } + + // Mark input as empty + std::for_each(inputs.begin(), inputs.end(), [] (Tensor* i) {i->clean();}); + + delete[] ov; +} + +void Model::run(Tensor &input, Tensor &output) { + this->run(&input, &output); +} + +void Model::run(const std::vector &inputs, Tensor &output) { + this->run(inputs, &output); +} + +void Model::run(Tensor &input, const std::vector &outputs) { + this->run(&input, outputs); +} + +void Model::run(Tensor *input, Tensor *output) { + this->run(std::vector({input}), std::vector({output})); +} + +void Model::run(const std::vector &inputs, Tensor *output) { + this->run(inputs, std::vector({output})); +} + +void Model::run(Tensor *input, const std::vector &outputs) { + this->run(std::vector({input}), outputs); +} + +bool Model::status_check(bool throw_exc) const { + + if (TF_GetCode(this->status) != TF_OK) { + if (throw_exc) { + throw std::runtime_error(TF_Message(status)); + } else { + return false; + } + } + return true; +} + +void Model::error_check(bool condition, const std::string &error) const { + if (!condition) { + throw std::runtime_error(error); + } +} diff --git a/examples/tesnorflow_c_handler/bindings/src/Tensor.cpp b/examples/tesnorflow_c_handler/bindings/src/Tensor.cpp new file mode 100644 index 000000000..26d0ea49e --- /dev/null +++ b/examples/tesnorflow_c_handler/bindings/src/Tensor.cpp @@ -0,0 +1,247 @@ +// +// Created by sergio on 13/05/19. +// + +#include "../include/Tensor.h" + +#include + +Tensor::Tensor(const Model& model, const std::string& operation) { + + // Get operation by the name + this->op.oper = TF_GraphOperationByName(model.graph, operation.c_str()); + this->op.index = 0; + + // Operation did not exists + error_check(this->op.oper != nullptr, "No operation named \"" + operation + "\" exists" ); + + // DIMENSIONS + + // Get number of dimensions + int n_dims = TF_GraphGetTensorNumDims(model.graph, this->op, model.status); + + // DataType + this->type = TF_OperationOutputType(this->op); + + // If is not a scalar + if (n_dims > 0) { + // Get dimensions + auto *dims = new int64_t[n_dims]; + TF_GraphGetTensorShape(model.graph, this->op, dims, n_dims, model.status); + + // Check error on Model Status + model.status_check(true); + + this->shape = std::vector(dims, dims + n_dims); + + // Only one dimension can be unknown using this constructor + // error_check(std::count(this->shape.begin(), this->shape.end(), -1) <= 1, "At most one dimension can be unknown"); + + delete[] dims; + } + + this->flag = 0; + this->val = nullptr; + this->data = nullptr; +} + +Tensor::~Tensor() { + this->clean(); +} + + + +void Tensor::clean() { + if (this->flag == 1) { + TF_DeleteTensor(this->val); + this->flag = 0; + } + this->data = nullptr; +} + +void Tensor::error_check(bool condition, const std::string &error) { + if (!condition) { + this->flag = -1; + throw std::runtime_error(error); + } +} + +template +void Tensor::set_data(std::vector new_data) { + + //Non empty tensor + if (this->flag == 1) { + TF_DeleteTensor(this->val); + this->flag = 0; + } + + // Check Tensor is valid + this->error_check(this->flag != -1, "Tensor is not valid"); + + // Check type + this->error_check(deduce_type() == this->type, "Provided type is different from Tensor expected type"); + + // Dimensions must be known + this->error_check(!this->shape.empty(), "Shape of the input Tensor is not known, please provide a shape"); + + // At most one dimension can be unknown + this->error_check(std::count(this->shape.begin(), this->shape.end(), -1) >= -1, "At most one dimension can be unknown, please provide a shape"); + + // Check number of elements + auto exp_size = std::abs(std::accumulate(this->shape.begin(), this->shape.end(), 1, std::multiplies())); + + this->error_check(new_data.size() % exp_size == 0, "Expected and provided number of elements do not match"); + + // Deallocator + auto d = [](void* ddata, size_t, void*) {free(static_cast(ddata));}; + + + // Calculate actual shape of unknown dimensions + this->actual_shape = std::make_unique(shape.begin(), shape.end()); + std::replace_if (actual_shape->begin(), actual_shape->end(), [](int64_t r) {return r==-1;}, new_data.size()/exp_size); + + // Saves data on class + this->data = malloc(sizeof(T) * new_data.size()); + memcpy(this->data, new_data.data(), sizeof(T) * new_data.size()); + + this->val = TF_NewTensor(this->type, actual_shape->data(), actual_shape->size(), this->data, sizeof(T) * new_data.size(), d, nullptr); + + + this->error_check(this->val != nullptr, "An error occurred allocating the Tensor memory"); + + this->flag = 1; +} + +template void Tensor::set_data(std::vector new_data, const std::vector& new_shape) { + + this->error_check(this->shape.empty() || this->shape.size() == new_shape.size(), "Provided shape has different number of dimensions"); + auto old_shape = this->shape; + + this->shape = new_shape; + this->set_data(new_data); + + this->shape = old_shape; +} + +template +std::vector Tensor::get_data() { + + // Check Tensor is valid + this->error_check(this->flag != -1, "Tensor is not valid"); + + // Check type + this->error_check(deduce_type() == this->type, "Expected return type is different from Tensor type"); + + // Tensor is not empty + this->error_check(this->flag != 0, "Tensor is empty"); + + + // Check tensor data is not empty + auto raw_data = TF_TensorData(this->val); + this->error_check(raw_data != nullptr, "Tensor data is empty"); + + size_t size = TF_TensorByteSize(this->val) / TF_DataTypeSize(TF_TensorType(this->val)); + + // Convert to correct type + const auto T_data = static_cast(raw_data); + return std::vector(T_data, T_data + size); +} + +std::vector Tensor::get_shape() { + return shape; +} + +template +TF_DataType Tensor::deduce_type() { + if (std::is_same::value) + return TF_FLOAT; + if (std::is_same::value) + return TF_DOUBLE; + if (std::is_same::value) + return TF_INT32; + if (std::is_same::value) + return TF_UINT8; + if (std::is_same::value) + return TF_INT16; + if (std::is_same::value) + return TF_INT8; + if (std::is_same::value) + return TF_INT64; +// if constexpr (std::is_same::value) +// return TF_BOOL; + if (std::is_same::value) + return TF_UINT16; + if (std::is_same::value) + return TF_UINT32; + if (std::is_same::value) + return TF_UINT64; + + throw std::runtime_error{"Could not deduce type!"}; +} + +void Tensor::deduce_shape() { + // Get number of dimensions + int n_dims = TF_NumDims(this->val); + + // If is not a scalar + if (n_dims > 0) { + // Get dimensions + this->shape = std::vector(n_dims, -1); + for (int i=0; ishape[i] = TF_Dim(this->val, i); + } + } +} + + +// VALID deduce_type TEMPLATES +template TF_DataType Tensor::deduce_type(); +template TF_DataType Tensor::deduce_type(); +//template TF_DataType Tensor::deduce_type(); +template TF_DataType Tensor::deduce_type(); +template TF_DataType Tensor::deduce_type(); +template TF_DataType Tensor::deduce_type(); +template TF_DataType Tensor::deduce_type(); +template TF_DataType Tensor::deduce_type(); +template TF_DataType Tensor::deduce_type(); +template TF_DataType Tensor::deduce_type(); +template TF_DataType Tensor::deduce_type(); + +// VALID get_data TEMPLATES +template std::vector Tensor::get_data(); +template std::vector Tensor::get_data(); +template std::vector Tensor::get_data(); +template std::vector Tensor::get_data(); +template std::vector Tensor::get_data(); +template std::vector Tensor::get_data(); +template std::vector Tensor::get_data(); +template std::vector Tensor::get_data(); +template std::vector Tensor::get_data(); +template std::vector Tensor::get_data(); +template std::vector Tensor::get_data(); + +// VALID set_data TEMPLATES +template void Tensor::set_data(std::vector new_data); +template void Tensor::set_data(std::vector new_data); +//template void Tensor::set_data(std::vector new_data); +template void Tensor::set_data(std::vector new_data); +template void Tensor::set_data(std::vector new_data); +template void Tensor::set_data(std::vector new_data); +template void Tensor::set_data(std::vector new_data); +template void Tensor::set_data(std::vector new_data); +template void Tensor::set_data(std::vector new_data); +template void Tensor::set_data(std::vector new_data); +template void Tensor::set_data(std::vector new_data); + +// VALID set_data TEMPLATES +template void Tensor::set_data(std::vector new_data, const std::vector& new_shape); +template void Tensor::set_data(std::vector new_data, const std::vector& new_shape); +//template void Tensor::set_data(std::vector new_data, const std::vector& new_shape); +template void Tensor::set_data(std::vector new_data, const std::vector& new_shape); +template void Tensor::set_data(std::vector new_data, const std::vector& new_shape); +template void Tensor::set_data(std::vector new_data, const std::vector& new_shape); +template void Tensor::set_data(std::vector new_data, const std::vector& new_shape); +template void Tensor::set_data(std::vector new_data, const std::vector& new_shape); +template void Tensor::set_data(std::vector new_data, const std::vector& new_shape); +template void Tensor::set_data(std::vector new_data, const std::vector& new_shape); +template void Tensor::set_data(std::vector new_data, const std::vector& new_shape); diff --git a/examples/tesnorflow_c_handler/bindings/src/tf_c_inference.cpp b/examples/tesnorflow_c_handler/bindings/src/tf_c_inference.cpp new file mode 100644 index 000000000..50fdc423d --- /dev/null +++ b/examples/tesnorflow_c_handler/bindings/src/tf_c_inference.cpp @@ -0,0 +1,105 @@ +#include "../../include/Model.h" +#include "../../include/Tensor.h" +#include +#include +#include + +#include +#include + +namespace py = pybind11; + +class Singleton { +private: + static bool instanceFlag; + static Singleton *single; + Model model; + Tensor input, output; + + Singleton(const std::string &fname, const std::string &inputName, + const std::string &outputName) : + model{fname}, input{model, inputName}, output{model, outputName} { + } + +public: + static Singleton *getInstance(); + + static Singleton *getInstance(const std::string &fname, const std::string &inputName, + const std::string &outputName); + + std::vector run_model(std::vector input_data, std::vector input_data_shape); + + ~Singleton() { + instanceFlag = false; + } +}; + +bool Singleton::instanceFlag = false; +Singleton *Singleton::single = NULL; + +Singleton *Singleton::getInstance(const std::string &fname, const std::string &inputName, + const std::string &outputName) { + if (!instanceFlag) { + single = new Singleton(fname, inputName, outputName); + instanceFlag = true; + return single; + } else { + return single; + } +} + +Singleton *Singleton::getInstance() { + return single; +} + +std::vector Singleton::run_model(std::vector input_data, std::vector input_data_shape) { + input.set_data(input_data, input_data_shape); + model.run({&input}, output); + std::vector res = output.get_data(); + return res; +} + + + +void load_model(const std::string &fname, const std::string &inputName, + const std::string &outputName) { + Singleton *sc1; + sc1 = Singleton::getInstance(fname, inputName, outputName); +} + +std::vector run_model(std::vector input_data, std::vector input_data_shape) { + Singleton *sc2; + sc2 = Singleton::getInstance(); + return sc2->run_model(input_data, input_data_shape); +} + + +PYBIND11_MODULE(tf_c_inference, m) { + m.doc() = R"pbdoc( + Pybnind module to invoke Tensorflow C infernce + ----------------------- + + .. currentmodule:: tf_c_inference + + .. autosummary:: + :toctree: _generate + + load_model + run_model + )pbdoc"; + + + //m.def("run1", &run1, "Run inference"); + m.def("load_model", &load_model, "Load Model"); + m.def("run_model", &run_model, "Run inference"); + + +#ifdef VERSION_INFO + m.attr("__version__") = VERSION_INFO; +#else + m.attr("__version__") = "dev"; +#endif +} + + + diff --git a/examples/tesnorflow_c_handler/handler.py b/examples/tesnorflow_c_handler/handler.py new file mode 100644 index 000000000..04811e867 --- /dev/null +++ b/examples/tesnorflow_c_handler/handler.py @@ -0,0 +1,171 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. + +""" +Defines an API for Pybind Module of Tensorflow C API. +""" +import json +import os +import logging +import pathlib +import tf_c_inference # import pybind module + + +class ModelHandler(object): + """ + A base Model handler implementation. + """ + + def __init__(self): + self.error = None + self._context = None + self._batch_size = 0 + self.initialized = False + + def initialize(self, context): + """ + Initialize model. This will be called during model loading time + :param context: Initial context contains model server system properties. + :return: + """ + self._context = context + self._batch_size = context.system_properties["batch_size"] + + # Call the utility to import the graph definition into default graph. + # The MobileNet model is downloaded from here + # https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz + out = tf_c_inference.load_model("mobilenet_v2_1.4_224_frozen.pb", + "input", + "MobilenetV2/Predictions/Reshape_1") + + self.initialized = True + + def preprocess(self, request): + """ + Decode all input images into ndarray. + + Note: This implementation doesn't properly handle error cases in batch mode, + If one of the input images is corrupted, all requests in the batch will fail. + + :param request: + :return: + """ + + img_list = [] + img_shape_list = [] + + logging.info("Worker :{} received {} requests with batch size {}".format(os.getpid(), len(request), + self._batch_size)) + for idx, data in enumerate(request): + if idx < len(request): + data = request[idx] + img = data.get("body") + + if img is None: + img = data.get("data") + + if img is None or len(img) == 0: + self.error = "Empty image input" + return None + + if isinstance(img, (bytes, bytearray, str)): + img = json.loads(img) + + img_shape = img["inputs"][0]["shape"] + arr = np.array(img["inputs"][0]["data"]).flatten().tolist() + img_list.append(arr) + img_shape_list.append(img_shape) + + return img_list, img_shape_list + + def inference(self, input_data, input_shape): + """ + Internal inference methods for MXNet. Run forward computation and + return output. + + :param model_input: list of NDArray + Preprocessed inputs in NDArray format. + :return: list of NDArray + Inference output. + """ + # Assuming MMS batch size 1 + return tf_c_inference.run_model(input_data[0], input_shape[0]) + + def postprocess(self, inference_output): + """ + Return predict result in batch. + :param inference_output: list of inference output + :return: list of predict results + """ + # Take output from network and post-process to desired format + return inference_output + + def handle(self, data, context): + """ + Call preprocess, inference and post-process functions + :param data: input data + :param context: mms context + """ + input_data, input_shape = self.preprocess(data) + model_out = self.inference(input_data, input_shape) + return self.postprocess(model_out) + + +_service = ModelHandler() + + +def handle(data, context): + try: + if not _service.initialized: + _service.initialize(context) + + if data is None: + return None + return _service.handle(data, context) + + except Exception as e: + import traceback + return [[str(e), traceback.format_exc()]] + + +if __name__ == "__main__": + import mms + import numpy as np + from mms.context import Context + + source_path = pathlib.Path(__file__).parent.absolute() + model_dir = '{}/saved'.format(source_path) + context = Context('tf_model', model_dir, '', 1, 'gpu', mms.__version__) + + data = np.random.uniform(size=(3, 224, 224, 3)).astype('float32') + + json_data = {"id": "1", + "inputs": [{"name": "input", + "shape": [3, 224, 224, 3], + "datatype": "FP32", "parameters": {}, + "data": data.tolist()} + ] + } + data = [{"body": json.dumps(json_data)}] + + print(handle(data, context)) + +# model-archiver --model-name c_model --handler c_model:handle --export-path ~/tmp/models --model-path c_model -f +# multi-model-server --start --model-store=/users/demo/tmp/models/ +# curl -s -X DELETE http://localhost:8081/models/c_model +# curl -s -X POST "http://localhost:8081/models?url=c_model.mar&batch_size=4&max_batch_delay=30" +# curl -s -X PUT "http://localhost:8081/models/c_model?min_worker=2&synchronous=true" +# curl -X POST http://127.0.0.1:8080/predictions/c_model --data @car.json -H "Content-Type: application/json" +# curl -X POST http://127.0.0.1:8080/predictions/c_model --data @coffe-mug.json -H "Content-Type: application/json" +# curl -X POST http://127.0.0.1:8080/predictions/c_model --data @valture.json -H "Content-Type: application/json" + + +## tar.gz format +## model-archiver --model-name c_model --handler c_model:handle --export-path ~/tmp/models --model-path c_model --archive-format tgz