[onert] Export dedicated OneRT data types to Python

arkq · arkq · commit c36e868da841 · 2025-10-15T16:52:30.000+02:00
NNFW types and numpy data types do not map one to one because NNFW has
quantized types represented as uint8 or int16. Because of that it should
be more efficient to export custom data type object which will map these
two types.

Additionally, for convenience, dedicated types are exported in the
top-level onert Python module, so one can use them as follows:

&gt; import numpy as np, onert
&gt; np.array([2, 42, 42], dtype=onert.float32)

ONE-DCO-1.0-Signed-off-by: Arkadiusz Bokowy &lt;a.bokowy@samsung.com&gt;
diff --git a/runtime/onert/api/python/include/nnfw_api_wrapper.h b/runtime/onert/api/python/include/nnfw_api_wrapper.h
@@ -33,6 +33,24 @@ namespace python
 
 namespace py = pybind11;
 
+/**
+ * @brief Data type mapping between NNFW_TYPE and numpy dtype.
+ */
+struct dtype
+{
+  NNFW_TYPE nnfw_type;
+  py::dtype py_dtype;
+  // The name of the dtype, e.g., "float32", "int32", etc.
+  // This is mainly for the __repr__ implementation.
+  const char *name;
+
+  dtype() = default;
+  explicit dtype(NNFW_TYPE type);
+
+  bool operator==(const struct dtype &other) const { return nnfw_type == other.nnfw_type; }
+  bool operator!=(const struct dtype &other) const { return nnfw_type != other.nnfw_type; }
+};
+
 /**
  *  @brief  tensor info describes the type and shape of tensors
  *
@@ -48,7 +66,7 @@ namespace py = pybind11;
 struct tensorinfo
 {
   /** The data type */
-  const char *dtype;
+  struct dtype dtype;
   /** The number of dimensions (rank) */
   int32_t rank;
   /**
@@ -75,22 +93,6 @@ void ensure_status(NNFW_STATUS status);
  */
 NNFW_LAYOUT getLayout(const char *layout = "");
 
-/**
- * Convert the type with string to NNFW_TYPE
- *
- * @param[in] type type to be converted
- * @return proper type if exists
- */
-NNFW_TYPE getType(const char *type = "");
-
-/**
- * Convert the type with NNFW_TYPE to string
- *
- * @param[in] type type to be converted
- * @return proper type
- */
-const char *getStringType(NNFW_TYPE type);
-
 /**
  * @brief     Get the total number of elements in nnfw_tensorinfo->dims.
  *
diff --git a/runtime/onert/api/python/package/__init__.py b/runtime/onert/api/python/package/__init__.py
@@ -1,11 +1,12 @@
 # Define the public API of the onert package
-__all__ = ["infer", "tensorinfo", "experimental"]
+__all__ = ["dtype", "infer", "tensorinfo", "experimental"]
+
+# Import and expose tensorinfo and tensor data types
+from .native.libnnfw_api_pybind import dtype, tensorinfo
+from .native.libnnfw_api_pybind.dtypes import *
 
 # Import and expose the infer module's functionalities
 from . import infer
 
-# Import and expose tensorinfo
-from .common import tensorinfo
-
 # Import and expose the experimental module's functionalities
 from . import experimental
diff --git a/runtime/onert/api/python/src/bindings/nnfw_tensorinfo_bindings.cc b/runtime/onert/api/python/src/bindings/nnfw_tensorinfo_bindings.cc
@@ -18,6 +18,8 @@
 
 #include "nnfw_api_wrapper.h"
 
+#include <pybind11/operators.h>
+
 namespace onert::api::python
 {
 
@@ -26,6 +28,36 @@ namespace py = pybind11;
 // Bind the `tensorinfo` class
 void bind_tensorinfo(py::module_ &m)
 {
+
+  static const struct dtype dtypes[] = {
+    dtype(NNFW_TYPE::NNFW_TYPE_TENSOR_FLOAT32),
+    dtype(NNFW_TYPE::NNFW_TYPE_TENSOR_INT32),
+    dtype(NNFW_TYPE::NNFW_TYPE_TENSOR_QUANT8_ASYMM),
+    dtype(NNFW_TYPE::NNFW_TYPE_TENSOR_UINT8),
+    dtype(NNFW_TYPE::NNFW_TYPE_TENSOR_BOOL),
+    dtype(NNFW_TYPE::NNFW_TYPE_TENSOR_INT64),
+    dtype(NNFW_TYPE::NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED),
+    dtype(NNFW_TYPE::NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED),
+  };
+
+  // Export dedicated OneRT type for tensor types. The presence of the "dtype"
+  // property allows this type to be used directly with numpy, e.g.:
+  // >>> np.array([3, 6, 3], dtype=onert.float32)
+  py::class_<dtype>(m, "dtype", "Defines the type of the OneRT tensor.", py::module_local())
+    .def(py::self == py::self)
+    .def(py::self != py::self)
+    .def("__repr__", [](const dtype &dt) { return std::string("onert.") + dt.name; })
+    .def_readonly("name", &dtype::name, "The name of the data type.")
+    .def_readonly("dtype", &dtype::py_dtype, "A corresponding numpy data type.")
+    .def_property_readonly(
+      "itemsize", [](const dtype &dt) { return dt.py_dtype.itemsize(); },
+      "The element size of this data-type object.");
+
+  // Export OneRT dtypes in a submodule, so we can batch import them
+  auto m_dtypes = m.def_submodule("dtypes", "OneRT tensor data types");
+  for (const auto &dt : dtypes)
+    m_dtypes.attr(dt.name) = dt;
+
   py::class_<tensorinfo>(m, "tensorinfo", "tensorinfo describes the type and shape of tensors",
                          py::module_local())
     .def(py::init<>(), "The constructor of tensorinfo")
diff --git a/runtime/onert/api/python/src/wrapper/nnfw_api_wrapper.cc b/runtime/onert/api/python/src/wrapper/nnfw_api_wrapper.cc
@@ -51,57 +51,54 @@ NNFW_LAYOUT getLayout(const char *layout)
 {
   if (std::strcmp(layout, "NCHW") == 0)
     return NNFW_LAYOUT::NNFW_LAYOUT_CHANNELS_FIRST;
-  else if (std::strcmp(layout, "NHWC") == 0)
+  if (std::strcmp(layout, "NHWC") == 0)
     return NNFW_LAYOUT::NNFW_LAYOUT_CHANNELS_LAST;
-  else if (std::strcmp(layout, "NONE") == 0)
+  if (std::strcmp(layout, "NONE") == 0)
     return NNFW_LAYOUT::NNFW_LAYOUT_NONE;
-  else
-    throw NnfwError(std::string("Unknown layout type: '") + layout + "'");
+  throw NnfwError(std::string("Unknown layout type: '") + layout + "'");
 }
 
-NNFW_TYPE getType(const char *type)
-{
-  if (std::strcmp(type, "float32") == 0)
-    return NNFW_TYPE::NNFW_TYPE_TENSOR_FLOAT32;
-  else if (std::strcmp(type, "int32") == 0)
-    return NNFW_TYPE::NNFW_TYPE_TENSOR_INT32;
-  else if (std::strcmp(type, "bool") == 0)
-    return NNFW_TYPE::NNFW_TYPE_TENSOR_UINT8;
-  else if (std::strcmp(type, "bool") == 0)
-    return NNFW_TYPE::NNFW_TYPE_TENSOR_BOOL;
-  else if (std::strcmp(type, "int64") == 0)
-    return NNFW_TYPE::NNFW_TYPE_TENSOR_INT64;
-  else if (std::strcmp(type, "int8") == 0)
-    return NNFW_TYPE::NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED;
-  else if (std::strcmp(type, "int16") == 0)
-    return NNFW_TYPE::NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED;
-  else
-    throw NnfwError(std::string("Cannot convert string to NNFW_TYPE: '") + type + "'");
-}
-
-const char *getStringType(NNFW_TYPE type)
+dtype::dtype(NNFW_TYPE type) : nnfw_type(type)
 {
   switch (type)
   {
     case NNFW_TYPE::NNFW_TYPE_TENSOR_FLOAT32:
-      return "float32";
+      py_dtype = py::dtype("float32");
+      name = "float32";
+      return;
     case NNFW_TYPE::NNFW_TYPE_TENSOR_INT32:
-      return "int32";
+      py_dtype = py::dtype("int32");
+      name = "int32";
+      return;
     case NNFW_TYPE::NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+      py_dtype = py::dtype("uint8");
+      name = "quint8";
+      return;
     case NNFW_TYPE::NNFW_TYPE_TENSOR_UINT8:
-      return "uint8";
+      py_dtype = py::dtype("uint8");
+      name = "uint8";
+      return;
     case NNFW_TYPE::NNFW_TYPE_TENSOR_BOOL:
-      return "bool";
+      py_dtype = py::dtype("bool");
+      name = "bool";
+      return;
     case NNFW_TYPE::NNFW_TYPE_TENSOR_INT64:
-      return "int64";
+      py_dtype = py::dtype("int64");
+      name = "int64";
+      return;
     case NNFW_TYPE::NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
-      return "int8";
+      py_dtype = py::dtype("int8");
+      name = "qint8";
+      return;
     case NNFW_TYPE::NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
-      return "int16";
-    default:
-      throw NnfwError(std::string("Cannot convert NNFW_TYPE enum to string (value=") +
-                      std::to_string(static_cast<int>(type)) + ")");
+      py_dtype = py::dtype("int16");
+      name = "qint16sym";
+      return;
   }
+  // This code should not be reached because compiler will generate a warning
+  // if some type is not handled in the switch block above.
+  throw NnfwError(std::string("Cannot convert NNFW_TYPE enum to onert.dtype (value=") +
+                  std::to_string(static_cast<int>(type)) + ")");
 }
 
 uint64_t num_elems(const nnfw_tensorinfo *tensor_info)
@@ -153,10 +150,11 @@ void NNFW_SESSION::close_session()
   ensure_status(nnfw_close_session(this->session));
   this->session = nullptr;
 }
+
 void NNFW_SESSION::set_input_tensorinfo(uint32_t index, const tensorinfo *tensor_info)
 {
   nnfw_tensorinfo ti;
-  ti.dtype = getType(tensor_info->dtype);
+  ti.dtype = tensor_info->dtype.nnfw_type;
   ti.rank = tensor_info->rank;
   for (int i = 0; i < NNFW_MAX_RANK; i++)
   {
@@ -187,25 +185,27 @@ void NNFW_SESSION::set_input_layout(uint32_t index, const char *layout)
   NNFW_LAYOUT nnfw_layout = getLayout(layout);
   ensure_status(nnfw_set_input_layout(session, index, nnfw_layout));
 }
+
 tensorinfo NNFW_SESSION::input_tensorinfo(uint32_t index)
 {
   nnfw_tensorinfo tensor_info = nnfw_tensorinfo();
   ensure_status(nnfw_input_tensorinfo(session, index, &tensor_info));
   tensorinfo ti;
-  ti.dtype = getStringType(tensor_info.dtype);
+  ti.dtype = dtype(tensor_info.dtype);
   ti.rank = tensor_info.rank;
   for (int i = 0; i < NNFW_MAX_RANK; i++)
   {
     ti.dims[i] = tensor_info.dims[i];
   }
   return ti;
 }
+
 tensorinfo NNFW_SESSION::output_tensorinfo(uint32_t index)
 {
   nnfw_tensorinfo tensor_info = nnfw_tensorinfo();
   ensure_status(nnfw_output_tensorinfo(session, index, &tensor_info));
   tensorinfo ti;
-  ti.dtype = getStringType(tensor_info.dtype);
+  ti.dtype = dtype(tensor_info.dtype);
   ti.rank = tensor_info.rank;
   for (int i = 0; i < NNFW_MAX_RANK; i++)
   {
@@ -234,13 +234,10 @@ py::array NNFW_SESSION::get_output(uint32_t index)
     num_elements *= static_cast<size_t>(out_info.dims[i]);
   }
 
+  const auto type = dtype(out_info.dtype);
   // Wrap the raw buffer in a numpy array;
-  auto np = py::module_::import("numpy");
-  py::dtype dt = np.attr("dtype")(py::str(getStringType(out_info.dtype))).cast<py::dtype>();
-  size_t itemsize = dt.attr("itemsize").cast<size_t>();
-
-  py::array arr(dt, shape);
-  std::memcpy(arr.mutable_data(), out_buffer, num_elements * itemsize);
+  py::array arr(type.py_dtype, shape);
+  std::memcpy(arr.mutable_data(), out_buffer, num_elements * type.py_dtype.itemsize());
   arr.attr("flags").attr("writeable") = false;
 
   return arr;
diff --git a/runtime/onert/sample/minimal-python/inference_benchmark.py b/runtime/onert/sample/minimal-python/inference_benchmark.py
@@ -2,7 +2,7 @@
 import numpy as np
 import psutil
 import os
-from typing import List
+from typing import List, Optional
 from onert import infer, tensorinfo
 
 
@@ -45,8 +45,8 @@ def get_validated_input_tensorinfos(sess: infer.session,
     return updated_infos
 
 
-def benchmark_inference(nnpackage_path: str, backends: str, input_shapes: List[List[int]],
-                        repeat: int):
+def benchmark_inference(nnpackage_path: str, backends: str,
+                        input_shapes: Optional[List[List[int]]], repeat: int):
     mem_before_kb = get_memory_usage_mb() * 1024
 
     sess = infer.session(path=nnpackage_path, backends=backends)
diff --git a/runtime/onert/sample/minimal-python/minimal.py b/runtime/onert/sample/minimal-python/minimal.py
@@ -12,10 +12,8 @@ def main(nnpackage_path, backends="cpu"):
     input_infos = session.get_inputs_tensorinfo()
     dummy_inputs = []
     for info in input_infos:
-        # Retrieve the dimensions list from tensorinfo property.
-        dims = list(info.dims)
         # Build the shape tuple from tensorinfo dimensions.
-        shape = tuple(dims[:info.rank])
+        shape = tuple(info.dims[:info.rank])
         # Create a dummy numpy array filled with zeros.
         dummy_inputs.append(np.zeros(shape, dtype=info.dtype))