diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp index 445744e9918..d412e18cc1d 100644 --- a/backends/xnnpack/runtime/XNNCompiler.cpp +++ b/backends/xnnpack/runtime/XNNCompiler.cpp @@ -97,7 +97,10 @@ std::pair getOutputMinMax(const NodePtr node) noexcept { } /* -Converts flatbuffer xnn data type to xnnpack data type +Converts flatbuffer xnn data type to xnnpack data type. + +NOTE: +Flatbuffer Enum Values are not the same as XNNPACK's datatype enum values. */ xnn_datatype getDataType(const DataType& data_type) { switch (data_type) { @@ -121,6 +124,14 @@ xnn_datatype getDataType(const DataType& data_type) { return xnn_datatype::xnn_datatype_qdint8; case DataType::xnn_datatype_qbint4: return xnn_datatype::xnn_datatype_qbint4; + case DataType::xnn_datatype_qpint8: + return xnn_datatype::xnn_datatype_qpint8; + case DataType::xnn_datatype_int32: + return xnn_datatype::xnn_datatype_int32; + case DataType::xnn_datatype_pfp32: + return xnn_datatype::xnn_datatype_pfp32; + case DataType::xnn_datatype_bf16: + return xnn_datatype::xnn_datatype_bf16; default: return xnn_datatype::xnn_datatype_invalid; } diff --git a/backends/xnnpack/serialization/runtime_schema.fbs b/backends/xnnpack/serialization/runtime_schema.fbs index f10ba3d1b81..99f9e4e5fbd 100644 --- a/backends/xnnpack/serialization/runtime_schema.fbs +++ b/backends/xnnpack/serialization/runtime_schema.fbs @@ -29,6 +29,15 @@ enum XNNDatatype : short { xnn_datatype_qdint8 = 9, /// Quantized 4-bit signed integer with shared blockwise quantization parameters. xnn_datatype_qbint4 = 10, + /// Dynamically quantized 8-bit signed integers packed with their per-row + /// quantization parameters. + xnn_datatype_qpint8 = 11, + /// 32-bit signed integers. + xnn_datatype_int32 = 12, + /// IEEE754 single-precision packed floating-point. + xnn_datatype_pfp32 = 13, + /// BFloat16, i.e. the upper 16 bits of a float32. + xnn_datatype_bf16 = 14, } // type of quantization diff --git a/backends/xnnpack/serialization/schema.fbs b/backends/xnnpack/serialization/schema.fbs index 565eb4c3bba..e3ed4061e94 100644 --- a/backends/xnnpack/serialization/schema.fbs +++ b/backends/xnnpack/serialization/schema.fbs @@ -29,6 +29,15 @@ enum XNNDatatype : short { xnn_datatype_qdint8 = 9, /// Quantized 4-bit signed integer with shared blockwise quantization parameters. xnn_datatype_qbint4 = 10, + /// Dynamically quantized 8-bit signed integers packed with their per-row + /// quantization parameters. + xnn_datatype_qpint8 = 11, + /// 32-bit signed integers. + xnn_datatype_int32 = 12, + /// IEEE754 single-precision packed floating-point. + xnn_datatype_pfp32 = 13, + /// BFloat16, i.e. the upper 16 bits of a float32. + xnn_datatype_bf16 = 14, } // type of quantization diff --git a/backends/xnnpack/serialization/xnnpack_graph_schema.py b/backends/xnnpack/serialization/xnnpack_graph_schema.py index 2a3ccaf2a0a..4e23e199dec 100644 --- a/backends/xnnpack/serialization/xnnpack_graph_schema.py +++ b/backends/xnnpack/serialization/xnnpack_graph_schema.py @@ -413,6 +413,10 @@ class XNNDatatype(IntEnum): xnn_datatype_qcint4 = 8 xnn_datatype_qdint8 = 9 xnn_datatype_qbint4 = 10 + xnn_datatype_qpint8 = 11 + xnn_datatype_int32 = 12 + xnn_datatype_pfp32 = 13 + xnn_datatype_bf16 = 14 @dataclass