Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions tensorflow/compiler/mlir/lite/core/c/builtin_op_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

/// WARNING: Users of TensorFlow Lite should not include this file directly,
/// but should instead include
/// "third_party/tensorflow/lite/c/builtin_op_data.h".
/// Only the TensorFlow Lite implementation itself should include this
/// file directly.
/// only the TensorFlow Lite implementation itself should.

// IWYU pragma: private, include "third_party/tensorflow/lite/c/builtin_op_data.h"

#ifndef TENSORFLOW_COMPILER_MLIR_LITE_CORE_C_BUILTIN_OP_DATA_H_
#define TENSORFLOW_COMPILER_MLIR_LITE_CORE_C_BUILTIN_OP_DATA_H_

Expand Down
4 changes: 2 additions & 2 deletions tensorflow/lite/core/api/tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
}
// TODO(b/139446230): Provide a platform header to better handle these
// specific scenarios.
#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \
defined(__i386) || defined(__x86__) || defined(__X86__) || \
#if defined(__ANDROID__) || defined(__x86_64__) || defined(__i386__) || \
defined(__i386) || defined(__x86__) || defined(__X86__) || \
defined(_X86_) || defined(_M_IX86) || defined(_M_X64)
memset(tensor->data.raw, value, tensor->bytes);
#else
Expand Down
17 changes: 14 additions & 3 deletions tensorflow/lite/core/c/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,25 @@ TfLiteQuantization TfLiteQuantizationClone(const TfLiteQuantization& src) {
case kTfLiteAffineQuantization: {
dst.params = calloc(1, sizeof(TfLiteAffineQuantization));
const TfLiteAffineQuantization* const src_params =
(TfLiteAffineQuantization*)(src.params);
reinterpret_cast<TfLiteAffineQuantization*>(src.params);
TfLiteAffineQuantization* const dst_params =
(TfLiteAffineQuantization*)(dst.params);
reinterpret_cast<TfLiteAffineQuantization*>(dst.params);
dst_params->quantized_dimension = src_params->quantized_dimension;
dst_params->scale = TfLiteFloatArrayCopy(src_params->scale);
dst_params->zero_point = TfLiteIntArrayCopy(src_params->zero_point);
break;
}
case kTfLiteBlockwiseQuantization: {
dst.params = calloc(1, sizeof(TfLiteBlockwiseQuantization));
const TfLiteBlockwiseQuantization* const src_params =
(TfLiteBlockwiseQuantization*)(src.params);
TfLiteBlockwiseQuantization* const dst_params =
(TfLiteBlockwiseQuantization*)(dst.params);
dst_params->blocksize = src_params->blocksize;
dst_params->scale = src_params->scale;
dst_params->zero_point = src_params->zero_point;
break;
}
}
return dst;
}
Expand Down Expand Up @@ -225,7 +236,7 @@ void TfLiteTensorDataFree(TfLiteTensor* t) {
void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
if (quantization->type == kTfLiteAffineQuantization) {
TfLiteAffineQuantization* q_params =
(TfLiteAffineQuantization*)(quantization->params);
reinterpret_cast<TfLiteAffineQuantization*>(quantization->params);
if (q_params->scale) {
TfLiteFloatArrayFree(q_params->scale);
q_params->scale = nullptr;
Expand Down
16 changes: 16 additions & 0 deletions tensorflow/lite/core/c/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ typedef enum TfLiteQuantizationType : int {
/// Affine quantization (with support for per-channel quantization).
/// Corresponds to TfLiteAffineQuantization.
kTfLiteAffineQuantization = 1,
/// Blockwise quantization.
kTfLiteBlockwiseQuantization = 2,
} TfLiteQuantizationType;

/// Structure specifying the quantization used by the tensor, if-any.
Expand All @@ -353,6 +355,20 @@ typedef struct TfLiteAffineQuantization {
int32_t quantized_dimension;
} TfLiteAffineQuantization;

/// Parameters for blockwise quantization across the output channels dimension.
/// For a particular value in quantized_dimension, quantized values can be
/// converted back to float using:
/// `real_value = scale * (quantized_value - zero_point)`
typedef struct TfLiteBlockwiseQuantization {
// Index of the tensor containing the scales.
int32_t scale;
// Index of the tensor containing the zero points.
int32_t zero_point;
// Quantization blocksize.
int32_t blocksize;
int32_t quantized_dimension;
} TfLiteBlockwiseQuantization;

/// A union of pointers that points to memory for a given tensor.
///
/// Do not access these members directly, if possible, use
Expand Down
109 changes: 109 additions & 0 deletions tensorflow/lite/kernels/internal/reference/fully_connected.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ limitations under the License.
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_

#include <algorithm>
#include <cmath>
#include <cstdint>

#include "ruy/profiler/instrumentation.h" // from @ruy
#include "tensorflow/lite/kernels/internal/common.h"
Expand Down Expand Up @@ -62,6 +64,59 @@ inline void FullyConnected(
}
}

// This implementation receives the scales in float and performs requant in
// float to avoid loss of precision.
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
float input_scale, float output_scale, float filter_scale,
uint8_t* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);

TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
// TODO(b/62193649): This really should be:
// const int batches = ArraySize(output_dims, 1);
// but the current --variable_batch hack consists in overwriting the 3rd
// dimension with the runtime batch size, as we don't keep track for each
// array of which dimension is the batch dimension in it.
const int output_dim_count = output_shape.DimensionsCount();
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
output_shape, output_dim_count - 1);
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
int32_t acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * (input_val + input_offset);
}
if (bias_data) {
acc += bias_data[out_c];
}
const double effective_output_scale = static_cast<double>(input_scale) *
static_cast<double>(filter_scale) /
static_cast<double>(output_scale);
int32_t acc_scaled = static_cast<int32_t>(
round(static_cast<double>(acc) * effective_output_scale));
acc_scaled += output_offset;
acc_scaled = std::max(acc_scaled, output_activation_min);
acc_scaled = std::min(acc_scaled, output_activation_max);
output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc_scaled);
}
}
}

inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
Expand Down Expand Up @@ -164,6 +219,60 @@ inline void FullyConnected(
}
}

// This implementation receives the scales in float and performs requant in
// float to avoid loss of precision.
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
float input_scale, float output_scale, float filter_scale,
int16_t* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;

TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(output_offset, 0);
// TODO(b/62193649): This really should be:
// const int batches = ArraySize(output_dims, 1);
// but the current --variable_batch hack consists in overwriting the 3rd
// dimension with the runtime batch size, as we don't keep track for each
// array of which dimension is the batch dimension in it.
const int output_dim_count = output_shape.DimensionsCount();
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
output_shape, output_dim_count - 1);
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
// Internal accumulation.
// Initialize accumulator with the bias-value.
int32_t accum = bias_data[out_c];
// Accumulation loop.
for (int d = 0; d < accum_depth; ++d) {
int16_t input_val = input_data[b * accum_depth + d] + input_offset;
int16_t filter_val =
filter_data[out_c * accum_depth + d] + filter_offset;
accum += filter_val * input_val;
}
const double effective_output_scale = static_cast<double>(input_scale) *
static_cast<double>(filter_scale) /
static_cast<double>(output_scale);
int32_t acc_scaled = static_cast<int32_t>(
round(static_cast<double>(accum) * effective_output_scale));
// Saturate, cast to int16_t, and store to output array.
acc_scaled = std::max(acc_scaled, output_activation_min - output_offset);
acc_scaled = std::min(acc_scaled, output_activation_max - output_offset);
acc_scaled += output_offset;
output_data[out_c + output_depth * b] = acc_scaled;
}
}
}

inline void ShuffledFullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& weights_shape,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ limitations under the License.
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_

#include <algorithm>
#include <cmath>
#include <cstdint>

#include "tensorflow/lite/kernels/internal/common.h"

Expand Down Expand Up @@ -74,6 +76,61 @@ void FullyConnectedPerChannel(
}
}

// This implementation receives the scales in float and performs requant in
// float to avoid loss of precision.
template <typename InputType, typename WeightType, typename OutputType,
typename BiasType>
void FullyConnectedPerChannel(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const InputType* input_data, const RuntimeShape& filter_shape,
const WeightType* filter_data, const RuntimeShape& bias_shape,
const BiasType* bias_data, const RuntimeShape& output_shape,
float input_scale, float output_scale, const float* filter_scales,
OutputType* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);

TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int filter_dim_count = filter_shape.DimensionsCount();

const int output_dim_count = output_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = output_shape.Dims(output_dim_count - 1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
BiasType acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += filter_val * (input_val + input_offset);
}
if (bias_data) {
acc += bias_data[out_c];
}

const float scale = filter_scales[out_c];
const double filter_scale = static_cast<double>(scale);
const double effective_output_scale = static_cast<double>(input_scale) *
filter_scale /
static_cast<double>(output_scale);
int32_t acc_scaled = static_cast<int32_t>(
round(static_cast<double>(acc) * effective_output_scale));

acc_scaled += output_offset;
acc_scaled = std::max(acc_scaled, output_activation_min);
acc_scaled = std::min(acc_scaled, output_activation_max);
output_data[out_c + output_depth * b] =
static_cast<OutputType>(acc_scaled);
}
}
}

template <typename InputType, typename WeightType, typename OutputType,
typename BiasType>
void FullyConnected(const FullyConnectedParams& params,
Expand Down Expand Up @@ -122,6 +179,59 @@ void FullyConnected(const FullyConnectedParams& params,
}
}

// This implementation receives the scales in float and performs requant in
// float to avoid loss of precision.
template <typename InputType, typename WeightType, typename OutputType,
typename BiasType>
void FullyConnected(const FullyConnectedParams& params,
const RuntimeShape& input_shape,
const InputType* input_data,
const RuntimeShape& filter_shape,
const WeightType* filter_data,
const RuntimeShape& bias_shape, const BiasType* bias_data,
const RuntimeShape& output_shape, float input_scale,
float output_scale, float filter_scale,
OutputType* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);

TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int filter_dim_count = filter_shape.DimensionsCount();
const int output_dim_count = output_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = output_shape.Dims(output_dim_count - 1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
BiasType acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * (input_val + input_offset);
}
if (bias_data) {
acc += bias_data[out_c];
}
const double effective_output_scale = static_cast<double>(input_scale) *
static_cast<double>(filter_scale) /
static_cast<double>(output_scale);
int32_t acc_scaled = static_cast<int32_t>(
round(static_cast<double>(acc) * effective_output_scale));
acc_scaled += output_offset;
acc_scaled = std::max(acc_scaled, output_activation_min);
acc_scaled = std::min(acc_scaled, output_activation_max);
output_data[out_c + output_depth * b] =
static_cast<OutputType>(acc_scaled);
}
}
}

} // namespace reference_integer_ops
} // namespace tflite

Expand Down
14 changes: 4 additions & 10 deletions tensorflow/lite/tools/flatbuffer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
"""

import copy
import functools
import random
import re
import struct
Expand Down Expand Up @@ -490,12 +489,7 @@ def get_options_as(
):
raise err

@functools.singledispatch
def _get_opts(unused_op):
return None

@_get_opts.register
def _(op: schema_fb.Operator):
if isinstance(op, schema_fb.Operator):
if not is_opt_1_type:
enum_val = getattr(schema_fb.BuiltinOptions2, base_type_name)
opts_creator = schema_fb.BuiltinOptions2Creator
Expand All @@ -510,8 +504,7 @@ def _(op: schema_fb.Operator):
return None
return opts_creator(enum_val, raw_ops)

@_get_opts.register
def _(op: schema_fb.OperatorT):
elif isinstance(op, schema_fb.OperatorT):
if is_opt_1_type:
raw_ops_t = op.builtinOptions
else:
Expand All @@ -520,4 +513,5 @@ def _(op: schema_fb.OperatorT):
return None
return raw_ops_t

return _get_opts(op)
else:
return None
Loading