Skip to content

Commit 5f0db34

Browse files
authored
Automated sync from github.com/tensorflow/tensorflow (#3060)
BUG=automated sync from upstream NO_CHECK_TFLITE_FILES=automated sync from upstream
1 parent 12eee9b commit 5f0db34

File tree

7 files changed

+260
-19
lines changed

7 files changed

+260
-19
lines changed

tensorflow/compiler/mlir/lite/core/c/builtin_op_data.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License.
1414
==============================================================================*/
15+
1516
/// WARNING: Users of TensorFlow Lite should not include this file directly,
16-
/// but should instead include
17-
/// "third_party/tensorflow/lite/c/builtin_op_data.h".
18-
/// Only the TensorFlow Lite implementation itself should include this
19-
/// file directly.
17+
/// only the TensorFlow Lite implementation itself should.
18+
19+
// IWYU pragma: private, include "third_party/tensorflow/lite/c/builtin_op_data.h"
20+
2021
#ifndef TENSORFLOW_COMPILER_MLIR_LITE_CORE_C_BUILTIN_OP_DATA_H_
2122
#define TENSORFLOW_COMPILER_MLIR_LITE_CORE_C_BUILTIN_OP_DATA_H_
2223

tensorflow/lite/core/api/tensor_utils.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
3333
}
3434
// TODO(b/139446230): Provide a platform header to better handle these
3535
// specific scenarios.
36-
#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \
37-
defined(__i386) || defined(__x86__) || defined(__X86__) || \
36+
#if defined(__ANDROID__) || defined(__x86_64__) || defined(__i386__) || \
37+
defined(__i386) || defined(__x86__) || defined(__X86__) || \
3838
defined(_X86_) || defined(_M_IX86) || defined(_M_X64)
3939
memset(tensor->data.raw, value, tensor->bytes);
4040
#else

tensorflow/lite/core/c/common.cc

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,25 @@ TfLiteQuantization TfLiteQuantizationClone(const TfLiteQuantization& src) {
113113
case kTfLiteAffineQuantization: {
114114
dst.params = calloc(1, sizeof(TfLiteAffineQuantization));
115115
const TfLiteAffineQuantization* const src_params =
116-
(TfLiteAffineQuantization*)(src.params);
116+
reinterpret_cast<TfLiteAffineQuantization*>(src.params);
117117
TfLiteAffineQuantization* const dst_params =
118-
(TfLiteAffineQuantization*)(dst.params);
118+
reinterpret_cast<TfLiteAffineQuantization*>(dst.params);
119119
dst_params->quantized_dimension = src_params->quantized_dimension;
120120
dst_params->scale = TfLiteFloatArrayCopy(src_params->scale);
121121
dst_params->zero_point = TfLiteIntArrayCopy(src_params->zero_point);
122122
break;
123123
}
124+
case kTfLiteBlockwiseQuantization: {
125+
dst.params = calloc(1, sizeof(TfLiteBlockwiseQuantization));
126+
const TfLiteBlockwiseQuantization* const src_params =
127+
(TfLiteBlockwiseQuantization*)(src.params);
128+
TfLiteBlockwiseQuantization* const dst_params =
129+
(TfLiteBlockwiseQuantization*)(dst.params);
130+
dst_params->blocksize = src_params->blocksize;
131+
dst_params->scale = src_params->scale;
132+
dst_params->zero_point = src_params->zero_point;
133+
break;
134+
}
124135
}
125136
return dst;
126137
}
@@ -225,7 +236,7 @@ void TfLiteTensorDataFree(TfLiteTensor* t) {
225236
void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
226237
if (quantization->type == kTfLiteAffineQuantization) {
227238
TfLiteAffineQuantization* q_params =
228-
(TfLiteAffineQuantization*)(quantization->params);
239+
reinterpret_cast<TfLiteAffineQuantization*>(quantization->params);
229240
if (q_params->scale) {
230241
TfLiteFloatArrayFree(q_params->scale);
231242
q_params->scale = nullptr;

tensorflow/lite/core/c/common.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,8 @@ typedef enum TfLiteQuantizationType : int {
328328
/// Affine quantization (with support for per-channel quantization).
329329
/// Corresponds to TfLiteAffineQuantization.
330330
kTfLiteAffineQuantization = 1,
331+
/// Blockwise quantization.
332+
kTfLiteBlockwiseQuantization = 2,
331333
} TfLiteQuantizationType;
332334

333335
/// Structure specifying the quantization used by the tensor, if-any.
@@ -353,6 +355,20 @@ typedef struct TfLiteAffineQuantization {
353355
int32_t quantized_dimension;
354356
} TfLiteAffineQuantization;
355357

358+
/// Parameters for blockwise quantization across the output channels dimension.
359+
/// For a particular value in quantized_dimension, quantized values can be
360+
/// converted back to float using:
361+
/// `real_value = scale * (quantized_value - zero_point)`
362+
typedef struct TfLiteBlockwiseQuantization {
363+
// Index of the tensor containing the scales.
364+
int32_t scale;
365+
// Index of the tensor containing the zero points.
366+
int32_t zero_point;
367+
// Quantization blocksize.
368+
int32_t blocksize;
369+
int32_t quantized_dimension;
370+
} TfLiteBlockwiseQuantization;
371+
356372
/// A union of pointers that points to memory for a given tensor.
357373
///
358374
/// Do not access these members directly, if possible, use

tensorflow/lite/kernels/internal/reference/fully_connected.h

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ limitations under the License.
1616
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
1717

1818
#include <algorithm>
19+
#include <cmath>
20+
#include <cstdint>
1921

2022
#include "ruy/profiler/instrumentation.h" // from @ruy
2123
#include "tensorflow/lite/kernels/internal/common.h"
@@ -62,6 +64,59 @@ inline void FullyConnected(
6264
}
6365
}
6466

67+
// This implementation receives the scales in float and performs requant in
68+
// float to avoid loss of precision.
69+
inline void FullyConnected(
70+
const FullyConnectedParams& params, const RuntimeShape& input_shape,
71+
const uint8_t* input_data, const RuntimeShape& filter_shape,
72+
const uint8_t* filter_data, const RuntimeShape& bias_shape,
73+
const int32_t* bias_data, const RuntimeShape& output_shape,
74+
float input_scale, float output_scale, float filter_scale,
75+
uint8_t* output_data) {
76+
const int32_t input_offset = params.input_offset;
77+
const int32_t filter_offset = params.weights_offset;
78+
const int32_t output_offset = params.output_offset;
79+
const int32_t output_activation_min = params.quantized_activation_min;
80+
const int32_t output_activation_max = params.quantized_activation_max;
81+
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
82+
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
83+
84+
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
85+
// TODO(b/62193649): This really should be:
86+
// const int batches = ArraySize(output_dims, 1);
87+
// but the current --variable_batch hack consists in overwriting the 3rd
88+
// dimension with the runtime batch size, as we don't keep track for each
89+
// array of which dimension is the batch dimension in it.
90+
const int output_dim_count = output_shape.DimensionsCount();
91+
const int filter_dim_count = filter_shape.DimensionsCount();
92+
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
93+
const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
94+
output_shape, output_dim_count - 1);
95+
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
96+
for (int b = 0; b < batches; ++b) {
97+
for (int out_c = 0; out_c < output_depth; ++out_c) {
98+
int32_t acc = 0;
99+
for (int d = 0; d < accum_depth; ++d) {
100+
int32_t input_val = input_data[b * accum_depth + d];
101+
int32_t filter_val = filter_data[out_c * accum_depth + d];
102+
acc += (filter_val + filter_offset) * (input_val + input_offset);
103+
}
104+
if (bias_data) {
105+
acc += bias_data[out_c];
106+
}
107+
const double effective_output_scale = static_cast<double>(input_scale) *
108+
static_cast<double>(filter_scale) /
109+
static_cast<double>(output_scale);
110+
int32_t acc_scaled = static_cast<int32_t>(
111+
round(static_cast<double>(acc) * effective_output_scale));
112+
acc_scaled += output_offset;
113+
acc_scaled = std::max(acc_scaled, output_activation_min);
114+
acc_scaled = std::min(acc_scaled, output_activation_max);
115+
output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc_scaled);
116+
}
117+
}
118+
}
119+
65120
inline void FullyConnected(
66121
const FullyConnectedParams& params, const RuntimeShape& input_shape,
67122
const uint8_t* input_data, const RuntimeShape& filter_shape,
@@ -164,6 +219,60 @@ inline void FullyConnected(
164219
}
165220
}
166221

222+
// This implementation receives the scales in float and performs requant in
223+
// float to avoid loss of precision.
224+
inline void FullyConnected(
225+
const FullyConnectedParams& params, const RuntimeShape& input_shape,
226+
const uint8_t* input_data, const RuntimeShape& filter_shape,
227+
const uint8_t* filter_data, const RuntimeShape& bias_shape,
228+
const int32_t* bias_data, const RuntimeShape& output_shape,
229+
float input_scale, float output_scale, float filter_scale,
230+
int16_t* output_data) {
231+
const int32_t input_offset = params.input_offset;
232+
const int32_t filter_offset = params.weights_offset;
233+
const int32_t output_offset = params.output_offset;
234+
const int32_t output_activation_min = params.quantized_activation_min;
235+
const int32_t output_activation_max = params.quantized_activation_max;
236+
237+
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
238+
TFLITE_DCHECK_EQ(output_offset, 0);
239+
// TODO(b/62193649): This really should be:
240+
// const int batches = ArraySize(output_dims, 1);
241+
// but the current --variable_batch hack consists in overwriting the 3rd
242+
// dimension with the runtime batch size, as we don't keep track for each
243+
// array of which dimension is the batch dimension in it.
244+
const int output_dim_count = output_shape.DimensionsCount();
245+
const int filter_dim_count = filter_shape.DimensionsCount();
246+
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
247+
const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
248+
output_shape, output_dim_count - 1);
249+
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
250+
for (int b = 0; b < batches; ++b) {
251+
for (int out_c = 0; out_c < output_depth; ++out_c) {
252+
// Internal accumulation.
253+
// Initialize accumulator with the bias-value.
254+
int32_t accum = bias_data[out_c];
255+
// Accumulation loop.
256+
for (int d = 0; d < accum_depth; ++d) {
257+
int16_t input_val = input_data[b * accum_depth + d] + input_offset;
258+
int16_t filter_val =
259+
filter_data[out_c * accum_depth + d] + filter_offset;
260+
accum += filter_val * input_val;
261+
}
262+
const double effective_output_scale = static_cast<double>(input_scale) *
263+
static_cast<double>(filter_scale) /
264+
static_cast<double>(output_scale);
265+
int32_t acc_scaled = static_cast<int32_t>(
266+
round(static_cast<double>(accum) * effective_output_scale));
267+
// Saturate, cast to int16_t, and store to output array.
268+
acc_scaled = std::max(acc_scaled, output_activation_min - output_offset);
269+
acc_scaled = std::min(acc_scaled, output_activation_max - output_offset);
270+
acc_scaled += output_offset;
271+
output_data[out_c + output_depth * b] = acc_scaled;
272+
}
273+
}
274+
}
275+
167276
inline void ShuffledFullyConnected(
168277
const FullyConnectedParams& params, const RuntimeShape& input_shape,
169278
const uint8_t* input_data, const RuntimeShape& weights_shape,

tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ limitations under the License.
1616
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
1717

1818
#include <algorithm>
19+
#include <cmath>
20+
#include <cstdint>
1921

2022
#include "tensorflow/lite/kernels/internal/common.h"
2123

@@ -74,6 +76,61 @@ void FullyConnectedPerChannel(
7476
}
7577
}
7678

79+
// This implementation receives the scales in float and performs requant in
80+
// float to avoid loss of precision.
81+
template <typename InputType, typename WeightType, typename OutputType,
82+
typename BiasType>
83+
void FullyConnectedPerChannel(
84+
const FullyConnectedParams& params, const RuntimeShape& input_shape,
85+
const InputType* input_data, const RuntimeShape& filter_shape,
86+
const WeightType* filter_data, const RuntimeShape& bias_shape,
87+
const BiasType* bias_data, const RuntimeShape& output_shape,
88+
float input_scale, float output_scale, const float* filter_scales,
89+
OutputType* output_data) {
90+
const int32_t input_offset = params.input_offset;
91+
const int32_t output_offset = params.output_offset;
92+
const int32_t output_activation_min = params.quantized_activation_min;
93+
const int32_t output_activation_max = params.quantized_activation_max;
94+
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
95+
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
96+
97+
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
98+
const int filter_dim_count = filter_shape.DimensionsCount();
99+
100+
const int output_dim_count = output_shape.DimensionsCount();
101+
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
102+
const int output_depth = output_shape.Dims(output_dim_count - 1);
103+
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
104+
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
105+
for (int b = 0; b < batches; ++b) {
106+
for (int out_c = 0; out_c < output_depth; ++out_c) {
107+
BiasType acc = 0;
108+
for (int d = 0; d < accum_depth; ++d) {
109+
int32_t input_val = input_data[b * accum_depth + d];
110+
int32_t filter_val = filter_data[out_c * accum_depth + d];
111+
acc += filter_val * (input_val + input_offset);
112+
}
113+
if (bias_data) {
114+
acc += bias_data[out_c];
115+
}
116+
117+
const float scale = filter_scales[out_c];
118+
const double filter_scale = static_cast<double>(scale);
119+
const double effective_output_scale = static_cast<double>(input_scale) *
120+
filter_scale /
121+
static_cast<double>(output_scale);
122+
int32_t acc_scaled = static_cast<int32_t>(
123+
round(static_cast<double>(acc) * effective_output_scale));
124+
125+
acc_scaled += output_offset;
126+
acc_scaled = std::max(acc_scaled, output_activation_min);
127+
acc_scaled = std::min(acc_scaled, output_activation_max);
128+
output_data[out_c + output_depth * b] =
129+
static_cast<OutputType>(acc_scaled);
130+
}
131+
}
132+
}
133+
77134
template <typename InputType, typename WeightType, typename OutputType,
78135
typename BiasType>
79136
void FullyConnected(const FullyConnectedParams& params,
@@ -122,6 +179,59 @@ void FullyConnected(const FullyConnectedParams& params,
122179
}
123180
}
124181

182+
// This implementation receives the scales in float and performs requant in
183+
// float to avoid loss of precision.
184+
template <typename InputType, typename WeightType, typename OutputType,
185+
typename BiasType>
186+
void FullyConnected(const FullyConnectedParams& params,
187+
const RuntimeShape& input_shape,
188+
const InputType* input_data,
189+
const RuntimeShape& filter_shape,
190+
const WeightType* filter_data,
191+
const RuntimeShape& bias_shape, const BiasType* bias_data,
192+
const RuntimeShape& output_shape, float input_scale,
193+
float output_scale, float filter_scale,
194+
OutputType* output_data) {
195+
const int32_t input_offset = params.input_offset;
196+
const int32_t filter_offset = params.weights_offset;
197+
const int32_t output_offset = params.output_offset;
198+
const int32_t output_activation_min = params.quantized_activation_min;
199+
const int32_t output_activation_max = params.quantized_activation_max;
200+
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
201+
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
202+
203+
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
204+
const int filter_dim_count = filter_shape.DimensionsCount();
205+
const int output_dim_count = output_shape.DimensionsCount();
206+
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
207+
const int output_depth = output_shape.Dims(output_dim_count - 1);
208+
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
209+
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
210+
for (int b = 0; b < batches; ++b) {
211+
for (int out_c = 0; out_c < output_depth; ++out_c) {
212+
BiasType acc = 0;
213+
for (int d = 0; d < accum_depth; ++d) {
214+
int32_t input_val = input_data[b * accum_depth + d];
215+
int32_t filter_val = filter_data[out_c * accum_depth + d];
216+
acc += (filter_val + filter_offset) * (input_val + input_offset);
217+
}
218+
if (bias_data) {
219+
acc += bias_data[out_c];
220+
}
221+
const double effective_output_scale = static_cast<double>(input_scale) *
222+
static_cast<double>(filter_scale) /
223+
static_cast<double>(output_scale);
224+
int32_t acc_scaled = static_cast<int32_t>(
225+
round(static_cast<double>(acc) * effective_output_scale));
226+
acc_scaled += output_offset;
227+
acc_scaled = std::max(acc_scaled, output_activation_min);
228+
acc_scaled = std::min(acc_scaled, output_activation_max);
229+
output_data[out_c + output_depth * b] =
230+
static_cast<OutputType>(acc_scaled);
231+
}
232+
}
233+
}
234+
125235
} // namespace reference_integer_ops
126236
} // namespace tflite
127237

tensorflow/lite/tools/flatbuffer_utils.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
"""
2222

2323
import copy
24-
import functools
2524
import random
2625
import re
2726
import struct
@@ -490,12 +489,7 @@ def get_options_as(
490489
):
491490
raise err
492491

493-
@functools.singledispatch
494-
def _get_opts(unused_op):
495-
return None
496-
497-
@_get_opts.register
498-
def _(op: schema_fb.Operator):
492+
if isinstance(op, schema_fb.Operator):
499493
if not is_opt_1_type:
500494
enum_val = getattr(schema_fb.BuiltinOptions2, base_type_name)
501495
opts_creator = schema_fb.BuiltinOptions2Creator
@@ -510,8 +504,7 @@ def _(op: schema_fb.Operator):
510504
return None
511505
return opts_creator(enum_val, raw_ops)
512506

513-
@_get_opts.register
514-
def _(op: schema_fb.OperatorT):
507+
elif isinstance(op, schema_fb.OperatorT):
515508
if is_opt_1_type:
516509
raw_ops_t = op.builtinOptions
517510
else:
@@ -520,4 +513,5 @@ def _(op: schema_fb.OperatorT):
520513
return None
521514
return raw_ops_t
522515

523-
return _get_opts(op)
516+
else:
517+
return None

0 commit comments

Comments
 (0)