PaddlePaddle
diff --git a/‎lite/backends/nnadapter/nnadapter/include/nnadapter/nnadapter.h
Lines changed: 21 additions & 0 deletions b/‎lite/backends/nnadapter/nnadapter/include/nnadapter/nnadapter.h
Lines changed: 21 additions & 0 deletions
diff --git a/‎lite/backends/nnadapter/nnadapter/include/nnadapter/operation/log_softmax.h
Lines changed: 41 additions & 0 deletions b/‎lite/backends/nnadapter/nnadapter/include/nnadapter/operation/log_softmax.h
Lines changed: 41 additions & 0 deletions
diff --git a/‎lite/backends/nnadapter/nnadapter/include/nnadapter/operation/math/log_softmax.h
Lines changed: 83 additions & 0 deletions b/‎lite/backends/nnadapter/nnadapter/include/nnadapter/operation/math/log_softmax.h
Lines changed: 83 additions & 0 deletions
diff --git a/‎lite/backends/nnadapter/nnadapter/include/nnadapter/operation/split.h
Lines changed: 1 addition & 0 deletions b/‎lite/backends/nnadapter/nnadapter/include/nnadapter/operation/split.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/all.h
Lines changed: 1 addition & 0 deletions b/‎lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/all.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/log_softmax.cc
Lines changed: 40 additions & 0 deletions b/‎lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/converter/log_softmax.cc
Lines changed: 40 additions & 0 deletions
diff --git a/‎lite/backends/nnadapter/nnadapter/src/driver/nvidia_tensorrt/calibrator.cc
Lines changed: 5 additions & 4 deletions b/‎lite/backends/nnadapter/nnadapter/src/driver/nvidia_tensorrt/calibrator.cc
Lines changed: 5 additions & 4 deletions
diff --git a/‎lite/backends/nnadapter/nnadapter/src/driver/nvidia_tensorrt/calibrator.h
Lines changed: 8 additions & 5 deletions b/‎lite/backends/nnadapter/nnadapter/src/driver/nvidia_tensorrt/calibrator.h
Lines changed: 8 additions & 5 deletions
diff --git a/‎lite/backends/nnadapter/nnadapter/src/driver/nvidia_tensorrt/converter/all.h
Lines changed: 3 additions & 0 deletions b/‎lite/backends/nnadapter/nnadapter/src/driver/nvidia_tensorrt/converter/all.h
Lines changed: 3 additions & 0 deletions
diff --git a/‎lite/backends/nnadapter/nnadapter/src/driver/nvidia_tensorrt/converter/batch_normalization.cc
Lines changed: 44 additions & 11 deletions b/‎lite/backends/nnadapter/nnadapter/src/driver/nvidia_tensorrt/converter/batch_normalization.cc
Lines changed: 44 additions & 11 deletions
@@ -1106,6 +1106,27 @@ typedef enum {
    */
   NNADAPTER_LOG,
 
+  /**
+   * Computes the log of softmax values for input.
+   * The output is calculated using this formula:
+   *     output = log(exp(input) / reduce_sum(exp(input), axis=axis,
+   * keepdims=true))
+   *
+   * Inputs:
+   * * 0: input, a NNADAPTER_FLOAT32,
+   * NNADAPTER_QUANT_INT8_SYMM_PER_LAYER tensor.
+   * * 1: axis, a NNADAPTER_INT32 scalar. Defaults to 1. It represents the
+   * dimension along which softmax will be performed. It should be in range [-R,
+   * R), where R is the rank of input, negative value works the same way as
+   * axis+R.
+   *
+   * Outputs:
+   * * 0: output, a tensor with the same shape and type as input.
+   *
+   * Available since version 1.
+   */
+  NNADAPTER_LOG_SOFTMAX,
+
   /**
    * Applies the Lp Normalization to the input tensor element-wise.
    * The output is calculated using this formula:
 
@@ -0,0 +1,41 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+namespace nnadapter {
+namespace operation {
+
+#define LOG_SOFTMAX_OPERATION_EXTRACT_INPUTS_OUTPUTS                  \
+  auto& input_operands = operation->input_operands;                   \
+  auto& output_operands = operation->output_operands;                 \
+  auto input_count = input_operands.size();                           \
+  auto output_count = output_operands.size();                         \
+  NNADAPTER_CHECK_EQ(input_count, 2);                                 \
+  NNADAPTER_CHECK_EQ(output_count, 1);                                \
+  /* Input */                                                         \
+  auto input_operand = input_operands[0];                             \
+  NNADAPTER_VLOG(5) << "input: " << OperandToString(input_operand);   \
+  /* Axis */                                                          \
+  auto axis = *reinterpret_cast<int32_t*>(input_operands[1]->buffer); \
+  if (axis < 0) {                                                     \
+    axis += input_operand->type.dimensions.count;                     \
+  }                                                                   \
+  NNADAPTER_VLOG(5) << "axis=" << axis;                               \
+  /* Output */                                                        \
+  auto output_operand = output_operands[0];                           \
+  NNADAPTER_VLOG(5) << "output: " << OperandToString(output_operand);
+
+}  // namespace operation
+}  // namespace nnadapter
@@ -0,0 +1,83 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <vector>
+#include "operation/math/dequantize.h"
+#include "operation/math/quantize.h"
+#include "operation/math/utility.h"
+
+namespace nnadapter {
+namespace operation {
+namespace math {
+
+template <typename T>
+static int log_softmax(const T* input_data,
+                       const std::vector<int32_t>& input_shape,
+                       int axis,
+                       T* output_data) {
+  if (!input_data || !output_data) {
+    return -1;
+  }
+  auto input_rank = input_shape.size();
+  if (axis < 0) {
+    axis += input_rank;
+  }
+  auto axis_count = input_shape[axis];
+  auto outer_count = shape_production(shape_slice(input_shape, 0, axis));
+  auto inner_count =
+      shape_production(shape_slice(input_shape, axis + 1, input_rank));
+  auto compute_count = outer_count * inner_count;
+  for (int64_t i = 0; i < compute_count; i++) {
+    auto inner_index = i % inner_count;
+    auto outer_index = (i / inner_count) * axis_count;
+    auto start = outer_index * inner_count + inner_index;
+    auto offset = start;
+    auto max_value = std::numeric_limits<T>::lowest();
+    for (int j = 0; j < axis_count; j++) {
+      max_value =
+          input_data[offset] > max_value ? input_data[offset] : max_value;
+      offset += inner_count;
+    }
+    offset = start;
+    T sum_value = 0;
+    for (int j = 0; j < axis_count; j++) {
+      output_data[offset] = std::exp(input_data[offset] - max_value);
+      sum_value += output_data[offset];
+      offset += inner_count;
+    }
+    offset = start;
+    for (int j = 0; j < axis_count; j++) {
+      output_data[offset] /= sum_value;
+      output_data[offset] = std::log(output_data[offset]);
+      offset += inner_count;
+    }
+  }
+  return 0;
+}
+
+int log_softmax(const int8_t* input_data,
+                const std::vector<int32_t>& input_shape,
+                float input_scale,
+                int axis,
+                int8_t* output_data,
+                float output_scale);
+
+}  // namespace math
+}  // namespace operation
+}  // namespace nnadapter
@@ -42,6 +42,7 @@ namespace operation {
   } else {                                                               \
     NNADAPTER_VLOG(5) << "axis: " << OperandToString(axis_operand);      \
   }                                                                      \
+  NNADAPTER_CHECK_LT(axis, input_operand->type.dimensions.count);        \
   /* Split */                                                            \
   auto split_operand = input_operands[2];                                \
   std::vector<int> split;                                                \
 
@@ -56,6 +56,7 @@ REGISTER_CONVERTER(LEAKY_RELU, ConvertLeakyRelu)
 REGISTER_CONVERTER(LESS, ConvertComparisons)
 REGISTER_CONVERTER(LESS_EQUAL, ConvertComparisons)
 REGISTER_CONVERTER(LOG, ConvertUnaryActivations)
+REGISTER_CONVERTER(LOG_SOFTMAX, ConvertLogSoftmax)
 REGISTER_CONVERTER(LP_NORMALIZATION, ConvertLpNormalization)
 REGISTER_CONVERTER(MAT_MUL, ConvertMatMul)
 REGISTER_CONVERTER(MAX, ConvertElementwise)
 
@@ -0,0 +1,40 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "operation/log_softmax.h"
+#include "driver/huawei_ascend_npu/converter/converter.h"
+#include "utility/debug.h"
+#include "utility/logging.h"
+
+namespace nnadapter {
+namespace huawei_ascend_npu {
+
+int ConvertLogSoftmax(Converter* converter, core::Operation* operation) {
+  LOG_SOFTMAX_OPERATION_EXTRACT_INPUTS_OUTPUTS
+
+  // Convert to GE operators
+  auto input_operator = converter->GetMappedOperator(input_operand);
+  if (!input_operator) {
+    input_operator = converter->ConvertOperand(input_operand);
+  }
+  auto log_softmax_op =
+      converter->AddOperator<ge::op::LogSoftmaxV2>(output_operand);
+  log_softmax_op->set_attr_axes({axis});
+  SET_INPUT(log_softmax_op, logits, input_operator);
+  MAP_OUTPUT(log_softmax_op, logsoftmax, output_operand);
+  return NNADAPTER_NO_ERROR;
+}
+
+}  // namespace huawei_ascend_npu
+}  // namespace nnadapter
@@ -54,9 +54,9 @@ Int8EntropyCalibrator::Int8EntropyCalibrator(int batch_size,
 
 bool Int8EntropyCalibrator::getBatch(void* bindings[],
                                      const char* names[],
-                                     int nbBindings) {
+                                     int nb_bindings) TRT_NOEXCEPT {
   // TODO(zhupengyang): support multi inputs
-  NNADAPTER_CHECK_EQ(nbBindings, 1);
+  NNADAPTER_CHECK_EQ(nb_bindings, 1);
   if (static_cast<size_t>(index_) >= input_file_names_.at(0).size()) {
     return false;
   }
@@ -89,7 +89,8 @@ bool Int8EntropyCalibrator::getBatch(void* bindings[],
   return true;
 }
 
-const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length) {
+const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length)
+    TRT_NOEXCEPT {
   if (table_path_.empty()) {
     NNADAPTER_LOG(WARNING) << "No calibration table file is set. New "
                               "calibration table will be generated.";
@@ -106,7 +107,7 @@ const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length) {
 }
 
 void Int8EntropyCalibrator::writeCalibrationCache(const void* cache,
-                                                  size_t length) {
+                                                  size_t length) TRT_NOEXCEPT {
   if (table_path_.empty()) {
     NNADAPTER_LOG(WARNING) << "No calibration table will be saved because "
                               "table_path is not found.";
 
@@ -27,12 +27,15 @@ class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 {
   Int8EntropyCalibrator(int batch_size,
                         std::string dataset_path,
                         std::string table_path);
-  virtual ~Int8EntropyCalibrator() {}
+  virtual ~Int8EntropyCalibrator() TRT_NOEXCEPT {}
 
-  int getBatchSize() const override { return batch_size_; }
-  bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
-  const void* readCalibrationCache(size_t& length) override;
-  void writeCalibrationCache(const void* cache, size_t length) override;
+  int getBatchSize() const TRT_NOEXCEPT override { return batch_size_; }
+  bool getBatch(void* bindings[],
+                const char* names[],
+                int nb_bindings) TRT_NOEXCEPT override;
+  const void* readCalibrationCache(size_t& length) TRT_NOEXCEPT override;
+  void writeCalibrationCache(const void* cache,
+                             size_t length) TRT_NOEXCEPT override;
 
  private:
   int batch_size_{1};
 
@@ -24,6 +24,7 @@ REGISTER_CONVERTER(CAST, ConvertCast)
 REGISTER_CONVERTER(CLIP, ConvertClip)
 REGISTER_CONVERTER(CONCAT, ConvertConcat)
 REGISTER_CONVERTER(CONV_2D, ConvertConv2D)
+REGISTER_CONVERTER(CONV_2D_TRANSPOSE, ConvertConv2DTranspose)
 REGISTER_CONVERTER(DIV, ConvertElementwise)
 REGISTER_CONVERTER(EQUAL, ConvertComparisons)
 REGISTER_CONVERTER(EXP, ConvertUnaryOperations)
@@ -33,6 +34,7 @@ REGISTER_CONVERTER(FULLY_CONNECTED, ConvertFullyConnected)
 REGISTER_CONVERTER(HARD_SWISH, ConvertHardSwish)
 REGISTER_CONVERTER(LEAKY_RELU, ConvertLeakyRelu)
 REGISTER_CONVERTER(LOG, ConvertUnaryOperations)
+REGISTER_CONVERTER(LOG_SOFTMAX, ConvertLogSoftmax)
 REGISTER_CONVERTER(MAT_MUL, ConvertMatMul)
 REGISTER_CONVERTER(MAX_POOL_2D, ConvertPool2D)
 REGISTER_CONVERTER(MUL, ConvertElementwise)
@@ -49,6 +51,7 @@ REGISTER_CONVERTER(SIGMOID, ConvertActivations)
 REGISTER_CONVERTER(SLICE, ConvertSlice)
 REGISTER_CONVERTER(SOFTMAX, ConvertSoftmax)
 REGISTER_CONVERTER(SQUEEZE, ConvertSqueeze)
+REGISTER_CONVERTER(SPLIT, ConvertSplit)
 REGISTER_CONVERTER(STACK, ConvertStack)
 REGISTER_CONVERTER(SUB, ConvertElementwise)
 REGISTER_CONVERTER(SWISH, ConvertSwish)
 
@@ -39,17 +39,37 @@ int ConvertBatchNormalization(Converter* converter,
   NNADAPTER_CHECK(bias_ptr);
   NNADAPTER_CHECK(mean_ptr);
   NNADAPTER_CHECK(var_ptr);
-  // prepare data
-  auto x_dim = input_operand->type.dimensions;
-  NNADAPTER_CHECK_EQ(scale_operand->type.dimensions.data[0], x_dim.data[1]);
-  NNADAPTER_CHECK_EQ(bias_operand->type.dimensions.data[0], x_dim.data[1]);
-  NNADAPTER_CHECK_EQ(mean_operand->type.dimensions.data[0], x_dim.data[1]);
-  NNADAPTER_CHECK_EQ(variance_operand->type.dimensions.data[0], x_dim.data[1]);
-  std::vector<float> fuse_scale(x_dim.data[1], 0);
-  std::vector<float> fuse_bias(x_dim.data[1], 0);
+  auto input_tensor_dim = input_tensor->getDimensions();
+  // Add shuffle operator to reshape data into 3 dimensions
+  if (input_tensor_dim.nbDims < 3) {
+    nvinfer1::Dims unsqueeze_shape;
+    unsqueeze_shape.nbDims = 3;
+    for (int i = 0; i < 3; i++) {
+      if (i < input_tensor_dim.nbDims) {
+        unsqueeze_shape.d[i] =
+            input_tensor_dim.d[i] < 0 ? 0 : input_tensor_dim.d[i];
+      } else {
+        unsqueeze_shape.d[i] = 1;
+      }
+    }
+    auto unsqueeze_layer = converter->network()->addShuffle(*input_tensor);
+    unsqueeze_layer->setReshapeDimensions(unsqueeze_shape);
+    input_tensor = unsqueeze_layer->getOutput(0);
+  }
+  // Add batch_normalization op using ScaleNd operator
+  NNADAPTER_CHECK_EQ(scale_operand->type.dimensions.data[0],
+                     input_tensor_dim.d[0]);
+  NNADAPTER_CHECK_EQ(bias_operand->type.dimensions.data[0],
+                     input_tensor_dim.d[0]);
+  NNADAPTER_CHECK_EQ(mean_operand->type.dimensions.data[0],
+                     input_tensor_dim.d[0]);
+  NNADAPTER_CHECK_EQ(variance_operand->type.dimensions.data[0],
+                     input_tensor_dim.d[0]);
+  std::vector<float> fuse_scale(input_tensor_dim.d[0], 0);
+  std::vector<float> fuse_bias(input_tensor_dim.d[0], 0);
   auto fuse_scale_ptr = fuse_scale.data();
   auto fuse_bias_ptr = fuse_bias.data();
-  for (int i = 0; i < x_dim.data[1]; i++) {
+  for (int i = 0; i < input_tensor_dim.d[0]; i++) {
     fuse_scale_ptr[i] = scale_ptr[i] / sqrtf(var_ptr[i] + epsilon);
     fuse_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * fuse_scale_ptr[i];
   }
@@ -58,9 +78,9 @@ int ConvertBatchNormalization(Converter* converter,
   const float* power_ptr = nullptr;
   // add scale op
   nvinfer1::Weights scale_w =
-      converter->AddWeights(fuse_scale_ptr_const, x_dim.data[1]);
+      converter->AddWeights(fuse_scale_ptr_const, input_tensor_dim.d[0]);
   nvinfer1::Weights shift_w =
-      converter->AddWeights(fuse_bias_ptr_const, x_dim.data[1]);
+      converter->AddWeights(fuse_bias_ptr_const, input_tensor_dim.d[0]);
   nvinfer1::Weights power_w = converter->AddWeights(power_ptr, 0);
   auto layer = converter->network()->addScaleNd(*input_tensor,
                                                 nvinfer1::ScaleMode::kCHANNEL,
@@ -69,6 +89,19 @@ int ConvertBatchNormalization(Converter* converter,
                                                 power_w,
                                                 0);
   auto output_tensor = layer->getOutput(0);
+  // Add shuffle operator to recover shape
+  if (input_tensor_dim.nbDims < 3) {
+    nvinfer1::Dims squeeze_shape;
+    squeeze_shape.nbDims = input_tensor_dim.nbDims;
+    for (int i = 0; i < squeeze_shape.nbDims; i++) {
+      squeeze_shape.d[i] =
+          input_tensor_dim.d[i] < 0 ? 0 : input_tensor_dim.d[i];
+    }
+    auto squeeze_layer =
+        converter->network()->addShuffle(*(layer->getOutput(0)));
+    squeeze_layer->setReshapeDimensions(squeeze_shape);
+    output_tensor = squeeze_layer->getOutput(0);
+  }
   converter->UpdateTensorMap(output_operand, output_tensor);
   return NNADAPTER_NO_ERROR;
 }