fix sampling_id, fix xpu python whl, fix quant_dequant pass (#9636) (#9648)

zhupengyang · web-flow · commit 9a344f6a1a1d · 2022-11-08T15:36:16.000+08:00
diff --git a/lite/api/python/setup.py.in b/lite/api/python/setup.py.in
@@ -62,6 +62,12 @@ if '${WITH_MKL}' == 'ON' and '${WITH_STATIC_MKL}' == 'ON' and os.name != 'nt':
     shutil.copy('${MKLML_SHARED_IOMP_LIB}', LIB_PATH)
     PACKAGE_DATA['paddlelite.libs'] += ['libiomp5.so']
 
+if '${LITE_WITH_XPU}' == 'ON':
+    shutil.copy('${XPU_INSTALL_DIR}/xpu/xdnn/so/libxpuapi.so', LIB_PATH)
+    PACKAGE_DATA['paddlelite.libs'] += ['libxpuapi.so']
+    shutil.copy('${XPU_INSTALL_DIR}/xpu/xre/so/libxpurt.so', LIB_PATH)
+    PACKAGE_DATA['paddlelite.libs'] += ['libxpurt.so']
+
 if '${LITE_WITH_NNADAPTER}' == 'ON':
     shutil.copy('${PADDLE_BINARY_DIR}/lite/backends/nnadapter/nnadapter/src/libnnadapter.so', LIB_PATH)
     PACKAGE_DATA['paddlelite.libs'] += ['libnnadapter.so']
diff --git a/lite/core/optimizer/mir/fusion/quant_dequant_op_fuser.cc b/lite/core/optimizer/mir/fusion/quant_dequant_op_fuser.cc
@@ -56,8 +56,12 @@ static float FindAbsMax(const float* input, int size) {
 template <typename T>
 void QuantizeTensorInPlace(Tensor* input, float scale) {
   if (input->precision() != PRECISION(kFloat)) {
-    LOG(FATAL) << "Error: the precision of input should be float.  actual is "
-               << PrecisionToStr(input->precision());
+    LOG(WARNING)
+        << "Warning: the precision of input should be float, but actual is "
+        << PrecisionToStr(input->precision())
+        << ". There may be several ops share the same weight and the weight "
+           "has already been transed to int8.";
+    return;
   }
   Tensor temp_tensor;
   temp_tensor.CopyDataFrom(*input);
@@ -76,24 +80,22 @@ void QuantizeTensorInPlace(Tensor* input,
                            const std::vector<float>& scales,
                            int quant_axis) {
   if (input->precision() != PRECISION(kFloat)) {
-    LOG(FATAL) << "Error: the precision of input should be float.  actual is "
-               << PrecisionToStr(input->precision());
-  }
-  if (quant_axis != 0 && quant_axis != 1) {
-    LOG(FATAL) << "Input error: quant_axis should be 0 or 1.";
+    LOG(WARNING)
+        << "Warning: the precision of input should be float, but actual is "
+        << PrecisionToStr(input->precision())
+        << ". There may be several ops share the same weight and the weight "
+           "has already been transed to int8.";
+    return;
   }
+
   Tensor origin_tensor;
   origin_tensor.CopyDataFrom(*input);
   input->clear();
 
   auto dims = origin_tensor.dims();
   const int64_t channel = dims[quant_axis];
-  if (dims.size() < 2) {
-    LOG(FATAL) << "Error: the rank of input tensor should at least be 2.";
-  }
-  if (scales.size() != channel) {
-    LOG(FATAL) << "Params Error: scale size should be equal to channel.";
-  }
+  CHECK_GE(dims.size(), 2);
+  CHECK_EQ(scales.size(), channel);
   float* origin_data = origin_tensor.mutable_data<float>();
   T* quantized_data = input->mutable_data<T>();
 
@@ -122,15 +124,23 @@ void QuantizeTensorInPlace(Tensor* input,
         });
       }
     }
+  } else {
+    LOG(FATAL)
+        << "Only support quant_axis is 0 or 1, but received quant_axis is "
+        << quant_axis;
   }
 }
 
 // Per-layer cast tensor
 template <typename T>
 static void TensorCaster(Tensor* input) {
   if (input->precision() != PRECISION(kFloat)) {
-    LOG(FATAL) << "Error: the precision of input should be float.  actual is "
-               << PrecisionToStr(input->precision());
+    LOG(WARNING)
+        << "Warning: the precision of input should be float, but actual is "
+        << PrecisionToStr(input->precision())
+        << ". There may be several ops share the same weight and the weight "
+           "has already been transed to int8.";
+    return;
   }
   Tensor temp_tensor;
   temp_tensor.CopyDataFrom(*input);
diff --git a/lite/kernels/arm/concat_compute.cc b/lite/kernels/arm/concat_compute.cc
@@ -80,7 +80,7 @@ void ConcatCompute::Run() {
       if (type == PRECISION(kUnk)) {
         type = tensor->precision();
       } else {
-        VLOG(4) << "type: " << PrecisionRepr(type)
+        VLOG(7) << "type: " << PrecisionRepr(type)
                 << ", tensor: " << PrecisionRepr(tensor->precision());
 #ifdef ENABLE_ARM_FP16
         if (type != tensor->precision()) {
diff --git a/lite/kernels/host/sampling_id_compute.cc b/lite/kernels/host/sampling_id_compute.cc
@@ -19,20 +19,20 @@ namespace lite {
 namespace kernels {
 namespace host {
 
-template <class T>
+template <typename T>
 void SamplingIdCompute<T>::PrepareForRun() {
   auto& param = this->template Param<param_t>();
   int seed = param.seed;
 
-  auto engine_ = std::make_shared<std::mt19937_64>();
+  engine_ = std::make_shared<std::mt19937_64>();
   if (seed == 0) {
     std::random_device rd;
     seed = ((((uint64_t)rd()) << 32) + rd()) & 0x1FFFFFFFFFFFFF;
   }
   engine_->seed(seed);
 }
 
-template <class T>
+template <typename T>
 void SamplingIdCompute<T>::Run() {
   auto& param = this->template Param<param_t>();
   const lite::Tensor* x = param.x;
@@ -44,6 +44,10 @@ void SamplingIdCompute<T>::Run() {
   auto out_data = out->mutable_data<int64_t>();
   std::uniform_real_distribution<T> dist(static_cast<T>(param.min),
                                          static_cast<T>(param.max));
+  int seed = param.seed;
+  if (seed != 0) {
+    engine_->seed(seed);
+  }
 
   for (int64_t i = 0; i < batch_size; ++i) {
     T r = dist(*engine_);
diff --git a/lite/kernels/host/sampling_id_compute.h b/lite/kernels/host/sampling_id_compute.h
@@ -23,7 +23,7 @@ namespace lite {
 namespace kernels {
 namespace host {
 
-template <class T>
+template <typename T>
 class SamplingIdCompute
     : public KernelLite<TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)> {
  public: