[cherry-pick][XPU] add xpu gru kernel (#5687) (#5730)

zhupengyang · web-flow · commit 574883b54e52 · 2021-03-16T10:58:15.000+08:00
diff --git a/lite/kernels/xpu/CMakeLists.txt b/lite/kernels/xpu/CMakeLists.txt
@@ -20,6 +20,7 @@ else()
   add_kernel(scale_compute_xpu XPU basic SRCS scale_compute.cc DEPS ${lite_kernel_deps})
   add_kernel(dropout_compute_xpu XPU basic SRCS dropout_compute.cc DEPS ${lite_kernel_deps})
   add_kernel(matmul_compute_xpu XPU basic SRCS matmul_compute.cc DEPS ${lite_kernel_deps})
+  add_kernel(gru_compute_xpu XPU basic SRCS gru_compute.cc DEPS ${lite_kernel_deps})
   add_kernel(stack_compute_xpu XPU basic SRCS stack_compute.cc DEPS ${lite_kernel_deps})
   add_kernel(slice_compute_xpu XPU basic SRCS slice_compute.cc DEPS ${lite_kernel_deps})
   add_kernel(cast_compute_xpu XPU basic SRCS cast_compute.cc DEPS ${lite_kernel_deps})
diff --git a/lite/kernels/xpu/gru_compute.cc b/lite/kernels/xpu/gru_compute.cc
@@ -0,0 +1,27 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/xpu/gru_compute.h"
+
+REGISTER_LITE_KERNEL(
+    gru, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::GRUCompute, def)
+    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindInput("H0", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindInput("Weight", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindOutput("BatchGate", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindOutput("BatchResetHiddenPrev", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindOutput("BatchHidden", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindOutput("Hidden", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .Finalize();
diff --git a/lite/kernels/xpu/gru_compute.h b/lite/kernels/xpu/gru_compute.h
@@ -0,0 +1,84 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WIfloatHOUfloat WARRANfloatIES OR CONDIfloatIONS OF ANY KIND, either express
+// or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "lite/backends/xpu/xpu_header_sitter.h"
+#include "lite/core/kernel.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace xpu {
+
+class GRUCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
+  float weight_s1_abs_max = -1;
+  float weight_s2_abs_max = -1;
+
+ public:
+  void Run() override {
+    auto& ctx = this->ctx_->As<XPUContext>();
+    auto& param = *param_.get_mutable<operators::GRUParam>();
+
+    bool origin_mode = param.origin_mode;
+    bool is_reverse = param.is_reverse;
+
+    auto* input = param.input;
+    const float* input_data = input->data<float>();
+    auto* h0 = param.h0;
+    CHECK_EQ((void*)h0, (void*)nullptr) << "h0 should be nullptr for XPU";
+
+    auto* weight = param.weight;
+    const float* weight_data = weight->data<float>();
+    auto* bias = param.bias;
+    const float* bias_data = bias->data<float>();
+
+    auto* hidden = param.hidden;
+    float* hidden_ptr = hidden->mutable_data<float>(TARGET(kXPU));
+    const auto& hidden_dims = hidden->dims();
+    int frame_size = hidden_dims[1];
+
+    auto& input_lod = input->lod()[0];
+    int batch_size = input_lod.size() - 1;
+    for (int i = 0; i < batch_size; i++) {
+      int cur_seq_len = input_lod[i + 1] - input_lod[i];
+      int ret = xdnn::gru_unit_int16(
+          ctx.GetRawContext(),              // Context *ctx,
+          cur_seq_len,                      // int seq_len,
+          frame_size,                       // int frame_size,
+          is_reverse,                       // bool is_reverse,
+          origin_mode,                      // bool origin_mode,
+          const_cast<float*>(input_data),   // float *input, // [seq_len, 3D]
+          const_cast<float*>(weight_data),  // float *weight, // [D, 3D]
+          weight_s1_abs_max,  // float& weight_s1_abs_max, // [D, 2D]
+          weight_s2_abs_max,  // float& weight_s2_abs_max, // [D, D]
+          const_cast<float*>(bias_data),  // float *bias, // [1, 3D]
+          hidden_ptr);                    // float *hidden // [seq_len, D]
+      CHECK_EQ(ret, 0) << "call xdnn::gru_unit_int16 failed!";
+      input_data += cur_seq_len * 3 * frame_size;
+      hidden_ptr += cur_seq_len * frame_size;
+    }
+    // batch_gate, batch_reset_hidden_prev lod not set
+    hidden->set_lod(input->lod());
+  }
+
+  virtual ~GRUCompute() = default;
+};
+
+}  // namespace xpu
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle