PaddlePaddle
diff --git a/‎paddle/fluid/operators/CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/operators/CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/operators/fusion_lstm_op.cc
Lines changed: 102 additions & 261 deletions b/‎paddle/fluid/operators/fusion_lstm_op.cc
Lines changed: 102 additions & 261 deletions
diff --git a/‎paddle/fluid/operators/math/CMakeLists.txt
Lines changed: 4 additions & 2 deletions b/‎paddle/fluid/operators/math/CMakeLists.txt
Lines changed: 4 additions & 2 deletions
diff --git a/‎paddle/fluid/operators/math/cpu_lstm_compute.cc
Lines changed: 0 additions & 43 deletions b/‎paddle/fluid/operators/math/cpu_lstm_compute.cc
Lines changed: 0 additions & 43 deletions
diff --git a/‎paddle/fluid/operators/math/cpu_lstm_compute.h
Lines changed: 0 additions & 64 deletions b/‎paddle/fluid/operators/math/cpu_lstm_compute.h
Lines changed: 0 additions & 64 deletions
diff --git a/‎paddle/fluid/operators/math/cpu_vec.h
Lines changed: 16 additions & 19 deletions b/‎paddle/fluid/operators/math/cpu_vec.h
Lines changed: 16 additions & 19 deletions
diff --git a/‎paddle/fluid/operators/math/cpu_vec_test.cc
Lines changed: 6 additions & 10 deletions b/‎paddle/fluid/operators/math/cpu_vec_test.cc
Lines changed: 6 additions & 10 deletions
diff --git a/‎paddle/fluid/operators/math/jit_kernel.cc
Lines changed: 41 additions & 0 deletions b/‎paddle/fluid/operators/math/jit_kernel.cc
Lines changed: 41 additions & 0 deletions
@@ -300,7 +300,7 @@ op_library(flatten_op DEPS reshape_op)
 op_library(sequence_pad_op DEPS sequence_padding)
 op_library(unstack_op DEPS stack_op)
 op_library(fake_quantize_op DEPS memory)
-op_library(fusion_lstm_op DEPS cpu_lstm_compute)
+op_library(fusion_lstm_op DEPS jit_kernel)
 if (WITH_GPU)
     op_library(conv_op DEPS vol2col depthwise_conv im2col)
     op_library(layer_norm_op DEPS cub)
 
@@ -45,8 +45,6 @@ math_library(im2col)
 if (NOT WIN32) # windows do not support avx functions yet.
 math_library(gru_compute DEPS activation_functions math_function)
 math_library(lstm_compute DEPS activation_functions)
-# TODO(TJ): ugly workaround, clean me
-cc_library(cpu_lstm_compute SRCS cpu_lstm_compute.cc DEPS activation_functions cblas cpu_info)
 endif (NOT WIN32)
 
 cc_library(blas SRCS blas.cc DEPS cblas framework_proto device_context)
@@ -76,3 +74,7 @@ if(WITH_GPU)
 endif()
 cc_test(concat_test SRCS concat_test.cc DEPS concat)
 cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info)
+cc_library(jit_kernel 
+    SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_lstm.cc
+    DEPS cpu_info cblas activation_functions)
+cc_test(jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel)
@@ -125,10 +125,8 @@ inline void vec_scal<float, platform::jit::avx2>(const int n, const float a,
 }
 
 template <>
-inline void vec_scal<float, platform::jit::avx512_common>(const int n,
-                                                          const float a,
-                                                          const float* x,
-                                                          float* y) {
+inline void vec_scal<float, platform::jit::avx512f>(const int n, const float a,
+                                                    const float* x, float* y) {
   // TODO(TJ): enable me
   vec_scal<float, platform::jit::avx2>(n, a, x, y);
 }
@@ -181,10 +179,10 @@ inline void vec_bias_sub<float, platform::jit::avx2>(const int n, const float a,
 }
 
 template <>
-inline void vec_bias_sub<float, platform::jit::avx512_common>(const int n,
-                                                              const float a,
-                                                              const float* x,
-                                                              float* y) {
+inline void vec_bias_sub<float, platform::jit::avx512f>(const int n,
+                                                        const float a,
+                                                        const float* x,
+                                                        float* y) {
   // TODO(TJ): enable me
   vec_bias_sub<float, platform::jit::avx2>(n, a, x, y);
 }
@@ -242,7 +240,7 @@ inline void vec_cross<float, platform::jit::avx2>(const int n, const float* x,
 }
 
 template <>
-inline void vec_cross<float, platform::jit::avx512_common>(
+inline void vec_cross<float, platform::jit::avx512f>(
     const int n, const float* x, const float* y, const float* z, float* out) {
   // TODO(TJ): enable me
   vec_cross<float, platform::jit::avx>(n, x, y, z, out);
@@ -296,10 +294,10 @@ inline void vec_add_bias<float, platform::jit::avx2>(const int n, const float a,
 }
 
 template <>
-inline void vec_add_bias<float, platform::jit::avx512_common>(const int n,
-                                                              const float a,
-                                                              const float* x,
-                                                              float* y) {
+inline void vec_add_bias<float, platform::jit::avx512f>(const int n,
+                                                        const float a,
+                                                        const float* x,
+                                                        float* y) {
   // TODO(TJ): enable me
   vec_add_bias<float, platform::jit::avx2>(n, a, x, y);
 }
@@ -390,9 +388,9 @@ inline void vec_sigmoid<float, platform::jit::avx2>(const int n, const float* x,
 }
 
 template <>
-inline void vec_sigmoid<float, platform::jit::avx512_common>(const int n,
-                                                             const float* x,
-                                                             float* y) {
+inline void vec_sigmoid<float, platform::jit::avx512f>(const int n,
+                                                       const float* x,
+                                                       float* y) {
   // TODO(TJ): enable me
   vec_sigmoid<float, platform::jit::avx2>(n, x, y);
 }
@@ -454,9 +452,8 @@ inline void vec_relu<float, platform::jit::avx2>(const int n, const float* x,
 }
 
 template <>
-inline void vec_relu<float, platform::jit::avx512_common>(const int n,
-                                                          const float* x,
-                                                          float* y) {
+inline void vec_relu<float, platform::jit::avx512f>(const int n, const float* x,
+                                                    float* y) {
   // TODO(TJ): enable me
   vec_relu<float, platform::jit::avx2>(n, x, y);
 }
 
@@ -110,7 +110,7 @@ TEST(CpuVecTest, sigmoid) {
     TestAndBench<float>(sz, vec_sigmoid<float>, ref_sigmoid<float>);
     TestAndBench<float>(sz, vec_sigmoid<float, jit::avx>, ref_sigmoid<float>);
     TestAndBench<float>(sz, vec_sigmoid<float, jit::avx2>, ref_sigmoid<float>);
-    TestAndBench<float>(sz, vec_sigmoid<float, jit::avx512_common>,
+    TestAndBench<float>(sz, vec_sigmoid<float, jit::avx512f>,
                         ref_sigmoid<float>);
   }
   TestAndBench<double>(30, vec_sigmoid<double>, ref_sigmoid<double>);
@@ -123,8 +123,7 @@ TEST(CpuVecTest, tanh) {
     TestAndBench<float>(sz, vec_tanh<float>, ref_tanh<float>);
     TestAndBench<float>(sz, vec_tanh<float, jit::avx>, ref_tanh<float>);
     TestAndBench<float>(sz, vec_tanh<float, jit::avx2>, ref_tanh<float>);
-    TestAndBench<float>(sz, vec_tanh<float, jit::avx512_common>,
-                        ref_tanh<float>);
+    TestAndBench<float>(sz, vec_tanh<float, jit::avx512f>, ref_tanh<float>);
   }
   TestAndBench<double>(30, vec_tanh<double>, ref_tanh<double>);
 }
@@ -136,8 +135,7 @@ TEST(CpuVecTest, relu) {
     TestAndBench<float>(sz, vec_relu<float>, ref_relu<float>);
     TestAndBench<float>(sz, vec_relu<float, jit::avx>, ref_relu<float>);
     TestAndBench<float>(sz, vec_relu<float, jit::avx2>, ref_relu<float>);
-    TestAndBench<float>(sz, vec_relu<float, jit::avx512_common>,
-                        ref_relu<float>);
+    TestAndBench<float>(sz, vec_relu<float, jit::avx512f>, ref_relu<float>);
   }
   TestAndBench<double>(30, vec_relu<double>, ref_relu<double>);
 }
@@ -170,7 +168,7 @@ TEST(CpuVecTest, inplace_sigmoid) {
     TestInplace<float>(sz, vec_sigmoid<float>, ref_sigmoid<float>);
     TestInplace<float>(sz, vec_sigmoid<float, jit::avx>, ref_sigmoid<float>);
     TestInplace<float>(sz, vec_sigmoid<float, jit::avx2>, ref_sigmoid<float>);
-    TestInplace<float>(sz, vec_sigmoid<float, jit::avx512_common>,
+    TestInplace<float>(sz, vec_sigmoid<float, jit::avx512f>,
                        ref_sigmoid<float>);
   }
   TestInplace<double>(30, vec_sigmoid<double>, ref_sigmoid<double>);
@@ -183,8 +181,7 @@ TEST(CpuVecTest, inplace_tanh) {
     TestInplace<float>(sz, vec_tanh<float>, ref_tanh<float>);
     TestInplace<float>(sz, vec_tanh<float, jit::avx>, ref_tanh<float>);
     TestInplace<float>(sz, vec_tanh<float, jit::avx2>, ref_tanh<float>);
-    TestInplace<float>(sz, vec_tanh<float, jit::avx512_common>,
-                       ref_tanh<float>);
+    TestInplace<float>(sz, vec_tanh<float, jit::avx512f>, ref_tanh<float>);
   }
   TestInplace<double>(30, vec_tanh<double>, ref_tanh<double>);
 }
@@ -196,8 +193,7 @@ TEST(CpuVecTest, inplace_relu) {
     TestInplace<float>(sz, vec_relu<float>, ref_relu<float>);
     TestInplace<float>(sz, vec_relu<float, jit::avx>, ref_relu<float>);
     TestInplace<float>(sz, vec_relu<float, jit::avx2>, ref_relu<float>);
-    TestInplace<float>(sz, vec_relu<float, jit::avx512_common>,
-                       ref_relu<float>);
+    TestInplace<float>(sz, vec_relu<float, jit::avx512f>, ref_relu<float>);
   }
   TestInplace<double>(30, vec_relu<double>, ref_relu<double>);
 }
@@ -0,0 +1,41 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/math/jit_kernel.h"
+#include <iostream>
+#include <string>
+
+namespace paddle {
+namespace operators {
+namespace math {
+namespace jitkernel {
+
+namespace jit = platform::jit;
+
+KernelPool& KernelPool::Instance() {
+  static thread_local KernelPool g_jit_kernels;
+  return g_jit_kernels;
+}
+
+std::shared_ptr<const Kernel> KernelPool::Get(const std::string& key) const {
+  if (kers_.find(key) == kers_.end()) {
+    return nullptr;
+  }
+  return kers_.at(key);
+}
+
+}  // namespace jitkernel
+}  // namespace math
+}  // namespace operators
+}  // namespace paddle
Original file line number	Diff line number	Diff line change
`@@ -110,7 +110,7 @@ TEST(CpuVecTest, sigmoid) {`
`110`	`110`	`TestAndBench<float>(sz, vec_sigmoid<float>, ref_sigmoid<float>);`
`111`	`111`	`TestAndBench<float>(sz, vec_sigmoid<float, jit::avx>, ref_sigmoid<float>);`
`112`	`112`	`TestAndBench<float>(sz, vec_sigmoid<float, jit::avx2>, ref_sigmoid<float>);`
`113`		`- TestAndBench<float>(sz, vec_sigmoid<float, jit::avx512_common>,`
	`113`	`+ TestAndBench<float>(sz, vec_sigmoid<float, jit::avx512f>,`
`114`	`114`	`ref_sigmoid<float>);`
`115`	`115`	`}`
`116`	`116`	`TestAndBench<double>(30, vec_sigmoid<double>, ref_sigmoid<double>);`
`@@ -123,8 +123,7 @@ TEST(CpuVecTest, tanh) {`
`123`	`123`	`TestAndBench<float>(sz, vec_tanh<float>, ref_tanh<float>);`
`124`	`124`	`TestAndBench<float>(sz, vec_tanh<float, jit::avx>, ref_tanh<float>);`
`125`	`125`	`TestAndBench<float>(sz, vec_tanh<float, jit::avx2>, ref_tanh<float>);`
`126`		`- TestAndBench<float>(sz, vec_tanh<float, jit::avx512_common>,`
`127`		`- ref_tanh<float>);`
	`126`	`+ TestAndBench<float>(sz, vec_tanh<float, jit::avx512f>, ref_tanh<float>);`
`128`	`127`	`}`
`129`	`128`	`TestAndBench<double>(30, vec_tanh<double>, ref_tanh<double>);`
`130`	`129`	`}`
`@@ -136,8 +135,7 @@ TEST(CpuVecTest, relu) {`
`136`	`135`	`TestAndBench<float>(sz, vec_relu<float>, ref_relu<float>);`
`137`	`136`	`TestAndBench<float>(sz, vec_relu<float, jit::avx>, ref_relu<float>);`
`138`	`137`	`TestAndBench<float>(sz, vec_relu<float, jit::avx2>, ref_relu<float>);`
`139`		`- TestAndBench<float>(sz, vec_relu<float, jit::avx512_common>,`
`140`		`- ref_relu<float>);`
	`138`	`+ TestAndBench<float>(sz, vec_relu<float, jit::avx512f>, ref_relu<float>);`
`141`	`139`	`}`
`142`	`140`	`TestAndBench<double>(30, vec_relu<double>, ref_relu<double>);`
`143`	`141`	`}`
`@@ -170,7 +168,7 @@ TEST(CpuVecTest, inplace_sigmoid) {`
`170`	`168`	`TestInplace<float>(sz, vec_sigmoid<float>, ref_sigmoid<float>);`
`171`	`169`	`TestInplace<float>(sz, vec_sigmoid<float, jit::avx>, ref_sigmoid<float>);`
`172`	`170`	`TestInplace<float>(sz, vec_sigmoid<float, jit::avx2>, ref_sigmoid<float>);`
`173`		`- TestInplace<float>(sz, vec_sigmoid<float, jit::avx512_common>,`
	`171`	`+ TestInplace<float>(sz, vec_sigmoid<float, jit::avx512f>,`
`174`	`172`	`ref_sigmoid<float>);`
`175`	`173`	`}`
`176`	`174`	`TestInplace<double>(30, vec_sigmoid<double>, ref_sigmoid<double>);`
`@@ -183,8 +181,7 @@ TEST(CpuVecTest, inplace_tanh) {`
`183`	`181`	`TestInplace<float>(sz, vec_tanh<float>, ref_tanh<float>);`
`184`	`182`	`TestInplace<float>(sz, vec_tanh<float, jit::avx>, ref_tanh<float>);`
`185`	`183`	`TestInplace<float>(sz, vec_tanh<float, jit::avx2>, ref_tanh<float>);`
`186`		`- TestInplace<float>(sz, vec_tanh<float, jit::avx512_common>,`
`187`		`- ref_tanh<float>);`
	`184`	`+ TestInplace<float>(sz, vec_tanh<float, jit::avx512f>, ref_tanh<float>);`
`188`	`185`	`}`
`189`	`186`	`TestInplace<double>(30, vec_tanh<double>, ref_tanh<double>);`
`190`	`187`	`}`
`@@ -196,8 +193,7 @@ TEST(CpuVecTest, inplace_relu) {`
`196`	`193`	`TestInplace<float>(sz, vec_relu<float>, ref_relu<float>);`
`197`	`194`	`TestInplace<float>(sz, vec_relu<float, jit::avx>, ref_relu<float>);`
`198`	`195`	`TestInplace<float>(sz, vec_relu<float, jit::avx2>, ref_relu<float>);`
`199`		`- TestInplace<float>(sz, vec_relu<float, jit::avx512_common>,`
`200`		`- ref_relu<float>);`
	`196`	`+ TestInplace<float>(sz, vec_relu<float, jit::avx512f>, ref_relu<float>);`
`201`	`197`	`}`
`202`	`198`	`TestInplace<double>(30, vec_relu<double>, ref_relu<double>);`
`203`	`199`	`}`