Skip to content

Commit 513bb6c

Browse files
committed
Squashing MKL based softmax for inference
test=develop - Added profiling to softmax functors - MKL based softmax inference op - Fix to softmax compuation via MKL - cleaning - Cosmetic fixes to softmax MKL - Fix to ON_INFER lack of propagation
1 parent 1722678 commit 513bb6c

File tree

3 files changed

+42
-34
lines changed

3 files changed

+42
-34
lines changed

CMakeLists.txt

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,14 @@ set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
302302
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
303303
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
304304

305+
if (ON_INFER)
306+
message(STATUS "On inference mode, will take place some specific optimization.")
307+
add_definitions(-DPADDLE_ON_INFERENCE)
308+
else()
309+
#TODO(luotao), combine this warning with `make inference_lib_dist` command.
310+
message(WARNING "On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only.")
311+
endif()
312+
305313
add_subdirectory(paddle)
306314
if(WITH_PYTHON)
307315
add_subdirectory(python)
@@ -312,10 +320,3 @@ if(WITH_DOC)
312320
find_python_module(recommonmark REQUIRED)
313321
add_subdirectory(doc)
314322
endif()
315-
316-
if (ON_INFER)
317-
message(STATUS "On inference mode, will take place some specific optimization.")
318-
else()
319-
#TODO(luotao), combine this warning with `make inference_lib_dist` command.
320-
message(WARNING "On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only.")
321-
endif()

paddle/fluid/operators/math/softmax_impl.h

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ limitations under the License. */
1616
#include "paddle/fluid/framework/eigen.h"
1717
#include "paddle/fluid/framework/tensor.h"
1818

19+
#include "paddle/fluid/operators/math/blas.h"
1920
namespace paddle {
2021
namespace operators {
2122
namespace math {
@@ -65,36 +66,42 @@ void SoftmaxFunctor<DeviceContext, T, is_test>::operator()(
6566
.broadcast(one_by_class));
6667
}
6768

68-
template <typename DeviceContext, typename T>
69-
class SoftmaxFunctor<DeviceContext, T, true> {
69+
template <typename DeviceContext>
70+
class SoftmaxFunctor<DeviceContext, float, true> {
7071
void operator()(const DeviceContext& context, const framework::Tensor* X,
7172
framework::Tensor* Y) {
72-
auto logits = EigenMatrix<T>::From(*X);
73-
auto softmax = EigenMatrix<T>::From(*Y);
74-
73+
auto in_dims = X->dims();
74+
auto out_dims = Y->dims();
75+
const float* in_data = X->data<float>();
76+
float* out_data = Y->data<float>();
7577
const int kBatchDim = 0;
7678
const int kClassDim = 1;
77-
78-
const int batch_size = logits.dimension(kBatchDim);
79-
const int num_classes = logits.dimension(kClassDim);
80-
81-
Eigen::DSizes<int, 1> along_class(kClassDim);
82-
Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
83-
Eigen::DSizes<int, 2> one_by_class(1, num_classes);
84-
85-
auto shifted_logits = (logits -
86-
logits.maximum(along_class)
87-
.eval()
88-
.reshape(batch_by_one)
89-
.broadcast(one_by_class));
90-
91-
softmax.device(*context.eigen_device()) = shifted_logits.exp();
92-
softmax.device(*context.eigen_device()) = (softmax *
93-
softmax.sum(along_class)
94-
.inverse()
95-
.eval()
96-
.reshape(batch_by_one)
97-
.broadcast(one_by_class));
79+
// 2D data. Batch x C
80+
const int batch_size = in_dims[kBatchDim];
81+
const int num_classes = in_dims[kClassDim];
82+
std::vector<float> entities(batch_size);
83+
auto blas = math::GetBlas<DeviceContext, float>(context);
84+
for (int n = 0; n < batch_size; ++n) {
85+
entities[n] = in_data[n * num_classes];
86+
for (int c = 1; c < num_classes; ++c) {
87+
entities[n] = in_data[n * num_classes + c] > entities[n]
88+
? in_data[n * num_classes + c]
89+
: entities[n];
90+
}
91+
for (int c = 0; c < num_classes; ++c) {
92+
out_data[n * num_classes + c] =
93+
in_data[n * num_classes + c] - entities[n];
94+
}
95+
}
96+
97+
blas.VEXP(num_classes * batch_size, out_data, out_data);
98+
for (int n = 0; n < batch_size; ++n) {
99+
entities[n] = out_data[n * num_classes];
100+
for (int c = 1; c < num_classes; ++c) {
101+
entities[n] += out_data[n * num_classes + c];
102+
}
103+
blas.SCAL(num_classes, 1.0f / entities[n], &out_data[n * num_classes]);
104+
}
98105
}
99106
};
100107

paddle/fluid/operators/softmax_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class SoftmaxKernel : public framework::OpKernel<T> {
3535
Tensor X_2d = framework::ReshapeToMatrix(*X, rank - 1);
3636
Tensor Out_2d = framework::ReshapeToMatrix(*Out, rank - 1);
3737

38-
#ifdef ON_INFER
38+
#ifdef PADDLE_ON_INFERENCE
3939
math::SoftmaxFunctor<DeviceContext, T, true>()(
4040
context.template device_context<DeviceContext>(), &X_2d, &Out_2d);
4141
#else

0 commit comments

Comments
 (0)