Skip to content

Commit a07da94

Browse files
hedaoyuanreyoung
authored andcommitted
fix floating-point overflow problem of tanh (#355)
1 parent 56b23d1 commit a07da94

File tree

10 files changed

+119
-14
lines changed

10 files changed

+119
-14
lines changed

paddle/cuda/include/hl_base.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,15 @@ typedef struct {
209209
#define HL_FLOAT_MIN 2.2250738585072014e-308
210210
#endif
211211

212+
213+
/**
214+
* The maximum input value for exp, used to avoid overflow problem.
215+
*
216+
* Currently only used for tanh function.
217+
*/
218+
#define EXP_MAX_INPUT 40.0
219+
220+
212221
/**
213222
* @brief DIVUP(x, y) is similar to ceil(x / y).
214223
* @note For CUDA, DIVUP will be used to specify

paddle/cuda/src/hl_avx_functions.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ namespace hppl {
3838
}
3939

4040
__m256 tanh(const __m256 a) {
41+
__m256 max = _mm256_set1_ps(EXP_MAX_INPUT);
4142
__m256 tmp = _mm256_mul_ps(_mm256_set1_ps(-2.0f), a);
43+
tmp = _mm256_min_ps(tmp, max);
4244
tmp = exp(tmp);
4345
return _mm256_sub_ps(
4446
_mm256_div_ps(_mm256_set1_ps(2.0f),

paddle/cuda/src/hl_cpu_functions.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ namespace hppl {
3030
}
3131

3232
real tanh(const real a) {
33-
return (2.0 / (1.0 + exp(-2.0*a))) - 1.0;
33+
real tmp = -2.0 * a;
34+
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
35+
return (2.0 / (1.0 + exp(tmp))) - 1.0;
3436
}
3537

3638
real linear(const real a) {

paddle/gserver/tests/test_LayerGrad.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -995,7 +995,7 @@ TEST(Layer, LstmLayer) {
995995
TestConfig config;
996996
config.layerConfig.set_type("lstmemory");
997997
config.layerConfig.set_size(4);
998-
config.layerConfig.set_active_type("sigmoid");
998+
config.layerConfig.set_active_type("tanh");
999999
config.layerConfig.set_active_state_type("sigmoid");
10001000
config.layerConfig.set_active_gate_type("sigmoid");
10011001
config.biasSize = 28;

paddle/gserver/tests/test_RecurrentLayer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ TEST(Layer, LstmLayer) {
369369
LayerConfig layerConfig;
370370
layerConfig.set_type("lstmemory");
371371
layerConfig.set_active_type("relu");
372-
layerConfig.set_active_state_type("sigmoid");
372+
layerConfig.set_active_state_type("tanh");
373373
layerConfig.set_active_gate_type("sigmoid");
374374

375375
layerConfig.add_inputs();

paddle/math/BaseMatrix.cu

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,10 @@ void BaseMatrixT<T>::squareDerivative(BaseMatrixT& b) {
625625
applyBinary(binary::SquareDerivative<T>(), b);
626626
}
627627

628-
DEFINE_MATRIX_BINARY_OP(Tanh, b = 2.0 / (1.0 + exp(-2 * a)) - 1.0);
628+
DEFINE_MATRIX_BINARY_OP(Tanh,
629+
T tmp = -2.0 * a;
630+
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
631+
b = 2.0 / (1.0 + std::exp(tmp)) - 1.0);
629632
template<>
630633
void BaseMatrixT<real>::tanh(BaseMatrixT& b) {
631634
applyBinary(binary::Tanh<real>(), b);

paddle/math/MathFunctions.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,10 @@ void vLog1p(const int n, const T* a, T* r) {
200200
binary::vLog1p<T>(), const_cast<T*>(a), r, 1, n, n, n);
201201
}
202202

203-
DEFINE_MATRIX_BINARY_OP(vTanh, b = 2.0 / (1.0 + std::exp(-2 * a)) - 1.0);
203+
DEFINE_MATRIX_BINARY_OP(vTanh,
204+
T tmp = -2.0 * a;
205+
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
206+
b = 2.0 / (1.0 + std::exp(tmp)) - 1.0);
204207
template<class T>
205208
void vTanh(const int n, const T* a, T* r) {
206209
hl_cpu_apply_binary_op<T, binary::vTanh<T>, 0, 0>(

paddle/math/Matrix.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3471,9 +3471,7 @@ void CpuMatrix::tanh(Matrix& output) {
34713471
size_t dim = getWidth();
34723472
CHECK_EQ(output.getHeight(), numSamples);
34733473
CHECK_EQ(output.getWidth(), dim);
3474-
errno = 0;
34753474
vTanh(numSamples * dim, getData(), output.getData());
3476-
CHECK_EQ(errno, 0) << "vTanh error";
34773475
}
34783476

34793477
void CpuMatrix::tanhDerivative(Matrix& output) {
@@ -3495,10 +3493,8 @@ void CpuMatrix::softrelu(Matrix& output) {
34953493
out[j] = x;
34963494
}
34973495
}
3498-
errno = 0;
34993496
vExp(numSamples * dim, output.getData(), output.getData());
35003497
vLog1p(numSamples * dim, output.getData(), output.getData());
3501-
CHECK_EQ(errno, 0) << "vExp+vLog1p error";
35023498
}
35033499

35043500
void CpuMatrix::softreluDerivative(Matrix& output) {
@@ -3513,9 +3509,7 @@ void CpuMatrix::softreluDerivative(Matrix& output) {
35133509
MatrixPtr tmpMat = Matrix::create(numSamples, dim);
35143510
real* tmp = tmpMat->getData();
35153511

3516-
errno = 0;
35173512
vExp(size, output.getData(), tmpMat->getData());
3518-
CHECK_EQ(errno, 0) << "vExp error";
35193513

35203514
for (size_t i = 0; i < size; ++i) {
35213515
grad[i] *= (1.0 - 1.0 / tmp[i]);
@@ -3538,10 +3532,7 @@ void CpuMatrix::scaledTanh(Matrix& output, real p1, real p2) {
35383532
out[i] = p2 * in[i];
35393533
}
35403534

3541-
// out = tanh(out)
3542-
errno = 0;
35433535
vTanh(numSamples * dim, out, out);
3544-
CHECK_EQ(errno, 0) << "vTanh error";
35453536

35463537
// out = p1 * out
35473538
for (size_t i = 0; i < numSamples * dim; ++i) {

paddle/math/tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ add_simple_unittest(test_sparseMatrixCompare)
1313
add_simple_unittest(test_perturbation)
1414
add_simple_unittest(test_CpuGpuVector)
1515
add_simple_unittest(test_Allocator)
16+
add_simple_unittest(test_FPException)
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
16+
/**
17+
* This test is about floating point calculation exception.
18+
* Paddle catches FE_INVALID, FE DIVBYZERO and FE_OVERFLOW exceptions.
19+
*
20+
* Some exceptions occur in the middle of a set of formulas,
21+
* that can be circumvented by some tricks.
22+
* For example,
23+
* calculate tanh
24+
* b = 2.0 / (1.0 + exp(-2 * a)) - 1.0
25+
*
26+
* If the result of (-2 * a) is too large,
27+
* a FE_OVERFLOW exception occurs when calculating exp.
28+
* But the result of tanh is no overflow problem,
29+
* so we can add some tricks to prevent exp calculate an excessive value.
30+
*
31+
*/
32+
#include <fenv.h>
33+
#include <gtest/gtest.h>
34+
#include "paddle/math/Matrix.h"
35+
#include "paddle/utils/Excepts.h"
36+
37+
using namespace paddle; // NOLINT
38+
39+
void SetTensorValue(Matrix& matrix, real value) {
40+
int height = matrix.getHeight();
41+
int width = matrix.getWidth();
42+
int stride = matrix.getStride();
43+
real* data = matrix.getData();
44+
for (int i = 0; i < height; i++) {
45+
int j = rand() % width; // NOLINT
46+
if (typeid(matrix) == typeid(CpuMatrix)) {
47+
data[i * stride + j] = value;
48+
} else if (typeid(matrix) == typeid(GpuMatrix)) {
49+
hl_memcpy(&data[i * stride + j], &value, sizeof(real));
50+
} else {
51+
LOG(FATAL) << "should not reach here";
52+
}
53+
}
54+
}
55+
56+
template<typename Matrix>
57+
void testTanh(real illegal) {
58+
MatrixPtr A = std::make_shared<Matrix>(10, 10);
59+
MatrixPtr B = std::make_shared<Matrix>(10, 10);
60+
A->randomizeUniform();
61+
B->randomizeUniform();
62+
63+
SetTensorValue(*A, illegal);
64+
65+
A->tanh(*B);
66+
}
67+
68+
template<typename Matrix>
69+
void testSigmoid(real illegal) {
70+
MatrixPtr A = std::make_shared<Matrix>(10, 10);
71+
MatrixPtr B = std::make_shared<Matrix>(10, 10);
72+
A->randomizeUniform();
73+
B->randomizeUniform();
74+
75+
SetTensorValue(*A, illegal);
76+
77+
A->sigmoid(*B);
78+
}
79+
80+
TEST(fp, overflow) {
81+
for (auto illegal : {-90.0, 90.0}) {
82+
LOG(INFO) << " illegal=" << illegal;
83+
testTanh<CpuMatrix>(illegal);
84+
testSigmoid<CpuMatrix>(illegal);
85+
}
86+
}
87+
88+
int main(int argc, char** argv) {
89+
testing::InitGoogleTest(&argc, argv);
90+
initMain(argc, argv);
91+
92+
feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
93+
return RUN_ALL_TESTS();
94+
}

0 commit comments

Comments
 (0)