Skip to content

Commit 98ffde4

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop
2 parents 08d8a62 + ac8208b commit 98ffde4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1026
-94
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,6 @@ third_party/
2525
bazel-*
2626
third_party/
2727

28+
build_*
2829
# clion workspace.
2930
cmake-build-*

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ option(WITH_INFERENCE "Compile fluid inference library" ON)
7272
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF)
7373
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
7474
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
75-
option(WITH_FAST_MATH "Make use of fast math library" OFF)
75+
option(WITH_FAST_MATH "Make use of fast math library, might affect the precision to some extent" ON)
7676

7777
# PY_VERSION
7878
if(NOT PY_VERSION)

paddle/fluid/API.spec

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,9 @@ paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None
198198
paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
199199
paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
200200
paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None)
201+
paddle.fluid.layers.has_inf ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
202+
paddle.fluid.layers.has_nan ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
203+
paddle.fluid.layers.isfinite ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
201204
paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None))
202205
paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
203206
paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))

paddle/fluid/framework/data_type.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ limitations under the License. */
1717
#include <typeindex>
1818
#include "paddle/fluid/framework/framework.pb.h"
1919
#include "paddle/fluid/platform/enforce.h"
20-
2120
#include "paddle/fluid/platform/float16.h"
2221

2322
namespace paddle {

paddle/fluid/framework/op_desc.cc

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,27 @@ class CompileTimeInferShapeContext : public InferShapeContext {
5050
const std::vector<std::string> &Outputs(
5151
const std::string &name) const override;
5252

53+
void ShareDim(const std::string &in, const std::string &out, size_t i = 0,
54+
size_t j = 0) override {
55+
PADDLE_ENFORCE_LT(i, Inputs(in).size());
56+
PADDLE_ENFORCE_LT(j, Outputs(out).size());
57+
const std::string &input_n = Inputs(in)[i];
58+
const std::string &output_n = Outputs(out)[j];
59+
60+
PADDLE_ENFORCE(input_n != framework::kEmptyVarName, "The %s[%d] is @EMPTY@",
61+
in, i);
62+
PADDLE_ENFORCE(output_n != framework::kEmptyVarName,
63+
"The %s[%d] is @EMPTY@", out, j);
64+
65+
auto *in_var = block_.FindVarRecursive(input_n);
66+
auto *out_var = block_.FindVarRecursive(output_n);
67+
68+
PADDLE_ENFORCE(in_var->GetType() == out_var->GetType(),
69+
"The type of %s and %s is not the same.", input_n, output_n);
70+
71+
SetDim(output_n, GetDim(input_n));
72+
}
73+
5374
void ShareLoD(const std::string &in, const std::string &out, size_t i = 0,
5475
size_t j = 0) const override {
5576
PADDLE_ENFORCE_LT(i, Inputs(in).size());

paddle/fluid/framework/operator.cc

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,36 @@ class RuntimeInferShapeContext : public InferShapeContext {
542542
return op_.Outputs(name);
543543
}
544544

545+
void ShareDim(const std::string& in, const std::string& out, size_t i = 0,
546+
size_t j = 0) override {
547+
PADDLE_ENFORCE_LT(i, Inputs(in).size());
548+
PADDLE_ENFORCE_LT(j, Outputs(out).size());
549+
const std::string& input_n = Inputs(in)[i];
550+
const std::string& output_n = Outputs(out)[j];
551+
552+
Variable* in_var = scope_.FindVar(input_n);
553+
Variable* out_var = scope_.FindVar(output_n);
554+
PADDLE_ENFORCE(in_var->Type() == out_var->Type(),
555+
"The type of %s and %s is not the same.", output_n,
556+
GetDim(input_n));
557+
558+
if (in_var->IsType<framework::SelectedRows>()) {
559+
auto& in_sele_rows = in_var->Get<framework::SelectedRows>();
560+
auto out_sele_rows = out_var->GetMutable<framework::SelectedRows>();
561+
out_sele_rows->mutable_value()->Resize(in_sele_rows.value().dims());
562+
out_sele_rows->set_rows(in_sele_rows.rows());
563+
out_sele_rows->set_height(in_sele_rows.height());
564+
} else if (in_var->IsType<framework::LoDTensor>()) {
565+
auto& in_lod_tensor = in_var->Get<framework::LoDTensor>();
566+
auto* out_lod_tensor = out_var->GetMutable<framework::LoDTensor>();
567+
out_lod_tensor->Resize(in_lod_tensor.dims());
568+
} else {
569+
PADDLE_THROW(
570+
"Currently, the input type of ShareDim only can be LoDTensor "
571+
"or SelectedRows.");
572+
}
573+
}
574+
545575
void ShareLoD(const std::string& in, const std::string& out, size_t i = 0,
546576
size_t j = 0) const override {
547577
const std::vector<std::string>& inputs = Inputs(in);

paddle/fluid/framework/shape_inference.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ class InferShapeContext {
5656
virtual const std::vector<std::string> &Outputs(
5757
const std::string &name) const = 0;
5858

59+
virtual void ShareDim(const std::string &in, const std::string &out,
60+
size_t i = 0, size_t j = 0) = 0;
61+
5962
virtual void ShareLoD(const std::string &in, const std::string &out,
6063
size_t i = 0, size_t j = 0) const = 0;
6164

paddle/fluid/framework/tensor_util.cc

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,12 @@ inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor,
165165
}
166166

167167
template <typename Predicate>
168-
struct AnyVisitor : public boost::static_visitor<bool> {
168+
class AnyVisitor : public boost::static_visitor<bool> {
169+
private:
169170
const framework::Tensor& tensor_;
170171
Predicate predicate_;
171172

173+
public:
172174
AnyVisitor(const framework::Tensor& tensor, Predicate predicate)
173175
: tensor_(tensor), predicate_(std::move(predicate)) {}
174176

@@ -206,13 +208,42 @@ struct AnyVisitor : public boost::static_visitor<bool> {
206208
}
207209
};
208210

211+
template <typename Predicate>
212+
class AnyOutVisitor : public boost::static_visitor<> {
213+
private:
214+
const framework::Tensor& tensor_;
215+
mutable framework::Tensor* out_;
216+
Predicate predicate_;
217+
218+
public:
219+
AnyOutVisitor(const framework::Tensor& tensor, Predicate predicate,
220+
framework::Tensor* out)
221+
: tensor_(tensor), out_(out), predicate_(std::move(predicate)) {}
222+
223+
template <typename Place>
224+
void operator()(const Place& place) const {
225+
auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place);
226+
out_->Resize({1});
227+
out_->mutable_data<bool>(place);
228+
AnyImpl(predicate_, tensor_, *ctx, out_);
229+
}
230+
};
231+
209232
template <typename Predicate>
210233
inline bool Any(const framework::Tensor& tensor, Predicate predicate) {
211234
AnyVisitor<Predicate> visitor(tensor, predicate);
212235
auto place = tensor.place();
213236
return platform::VisitPlace(place, visitor);
214237
}
215238

239+
template <typename Predicate>
240+
inline void Any(const framework::Tensor& tensor, Predicate predicate,
241+
framework::Tensor* out) {
242+
AnyOutVisitor<Predicate> visitor(tensor, predicate, out);
243+
auto place = tensor.place();
244+
platform::VisitPlace(place, visitor);
245+
}
246+
216247
struct ContainsNANPredicate {
217248
template <typename T>
218249
auto operator()(const T& eigen_vec) const
@@ -227,6 +258,12 @@ bool TensorContainsNAN(const framework::Tensor& tensor) {
227258
return Any(tensor, predicate);
228259
}
229260

261+
void TensorContainsNAN(const framework::Tensor& tensor,
262+
framework::Tensor* out) {
263+
ContainsNANPredicate predicate;
264+
Any(tensor, predicate, out);
265+
}
266+
230267
struct ContainsInfPredicate {
231268
template <typename T>
232269
auto operator()(const T& eigen_vec) const
@@ -241,6 +278,71 @@ bool TensorContainsInf(const framework::Tensor& tensor) {
241278
return Any(tensor, predicate);
242279
}
243280

281+
void TensorContainsInf(const framework::Tensor& tensor,
282+
framework::Tensor* out) {
283+
ContainsInfPredicate predicate;
284+
Any(tensor, predicate, out);
285+
}
286+
287+
// NOTE(dzhwinter):
288+
// Isfinite need a AllVisitor to loop through all the elements.
289+
// We choose two cuda call instead of one allvisitor. The AllVisitor
290+
// should be implemented if the performance hurts.
291+
bool TensorIsfinite(const framework::Tensor& tensor) {
292+
ContainsInfPredicate pred_inf;
293+
ContainsNANPredicate pred_nan;
294+
return !Any(tensor, pred_inf) && !Any(tensor, pred_nan);
295+
}
296+
297+
#ifdef PADDLE_WITH_CUDA
298+
template <typename T>
299+
static inline void __global__ BothFalse(const T* cmp, T* out) {
300+
out[0] = (!cmp[0]) && (!out[0]);
301+
}
302+
#endif
303+
304+
struct BothFalseVisitor : public boost::static_visitor<> {
305+
const framework::Tensor& in_;
306+
mutable framework::Tensor* out_;
307+
BothFalseVisitor(const framework::Tensor& in, framework::Tensor* out)
308+
: in_(in), out_(out) {}
309+
310+
template <typename Place>
311+
void operator()(const Place& place) const {
312+
VisitorImpl(place);
313+
}
314+
315+
void VisitorImpl(const platform::CUDAPlace& gpu) const {
316+
#ifdef PADDLE_WITH_CUDA
317+
auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(gpu);
318+
BothFalse<bool><<<1, 1, 0, ctx->stream()>>>(in_.data<bool>(),
319+
out_->mutable_data<bool>(gpu));
320+
#endif
321+
}
322+
323+
void VisitorImpl(const platform::CPUPlace& cpu) const {
324+
bool lhs = !in_.data<bool>()[0];
325+
bool rhs = !out_->mutable_data<bool>(cpu)[0];
326+
out_->mutable_data<bool>(cpu)[0] = lhs && rhs;
327+
}
328+
329+
void VisitorImpl(
330+
const platform::CUDAPinnedPlace& cpu /* equals to cpu*/) const {
331+
bool lhs = !in_.data<bool>()[0];
332+
bool rhs = !out_->mutable_data<bool>(cpu)[0];
333+
out_->mutable_data<bool>(cpu)[0] = lhs && rhs;
334+
}
335+
};
336+
337+
void TensorIsfinite(const framework::Tensor& tensor, framework::Tensor* out) {
338+
framework::Tensor tmp;
339+
TensorContainsInf(tensor, &tmp);
340+
TensorContainsNAN(tensor, out);
341+
BothFalseVisitor visitor(tmp, out);
342+
auto place = tensor.place();
343+
platform::VisitPlace(place, visitor);
344+
}
345+
244346
void TensorToStream(std::ostream& os, const Tensor& tensor,
245347
const platform::DeviceContext& dev_ctx) {
246348
{ // the 1st field, uint32_t version

paddle/fluid/framework/tensor_util.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,15 @@ void TensorToVector(const Tensor& src, const platform::DeviceContext& ctx,
5757
template <typename T>
5858
void TesnorToVector(const Tensor& src, std::vector<T>* dst);
5959

60+
// copy the result bool to cpu
6061
bool TensorContainsNAN(const framework::Tensor& tensor);
6162
bool TensorContainsInf(const framework::Tensor& tensor);
63+
bool TensorIsfinite(const framework::Tensor& tensor);
64+
65+
// store the result bool in gpu tensor, async operation. Faster than above ones.
66+
void TensorContainsNAN(const framework::Tensor& tensor, framework::Tensor* out);
67+
void TensorContainsInf(const framework::Tensor& tensor, framework::Tensor* out);
68+
void TensorIsfinite(const framework::Tensor& tensor, framework::Tensor* out);
6269

6370
void TensorToStream(std::ostream& os, const Tensor& tensor,
6471
const platform::DeviceContext& dev_ctx);

0 commit comments

Comments
 (0)