Skip to content

Commit 31e67b9

Browse files
committed
test=develop
2 parents f1a08a3 + 7cd2761 commit 31e67b9

File tree

90 files changed

+3344
-906
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+3344
-906
lines changed

doc/CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
add_custom_target(paddle_apis ALL
2-
DEPENDS paddle_v2_apis paddle_fluid_apis)
2+
DEPENDS paddle_v2_apis)
33

44
add_custom_target(paddle_docs ALL
55
DEPENDS paddle_v2_docs paddle_v2_docs_cn
6-
paddle_fluid_docs paddle_fluid_docs_cn
76
paddle_mobile_docs paddle_mobile_docs_cn)
87

98
add_subdirectory(v2)
10-
add_subdirectory(fluid)
119
add_subdirectory(mobile)

paddle/fluid/API.spec

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,13 @@ paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn'
153153
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
154154
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
155155
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
156+
paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0))
157+
paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype', 'use_mkldnn'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32', False))
158+
paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'))
159+
paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32'))
160+
paddle.fluid.layers.sum ArgSpec(args=['x', 'use_mkldnn'], varargs=None, keywords=None, defaults=(False,))
161+
paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None)
162+
paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
156163
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
157164
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
158165
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
@@ -224,13 +231,6 @@ paddle.fluid.layers.logical_and ArgSpec(args=[], varargs='args', keywords='kwarg
224231
paddle.fluid.layers.logical_or ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
225232
paddle.fluid.layers.logical_xor ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
226233
paddle.fluid.layers.logical_not ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
227-
paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
228-
paddle.fluid.layers.gaussian_random ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
229-
paddle.fluid.layers.sampling_id ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
230-
paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
231-
paddle.fluid.layers.sum ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
232-
paddle.fluid.layers.slice ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
233-
paddle.fluid.layers.shape ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
234234
paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
235235
paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
236236
paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
@@ -269,7 +269,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kw
269269
paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
270270
paddle.fluid.layers.polygon_box_transform ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
271271
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
272-
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC', 4095, 1))
272+
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
273273
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
274274
paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
275275
paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
@@ -298,6 +298,7 @@ paddle.fluid.contrib.BeamSearchDecoder.early_stop ArgSpec(args=['self'], varargs
298298
paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False))
299299
paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None)
300300
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
301+
paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
301302
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
302303
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
303304
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)

paddle/fluid/framework/CMakeLists.txt

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,10 @@ else()
150150
endif()
151151

152152
if (NOT WIN32)
153-
cc_library(parallel_executor SRCS parallel_executor.cc DEPS
154-
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
155-
graph graph_viz_pass multi_devices_graph_pass
156-
multi_devices_graph_print_pass multi_devices_graph_check_pass
157-
fast_threaded_ssa_graph_executor fuse_elewise_add_act_pass)
153+
cc_library(parallel_executor SRCS parallel_executor.cc DEPS
154+
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
155+
graph build_strategy
156+
fast_threaded_ssa_graph_executor)
158157
endif() # NOT WIN32
159158

160159
cc_library(prune SRCS prune.cc DEPS framework_proto)

paddle/fluid/framework/details/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,8 @@ cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_execu
5454
# device_context reduce_op_handle )
5555
cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc
5656
DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context)
57+
58+
cc_library(build_strategy SRCS build_strategy.cc DEPS
59+
graph_viz_pass multi_devices_graph_pass
60+
multi_devices_graph_print_pass multi_devices_graph_check_pass
61+
fuse_elewise_add_act_pass)
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/fluid/framework/details/build_strategy.h"
16+
17+
#include "paddle/fluid/framework/details/multi_devices_graph_check_pass.h"
18+
#include "paddle/fluid/framework/details/multi_devices_graph_print_pass.h"
19+
#include "paddle/fluid/framework/ir/graph.h"
20+
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
21+
22+
namespace paddle {
23+
namespace framework {
24+
namespace details {
25+
26+
class ParallelExecutorPassBuilder : public ir::PassBuilder {
27+
public:
28+
explicit ParallelExecutorPassBuilder(const BuildStrategy &strategy)
29+
: ir::PassBuilder(), strategy_(strategy) {
30+
// Add a graph viz pass to record a graph.
31+
if (!strategy_.debug_graphviz_path_.empty()) {
32+
auto viz_pass = AppendPass("graph_viz_pass");
33+
const std::string graph_path = string::Sprintf(
34+
"%s%s", strategy_.debug_graphviz_path_.c_str(), "_original_graph");
35+
viz_pass->Set<std::string>("graph_viz_path", new std::string(graph_path));
36+
}
37+
38+
// Add op fusion.
39+
if (strategy.fuse_elewise_add_act_ops_) {
40+
auto fuse_elewise_add_act_pass = AppendPass("fuse_elewise_add_act_pass");
41+
// Add a graph viz pass to record a graph.
42+
if (!strategy.debug_graphviz_path_.empty()) {
43+
auto viz_pass = AppendPass("graph_viz_pass");
44+
const std::string graph_path = string::Sprintf(
45+
"%s%s", strategy.debug_graphviz_path_.c_str(), "_fused_graph");
46+
viz_pass->Set<std::string>("graph_viz_path",
47+
new std::string(graph_path));
48+
}
49+
}
50+
51+
// Convert graph to run on multi-devices.
52+
auto multi_devices_pass = AppendPass("multi_devices_pass");
53+
multi_devices_pass->SetNotOwned<const BuildStrategy>("strategy",
54+
&strategy_);
55+
56+
// Add a graph print pass to record a graph with device info.
57+
if (!strategy_.debug_graphviz_path_.empty()) {
58+
auto multi_devices_print_pass = AppendPass("multi_devices_print_pass");
59+
multi_devices_print_pass->SetNotOwned<const std::string>(
60+
"debug_graphviz_path", &strategy_.debug_graphviz_path_);
61+
multi_devices_print_pass->Set<details::GraphvizSSAGraphPrinter>(
62+
"graph_printer", new details::GraphvizSSAGraphPrinter);
63+
}
64+
65+
// Verify that the graph is correct for multi-device executor.
66+
AppendPass("multi_devices_check_pass");
67+
}
68+
69+
private:
70+
BuildStrategy strategy_;
71+
};
72+
73+
std::shared_ptr<ir::PassBuilder> BuildStrategy::CreatePassesFromStrategy()
74+
const {
75+
pass_builder_.reset(new ParallelExecutorPassBuilder(*this));
76+
return pass_builder_;
77+
}
78+
79+
std::unique_ptr<ir::Graph> BuildStrategy::Apply(
80+
const ProgramDesc &main_program, const std::vector<platform::Place> &places,
81+
const std::string &loss_var_name,
82+
const std::unordered_set<std::string> &param_names,
83+
const std::vector<Scope *> &local_scopes,
84+
#ifdef PADDLE_WITH_CUDA
85+
const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const {
86+
#else
87+
const bool use_cuda) const {
88+
#endif
89+
// Create a default one if not initialized by user.
90+
if (!pass_builder_) {
91+
CreatePassesFromStrategy();
92+
}
93+
94+
std::unique_ptr<ir::Graph> graph(new ir::Graph(main_program));
95+
96+
for (std::shared_ptr<ir::Pass> &pass : pass_builder_->AllPasses()) {
97+
if (pass->Type() == "multi_devices_pass") {
98+
pass->Erase("places");
99+
pass->SetNotOwned<const std::vector<platform::Place>>("places", &places);
100+
pass->Erase("loss_var_name");
101+
pass->SetNotOwned<const std::string>("loss_var_name", &loss_var_name);
102+
pass->Erase("params");
103+
pass->SetNotOwned<const std::unordered_set<std::string>>("params",
104+
&param_names);
105+
pass->Erase("local_scopes");
106+
pass->SetNotOwned<const std::vector<Scope *>>("local_scopes",
107+
&local_scopes);
108+
#ifdef PADDLE_WITH_CUDA
109+
platform::NCCLContextMap *nctx = use_cuda ? nccl_ctxs : nullptr;
110+
pass->Erase("nccl_ctxs");
111+
pass->SetNotOwned<platform::NCCLContextMap>("nccl_ctxs", nctx);
112+
#endif
113+
}
114+
graph = pass->Apply(std::move(graph));
115+
}
116+
return graph;
117+
}
118+
} // namespace details
119+
} // namespace framework
120+
} // namespace paddle
121+
122+
USE_PASS(fuse_elewise_add_act_pass);
123+
USE_PASS(graph_viz_pass);
124+
USE_PASS(multi_devices_pass);
125+
USE_PASS(multi_devices_check_pass);
126+
USE_PASS(multi_devices_print_pass);

paddle/fluid/framework/details/build_strategy.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,17 @@
1515
#pragma once
1616

1717
#include <string>
18+
#include <vector>
19+
20+
#include "paddle/fluid/framework/ir/pass_builder.h"
21+
#include "paddle/fluid/framework/program_desc.h"
22+
#include "paddle/fluid/framework/scope.h"
23+
#include "paddle/fluid/platform/device_context.h"
24+
#include "paddle/fluid/platform/enforce.h"
25+
26+
#ifdef PADDLE_WITH_CUDA
27+
#include "paddle/fluid/platform/nccl_helper.h"
28+
#endif
1829

1930
namespace paddle {
2031
namespace framework {
@@ -57,6 +68,30 @@ struct BuildStrategy {
5768
bool fuse_elewise_add_act_ops_{false};
5869

5970
bool enable_data_balance_{false};
71+
72+
// User normally doesn't need to call this API.
73+
// The PassBuilder allows for more customized insert, remove of passes
74+
// from python side.
75+
// A new PassBuilder is created based on configs defined above and
76+
// passes are owned by the PassBuilder.
77+
std::shared_ptr<ir::PassBuilder> CreatePassesFromStrategy() const;
78+
79+
// Apply the passes built by the pass_builder_. The passes will be
80+
// applied to the Program and output an ir::Graph.
81+
std::unique_ptr<ir::Graph> Apply(
82+
const ProgramDesc &main_program,
83+
const std::vector<platform::Place> &places,
84+
const std::string &loss_var_name,
85+
const std::unordered_set<std::string> &param_names,
86+
const std::vector<Scope *> &local_scopes,
87+
#ifdef PADDLE_WITH_CUDA
88+
const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const;
89+
#else
90+
const bool use_cuda) const;
91+
#endif
92+
93+
private:
94+
mutable std::shared_ptr<ir::PassBuilder> pass_builder_;
6095
};
6196

6297
} // namespace details

paddle/fluid/framework/details/cow_ptr.h

Lines changed: 19 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -20,79 +20,37 @@ namespace paddle {
2020
namespace framework {
2121
namespace details {
2222

23-
// Change it to thread safe flags if needed.
24-
class ThreadUnsafeOwnershipFlags {
23+
template <class T>
24+
class COWPtr {
2525
public:
26-
explicit ThreadUnsafeOwnershipFlags(bool flag) : flag_(flag) {}
27-
28-
ThreadUnsafeOwnershipFlags(const ThreadUnsafeOwnershipFlags& other) = delete;
29-
ThreadUnsafeOwnershipFlags& operator=(
30-
const ThreadUnsafeOwnershipFlags& other) = delete;
31-
ThreadUnsafeOwnershipFlags(ThreadUnsafeOwnershipFlags&& other) = default;
32-
33-
void SetOwnership(bool flag) { flag_ = flag; }
34-
35-
// Invoke the callback if it is not owned.
36-
template <typename Callback>
37-
void AcquireOwnershipOnce(Callback acquire) {
38-
if (!flag_) {
39-
acquire();
40-
flag_ = true;
41-
}
42-
}
26+
typedef std::shared_ptr<T> RefPtr;
4327

4428
private:
45-
bool flag_;
46-
};
29+
RefPtr m_sp;
4730

48-
// Copy-On-Write pointer.
49-
// It will hold a T* pointer, and only copy once when `MutableData` is invoked.
50-
//
51-
// The template parameter OwnershipFlags should have:
52-
// * a constructor takes a bool. True if own.
53-
// * SetOwnership(bool flag).
54-
// * AcquireOwnershipOnce(Callback). It will invoke the callback if it is not
55-
// owned.
56-
//
57-
// https://en.wikipedia.org/wiki/Copy-on-write
58-
template <typename T, typename OwnershipFlags = ThreadUnsafeOwnershipFlags>
59-
class COWPtr {
6031
public:
61-
// Ctor from raw pointer.
62-
explicit COWPtr(T* ptr) : payload_(ptr), ownership_{true} {}
32+
COWPtr() : m_sp(nullptr) {}
33+
explicit COWPtr(T* t) : m_sp(t) {}
6334

64-
// Move methods. Steal ownership from origin
65-
COWPtr(COWPtr&& other)
66-
: payload_(other.payload_), ownership_{std::move(other.ownership_)} {}
67-
COWPtr& operator=(COWPtr&& origin) = default;
35+
const T& Data() const { return *m_sp; }
6836

69-
// Copy methods. Not own payload
70-
COWPtr(const COWPtr& other) : payload_(other.payload_), ownership_{false} {}
71-
COWPtr& operator=(const COWPtr& other) {
72-
payload_ = other.payload_;
73-
ownership_.SetOwnership(false);
74-
return *this;
75-
}
76-
77-
// Access read only data.
78-
const T& Data() const { return *payload_; }
79-
80-
// Access mutable data. If the data is not owned, the data will be copied
81-
// before.
8237
T* MutableData() {
83-
ownership_.AcquireOwnershipOnce(
84-
[this] { payload_.reset(new T(*payload_)); });
85-
return payload_.get();
38+
DetachIfNotUnique();
39+
return m_sp.get();
8640
}
8741

88-
private:
89-
// Actual data pointer.
90-
std::shared_ptr<T> payload_;
42+
void DetachIfNotUnique() {
43+
T* tmp = m_sp.get();
44+
if (!(tmp == nullptr || m_sp.unique())) {
45+
Detach();
46+
}
47+
}
9148

92-
// Ownership flag.
93-
OwnershipFlags ownership_;
49+
void Detach() {
50+
T* tmp = m_sp.get();
51+
m_sp = RefPtr(new T(*tmp));
52+
}
9453
};
95-
9654
} // namespace details
9755
} // namespace framework
9856
} // namespace paddle

paddle/fluid/framework/details/cow_ptr_test.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ TEST(COWPtr, all) {
3030
ASSERT_EQ(ptr2.Data(), 10);
3131
}
3232

33+
TEST(COWPtr, change_old) {
34+
COWPtr<int> ptr(new int{0});
35+
COWPtr<int> ptr2 = ptr;
36+
*ptr.MutableData() = 10;
37+
ASSERT_EQ(ptr2.Data(), 0);
38+
ASSERT_EQ(ptr.Data(), 10);
39+
}
40+
3341
} // namespace details
3442
} // namespace framework
3543
} // namespace paddle

paddle/fluid/framework/ir/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass
4141

4242
set(GLOB_PASS_LIB ${PASS_LIBRARY} CACHE INTERNAL "Global PASS library")
4343

44+
cc_library(pass_builder SRCS pass_builder.cc DEPS pass)
45+
4446
cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper)
4547
cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry)
4648
cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry)

0 commit comments

Comments
 (0)