Skip to content

Commit 60f70b1

Browse files
committed
test=develop
1 parent cc02353 commit 60f70b1

File tree

14 files changed

+120
-183
lines changed

14 files changed

+120
-183
lines changed

CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ endif()
212212

213213

214214
include(external/threadpool)
215+
include(flags) # set paddle compile flags
215216
include(cudnn) # set cudnn libraries, must before configure
216217
include(configure) # add paddle env configuration
217218

@@ -225,9 +226,6 @@ elseif()
225226
set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in MKL only now." FORCE)
226227
endif()
227228

228-
include(flags) # set paddle compile flags
229-
include(cudnn) # set cudnn libraries, must before configure
230-
include(configure) # add paddle env configuration
231229
include(generic) # simplify cmake module
232230
include(package) # set paddle packages
233231
include(ccache) # set ccache for compilation

paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,6 @@ void MainThreads(int num_threads, bool use_gpu) {
135135
} // namespace paddle
136136

137137
int main(int argc, char** argv) {
138-
FLAGS_dirname = "./word2vec.inference.model";
139138
google::ParseCommandLineFlags(&argc, &argv, true);
140139
paddle::demo::Main(false /* use_gpu*/);
141140
paddle::demo::MainThreads(1, false /* use_gpu*/);

paddle/fluid/inference/api/demo_ci/test.cc

Lines changed: 0 additions & 99 deletions
This file was deleted.

paddle/fluid/inference/api/helper.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ static void TensorAssignData(PaddleTensor *tensor,
9797
}
9898

9999
template <typename T>
100-
static int ZeroCopyTensorAssignData(paddle::ZeroCopyTensor *tensor,
100+
static int ZeroCopyTensorAssignData(ZeroCopyTensor *tensor,
101101
const std::vector<std::vector<T>> &data) {
102102
int size{0};
103103
auto *ptr = tensor->mutable_data<T>(PaddlePlace::kCPU);

paddle/fluid/operators/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ op_library(gru_op DEPS sequence2batch gru_compute)
291291
op_library(recurrent_op DEPS executor)
292292
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale)
293293
op_library(cos_sim_op DEPS cos_sim_functor)
294-
op_library(parallel_do_op DEPS executor glog)
294+
op_library(parallel_do_op DEPS executor)
295295
op_library(unsqueeze_op DEPS reshape_op)
296296
op_library(squeeze_op DEPS reshape_op)
297297
op_library(extract_rows_op DEPS memory)

paddle/fluid/operators/batch_norm_op.cu.cc

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -141,27 +141,6 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
141141
bias->template data<BatchNormParamType<T>>(),
142142
est_mean->template data<BatchNormParamType<T>>(),
143143
est_var->template data<BatchNormParamType<T>>(), epsilon));
144-
145-
VLOG(3) << "before tensor copy";
146-
Tensor mean_, var_, x_, y_;
147-
framework::TensorCopy(*est_mean, platform::CPUPlace(), dev_ctx, &mean_);
148-
framework::TensorCopy(*est_var, platform::CPUPlace(), dev_ctx, &var_);
149-
framework::TensorCopy(*x, platform::CPUPlace(), dev_ctx, &x_);
150-
framework::TensorCopy(*y, platform::CPUPlace(), dev_ctx, &y_);
151-
VLOG(3) << "after tensor copy";
152-
auto check_tensor = [&](const Tensor& check) {
153-
float sum = .0;
154-
for(size_t i=0; i < check.numel(); ++i) {
155-
sum += check.data<float>()[i];
156-
}
157-
return sum;
158-
};
159-
VLOG(3) << "BatchNormKernel";
160-
VLOG(3) << "mean" << check_tensor(mean_);
161-
VLOG(3) << "var" << check_tensor(var_);
162-
VLOG(3) << "x" << check_tensor(x_);
163-
VLOG(3) << "y" << check_tensor(y_);
164-
165144
} else {
166145
// Run training mode.
167146
// obtain running mean and running inv var, and see if we need to

paddle/fluid/operators/conv_cudnn_op.cu.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ template <typename T>
4343
class CUDNNConvOpKernel : public framework::OpKernel<T> {
4444
public:
4545
void Compute(const framework::ExecutionContext& ctx) const override {
46-
VLOG(3) << "inside cudnn";
4746
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
4847
"It must use CUDAPlace.");
4948
auto* input = ctx.Input<Tensor>("Input");
@@ -60,7 +59,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
6059
const T* input_data = input->data<T>();
6160
const T* filter_data = filter->data<T>();
6261
T* output_data = output->mutable_data<T>(ctx.GetPlace());
63-
VLOG(3) << "get all inputs";
62+
6463
// ------------------- cudnn descriptors ---------------------
6564
ScopedTensorDescriptor input_desc;
6665
ScopedTensorDescriptor output_desc;
@@ -73,7 +72,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
7372

7473
cudnnConvolutionDescriptor_t cudnn_conv_desc =
7574
conv_desc.descriptor<T>(paddings, strides, dilations);
76-
VLOG(3) << "create tensor descriptor";
75+
7776
#if CUDNN_VERSION_MIN(7, 0, 1)
7877
// cudnn 7 can support groups, no need to do it mannually
7978
// FIXME(typhoonzero): find a better way to disable groups
@@ -82,7 +81,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
8281
cudnn_conv_desc, groups));
8382
groups = 1;
8483
#endif
85-
VLOG(3) << "before create tensor descriptor";
84+
8685
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
8786
layout, framework::vectorize2int(input->dims()), groups);
8887
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
@@ -112,7 +111,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
112111
output_height = output->dims()[2];
113112
output_width = output->dims()[3];
114113
}
115-
VLOG(3) << "after create tensor descriptor";
114+
116115
int group_offset_in =
117116
input_channels / groups * input_height * input_width * input_depth;
118117
int group_offset_out =
@@ -129,7 +128,6 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
129128
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
130129
auto handle = dev_ctx.cudnn_handle();
131130

132-
VLOG(3) << "set cudnn algorithm";
133131
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm(
134132
handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
135133
cudnn_output_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
@@ -150,7 +148,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
150148
cudnn_conv_desc, CUDNN_DEFAULT_MATH));
151149
}
152150
#endif
153-
VLOG(3) << "before get workspace";
151+
154152
// get workspace size able to allocate
155153
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardWorkspaceSize(
156154
handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
@@ -159,6 +157,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
159157
// the limit because the algo is overrided to use tensor core.
160158
PADDLE_ENFORCE_LE(workspace_size_in_bytes, workspace_size_limit,
161159
"workspace_size to be allocated exceeds the limit");
160+
162161
// ------------------- cudnn conv forward ---------------------
163162
ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
164163
for (int i = 0; i < groups; i++) {
@@ -312,6 +311,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
312311
cudnn_filter_desc, filter_algo, &tmp_size));
313312
workspace_size_in_bytes = std::max(workspace_size_in_bytes, tmp_size);
314313
}
314+
315315
// ------------------- cudnn conv backward data ---------------------
316316
ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
317317
if (input_grad) {

paddle/fluid/operators/fetch_op.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ class FetchOp : public framework::OperatorBase {
4242
"Cannot find out_var in scope, out_var_name is %s",
4343
out_name);
4444

45-
VLOG(3) << "fetch_var ptr " << fetch_var << " is " << (fetch_var == nullptr);
46-
VLOG(3) << "out_var ptr " << out_var << " is " << (out_var == nullptr);
4745
auto col = static_cast<size_t>(Attr<int>("col"));
4846

4947
auto *fetch_list = out_var->GetMutable<framework::FeedFetchList>();

paddle/fluid/operators/label_smooth_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class LabelSmoothOp : public framework::OperatorWithKernel {
3434
auto in_dims = ctx->GetInputDim("X");
3535
if (ctx->HasInput("PriorDist")) {
3636
auto noise_dims = ctx->GetInputDim("PriorDist");
37-
int64_t noise_numel = paddle::framework::product(noise_dims);
37+
auto noise_numel = paddle::framework::product(noise_dims);
3838
PADDLE_ENFORCE(
3939
in_dims[1] == noise_numel,
4040
"The number of elements in Input(PriorDist) must be equal to the "

paddle/fluid/operators/load_combine_op.cc

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414
#include <fstream>
15-
#include <vector>
15+
#include <memory>
1616
#include "paddle/fluid/framework/data_type_transform.h"
1717
#include "paddle/fluid/framework/op_registry.h"
1818
#include "paddle/fluid/platform/device_context.h"
@@ -33,10 +33,15 @@ class LoadCombineOp : public framework::OperatorBase {
3333
const platform::Place &place) const override {
3434
auto filename = Attr<std::string>("file_path");
3535
auto load_as_fp16 = Attr<bool>("load_as_fp16");
36-
37-
std::ifstream fin(filename, std::ios_base::in | std::ios_base::binary);
38-
//std::ifstream fin(filename, std::ios_base::in);
39-
PADDLE_ENFORCE(!fin.bad(),
36+
auto format = Attr<std::string>("format");
37+
std::unique_ptr<std::ifstream> fin;
38+
if (format == "windows") {
39+
fin.reset(new std::ifstream(filename,
40+
std::ios_base::in | std::ios_base::binary));
41+
} else {
42+
fin.reset(new std::ifstream(filename));
43+
}
44+
PADDLE_ENFORCE(static_cast<bool>(*fin),
4045
"Cannot open file %s for load_combine op", filename);
4146

4247
auto out_var_names = Outputs("Out");
@@ -48,32 +53,20 @@ class LoadCombineOp : public framework::OperatorBase {
4853
auto &dev_ctx = *pool.Get(place);
4954

5055
for (size_t i = 0; i < out_var_names.size(); i++) {
51-
VLOG(3) << "load variable " << out_var_names[i];
5256
auto *out_var = scope.FindVar(out_var_names[i]);
5357

5458
PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found",
5559
out_var_names[i]);
5660

5761
auto *tensor = out_var->GetMutable<framework::LoDTensor>();
58-
VLOG(3) << "Get Tensor";
62+
5963
// Error checking
60-
PADDLE_ENFORCE(!fin.bad(), "Cannot read more from file %s",
64+
PADDLE_ENFORCE(static_cast<bool>(*fin), "Cannot read more from file %s",
6165
filename);
62-
VLOG(3) << "before deserialization";
66+
6367
// Get data from fin to tensor
64-
DeserializeFromStream(fin, tensor, dev_ctx);
65-
// VLOG(3) << "after deserialization";
66-
// framework::Tensor check;
67-
// framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
68-
// float sum = .0;
69-
// for(size_t i=0; i < check.numel(); ++i) {
70-
// if(std::type_index(check.type()) == std::type_index(typeid(int64_t))) {
71-
// sum += static_cast<float>(check.data<int64_t>()[i]);
72-
// } else {
73-
// sum += check.data<float>()[i];
74-
// }
75-
// }
76-
// VLOG(3) << "sum result" << sum;
68+
DeserializeFromStream(*fin, tensor, dev_ctx);
69+
7770
auto in_dtype = framework::ToDataType(tensor->type());
7871
auto out_dtype =
7972
load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype;
@@ -93,9 +86,7 @@ class LoadCombineOp : public framework::OperatorBase {
9386
tensor = out_var->GetMutable<framework::LoDTensor>();
9487
tensor->set_lod(fp16_tensor.lod());
9588
tensor->ShareDataWith(fp16_tensor);
96-
9789
}
98-
VLOG(3) << "load " << out_var_names[i] << " finished";
9990
}
10091
}
10192
};
@@ -119,6 +110,18 @@ class LoadCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker {
119110
"LoDTensors will be loaded from \"file_path\".")
120111
.AddCustomChecker(
121112
[](const std::string &path) { return !path.empty(); });
113+
AddAttr<std::string>("format",
114+
R"DOC((windows|linux)" "saved model file format
115+
windows and linux file newline symbol is
116+
different. windows(newline is \n\r) or linux(newline is \r)
117+
So if you set attribute format to windows, then we saved model file in binary.
118+
It can be used both linux and windows. If you set format to linux,
119+
it will save file in normal file, newline symbol is \r. Need to note
120+
that these two format is not inter-compatible.)DOC")
121+
.SetDefault("linux")
122+
.AddCustomChecker([](const std::string &s) {
123+
return s == "windows" || s == "linux";
124+
});
122125
AddComment(R"DOC(
123126
LoadCombine Operator.
124127

0 commit comments

Comments
 (0)