1
- /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2
-
3
- Licensed under the Apache License, Version 2.0 (the "License");
4
- you may not use this file except in compliance with the License.
5
- You may obtain a copy of the License at
6
-
7
- http://www.apache.org/licenses/LICENSE-2.0
8
-
9
- Unless required by applicable law or agreed to in writing, software
10
- distributed under the License is distributed on an "AS IS" BASIS,
11
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- See the License for the specific language governing permissions and
13
- limitations under the License. */
1
+ // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
14
15
15
#include < vector>
16
16
#include " paddle/fluid/framework/executor.h"
@@ -138,6 +138,10 @@ class WhileGradOp : public framework::OperatorBase {
138
138
auto inside_og_name = inside_og_names[i];
139
139
VLOG (8 ) << " Linking outside " << outside_og_name << " --> inside "
140
140
<< inside_og_name;
141
+ if (scope.FindVar (outside_og_name) == nullptr ) {
142
+ continue ;
143
+ }
144
+
141
145
auto &og_outside =
142
146
detail::Ref (scope.FindVar (outside_og_name),
143
147
" Cannot find Outside Gradient %s" , outside_og_name);
@@ -167,20 +171,46 @@ class WhileGradOp : public framework::OperatorBase {
167
171
PADDLE_ENFORCE_EQ (inside_array[j].numel (), 0 );
168
172
}
169
173
}
174
+ } else {
175
+ PADDLE_THROW (" Currently only support LoDTensor and LoDTensorArray." );
170
176
}
171
177
}
172
178
executor.RunPreparedContext (ctx.get (), *cur_scope_iter, false , true ,
173
179
true );
174
180
175
- auto &pg_names = Outputs (kXGRAD );
181
+ // The Outputs(kXGRAD) contains the names of the gradient of parameters
182
+ // and inputs.
183
+ auto &pg_ig_names = Outputs (kXGRAD );
176
184
auto &p_names = Inputs (kX );
177
- PADDLE_ENFORCE_EQ (pg_names .size (), p_names.size ());
178
- for (size_t param_id = 0 ; param_id < pg_names .size (); ++param_id) {
179
- if (pg_names [param_id] == framework::kEmptyVarName ) {
185
+ PADDLE_ENFORCE_EQ (pg_ig_names .size (), p_names.size ());
186
+ for (size_t param_id = 0 ; param_id < pg_ig_names .size (); ++param_id) {
187
+ if (pg_ig_names [param_id] == framework::kEmptyVarName ) {
180
188
continue ; // parameter doesn't have gradient
181
189
}
182
190
auto inside_grad_name = framework::GradVarName (p_names[param_id]);
183
191
192
+ // for some grad_op, their input doesn't have gradient,
193
+ // for example lookup_table_grad_op, the input(Idx) doesn't have
194
+ // gradient.
195
+ auto pg_ig_var = cur_scope.FindVar (inside_grad_name);
196
+ PADDLE_ENFORCE (pg_ig_var != nullptr );
197
+ if (pg_ig_var->IsType <framework::LoDTensorArray>()) {
198
+ auto pg_ig_lod_t_arr =
199
+ pg_ig_var->GetMutable <framework::LoDTensorArray>();
200
+ bool empty = true ;
201
+ for (auto &each : *pg_ig_lod_t_arr) {
202
+ if (each.numel () != 0 ) {
203
+ empty = false ;
204
+ break ;
205
+ }
206
+ }
207
+ if (empty) {
208
+ LOG (WARNING) << pg_ig_names[param_id]
209
+ << " is not found in cur_scope." ;
210
+ continue ;
211
+ }
212
+ }
213
+
184
214
// // TODO(tonyyang-svail): Not sure we need the following
185
215
// // If does not compute gradient of that variable inside rnn,
186
216
// just
@@ -194,14 +224,19 @@ class WhileGradOp : public framework::OperatorBase {
194
224
if (cur_scope_iter == step_scopes->rbegin ()) {
195
225
auto *var = (*cur_scope_iter)->FindVar (inside_grad_name);
196
226
PADDLE_ENFORCE_NOT_NULL (var, " Can not find var %s" , inside_grad_name);
227
+ PADDLE_ENFORCE (var->IsType <framework::LoDTensorArray>() ||
228
+ var->IsType <LoDTensor>(),
229
+ " Currently the type of var only can be LoDTensorArray "
230
+ " or LoDTensor." );
231
+
197
232
if (var->IsType <LoDTensor>()) {
198
233
auto &inside_tensor = var->Get <framework::LoDTensor>();
199
234
framework::AttributeMap attrs;
200
235
attrs[" dtype" ] = framework::ToDataType (inside_tensor.type ());
201
236
attrs[" shape" ] = framework::vectorize2int (inside_tensor.dims ());
202
237
attrs[" value" ] = 0 .0f ;
203
238
204
- auto var_name = pg_names [param_id];
239
+ auto var_name = pg_ig_names [param_id];
205
240
auto zero_op = framework::OpRegistry::CreateOp (
206
241
" fill_constant" , framework::VariableNameMap{},
207
242
{{" Out" , {var_name}}}, attrs);
@@ -213,8 +248,8 @@ class WhileGradOp : public framework::OperatorBase {
213
248
}
214
249
auto new_inside_name = cur_scope.Rename (inside_grad_name);
215
250
auto sum_op = framework::OpRegistry::CreateOp (
216
- " sum" , {{" X" , {pg_names [param_id], new_inside_name}}},
217
- {{" Out" , {pg_names [param_id]}}},
251
+ " sum" , {{" X" , {pg_ig_names [param_id], new_inside_name}}},
252
+ {{" Out" , {pg_ig_names [param_id]}}},
218
253
framework::AttributeMap{{" use_mkldnn" , {false }}});
219
254
sum_op->Run (cur_scope, dev_place);
220
255
cur_scope.Rename (new_inside_name, inside_grad_name);
@@ -281,6 +316,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
281
316
parent_block->FindVarRecursive (input_name) != nullptr )) {
282
317
continue ;
283
318
}
319
+
284
320
output_grads.insert (input_name);
285
321
}
286
322
for (auto &output_name : op->OutputArgumentNames ()) {
@@ -309,13 +345,13 @@ class WhileGradOpVarTypeInference : public framework::VarTypeInference {
309
345
void operator ()(const framework::OpDesc &op_desc,
310
346
framework::BlockDesc *block) const override {
311
347
auto p_names = op_desc.Input (kX );
312
- auto pg_names = op_desc.Output (framework::GradVarName (kX ));
348
+ auto pg_ig_names = op_desc.Output (framework::GradVarName (kX ));
313
349
314
350
for (size_t i = 0 ; i < p_names.size (); ++i) {
315
351
auto &p_var = detail::Ref (block->FindVarRecursive (p_names[i]));
316
- auto *g_var = block->FindVarRecursive (pg_names [i]);
352
+ auto *g_var = block->FindVarRecursive (pg_ig_names [i]);
317
353
if (g_var != nullptr ) { // Gradient could be @EMPTY@
318
- VLOG (5 ) << " Setting " << pg_names [i] << " following " << p_names[i]
354
+ VLOG (5 ) << " Setting " << pg_ig_names [i] << " following " << p_names[i]
319
355
<< " type: " << p_var.GetType ();
320
356
g_var->SetType (p_var.GetType ());
321
357
g_var->SetDataType (p_var.GetDataType ());
@@ -333,21 +369,21 @@ class WhileGradOpShapeInference : public framework::InferShapeBase {
333
369
ctx->HasInputs (framework::GradVarName (kOutputs ));
334
370
335
371
auto p_names = ctx->Inputs (kX );
336
- auto pg_names = ctx->Outputs (kXGRAD );
372
+ auto pg_ig_names = ctx->Outputs (kXGRAD );
337
373
auto var_types = ctx->GetInputsVarType (kX );
338
374
std::vector<std::string> names_to_set;
339
375
std::vector<framework::DDim> dims_to_set;
340
376
for (size_t i = 0 ; i < p_names.size (); ++i) {
341
- if (pg_names [i] == framework::kEmptyVarName ) {
377
+ if (pg_ig_names [i] == framework::kEmptyVarName ) {
342
378
continue ;
343
379
}
344
380
auto dims = ctx->GetInputsElementDim (kX , i);
345
381
if (var_types[i] == framework::proto::VarType::LOD_TENSOR) {
346
- names_to_set.push_back (pg_names [i]);
382
+ names_to_set.push_back (pg_ig_names [i]);
347
383
dims_to_set.push_back (dims);
348
384
} else if (var_types[i] == framework::proto::VarType::LOD_TENSOR_ARRAY) {
349
385
// not sure how to set the dim of LOD_TENSOR_ARRAY
350
- names_to_set.push_back (pg_names [i]);
386
+ names_to_set.push_back (pg_ig_names [i]);
351
387
dims_to_set.push_back (dims);
352
388
}
353
389
}
0 commit comments