Skip to content

Commit abce9eb

Browse files
authored
Merge pull request #4953 from tensor-tang/merge_grad_gtest
refine the mkldnn logic
2 parents c91de28 + 5c892db commit abce9eb

16 files changed

+612
-711
lines changed

paddle/gserver/activations/MKLDNNActivation.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ void MKLDNNEltwiseActivation::resetFwd(Argument& act) {
126126
copyInVal_ = nullptr;
127127
if (act.grad && algo == algorithm::eltwise_tanh) {
128128
// tanh need save src input for backward
129-
inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc());
129+
inVal_ = MKLDNNMatrix::create(val_->getPrimitiveDesc());
130130
copyInVal_ = std::make_shared<mkldnn::reorder>(*val_, *inVal_);
131131
CHECK(copyInVal_) << "should not be emptry";
132132
pipelineFwd_.push_back(*copyInVal_);
@@ -145,7 +145,7 @@ void MKLDNNEltwiseActivation::resetBwd(Argument& act) {
145145
algorithm algo = getAlgo(this->getName());
146146
float alpha = getBwdAlpha();
147147
float beta = getBeta();
148-
grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc());
148+
grad_ = MKLDNNMatrix::create(val_->getPrimitiveDesc(), act.grad);
149149
auto eng = CPUEngine::Instance().getEngine();
150150
auto bwdDesc = eltwise_bwd::desc(
151151
algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta);
@@ -230,7 +230,7 @@ void MKLDNNActivation::resetFwd(Argument& act) {
230230
int ic = cnt_ / bs / ih / iw;
231231
CHECK_EQ(cnt_, (size_t)bs * ic * ih * iw);
232232
val_ = MKLDNNMatrix::create(
233-
act.value, {bs, ic, ih, iw}, mkldnn::memory::format::nchw, *engine_);
233+
{bs, ic, ih, iw}, mkldnn::memory::format::nchw, *engine_, act.value);
234234
CHECK(val_);
235235
val_->downSpatial();
236236
}

paddle/gserver/layers/MKLDNNBase.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ namespace paddle {
2121
typedef enum {
2222
MKLDNN_BASE = 1, // basical info of MKLDNN
2323
MKLDNN_TESTS = 1, // gtest info of MKLDNN
24-
MKLDNN_SIZES = 2, // size info of MKLDNN
25-
MKLDNN_FMTS = 3, // format info of MKLDNN
24+
MKLDNN_FMTS = 2, // format info of MKLDNN
25+
MKLDNN_SIZES = 3, // size info of MKLDNN
2626
MKLDNN_ALL = 4, // show all info of MKLDNN
2727
} MKLDNN_LOG_LEVEL;
2828

paddle/gserver/layers/MKLDNNConvLayer.cpp

Lines changed: 34 additions & 202 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,6 @@ void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
116116
resetFwdBuffers(fwdPD_, in, wgt, bias, out);
117117

118118
resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out);
119-
120-
printValueFormatFlow();
121119
}
122120

123121
void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
@@ -135,12 +133,6 @@ void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
135133
resetBwdBuffers(bwdWgtPD, bwdDataPD, in, wgt, bias, out);
136134

137135
resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out);
138-
139-
printGradFormatFlow();
140-
}
141-
142-
void MKLDNNConvLayer::updateInputData() {
143-
cpuInVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
144136
}
145137

146138
void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) {
@@ -211,11 +203,18 @@ void MKLDNNConvLayer::resetFwdBuffers(
211203
MKLDNNMatrixPtr& bias,
212204
MKLDNNMatrixPtr& out) {
213205
CHECK(pd);
214-
resetInValue(pd, in);
206+
resetInValue(
207+
in, std::make_shared<memory::primitive_desc>(pd->src_primitive_desc()));
208+
209+
resetOutValue(out, pd->dst_primitive_desc());
215210

216-
resetWgtBiasValue(pd, wgt, bias);
211+
resetWithMatrix(wgt, weight_->getW(), pd->weights_primitive_desc());
217212

218-
resetOutValue(pd, out);
213+
if (biases_ && biases_->getW()) {
214+
resetWithMatrix(bias, biases_->getW(), pd->bias_primitive_desc());
215+
} else {
216+
bias = nullptr;
217+
}
219218
}
220219

221220
void MKLDNNConvLayer::resetFwdPipeline(
@@ -225,104 +224,12 @@ void MKLDNNConvLayer::resetFwdPipeline(
225224
MKLDNNMatrixPtr& wgt,
226225
MKLDNNMatrixPtr& bias,
227226
MKLDNNMatrixPtr& out) {
228-
if (cvtInVal_) {
229-
pipeline.push_back(*cvtInVal_);
230-
}
231-
232227
if (bias) {
233228
fwd_.reset(new conv_fwd(*pd, *in, *wgt, *bias, *out));
234229
} else {
235230
fwd_.reset(new conv_fwd(*pd, *in, *wgt, *out));
236231
}
237232
pipeline.push_back(*fwd_);
238-
239-
if (cvtOutVal_) {
240-
pipeline.push_back(*cvtOutVal_);
241-
}
242-
}
243-
244-
void MKLDNNConvLayer::resetInValue(
245-
std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& in) {
246-
const MatrixPtr& inMat = inputLayers_[0]->getOutputValue();
247-
in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc());
248-
249-
// create buffer and reorder if input value do not match
250-
cpuInVal_ = nullptr;
251-
cvtInVal_ = nullptr;
252-
253-
MKLDNNMatrixPtr dnnIn = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat);
254-
CHECK_EQ(inputIsOnlyMKLDNN(), dnnIn != nullptr);
255-
if (dnnIn != nullptr && dnnIn->getPrimitiveDesc() == in->getPrimitiveDesc()) {
256-
in = dnnIn;
257-
return;
258-
}
259-
if (dnnIn) {
260-
if (dnnIn->getFormat() == format::nc) {
261-
CHECK(ih_ == 1 && iw_ == 1) << "when input is nc format";
262-
// create a new one with nchw format and same data
263-
memory::dims inDims = memory::dims{bs_, ic_, 1, 1};
264-
dnnIn = MKLDNNMatrix::create(inMat, inDims, format::nchw, engine_);
265-
}
266-
if (dnnIn->getPrimitiveDesc() == in->getPrimitiveDesc()) {
267-
in = dnnIn;
268-
return;
269-
}
270-
cpuInVal_ = dnnIn;
271-
in = MKLDNNMatrix::create(nullptr, pd->src_primitive_desc());
272-
cvtInVal_ = MKLDNNMatrix::createReorder(cpuInVal_, in);
273-
CHECK(cvtInVal_) << "should not be emptry";
274-
} else {
275-
memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_};
276-
cpuInVal_ = MKLDNNMatrix::create(inMat, inDims, format::nchw, engine_);
277-
if (cpuInVal_->getPrimitiveDesc() != in->getPrimitiveDesc()) {
278-
// create new mkldnn matrix
279-
in = MKLDNNMatrix::create(nullptr, pd->src_primitive_desc());
280-
cvtInVal_ = MKLDNNMatrix::createReorder(cpuInVal_, in);
281-
CHECK(cvtInVal_) << "should not be emptry";
282-
} else {
283-
in = cpuInVal_;
284-
}
285-
}
286-
}
287-
288-
void MKLDNNConvLayer::resetWgtBiasValue(
289-
std::shared_ptr<conv_fwd::primitive_desc>& pd,
290-
MKLDNNMatrixPtr& wgt,
291-
MKLDNNMatrixPtr& bias) {
292-
wgt = MKLDNNMatrix::create(weight_->getW(), pd->weights_primitive_desc());
293-
VLOG(MKLDNN_FMTS) << "Weight value format: " << wgt->getFormat();
294-
295-
bias = (biases_ && biases_->getW())
296-
? MKLDNNMatrix::create(biases_->getW(), pd->bias_primitive_desc())
297-
: nullptr;
298-
}
299-
300-
void MKLDNNConvLayer::resetOutValue(
301-
std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& out) {
302-
out = MKLDNNMatrix::create(output_.value, pd->dst_primitive_desc());
303-
304-
// create reorder if output value has cpu device and pd do not match
305-
cpuOutVal_ = nullptr;
306-
cvtOutVal_ = nullptr;
307-
if (!outputIsOnlyMKLDNN()) {
308-
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
309-
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
310-
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
311-
if (cpuOutVal_->getPrimitiveDesc() != pd->dst_primitive_desc()) {
312-
out = MKLDNNMatrix::create(nullptr, pd->dst_primitive_desc());
313-
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
314-
CHECK(cvtOutVal_) << "should not be empty";
315-
} else {
316-
cpuOut->setData(output_.value->getData());
317-
cpuOutVal_ = out;
318-
}
319-
// when output is cpu device, change the mkldnn output value and make them
320-
// share the same data. Then if next layer use inputlayer->getOuputValue()
321-
// to achieve the input value, it will get the right data.
322-
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
323-
return;
324-
}
325-
output_.value = std::dynamic_pointer_cast<Matrix>(out);
326233
}
327234

328235
void MKLDNNConvLayer::resetBwdWgtPD(
@@ -331,8 +238,8 @@ void MKLDNNConvLayer::resetBwdWgtPD(
331238
loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
332239

333240
// create backward weight using input, output and weight value memory desc
334-
CHECK(inVal_) << "Should have input value";
335-
CHECK(outVal_) << "Should have output value";
241+
CHECK(inVal_) << "Should have internal input value";
242+
CHECK(outVal_) << "Should have internal output value";
336243
CHECK(wgtVal_) << "Should have weight value";
337244
algorithm algo = algorithm::convolution_direct;
338245
padding_kind padKind = padding_kind::zero;
@@ -372,8 +279,8 @@ void MKLDNNConvLayer::resetBwdDataPD(
372279

373280
memory::dims wgtDims, biasDims, strides, dilations, padL, padR;
374281
loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
375-
CHECK(inVal_) << "Should have input value";
376-
CHECK(outVal_) << "Should have output value";
282+
CHECK(inVal_) << "Should have internal input value";
283+
CHECK(outVal_) << "Should have internal output value";
377284
// create backward data using input and output value memory desc
378285
// but using weight memory desc with any format
379286
auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct,
@@ -399,12 +306,27 @@ void MKLDNNConvLayer::resetBwdBuffers(
399306
MKLDNNMatrixPtr& bias,
400307
MKLDNNMatrixPtr& out) {
401308
CHECK(wgtPD);
402-
resetOutGrad(wgtPD, out);
309+
resetOutGrad(out, wgtPD->diff_dst_primitive_desc());
403310

404-
resetWgtBiasGrad(wgtPD, wgt, bias);
311+
resetWithMatrix(
312+
wgt, weight_->getWGrad(), wgtPD->diff_weights_primitive_desc());
313+
CHECK(wgtVal_ != nullptr &&
314+
wgt->getPrimitiveDesc() == wgtVal_->getPrimitiveDesc())
315+
<< "primitive desc of weight grad and value should be equal";
405316

406-
resetInGrad(dataPD, in);
317+
bias = nullptr;
318+
if (biases_ && biases_->getWGrad()) {
319+
resetWithMatrix(
320+
bias, biases_->getWGrad(), wgtPD->diff_bias_primitive_desc());
321+
CHECK(bias && biasVal_ &&
322+
bias->getPrimitiveDesc() == biasVal_->getPrimitiveDesc())
323+
<< "primitive desc of bias grad should equal the bias value";
324+
}
407325

326+
if (dataPD == nullptr) {
327+
return;
328+
}
329+
resetInGrad(in, dataPD->diff_src_primitive_desc());
408330
resetWgtValBwdData(dataPD, wgtValBwdData_);
409331
}
410332

@@ -416,10 +338,7 @@ void MKLDNNConvLayer::resetBwdPipeline(
416338
MKLDNNMatrixPtr& wgt,
417339
MKLDNNMatrixPtr& bias,
418340
MKLDNNMatrixPtr& out) {
419-
if (cvtOutGrad_) {
420-
pipeline.push_back(*cvtOutGrad_);
421-
}
422-
341+
CHECK(inVal_);
423342
// add bwdWgt handle
424343
if (bias) {
425344
bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias));
@@ -431,99 +350,13 @@ void MKLDNNConvLayer::resetBwdPipeline(
431350
if (dataPD == nullptr) {
432351
return;
433352
}
434-
435353
if (cvtWgtVal_) {
436354
pipeline.push_back(*cvtWgtVal_);
437355
}
438-
439356
// add bwdData handle
440357
CHECK(wgtValBwdData_) << "Should have weight memory";
441358
bwdData_.reset(new conv_bwdData(*dataPD, *out, *wgtValBwdData_, *in));
442359
pipeline.push_back(*bwdData_);
443-
444-
if (cvtInGrad_) {
445-
pipeline.push_back(*cvtInGrad_);
446-
}
447-
}
448-
449-
void MKLDNNConvLayer::resetOutGrad(
450-
std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD, MKLDNNMatrixPtr& out) {
451-
cpuOutGrad_ = nullptr;
452-
cvtOutGrad_ = nullptr;
453-
CHECK(outVal_ != nullptr &&
454-
outVal_->getPrimitiveDesc() == wgtPD->diff_dst_primitive_desc())
455-
<< "primitive desc of out grad and value should be equal";
456-
if (outputIsOnlyMKLDNN()) {
457-
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
458-
} else {
459-
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
460-
// always share the same grad data of CPU output
461-
// then the activation can get the right grad from output_.grad
462-
output_.grad->setData(cpuOut->getData());
463-
// same PrimitiveDesc with cpuInVal_
464-
CHECK(cpuOutVal_);
465-
cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc());
466-
// create reorder if primitive desc does not match
467-
if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
468-
out = MKLDNNMatrix::create(nullptr, outVal_->getPrimitiveDesc());
469-
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
470-
CHECK(cvtOutGrad_);
471-
} else {
472-
out = cpuOutGrad_;
473-
}
474-
}
475-
}
476-
477-
void MKLDNNConvLayer::resetWgtBiasGrad(
478-
std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
479-
MKLDNNMatrixPtr& wgt,
480-
MKLDNNMatrixPtr& bias) {
481-
wgt = MKLDNNMatrix::create(weight_->getWGrad(),
482-
wgtPD->diff_weights_primitive_desc());
483-
CHECK(nullptr != wgtVal_ &&
484-
wgt->getPrimitiveDesc() == wgtVal_->getPrimitiveDesc())
485-
<< "primitive desc of weight grad and value should be equal";
486-
VLOG(MKLDNN_FMTS) << "weight grad format: " << wgt->getFormat();
487-
488-
bias = nullptr;
489-
if (biasVal_ == nullptr) {
490-
return;
491-
}
492-
bias = MKLDNNMatrix::create(biases_->getWGrad(),
493-
wgtPD->diff_bias_primitive_desc());
494-
CHECK(bias->getPrimitiveDesc() == biasVal_->getPrimitiveDesc())
495-
<< "primitive desc of bias grad should equal the bias value";
496-
}
497-
498-
void MKLDNNConvLayer::resetInGrad(
499-
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
500-
MKLDNNMatrixPtr& in) {
501-
in = nullptr;
502-
cpuInGrad_ = nullptr;
503-
cvtInGrad_ = nullptr;
504-
if (dataPD == nullptr) {
505-
return;
506-
}
507-
508-
if (inputIsOnlyMKLDNN()) {
509-
MKLDNNLayer::resetInGrad(in, dataPD->diff_src_primitive_desc());
510-
CHECK(nullptr != inVal_ &&
511-
in->getPrimitiveDesc() == inVal_->getPrimitiveDesc())
512-
<< "primitive desc of input grad and value should be equal";
513-
} else {
514-
const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE);
515-
// same PrimitiveDesc with cpuInVal_
516-
CHECK(cpuInVal_);
517-
cpuInGrad_ = MKLDNNMatrix::create(cpuIn, cpuInVal_->getPrimitiveDesc());
518-
in = cpuInGrad_;
519-
// create reorder if PrimitiveDesc does not match
520-
if (cpuInGrad_->getPrimitiveDesc() != dataPD->diff_src_primitive_desc()) {
521-
in = MKLDNNMatrix::create(getInputGrad(0, MKLDNN_DEVICE),
522-
dataPD->diff_src_primitive_desc());
523-
cvtInGrad_ = MKLDNNMatrix::createReorder(in, cpuInGrad_);
524-
CHECK(cvtInGrad_);
525-
}
526-
}
527360
}
528361

529362
void MKLDNNConvLayer::resetWgtValBwdData(
@@ -537,8 +370,7 @@ void MKLDNNConvLayer::resetWgtValBwdData(
537370
// since the primitive_desc would be different with wgtVal_
538371
CHECK(wgtVal_) << "should have weight value";
539372
if (dataPD->weights_primitive_desc() != wgtVal_->getPrimitiveDesc()) {
540-
wgtValBwdData_ =
541-
MKLDNNMatrix::create(nullptr, dataPD->weights_primitive_desc());
373+
wgtValBwdData_ = MKLDNNMatrix::create(dataPD->weights_primitive_desc());
542374
cvtWgtVal_ = MKLDNNMatrix::createReorder(wgtVal_, wgtValBwdData_);
543375
CHECK(cvtWgtVal_);
544376
} else {

0 commit comments

Comments
 (0)