Skip to content

Commit 6dfd7e3

Browse files
committed
Merge pull request opencv#10850 from dkurt:dnn_tf_deconv_tests
2 parents cfe84b9 + a6baedd commit 6dfd7e3

File tree

4 files changed

+136
-18
lines changed

4 files changed

+136
-18
lines changed

modules/dnn/src/layers/convolution_layer.cpp

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,8 +1025,25 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
10251025
int inpH = inputs[0][2];
10261026
int inpW = inputs[0][3];
10271027

1028-
int outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height;
1029-
int outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
1028+
int outH = -1, outW = -1;
1029+
if (padMode.empty())
1030+
{
1031+
outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height;
1032+
outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
1033+
}
1034+
else if (padMode == "VALID")
1035+
{
1036+
outH = stride.height * (inpH - 1) + kernel.height + adjustPad.height;
1037+
outW = stride.width * (inpW - 1) + kernel.width + adjustPad.width;
1038+
}
1039+
else if (padMode == "SAME")
1040+
{
1041+
outH = stride.height * (inpH - 1) + 1 + adjustPad.height;
1042+
outW = stride.width * (inpW - 1) + 1 + adjustPad.width;
1043+
}
1044+
else
1045+
CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
1046+
10301047
int outCn = numOutput;
10311048

10321049
CV_Assert(outCn % blobs[0].size[1] == 0);
@@ -1048,6 +1065,14 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
10481065
return false;
10491066
}
10501067

1068+
void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
1069+
{
1070+
BaseConvolutionLayerImpl::finalize(inputs, outputs);
1071+
getConvPoolPaddings(Size(outputs[0].size[3], outputs[0].size[2]),
1072+
Size(inputs[0]->size[3], inputs[0]->size[2]),
1073+
kernel, stride, padMode, dilation, pad);
1074+
}
1075+
10511076
class MatMulInvoker : public ParallelLoopBody
10521077
{
10531078
public:
@@ -1214,6 +1239,7 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
12141239
int kernel_h, int kernel_w,
12151240
int pad_h, int pad_w,
12161241
int stride_h, int stride_w,
1242+
int height_col, int width_col,
12171243
float* data_im,
12181244
const float* biasvec,
12191245
bool is1x1)
@@ -1227,8 +1253,8 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
12271253
t.kernel_h = kernel_h; t.kernel_w = kernel_w;
12281254
t.pad_h = pad_h; t.pad_w = pad_w;
12291255
t.stride_h = stride_h; t.stride_w = stride_w;
1230-
t.height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
1231-
t.width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
1256+
t.height_col = height_col;
1257+
t.width_col = width_col;
12321258
t.nstripes = nstripes;
12331259
t.is1x1 = is1x1;
12341260
t.biasvec = biasvec;
@@ -1418,6 +1444,7 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
14181444
const Mat& inp = *inputs[ii];
14191445
Mat& out = outputs[ii];
14201446
int numImg = inp.size[0];
1447+
int inpH = inp.size[2], inpW = inp.size[3];
14211448
int outH = out.size[2], outW = out.size[3];
14221449

14231450
Mat convBlob = inputs[ii]->reshape(1, numImg*inpCn);
@@ -1440,7 +1467,7 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
14401467

14411468
Col2ImInvoker::run(colMat.ptr<float>(), outGroupCn, outH, outW,
14421469
kernel.height, kernel.width, pad.height, pad.width,
1443-
stride.height, stride.width, dstMat.ptr<float>(),
1470+
stride.height, stride.width, inpH, inpW, dstMat.ptr<float>(),
14441471
curBiasMat.ptr<float>(), is1x1flag);
14451472
}
14461473
}

modules/dnn/src/layers/mvn_layer.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,14 @@ class MVNLayerImpl : public MVNLayer
267267
int i, newRows = 1;
268268
for( i = 0; i < splitDim; i++ )
269269
newRows *= inpBlob.size[i];
270+
271+
if (inpBlob.total() == newRows)
272+
{
273+
// MVN is applied to single values at an every row.
274+
outBlob.setTo(0);
275+
return;
276+
}
277+
270278
Mat inpMat = inpBlob.reshape(1, newRows);
271279
Mat outMat = outBlob.reshape(1, newRows);
272280

modules/dnn/src/tensorflow/tf_importer.cpp

Lines changed: 90 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,8 +1160,35 @@ void TFImporter::populateNet(Net dstNet)
11601160
int id;
11611161
if (scaleMat.total() == 1) // is a scalar.
11621162
{
1163-
layerParams.set("scale", scaleMat.at<float>(0));
1164-
id = dstNet.addLayer(name, "Power", layerParams);
1163+
// Try to match with a LeakyRelu:
1164+
// node {
1165+
// name: "LeakyRelu/mul"
1166+
// op: "Mul"
1167+
// input: "LeakyRelu/alpha"
1168+
// input: "input"
1169+
// }
1170+
// node {
1171+
// name: "LeakyRelu/Maximum"
1172+
// op: "Maximum"
1173+
// input: "LeakyRelu/mul"
1174+
// input: "input"
1175+
// }
1176+
StrIntVector next_layers = getNextLayers(net, name, "Maximum");
1177+
if (!next_layers.empty())
1178+
{
1179+
int maximumLayerIdx = next_layers[0].second;
1180+
ExcludeLayer(net, maximumLayerIdx, 0, false);
1181+
layers_to_ignore.insert(next_layers[0].first);
1182+
1183+
layerParams.set("negative_slope", scaleMat.at<float>(0));
1184+
id = dstNet.addLayer(name, "ReLU", layerParams);
1185+
}
1186+
else
1187+
{
1188+
// Just a multiplication.
1189+
layerParams.set("scale", scaleMat.at<float>(0));
1190+
id = dstNet.addLayer(name, "Power", layerParams);
1191+
}
11651192
}
11661193
else // is a vector
11671194
{
@@ -1241,16 +1268,37 @@ void TFImporter::populateNet(Net dstNet)
12411268
if (layer.input_size() != 5)
12421269
CV_Error(Error::StsNotImplemented,
12431270
"Expected gamma, beta, mean and std");
1271+
Pin inpId = parsePin(layer.input(0));
1272+
1273+
bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b();
12441274

12451275
layerParams.blobs.resize(4);
1246-
// gamma
1247-
blobFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[2]);
1248-
// beta
1249-
blobFromTensor(getConstBlob(layer, value_id, 2), layerParams.blobs[3]);
1250-
// mean
1251-
blobFromTensor(getConstBlob(layer, value_id, 3), layerParams.blobs[0]);
1252-
// std
1253-
blobFromTensor(getConstBlob(layer, value_id, 4), layerParams.blobs[1]);
1276+
Mat gamma, beta, mean, std;
1277+
blobFromTensor(getConstBlob(layer, value_id, 1), gamma);
1278+
blobFromTensor(getConstBlob(layer, value_id, 2), beta);
1279+
if (isTraining)
1280+
{
1281+
mean = Mat::zeros(1, beta.total(), CV_32F);
1282+
std = Mat::ones(1, beta.total(), CV_32F);
1283+
1284+
// Add an extra layer: Mean-Variance normalization
1285+
LayerParams mvnParams;
1286+
std::string mvnName = name + "/MVN";
1287+
CV_Assert(layer_id.find(mvnName) == layer_id.end());
1288+
int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);
1289+
layer_id[mvnName] = mvnId;
1290+
connect(layer_id, dstNet, inpId, mvnId, 0);
1291+
inpId = Pin(mvnName);
1292+
}
1293+
else
1294+
{
1295+
blobFromTensor(getConstBlob(layer, value_id, 3), mean);
1296+
blobFromTensor(getConstBlob(layer, value_id, 4), std);
1297+
}
1298+
layerParams.blobs[0] = mean;
1299+
layerParams.blobs[1] = std;
1300+
layerParams.blobs[2] = gamma;
1301+
layerParams.blobs[3] = beta;
12541302

12551303
if (hasLayerAttr(layer, "epsilon"))
12561304
layerParams.set("eps", getLayerAttr(layer, "epsilon").f());
@@ -1262,7 +1310,7 @@ void TFImporter::populateNet(Net dstNet)
12621310
layer_id[name] = id;
12631311

12641312
// one input only
1265-
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
1313+
connect(layer_id, dstNet, inpId, id, 0);
12661314
}
12671315
else if (type == "Conv2DBackpropInput")
12681316
{
@@ -1293,13 +1341,42 @@ void TFImporter::populateNet(Net dstNet)
12931341
kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
12941342

12951343
const int* kshape = layerParams.blobs[0].size.p;
1296-
layerParams.set("kernel_h", kshape[2]);
1297-
layerParams.set("kernel_w", kshape[3]);
1344+
const int kernelH = kshape[2];
1345+
const int kernelW = kshape[3];
1346+
layerParams.set("kernel_h", kernelH);
1347+
layerParams.set("kernel_w", kernelW);
12981348
layerParams.set("num_output", kshape[1]);
12991349

13001350
setStrides(layerParams, layer);
13011351
setPadding(layerParams, layer);
13021352

1353+
// For convolution layer, output shape computes as
1354+
// o = 1 + (i - k + 2*p) / s
1355+
// i - input size, o - output size, k - kernel size, p - pad, s - stride
1356+
// In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2
1357+
// considering that k is odd.
1358+
// SAME: o = 1 + (i - 1) / s
1359+
// VALID: o = 1 + i / s
1360+
// Deconvolution's layer output shape computes as
1361+
// SAME: o = 1 + (i - 1)*s
1362+
// VALID: o = (i - 1)*s
1363+
// If output_shape differs from formulas above then adjust padding is applied.
1364+
1365+
const int strideY = layerParams.get<int>("stride_h");
1366+
const int strideX = layerParams.get<int>("stride_w");
1367+
Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
1368+
const int outH = outShape.at<int>(2);
1369+
const int outW = outShape.at<int>(1);
1370+
if (layerParams.get<String>("pad_mode") == "SAME")
1371+
{
1372+
layerParams.set("adj_w", (outW - 1) % strideX);
1373+
layerParams.set("adj_h", (outH - 1) % strideY);
1374+
}
1375+
else if (layerParams.get<String>("pad_mode") == "VALID")
1376+
{
1377+
layerParams.set("adj_w", (outW - kernelW) % strideX);
1378+
layerParams.set("adj_h", (outH - kernelH) % strideY);
1379+
}
13031380
int id = dstNet.addLayer(name, "Deconvolution", layerParams);
13041381
layer_id[name] = id;
13051382

modules/dnn/test/test_tf_importer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ TEST(Test_TensorFlow, batch_norm)
150150
runTensorFlowNet("batch_norm");
151151
runTensorFlowNet("fused_batch_norm");
152152
runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true);
153+
runTensorFlowNet("mvn_batch_norm");
154+
runTensorFlowNet("mvn_batch_norm_1x1");
153155
}
154156

155157
OCL_TEST(Test_TensorFlow, batch_norm)
@@ -170,6 +172,10 @@ TEST(Test_TensorFlow, pooling)
170172
TEST(Test_TensorFlow, deconvolution)
171173
{
172174
runTensorFlowNet("deconvolution");
175+
runTensorFlowNet("deconvolution_same");
176+
runTensorFlowNet("deconvolution_stride_2_same");
177+
runTensorFlowNet("deconvolution_adj_pad_valid");
178+
runTensorFlowNet("deconvolution_adj_pad_same");
173179
}
174180

175181
OCL_TEST(Test_TensorFlow, deconvolution)

0 commit comments

Comments
 (0)