Skip to content

Commit d513a3b

Browse files
ielizakorobeinikov
andauthored
Adding style transfer functionality into "image_processing_demo" (#3000)
* trying to add style transfer into image processing demo * removing trailing whitespace * remove extra scale * updating readme and adding picture from original IE sample * moving text in readme * removing unnecessary function for type print * fast-neural-style-mosaic-onnx model added to model list * fixing helping message, changing picture for README Co-authored-by: akorobeinikov <[email protected]>
1 parent 6994142 commit d513a3b

File tree

6 files changed

+145
-2
lines changed

6 files changed

+145
-2
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
// Copyright (C) 2021 Intel Corporation
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writingb software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include "image_model.h"
20+
21+
class StyleTransferModel : public ImageModel {
22+
public:
23+
/// Constructor
24+
/// @param modelFileName name of model to load
25+
StyleTransferModel(const std::string& modelFileName);
26+
27+
std::shared_ptr<InternalModelData> preprocess(
28+
const InputData& inputData, InferenceEngine::InferRequest::Ptr& request) override;
29+
std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
30+
31+
protected:
32+
void prepareInputsOutputs(InferenceEngine::CNNNetwork & cnnNetwork) override;
33+
};
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
// Copyright (C) 2021 Intel Corporation
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
*/
16+
17+
#include "models/style_transfer_model.h"
18+
19+
#include "utils/ocv_common.hpp"
20+
#include <utils/slog.hpp>
21+
22+
#include <string>
23+
#include <vector>
24+
#include <memory>
25+
26+
using namespace InferenceEngine;
27+
28+
StyleTransferModel::StyleTransferModel(const std::string& modelFileName) :
29+
ImageModel(modelFileName, false) {
30+
}
31+
32+
void StyleTransferModel::prepareInputsOutputs(InferenceEngine::CNNNetwork& cnnNetwork) {
33+
// --------------------------- Configure input & output ---------------------------------------------
34+
// --------------------------- Prepare input blobs --------------------------------------------------
35+
36+
ICNNNetwork::InputShapes inputShapes = cnnNetwork.getInputShapes();
37+
if (inputShapes.size() != 1)
38+
throw std::runtime_error("Demo supports topologies only with 1 input");
39+
inputsNames.push_back(inputShapes.begin()->first);
40+
SizeVector& inSizeVector = inputShapes.begin()->second;
41+
if (inSizeVector.size() != 4 || inSizeVector[0] != 1 || inSizeVector[1] != 3)
42+
throw std::runtime_error("3-channel 4-dimensional model's input is expected");
43+
InputInfo& inputInfo = *cnnNetwork.getInputsInfo().begin()->second;
44+
inputInfo.setPrecision(Precision::FP32);
45+
46+
// --------------------------- Prepare output blobs -----------------------------------------------------
47+
const OutputsDataMap& outputInfo = cnnNetwork.getOutputsInfo();
48+
if (outputInfo.size() != 1)
49+
throw std::runtime_error("Demo supports topologies only with 1 output");
50+
51+
outputsNames.push_back(outputInfo.begin()->first);
52+
Data& data = *outputInfo.begin()->second;
53+
data.setPrecision(Precision::FP32);
54+
const SizeVector& outSizeVector = data.getTensorDesc().getDims();
55+
if (outSizeVector.size() != 4 || outSizeVector[0] != 1 || outSizeVector[1] != 3)
56+
throw std::runtime_error("3-channel 4-dimensional model's output is expected");
57+
58+
}
59+
60+
std::shared_ptr<InternalModelData> StyleTransferModel::preprocess(const InputData& inputData, InferenceEngine::InferRequest::Ptr& request) {
61+
auto imgData = inputData.asRef<ImageInputData>();
62+
auto& img = imgData.inputImage;
63+
64+
Blob::Ptr minput = request->GetBlob(inputsNames[0]);
65+
matToBlob(img, minput);
66+
return std::make_shared<InternalImageModelData>(img.cols, img.rows);
67+
}
68+
69+
std::unique_ptr<ResultBase> StyleTransferModel::postprocess(InferenceResult& infResult) {
70+
71+
ImageResult* result = new ImageResult;
72+
*static_cast<ResultBase*>(result) = static_cast<ResultBase&>(infResult);
73+
74+
const auto& inputImgSize = infResult.internalModelData->asRef<InternalImageModelData>();
75+
76+
LockedMemory<const void> outMapped = infResult.getFirstOutputBlob()->rmap();
77+
const auto outputData = outMapped.as<float*>();
78+
79+
const SizeVector& outSizeVector = infResult.getFirstOutputBlob()->getTensorDesc().getDims();
80+
size_t outHeight = (int)(outSizeVector[2]);
81+
size_t outWidth = (int)(outSizeVector[3]);
82+
size_t numOfPixels = outWidth * outHeight;
83+
84+
std::vector<cv::Mat> imgPlanes;
85+
imgPlanes = std::vector<cv::Mat>{
86+
cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels * 2])),
87+
cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[numOfPixels])),
88+
cv::Mat(outHeight, outWidth, CV_32FC1, &(outputData[0]))};
89+
cv::Mat resultImg;
90+
cv::merge(imgPlanes, resultImg);
91+
cv::resize(resultImg, result->resultImage, cv::Size(inputImgSize.inputImgWidth, inputImgSize.inputImgHeight));
92+
93+
result->resultImage.convertTo(result->resultImage, CV_8UC3);
94+
95+
return std::unique_ptr<ResultBase>(result);
96+
}

demos/image_processing_demo/cpp/README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ This demo processes the image according to the selected type of processing. The
55
* `super_resolution`
66
* `deblurring`
77
* `jpeg_restoration`
8+
* `style_transfer`
89

910
## Examples
1011

@@ -38,6 +39,10 @@ Super resolution:
3839

3940
For this type of image processing user can use flag `-jc`. It allows to perform compression before the inference (usefull when user want to test model on high quality jpeg images).
4041

42+
4. Example for style_transfer:
43+
44+
![](./assets/style_transfer.jpg)
45+
4146
## How It Works
4247

4348
Before running the demo, user must choose type of processing and model for this processing.\
@@ -51,6 +56,8 @@ For `deblurring` user can use [deblurgan-v2](../../../models/public/deblurgan-v2
5156

5257
For `jpeg_restoration` user can use [fbcnn](../../../models/public/fbcnn/README.md) - flexible blind convolutional neural network for JPEG artifacts removal.
5358

59+
For `style_transfer` user can use [fast-neural-style-mosaic-onnx](../../../models/public/fast-neural-style-mosaic-onnx/README.md) - one of the style transfer models designed to mix the content of an image with the style of another image.
60+
5461
The demo runs inference and shows results for each image captured from an input. Depending on number of inference requests processing simultaneously (-nireq parameter) the pipeline might minimize the time required to process each single image (for nireq 1) or maximizes utilization of the device and overall processing performance.
5562

5663
> **NOTE**: By default, Open Model Zoo demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](https://docs.openvino.ai/latest/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model.html#general-conversion-parameters).
@@ -68,6 +75,7 @@ This file can be used as a parameter for [Model Downloader](../../../tools/model
6875
* text-image-super-resolution-0001
6976
* deblurgan-v2
7077
* fbcnn
78+
* fast-neural-style-mosaic-onnx
7179

7280
> **NOTE**: Refer to the tables [Intel's Pre-Trained Models Device Support](../../../models/intel/device_support.md) and [Public Pre-Trained Models Device Support](../../../models/public/device_support.md) for the details on models inference support at different devices.
7381
@@ -82,7 +90,7 @@ image_processing_demo_async [OPTION]
8290
Options:
8391
8492
-h Print a usage message.
85-
-at "<type>" Required. Type of the network, either 'sr' for Super Resolution task, 'deblur' for Deblurring, 'jr' for JPEGRestoration.
93+
-at "<type>" Required. Type of the network, either 'sr' for Super Resolution task, 'deblur' for Deblurring, 'jr' for JPEGRestoration, 'style' for Style Transfer.
8694
-i "<path>" Required. An input to process. The input must be a single image, a folder of images, video file or camera id.
8795
-m "<path>" Required. Path to an .xml file with a trained model.
8896
-o "<path>" Optional. Name of the output file(s) to save.
103 KB
Loading

demos/image_processing_demo/cpp/main.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,15 @@
3939
#include <models/super_resolution_model.h>
4040
#include <models/deblurring_model.h>
4141
#include <models/jpeg_restoration_model.h>
42+
#include <models/style_transfer_model.h>
4243
#include <pipelines/metadata.h>
4344
#include "visualizer.hpp"
4445

4546
DEFINE_INPUT_FLAGS
4647
DEFINE_OUTPUT_FLAGS
4748

4849
static const char help_message[] = "Print a usage message.";
49-
static const char at_message[] = "Required. Type of the network, either 'sr' for Super Resolution task, 'deblur' for Deblurring, 'jr' for JPEGRestoration.";
50+
static const char at_message[] = "Required. Type of the network, either 'sr' for Super Resolution task, 'deblur' for Deblurring, 'jr' for JPEGRestoration, 'style' for Style Transfer task.";
5051
static const char model_message[] = "Required. Path to an .xml file with a trained model.";
5152
static const char target_device_message[] = "Optional. Specify the target device to infer on (the list of available devices is shown below). "
5253
"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify HETERO plugin. "
@@ -145,6 +146,9 @@ std::unique_ptr<ImageModel> getModel(const cv::Size& frameSize, const std::strin
145146
}
146147
if (type == "jr") {
147148
return std::unique_ptr<ImageModel>(new JPEGRestorationModel(FLAGS_m, frameSize, doCompression));
149+
}
150+
if (type == "style") {
151+
return std::unique_ptr<ImageModel>(new StyleTransferModel(FLAGS_m));
148152
}
149153
throw std::invalid_argument("No model type or invalid model type (-at) provided: " + FLAGS_at);
150154
}

demos/image_processing_demo/cpp/models.lst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@ text-image-super-resolution-????
66
deblurgan-v2
77
# For -at jr
88
fbcnn
9+
# For -at style
10+
fast-neural-style-mosaic-onnx

0 commit comments

Comments
 (0)