diff --git a/modules/dnn_superres/README.md b/modules/dnn_superres/README.md index b8a4250ac83..65a6235ee3b 100644 --- a/modules/dnn_superres/README.md +++ b/modules/dnn_superres/README.md @@ -1,9 +1,81 @@ # Super Resolution using Convolutional Neural Networks -This module contains several learning-based algorithms for upscaling an image. +This repository contains an OpenCV module, which uses neural networks to upscale images. +This superesolution module, originally created as a GSoC 2018 project, contains methods to upscale images using +the following super-resolution algorithms: + +- EDSR, Bee Lim, et al. "Enhanced deep residual networks for single image super-resolution." https://arxiv.org/abs/1707.02921 +- ESPCN, Shi, Wenzhe, et al. "Real-time single image and video super-resolution using an efficient sub-pixel convolutional neural network." https://arxiv.org/abs/1609.05158 +- FSRCNN, Dong, Chao, Chen Change Loy, and Xiaoou Tang. "Accelerating the super-resolution convolutional neural network." https://arxiv.org/abs/1608.00367 +- LapSRN, Lai, Wei-Sheng, et al. "Fast and accurate image super-resolution with deep laplacian pyramid networks." https://arxiv.org/abs/1710.01992 +- SRGAN, Ledig, Christian, et al. "Photo-realistic single image super-resolution using a generative adversarial network." https://arxiv.org/abs/1609.04802 +- RDN, Zhang, Yulun, et al. "Residual dense network for image super-resolution." https://arxiv.org/abs/1802.08797 + +## Copyright Notice and Citation + +This module is part of the OpenCV project and is available under the Apache 2.0 license. + +When using the SRGAN and RDN models in academic projects, please cite the appropriate papers: + +For SRGAN: +``` +@InProceedings{Ledig_2017_CVPR, + author = {Ledig, Christian and Theis, Lucas and Huszar, Ferenc and Caballero, Jose and Cunningham, Andrew and Acosta, Alejandro and Aitken, Andrew and Tejani, Alykhan and Totz, Johannes and Wang, Zehan and Shi, Wenzhe}, + title = {Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network}, + booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {July}, + year = {2017} +} +``` + +For RDN: +``` +@inproceedings{zhang2018residual, + title={Residual Dense Network for Image Super-Resolution}, + author={Zhang, Yulun and Tian, Yapeng and Kong, Yu and Zhong, Bineng and Fu, Yun}, + booktitle={CVPR}, + year={2018} +} +``` + +## Patent Information + +To the best of our knowledge, the implemented algorithms (SRGAN and RDN) are not encumbered by patents that would restrict their use in this project. These implementations are based on academic research papers and are provided for academic and research purposes. ## Usage +The dnn_superres module allows for super-resolution using neural networks. The network takes as input a image of a certain size, and outputs a larger one. Here's a brief example of how to use the provided C++ API: + +```c++ +using namespace std; +using namespace cv; +using namespace dnn_superres; + +int main(int argc, char* argv[]) +{ + cv::Mat img = cv::imread("img.png"); + cv::Mat img_new; + + // Create the Dnn Superres object + cv::dnn_superres::DnnSuperResImpl sr; + + // Read the desired model + path = "models/FSRCNN_x4.pb" + sr.readModel(path); + + // Set the desired model and scale to get correct pre- and post-processing + sr.setModel("fsrcnn", 4); + + // Upscale + sr.upsample(img, img_new); + + // Save the result + cv::imwrite("./img_upscaled.png", img_new); + + return 0; +} +``` + Run the following command to build this module: ```make diff --git a/modules/dnn_superres/include/opencv2/dnn_superres.hpp b/modules/dnn_superres/include/opencv2/dnn_superres.hpp index 3d3fb757616..839c6d6d7f4 100644 --- a/modules/dnn_superres/include/opencv2/dnn_superres.hpp +++ b/modules/dnn_superres/include/opencv2/dnn_superres.hpp @@ -8,12 +8,14 @@ /** @defgroup dnn_superres DNN used for super resolution This module contains functionality for upscaling an image via convolutional neural networks. -The following four models are implemented: +The following models are implemented: - EDSR - ESPCN - FSRCNN - LapSRN +- SRGAN +- RDN */ @@ -29,12 +31,14 @@ namespace dnn_superres //! @{ /** @brief A class to upscale images via convolutional neural networks. -The following four models are implemented: +The following models are implemented: - edsr - espcn - fsrcnn - lapsrn +- srgan +- rdn */ class CV_EXPORTS_W DnnSuperResImpl @@ -59,8 +63,8 @@ class CV_EXPORTS_W DnnSuperResImpl */ CV_WRAP static Ptr create(); - // /** @brief Empty constructor - // */ + /** @brief Empty constructor + */ DnnSuperResImpl(); /** @brief Constructor which immediately sets the desired model @@ -69,6 +73,8 @@ class CV_EXPORTS_W DnnSuperResImpl - __espcn__ - __fsrcnn__ - __lapsrn__ + - __srgan__ + - __rdn__ @param scale Integer specifying the upscale factor */ DnnSuperResImpl(const String& algo, int scale); @@ -90,6 +96,8 @@ class CV_EXPORTS_W DnnSuperResImpl - __espcn__ - __fsrcnn__ - __lapsrn__ + - __srgan__ + - __rdn__ @param scale Integer specifying the upscale factor */ CV_WRAP void setModel(const String& algo, int scale); diff --git a/modules/dnn_superres/misc/python/test/test_dnn_superres.py b/modules/dnn_superres/misc/python/test/test_dnn_superres.py index 48b7bcd29b6..a3a3bda2a30 100644 --- a/modules/dnn_superres/misc/python/test/test_dnn_superres.py +++ b/modules/dnn_superres/misc/python/test/test_dnn_superres.py @@ -1,4 +1,8 @@ #!/usr/bin/env python +# This file is part of OpenCV project. +# It is subject to the license terms in the LICENSE file found in the top-level directory +# of this distribution and at http://opencv.org/license.html. + import os import cv2 as cv @@ -44,5 +48,81 @@ def test_single_output(self): self.assertEqual(sr.getScale(), 2) self.assertEqual(sr.getAlgorithm(), "espcn") + @unittest.skipIf('OPENCV_TEST_DATA_PATH' not in os.environ, + "OPENCV_TEST_DATA_PATH is not defined") + def test_srgan(self): + # Get test data paths + dnn_superres_test_path = os.environ['OPENCV_TEST_DATA_PATH'] + "/cv/dnn_superres/" + img_path = dnn_superres_test_path + "butterfly.png" + srgan_path = dnn_superres_test_path + "SRGAN_x4.pb" + + # Create an SR object + sr = cv.dnn_superres.DnnSuperResImpl_create() + + # Read image + image = cv.imread(img_path) + inp_h, inp_w, inp_c = image.shape + + # Read the desired model + sr.readModel(srgan_path) + + # Set the desired model and scale to get correct pre- and post-processing + sr.setModel("srgan", 4) + + # Upscale the image + result = sr.upsample(image) + out_h, out_w, out_c = result.shape + + # CHECK... + # if result is not empty + self.assertFalse(result is None) + + # upsampled image is correct size + self.assertEqual(out_h, inp_h*4) + self.assertEqual(out_w, inp_w*4) + self.assertEqual(out_c, inp_c) + + # get functions work + self.assertEqual(sr.getScale(), 4) + self.assertEqual(sr.getAlgorithm(), "srgan") + + @unittest.skipIf('OPENCV_TEST_DATA_PATH' not in os.environ, + "OPENCV_TEST_DATA_PATH is not defined") + def test_rdn(self): + # Get test data paths + dnn_superres_test_path = os.environ['OPENCV_TEST_DATA_PATH'] + "/cv/dnn_superres/" + img_path = dnn_superres_test_path + "butterfly.png" + rdn_path = dnn_superres_test_path + "RDN_x3.pb" + + # Create an SR object + sr = cv.dnn_superres.DnnSuperResImpl_create() + + # Read image + image = cv.imread(img_path) + inp_h, inp_w, inp_c = image.shape + + # Read the desired model + sr.readModel(rdn_path) + + # Set the desired model and scale to get correct pre- and post-processing + sr.setModel("rdn", 3) + + # Upscale the image + result = sr.upsample(image) + out_h, out_w, out_c = result.shape + + # CHECK... + # if result is not empty + self.assertFalse(result is None) + + # upsampled image is correct size + self.assertEqual(out_h, inp_h*3) + self.assertEqual(out_w, inp_w*3) + self.assertEqual(out_c, inp_c) + + # get functions work + self.assertEqual(sr.getScale(), 3) + self.assertEqual(sr.getAlgorithm(), "rdn") + if __name__ == '__main__': - NewOpenCVTests.bootstrap() \ No newline at end of file + NewOpenCVTests.bootstrap() diff --git a/modules/dnn_superres/samples/CMakeLists.txt b/modules/dnn_superres/samples/CMakeLists.txt new file mode 100644 index 00000000000..c7ed6c1d73f --- /dev/null +++ b/modules/dnn_superres/samples/CMakeLists.txt @@ -0,0 +1,36 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + + +set(OPENCV_SUPERRES_SAMPLE_REQUIRED_DEPS + opencv_core + opencv_imgproc + opencv_dnn_superres + opencv_highgui + opencv_imgcodecs +) + +ocv_add_executable(dnn_superres + dnn_superres.cpp +) + +ocv_add_executable(dnn_superres_benchmark + dnn_superres_benchmark.cpp +) + +ocv_add_executable(dnn_superres_srgan_rdn_demo + dnn_superres_srgan_rdn_demo.cpp +) + +ocv_target_link_libraries(dnn_superres + ${OPENCV_SUPERRES_SAMPLE_REQUIRED_DEPS} +) + +ocv_target_link_libraries(dnn_superres_benchmark + ${OPENCV_SUPERRES_SAMPLE_REQUIRED_DEPS} +) + +ocv_target_link_libraries(dnn_superres_srgan_rdn_demo + ${OPENCV_SUPERRES_SAMPLE_REQUIRED_DEPS} +) diff --git a/modules/dnn_superres/samples/dnn_superres_srgan_rdn_demo.cpp b/modules/dnn_superres/samples/dnn_superres_srgan_rdn_demo.cpp new file mode 100644 index 00000000000..a84deea5995 --- /dev/null +++ b/modules/dnn_superres/samples/dnn_superres_srgan_rdn_demo.cpp @@ -0,0 +1,137 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include +#include +#include +#include + +using namespace std; +using namespace cv; +using namespace dnn_superres; + +/** + * @brief Demonstrate the use of SRGAN and RDN super resolution models + * @author Contributed by Akalp + */ + +const char* keys = +{ + "{ help h | | Print help message. }" + "{ input i | | Path to input image. }" + "{ model m | | Path to model weights. }" + "{ scale s | 4 | Scale factor (2, 3, 4). }" + "{ model_type t| srgan | Model type (srgan or rdn). }" + "{ output o | sr_result.png | Path to output image. }" + "{ cuda c | false | Use CUDA for GPU acceleration. }" +}; + +int main(int argc, char* argv[]) +{ + // Parse command line arguments + CommandLineParser parser(argc, argv, keys); + parser.about("Super Resolution using SRGAN and RDN models"); + + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + + // Get the required parameters + String input_path = parser.get("input"); + String model_path = parser.get("model"); + String model_type = parser.get("model_type"); + String output_path = parser.get("output"); + int scale = parser.get("scale"); + bool use_cuda = parser.get("cuda"); + + if (!parser.check()) + { + parser.printErrors(); + return -1; + } + + if (input_path.empty() || model_path.empty()) + { + cerr << "Input image and model are required!" << endl; + return -1; + } + + // Convert model type to lowercase for comparison + transform(model_type.begin(), model_type.end(), model_type.begin(), ::tolower); + + // Check if model type is valid + if (model_type != "srgan" && model_type != "rdn") + { + cerr << "Invalid model type. Supported types: srgan, rdn" << endl; + return -1; + } + + try + { + // Load the image + Mat img = imread(input_path); + if (img.empty()) + { + cerr << "Could not load the image: " << input_path << endl; + return -1; + } + + // Create the super resolution object + DnnSuperResImpl sr; + + // Read the model + sr.readModel(model_path); + + // Set the model and scale + sr.setModel(model_type, scale); + + // Set GPU if requested + if (use_cuda) + { +#ifdef HAVE_CUDA + sr.setPreferableBackend(dnn::DNN_BACKEND_CUDA); + sr.setPreferableTarget(dnn::DNN_TARGET_CUDA); + cout << "Using CUDA backend" << endl; +#else + cerr << "CUDA is not available in this build. Using CPU." << endl; +#endif + } + + cout << "Processing image with " << model_type << " model..." << endl; + cout << "Original resolution: " << img.cols << "x" << img.rows << endl; + + // Create a window for the original image + namedWindow("Original Image", WINDOW_NORMAL); + imshow("Original Image", img); + + // Upscale the image + Mat result; + + // Measure processing time + double t = (double)getTickCount(); + sr.upsample(img, result); + t = ((double)getTickCount() - t) / getTickFrequency(); + + cout << "Done in " << t << " seconds" << endl; + cout << "Upscaled resolution: " << result.cols << "x" << result.rows << endl; + + // Create a window for the super resolution result + namedWindow("Super Resolution Result", WINDOW_NORMAL); + imshow("Super Resolution Result", result); + + // Save the result + imwrite(output_path, result); + cout << "Result saved to: " << output_path << endl; + + waitKey(0); + return 0; + } + catch (const cv::Exception& e) + { + cerr << "Error: " << e.what() << endl; + return -1; + } +} diff --git a/modules/dnn_superres/src/dnn_superres.cpp b/modules/dnn_superres/src/dnn_superres.cpp index 12c93a68d03..91740efbbd0 100644 --- a/modules/dnn_superres/src/dnn_superres.cpp +++ b/modules/dnn_superres/src/dnn_superres.cpp @@ -85,10 +85,17 @@ void DnnSuperResImpl::readModel(const String& weights, const String& definition) } } -void DnnSuperResImpl::setModel(const String& algo, int scale) +void DnnSuperResImpl::setModel(const String& algorithm, int scale) { + this->alg = algorithm; + + // Validate scale + if (scale <= 0) + { + CV_Error(Error::StsBadArg, "Upscaling ratio must be positive"); + } + this->sc = scale; - this->alg = algo; } void DnnSuperResImpl::setPreferableBackend(int backendId) @@ -114,6 +121,9 @@ void DnnSuperResImpl::upsample(InputArray img, OutputArray result) if (net.empty()) CV_Error(Error::StsError, "Model not specified. Please set model via setModel()."); + if (this->sc <= 0) + CV_Error(Error::StsBadArg, "Upscaling ratio must be positive"); + if (this->alg == "espcn" || this->alg == "lapsrn" || this->alg == "fsrcnn") { //Preprocess the image: convert to YCrCb float image and normalize @@ -167,6 +177,63 @@ void DnnSuperResImpl::upsample(InputArray img, OutputArray result) //Post-process: add mean. Mat(model_outs[0] + mean).convertTo(result, CV_8U); } + else if (this->alg == "srgan") + { + // SRGAN uses the same preprocessing as EDSR (RGB processing) + // but might have different normalization values + + // BGR mean typically used for SRGAN models + Scalar mean = Scalar(103.939, 116.779, 123.68); // Common ImageNet mean + + // Convert to float + Mat float_img; + img.getMat().convertTo(float_img, CV_32F, 1.0); + + // Create blob from image + cv::Mat blob; + dnn::blobFromImage(float_img, blob, 1/127.5, Size(), mean, true); // Normalize to [-1,1] + + // Get the HR output + this->net.setInput(blob); + Mat blob_output = this->net.forward(); + + // Convert from blob + std::vector model_outs; + dnn::imagesFromBlob(blob_output, model_outs); + + // SRGAN typically outputs images in range [0,1] or [-1,1] + // Denormalize the output + Mat normalized; + cv::add(model_outs[0], 1.0, normalized); // Convert from [-1,1] to [0,2] + cv::multiply(normalized, 127.5, normalized); // Scale to [0,255] + normalized.convertTo(result, CV_8U); + } + else if (this->alg == "rdn") + { + // RDN (Residual Dense Network) typically follows EDSR-like preprocessing + + // BGR mean of the training dataset (usually DIV2K) + Scalar mean = Scalar(103.1545782, 111.561547, 114.35629928); + + // Convert to float + Mat float_img; + img.getMat().convertTo(float_img, CV_32F, 1.0); + + // Create blob from image + cv::Mat blob; + dnn::blobFromImage(float_img, blob, 1.0, Size(), mean); + + // Get the HR output + this->net.setInput(blob); + Mat blob_output = this->net.forward(); + + // Convert from blob + std::vector model_outs; + dnn::imagesFromBlob(blob_output, model_outs); + + // Post-process: add mean + Mat(model_outs[0] + mean).convertTo(result, CV_8U); + } else { CV_Error(cv::Error::StsNotImplemented, String("Unknown/unsupported superres algorithm: ") + this->alg); diff --git a/modules/dnn_superres/test/test_dnn_superres.cpp b/modules/dnn_superres/test/test_dnn_superres.cpp index 6645040bddc..41e299e6628 100644 --- a/modules/dnn_superres/test/test_dnn_superres.cpp +++ b/modules/dnn_superres/test/test_dnn_superres.cpp @@ -44,6 +44,38 @@ void runSingleModel(std::string algorithm, int scale, std::string model_filename ASSERT_EQ(new_rows, result.rows); } +// Test model parameter validation +TEST(CV_DnnSuperResParameterValidationTest, validate_parameters) +{ + Ptr dnn_sr = makePtr(); + + // Test invalid algorithm + try { + dnn_sr->setModel("invalid_algorithm", 2); + FAIL() << "Expected exception for invalid algorithm not thrown"; + } catch (const cv::Exception& e) { + EXPECT_TRUE(std::string(e.what()).find("Unknown/unsupported superres algorithm") != std::string::npos); + } + + // Test invalid scale (0 or negative) + try { + dnn_sr->setModel("espcn", 0); + FAIL() << "Expected exception for invalid scale not thrown"; + } catch (const cv::Exception& e) { + EXPECT_TRUE(std::string(e.what()).find("Upscaling ratio must be positive") != std::string::npos); + } + + // Test empty model + Mat img = Mat::zeros(100, 100, CV_8UC3); + Mat result; + try { + dnn_sr->upsample(img, result); + FAIL() << "Expected exception for empty model not thrown"; + } catch (const cv::Exception& e) { + EXPECT_TRUE(std::string(e.what()).find("Model not specified") != std::string::npos); + } +} + TEST(CV_DnnSuperResSingleOutputTest, accuracy_espcn_2) { runSingleModel("espcn", 2, "ESPCN_x2.pb"); @@ -59,6 +91,79 @@ TEST(CV_DnnSuperResSingleOutputTest, accuracy_fsrcnn_3) runSingleModel("fsrcnn", 3, "FSRCNN_x3.pb"); } +TEST(CV_DnnSuperResSingleOutputTest, accuracy_srgan_4) +{ + runSingleModel("srgan", 4, "SRGAN_x4.pb"); +} + +TEST(CV_DnnSuperResSingleOutputTest, accuracy_rdn_3) +{ + runSingleModel("rdn", 3, "RDN_x3.pb"); +} + +// Extended tests for SRGAN +TEST(CV_DnnSuperResSRGANTest, various_input_sizes) +{ + SCOPED_TRACE("srgan"); + + Ptr dnn_sr = makePtr(); + std::string path = cvtest::findDataFile(DNN_SUPERRES_DIR + "/" + IMAGE_FILENAME); + Mat img = imread(path); + ASSERT_FALSE(img.empty()) << "Test image can't be loaded: " << path; + + std::string pb_path = cvtest::findDataFile(DNN_SUPERRES_DIR + "/SRGAN_x4.pb"); + dnn_sr->readModel(pb_path); + dnn_sr->setModel("srgan", 4); + + // Test with different input sizes + std::vector sizes = {Size(32, 32), Size(64, 64), Size(128, 96)}; + + for (const auto& size : sizes) { + Mat resized; + resize(img, resized, size); + + Mat result; + dnn_sr->upsample(resized, result); + + ASSERT_FALSE(result.empty()) << "Could not perform upsampling for input size " << size; + ASSERT_EQ(size.width * 4, result.cols); + ASSERT_EQ(size.height * 4, result.rows); + } +} + +// Extended tests for RDN +TEST(CV_DnnSuperResRDNTest, different_input_channels) +{ + SCOPED_TRACE("rdn"); + + Ptr dnn_sr = makePtr(); + std::string path = cvtest::findDataFile(DNN_SUPERRES_DIR + "/" + IMAGE_FILENAME); + Mat img = imread(path); + ASSERT_FALSE(img.empty()) << "Test image can't be loaded: " << path; + + std::string pb_path = cvtest::findDataFile(DNN_SUPERRES_DIR + "/RDN_x3.pb"); + dnn_sr->readModel(pb_path); + dnn_sr->setModel("rdn", 3); + + // Test with color image + Mat color_result; + dnn_sr->upsample(img, color_result); + ASSERT_FALSE(color_result.empty()) << "Could not perform upsampling for color image"; + ASSERT_EQ(img.cols * 3, color_result.cols); + ASSERT_EQ(img.rows * 3, color_result.rows); + ASSERT_EQ(img.channels(), color_result.channels()); + + // Test with grayscale image + Mat gray; + cvtColor(img, gray, COLOR_BGR2GRAY); + + Mat gray_result; + dnn_sr->upsample(gray, gray_result); + ASSERT_FALSE(gray_result.empty()) << "Could not perform upsampling for grayscale image"; + ASSERT_EQ(gray.cols * 3, gray_result.cols); + ASSERT_EQ(gray.rows * 3, gray_result.rows); + ASSERT_EQ(gray.channels(), gray_result.channels()); +} /****************************************************************************************\ * Test multi output models *