Add ImageFolderDataset, train convnet tutorial on imagenette dataset (#95)

mfl28 · web-flow · commit a336cfb90b5c · 2022-02-03T00:21:39.000+05:30
* Train convolutional neural network tutorial on Imagenette dataset instead of MNIST.

* Remove macos CI build using Xcode 10.3.
diff --git a/.github/workflows/build_macos.yml b/.github/workflows/build_macos.yml
@@ -22,7 +22,7 @@ jobs:
     runs-on: macos-10.15
     strategy:
       matrix:
-        xcode: [10.3, 11.5, 12.4]
+        xcode: [11.5, 12.4]
     steps:
     - name: Checkout
       uses: actions/checkout@v2.0.0
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -59,6 +59,9 @@ if(DOWNLOAD_DATASETS)
     add_custom_target(flickr8k COMMAND ${CMAKE_COMMAND}
         -D DATA_DIR=${DATA_DIR}
         -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/fetch_flickr8k.cmake)
+    add_custom_target(imagenette COMMAND ${CMAKE_COMMAND}
+        -D DATA_DIR=${DATA_DIR}
+        -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/fetch_imagenette.cmake)
 endif()
 
 # Add tutorial sub-projects:
diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@
 
 | OS (Compiler)\\LibTorch |                                                  1.10.1                                                |
 | :--------------------- | :--------------------------------------------------------------------------------------------------- |
-|    macOS (clang 10.0, 11.0, 12.0)    | [![Status](https://github.com/prabhuomkar/pytorch-cpp/actions/workflows/build_macos.yml/badge.svg?branch=master)](https://github.com/prabhuomkar/pytorch-cpp/actions?query=workflow%3Aci-build-macos) |
+|    macOS (clang 11.0, 12.0)    | [![Status](https://github.com/prabhuomkar/pytorch-cpp/actions/workflows/build_macos.yml/badge.svg?branch=master)](https://github.com/prabhuomkar/pytorch-cpp/actions?query=workflow%3Aci-build-macos) |
 |      Linux (gcc 8, 9, 10, 11)      | [![Status](https://github.com/prabhuomkar/pytorch-cpp/actions/workflows/build_ubuntu.yml/badge.svg?branch=master)](https://github.com/prabhuomkar/pytorch-cpp/actions?query=workflow%3Aci-build-ubuntu) |
 |    Windows (msvc 2017, 2019)  | [![Status](https://github.com/prabhuomkar/pytorch-cpp/actions/workflows/build_windows.yml/badge.svg?branch=master)](https://github.com/prabhuomkar/pytorch-cpp/actions?query=workflow%3Aci-build-windows) |
 
diff --git a/cmake/fetch_imagenette.cmake b/cmake/fetch_imagenette.cmake
@@ -0,0 +1,33 @@
+cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
+
+function(fetch_imagenette DATA_DIR)
+    set(IMAGENETTE_DIR "${DATA_DIR}/imagenette2-160")
+    set(IMAGENETTE_DOWNLOAD_DIR "${DATA_DIR}/imagenette_download")
+
+    set(IMAGENETTE_DATA_URL
+        "https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz"
+    )
+
+    if(NOT EXISTS ${IMAGENETTE_DIR})
+        message(STATUS "Fetching Imagenette dataset...")
+
+        file(
+            DOWNLOAD ${IMAGENETTE_DATA_URL}
+            "${IMAGENETTE_DOWNLOAD_DIR}/imagenette2-160.tgz"
+            EXPECTED_MD5 "e793b78cc4c9e9a4ccc0c1155377a412"
+            SHOW_PROGRESS)
+
+        execute_process(
+            COMMAND ${CMAKE_COMMAND} -E tar xf  
+                    "${IMAGENETTE_DOWNLOAD_DIR}/imagenette2-160.tgz"
+                    "imagenette2-160/train"
+                    "imagenette2-160/val"
+            WORKING_DIRECTORY ${DATA_DIR})
+
+        file(REMOVE_RECURSE ${IMAGENETTE_DOWNLOAD_DIR})
+
+        message(STATUS "Fetching Imagenette dataset - done")
+    endif()
+endfunction()
+
+fetch_imagenette(${DATA_DIR})
diff --git a/tutorials/intermediate/convolutional_neural_network/CMakeLists.txt b/tutorials/intermediate/convolutional_neural_network/CMakeLists.txt
@@ -7,20 +7,22 @@ set(EXECUTABLE_NAME convolutional-neural-network)
 add_executable(${EXECUTABLE_NAME})
 target_sources(${EXECUTABLE_NAME} PRIVATE src/main.cpp
                                           src/convnet.cpp
+                                          src/imagefolder_dataset.cpp
                                           include/convnet.h
+                                          include/imagefolder_dataset.h
 )
 
 target_include_directories(${EXECUTABLE_NAME} PRIVATE include)
 
-target_link_libraries(${EXECUTABLE_NAME} ${TORCH_LIBRARIES})
+target_link_libraries(${EXECUTABLE_NAME} ${TORCH_LIBRARIES} image-io)
 
 set_target_properties(${EXECUTABLE_NAME} PROPERTIES
   CXX_STANDARD 17
   CXX_STANDARD_REQUIRED YES
 )
 
 if(DOWNLOAD_DATASETS)
-    add_dependencies(${EXECUTABLE_NAME} mnist)
+    add_dependencies(${EXECUTABLE_NAME} imagenette)
 endif()
 
 if(MSVC)
diff --git a/tutorials/intermediate/convolutional_neural_network/include/convnet.h b/tutorials/intermediate/convolutional_neural_network/include/convnet.h
@@ -10,19 +10,27 @@ class ConvNetImpl : public torch::nn::Module {
 
  private:
     torch::nn::Sequential layer1{
-        torch::nn::Conv2d(torch::nn::Conv2dOptions(1, 16, 5).stride(1).padding(2)),
+        torch::nn::Conv2d(torch::nn::Conv2dOptions(3, 16, 3).stride(1)),
         torch::nn::BatchNorm2d(16),
         torch::nn::ReLU(),
         torch::nn::MaxPool2d(torch::nn::MaxPool2dOptions(2).stride(2))
     };
 
     torch::nn::Sequential layer2{
-        torch::nn::Conv2d(torch::nn::Conv2dOptions(16, 32, 5).stride(1).padding(2)),
+        torch::nn::Conv2d(torch::nn::Conv2dOptions(16, 32, 3).stride(1)),
         torch::nn::BatchNorm2d(32),
         torch::nn::ReLU(),
         torch::nn::MaxPool2d(torch::nn::MaxPool2dOptions(2).stride(2))
     };
 
+    torch::nn::Sequential layer3{
+        torch::nn::Conv2d(torch::nn::Conv2dOptions(32, 64, 3).stride(1)),
+        torch::nn::BatchNorm2d(64),
+        torch::nn::ReLU(),
+    };
+
+    torch::nn::AdaptiveAvgPool2d pool{torch::nn::AdaptiveAvgPool2dOptions({4, 4})};
+
     torch::nn::Linear fc;
 };
 
diff --git a/tutorials/intermediate/convolutional_neural_network/include/imagefolder_dataset.h b/tutorials/intermediate/convolutional_neural_network/include/imagefolder_dataset.h
@@ -0,0 +1,37 @@
+// Copyright 2020-present pytorch-cpp Authors
+#pragma once
+
+#include <torch/data/datasets/base.h>
+#include <torch/data/example.h>
+#include <torch/types.h>
+#include <string>
+#include <vector>
+#include <unordered_map>
+
+namespace dataset {
+/**
+ * Dataset class that provides image-label samples.
+ */
+class ImageFolderDataset : public torch::data::datasets::Dataset<ImageFolderDataset> {
+ public:
+    enum class Mode {
+       TRAIN,
+       VAL
+    };
+
+    explicit ImageFolderDataset(const std::string &root, Mode mode = Mode::TRAIN,
+                                torch::IntArrayRef image_load_size = {});
+
+    torch::data::Example<> get(size_t index) override;
+
+    torch::optional<size_t> size() const override;
+
+ private:
+    Mode mode_;
+    std::vector<int64_t> image_load_size_;
+    std::string mode_dir_;
+    std::vector<std::string> classes_;
+    std::unordered_map<std::string, int> class_to_index_;
+    std::vector<std::pair<std::string, int>> samples_;
+};
+}  // namespace dataset
diff --git a/tutorials/intermediate/convolutional_neural_network/src/convnet.cpp b/tutorials/intermediate/convolutional_neural_network/src/convnet.cpp
@@ -3,15 +3,19 @@
 #include <torch/torch.h>
 
 ConvNetImpl::ConvNetImpl(int64_t num_classes)
-    : fc(7 * 7 * 32, num_classes) {
+    : fc(64 * 4 * 4, num_classes) {
     register_module("layer1", layer1);
     register_module("layer2", layer2);
+    register_module("layer3", layer3);
+    register_module("pool", pool),
     register_module("fc", fc);
 }
 
 torch::Tensor ConvNetImpl::forward(torch::Tensor x) {
     x = layer1->forward(x);
     x = layer2->forward(x);
-    x = x.view({-1, 7 * 7 * 32});
+    x = layer3->forward(x);
+    x = pool->forward(x);
+    x = x.view({-1,  64 * 4 * 4});
     return fc->forward(x);
 }
diff --git a/tutorials/intermediate/convolutional_neural_network/src/imagefolder_dataset.cpp b/tutorials/intermediate/convolutional_neural_network/src/imagefolder_dataset.cpp
@@ -0,0 +1,76 @@
+// Copyright 2020-present pytorch-cpp Authors
+#include <imagefolder_dataset.h>
+#include <torch/torch.h>
+#include <vector>
+#include <algorithm>
+#include <filesystem>
+#include <unordered_map>
+#include "image_io.h"
+
+namespace fs = std::filesystem;
+
+using image_io::load_image;
+
+namespace dataset {
+namespace {
+std::vector<std::string> parse_classes(const std::string &directory) {
+    std::vector<std::string> classes;
+
+    for (auto &p : fs::directory_iterator(directory)) {
+        if (p.is_directory()) {
+            classes.push_back(p.path().filename().string());
+        }
+    }
+
+    std::sort(classes.begin(), classes.end());
+
+    return classes;
+}
+
+std::unordered_map<std::string, int> create_class_to_index_map(const std::vector<std::string> &classes) {
+    std::unordered_map<std::string, int> class_to_index;
+
+    int index = 0;
+
+    for (const auto &class_name : classes) {
+        class_to_index[class_name] = index++;
+    }
+
+    return class_to_index;
+}
+
+std::vector<std::pair<std::string, int>> create_samples(
+    const std::string &directory,
+    const std::unordered_map<std::string, int> &class_to_index) {
+    std::vector<std::pair<std::string, int>> samples;
+
+    for (const auto &[class_name, class_index] : class_to_index) {
+        for (const auto &p : fs::directory_iterator(directory + "/" + class_name)) {
+            if (p.is_regular_file()) {
+                samples.emplace_back(p.path().string(), class_index);
+            }
+        }
+    }
+
+    return samples;
+}
+}  // namespace
+
+ImageFolderDataset::ImageFolderDataset(const std::string &root, Mode mode, torch::IntArrayRef image_load_size)
+    : mode_(mode),
+      image_load_size_(image_load_size.begin(), image_load_size.end()),
+      mode_dir_(root + "/" + (mode == Mode::TRAIN ? "train" : "val")),
+      classes_(parse_classes(mode_dir_)),
+      class_to_index_(create_class_to_index_map(classes_)),
+      samples_(create_samples(mode_dir_, class_to_index_)) {}
+
+torch::optional<size_t> ImageFolderDataset::size() const {
+    return samples_.size();
+}
+
+torch::data::Example<> ImageFolderDataset::get(size_t index) {
+    const auto &[image_path, class_index] = samples_[index];
+
+    return {load_image(image_path, image_load_size_), torch::tensor(class_index)};
+}
+}  // namespace dataset
diff --git a/tutorials/intermediate/convolutional_neural_network/src/main.cpp b/tutorials/intermediate/convolutional_neural_network/src/main.cpp
@@ -3,6 +3,9 @@
 #include <iostream>
 #include <iomanip>
 #include "convnet.h"
+#include "imagefolder_dataset.h"
+
+using dataset::ImageFolderDataset;
 
 int main() {
     std::cout << "Convolutional Neural Network\n\n";
@@ -14,22 +17,23 @@ int main() {
 
     // Hyper parameters
     const int64_t num_classes = 10;
-    const int64_t batch_size = 100;
-    const size_t num_epochs = 5;
-    const double learning_rate = 0.001;
+    const int64_t batch_size = 8;
+    const size_t num_epochs = 10;
+    const double learning_rate = 1e-3;
+    const double weight_decay = 1e-3;
 
-    const std::string MNIST_data_path = "../../../../data/mnist/";
+    const std::string imagenette_data_path = "../../../../data/imagenette2-160";
 
-    // MNIST dataset
-    auto train_dataset = torch::data::datasets::MNIST(MNIST_data_path)
-        .map(torch::data::transforms::Normalize<>(0.1307, 0.3081))
+    // Imagenette dataset
+    auto train_dataset = ImageFolderDataset(imagenette_data_path, ImageFolderDataset::Mode::TRAIN, {160, 160})
+        .map(torch::data::transforms::Normalize<>({0.485, 0.456, 0.406}, {0.229, 0.224, 0.225}))
         .map(torch::data::transforms::Stack<>());
 
     // Number of samples in the training set
     auto num_train_samples = train_dataset.size().value();
 
-    auto test_dataset = torch::data::datasets::MNIST(MNIST_data_path, torch::data::datasets::MNIST::Mode::kTest)
-        .map(torch::data::transforms::Normalize<>(0.1307, 0.3081))
+    auto test_dataset = ImageFolderDataset(imagenette_data_path, ImageFolderDataset::Mode::VAL, {160, 160})
+        .map(torch::data::transforms::Normalize<>({0.485, 0.456, 0.406}, {0.229, 0.224, 0.225}))
         .map(torch::data::transforms::Stack<>());
 
     // Number of samples in the testset
@@ -47,7 +51,8 @@ int main() {
     model->to(device);
 
     // Optimizer
-    torch::optim::Adam optimizer(model->parameters(), torch::optim::AdamOptions(learning_rate));
+    torch::optim::Adam optimizer(
+        model->parameters(), torch::optim::AdamOptions(learning_rate).weight_decay(weight_decay));
 
     // Set floating point output precision
     std::cout << std::fixed << std::setprecision(4);
@@ -98,7 +103,7 @@ int main() {
 
     // Test the model
     model->eval();
-    torch::NoGradGuard no_grad;
+    torch::InferenceMode no_grad;
 
     double running_loss = 0.0;
     size_t num_correct = 0;