Implement and use thread pool to run models concurrently (#178)

yskim1501 · web-flow · commit 3db448ff7a07 · 2019-02-27T12:37:49.000-08:00
* Implement and use thread pool to run models concurrently

* add comments
diff --git a/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp b/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp
@@ -658,6 +658,36 @@ namespace WinMLRunnerTest
         }
     };
 
+    TEST_CLASS(ConcurrencyTest)
+    {
+    public:
+        TEST_CLASS_INITIALIZE(SetupClass)
+        {
+            // Make test_folder_input folder before starting the tests
+            std::string mkFolderCommand = "mkdir " + std::string(INPUT_FOLDER_PATH.begin(), INPUT_FOLDER_PATH.end());
+            system(mkFolderCommand.c_str());
+
+            std::vector<std::string> models = { "SqueezeNet.onnx", "keras_Add_ImageNet_small.onnx" };
+
+            // Copy models from list to test_folder_input
+            for (auto model : models)
+            {
+                std::string copyCommand = "Copy ";
+                copyCommand += model;
+                copyCommand += ' ' + std::string(INPUT_FOLDER_PATH.begin(), INPUT_FOLDER_PATH.end());
+                system(copyCommand.c_str());
+            }
+        }
+
+        TEST_METHOD(RunFolder)
+        {
+            const std::wstring command = BuildCommand({
+                EXE_PATH, L"-folder", INPUT_FOLDER_PATH, L"-ConcurrentLoad", L"-NumThreads", L"5"
+            });
+            Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str()));
+        }
+    };
+
     TEST_CLASS(OtherTests)
     {
     public:
diff --git a/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj b/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj
@@ -20,9 +20,11 @@
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="src/Scenarios.h" />
+    <ClInclude Include="src\ThreadPool.h" />
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="src/Concurrency.cpp" />
+    <ClCompile Include="src\ThreadPool.cpp" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <VCProjectVersion>15.0</VCProjectVersion>
diff --git a/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj.filters b/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj.filters
@@ -18,10 +18,16 @@
     <ClInclude Include="src/Scenarios.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="src\ThreadPool.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="src/Concurrency.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="src\ThreadPool.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
diff --git a/Tools/WinMLRunner/src/Concurrency.cpp b/Tools/WinMLRunner/src/Concurrency.cpp
@@ -1,9 +1,11 @@
-#include "Windows.h"
-#include "common.h"
 #include <iostream>
 #include <thread>
 #include <regex>
 
+#include "Windows.h"
+#include "common.h"
+#include "ThreadPool.h"
+
 using namespace winrt;
 using namespace winrt::Windows::AI::MachineLearning;
 
@@ -27,12 +29,16 @@ void load_model(const std::wstring &path, bool print_info)
 void ConcurrentLoadModel(const std::vector<std::wstring> &paths, unsigned num_threads,
                          unsigned interval_milliseconds, bool print_info)
 {
-  std::vector<std::thread> threads;
-  unsigned threads_size = paths.size() > num_threads ? paths.size() : num_threads;
-  for (unsigned i = 0; i < threads_size; i++)
+
+  ThreadPool pool(num_threads);
+  // Creating enough threads to load all the models specified
+  // If there is more than enough threads, some threads will concurrently load same models
+  size_t threads_size = paths.size() > num_threads ? paths.size() : num_threads;
+  std::vector<std::future<void>> output_futures;
+  for (size_t i = 0; i < threads_size; i++)
   {
-      threads.emplace_back(std::thread(load_model, std::ref(paths[i % paths.size()]), print_info));
       Sleep(interval_milliseconds);
+      output_futures.push_back(pool.SubmitWork(load_model, std::ref(paths[i % paths.size()]), true));
   }
-  std::for_each(threads.begin(), threads.end(), [](std::thread &th) { th.join(); });
+  // TODO: read output values from load_model
 }
diff --git a/Tools/WinMLRunner/src/ThreadPool.cpp b/Tools/WinMLRunner/src/ThreadPool.cpp
@@ -0,0 +1,33 @@
+#include "ThreadPool.h"
+#include <ctime>
+
+ThreadPool::ThreadPool(unsigned int initial_pool_size): m_threads(), m_destruct_pool(false) {
+    for (unsigned int i = 0; i < initial_pool_size; i++) {
+        m_threads.emplace_back([this]() {
+            while (true) {
+                std::unique_lock<std::mutex> lock(m_mutex);
+                // thread listening for event and acquire lock if event triggered
+                m_cond_var.wait(lock, [this] { return m_destruct_pool || !m_work_queue.empty(); });
+                if (!m_work_queue.empty()) {
+                    auto work = m_work_queue.front();
+                    m_work_queue.pop();
+                    lock.unlock();
+                    work();
+                }
+                else {
+                    // Work queue is empty but lock acquired
+                    // This means we are destructing the pool
+                    break;
+                }
+            }
+        });
+    }
+}
+
+ThreadPool::~ThreadPool() {
+    m_destruct_pool = true;
+    m_cond_var.notify_all(); // notify destruction to threads
+    for (auto &thread : m_threads) {
+        thread.join();
+    }
+}
diff --git a/Tools/WinMLRunner/src/ThreadPool.h b/Tools/WinMLRunner/src/ThreadPool.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <vector>
+#include <thread>
+#include <queue>
+#include <mutex>
+#include <future>
+
+class ThreadPool {
+private:
+    std::condition_variable m_cond_var;
+    bool m_destruct_pool;
+    std::mutex m_mutex;
+    std::vector<std::thread> m_threads;
+    std::queue<std::function<void()>> m_work_queue;
+
+public:
+    ThreadPool(unsigned int initial_pool_size);
+    ~ThreadPool();
+    template <typename F, typename...Args>
+    inline auto SubmitWork(F &&f, Args&&... args) -> std::future<decltype(f(args...))> {
+        auto func = std::bind(std::forward<F>(f), std::forward<Args>(args)...);
+        auto task = std::make_shared<std::packaged_task<decltype(f(args...))()>>(func);
+        {
+            std::lock_guard<std::mutex> lock(m_mutex);
+            // wrap packed task into a void return function type so that it can be stored in queue
+            m_work_queue.push([task]() { (*task)(); });
+        }
+
+        m_cond_var.notify_one(); // unblocks one of the waiting threads
+        return task->get_future();
+    }
+};