Merge pull request #734 from kirilg/1.5

kirilg · web-flow · commit 406bc866036e · 2018-01-22T12:05:55.000-08:00
1.5
diff --git a/tensorflow b/tensorflow
@@ -1 +1 @@
-Subproject commit 4cb0c13c7779da536cac6c682180c5757611b384
+Subproject commit b1f157f4d2d871f7a6d8eeb21fddf97b5216608a
diff --git a/tensorflow_serving/core/aspired_versions_manager.cc b/tensorflow_serving/core/aspired_versions_manager.cc
@@ -156,8 +156,6 @@ Status AspiredVersionsManager::Create(
   basic_manager_options.max_num_load_retries = options.max_num_load_retries;
   basic_manager_options.load_retry_interval_micros =
       options.load_retry_interval_micros;
-  basic_manager_options.flush_filesystem_caches =
-      options.flush_filesystem_caches;
   basic_manager_options.servable_event_bus = options.servable_event_bus;
   basic_manager_options.pre_load_hook = std::move(options.pre_load_hook);
   std::unique_ptr<BasicManager> basic_manager;
diff --git a/tensorflow_serving/core/aspired_versions_manager.h b/tensorflow_serving/core/aspired_versions_manager.h
@@ -123,12 +123,6 @@ class AspiredVersionsManager : public Manager,
     /// Default: 1 minute.
     int64 load_retry_interval_micros = 1LL * 60 * 1000 * 1000;
 
-    // If true, and there are not multiple load threads, filesystem caches will
-    // be flushed after each servable is loaded. (Cache flush is skipped when
-    // multiple load threads are active, in order to avoid setting back a
-    // concurrent load on another thread.)
-    bool flush_filesystem_caches = false;
-
     /// The environment to use for starting threads in the thread-pool or for
     /// sleeping.
     Env* env = Env::Default();
diff --git a/tensorflow_serving/core/basic_manager.cc b/tensorflow_serving/core/basic_manager.cc
@@ -205,23 +205,21 @@ Status BasicManager::Create(Options options,
   manager->reset(new BasicManager(
       options.env, options.num_load_threads, options.num_unload_threads,
       options.max_num_load_retries, options.load_retry_interval_micros,
-      options.flush_filesystem_caches, std::move(options.resource_tracker),
-      options.servable_event_bus, std::move(options.pre_load_hook)));
+      std::move(options.resource_tracker), options.servable_event_bus,
+      std::move(options.pre_load_hook)));
   return Status::OK();
 }
 
 BasicManager::BasicManager(Env* const env, const uint32 num_load_threads,
                            const uint32 num_unload_threads,
                            uint32 max_num_load_retries,
                            int64 load_retry_interval_micros,
-                           bool flush_filesystem_caches,
                            std::unique_ptr<ResourceTracker> resource_tracker,
                            EventBus<ServableState>* servable_event_bus,
                            std::function<void(const ServableId&)> pre_load_hook)
     : servable_event_bus_(servable_event_bus),
       env_(env),
       num_load_threads_(num_load_threads),
-      flush_filesystem_caches_(flush_filesystem_caches),
       pre_load_hook_(std::move(pre_load_hook)) {
   harness_options_.max_num_load_retries = max_num_load_retries;
   harness_options_.load_retry_interval_micros = load_retry_interval_micros;
@@ -231,7 +229,7 @@ BasicManager::BasicManager(Env* const env, const uint32 num_load_threads,
   };
 
   {
-    mutex_lock l(load_executor_mu_);
+    mutex_lock l(num_load_threads_mu_);
     load_executor_ =
         CreateExecutor(env_, num_load_threads, "BasicManager_Load_ThreadPool");
   }
@@ -243,7 +241,7 @@ BasicManager::BasicManager(Env* const env, const uint32 num_load_threads,
 BasicManager::~BasicManager() {
   // Reset the executors first to finish all pending loads/unloads.
   {
-    mutex_lock l(load_executor_mu_);
+    mutex_lock l(num_load_threads_mu_);
     load_executor_.reset();
   }
   unload_executor_.reset();
@@ -464,18 +462,7 @@ Status BasicManager::ExecuteLoad(LoaderHarness* harness) {
   }
 
   // We don't hold the lock while calling Load() as it may block.
-  const Status status = harness->Load();
-
-  // Whether the load succeeded or failed, flush filesystem caches if there is
-  // only one load thread.
-  if (flush_filesystem_caches_ && num_load_threads() <= 1) {
-    const Status flush_status = Env::Default()->FlushFileSystemCaches();
-    if (!flush_status.ok()) {
-      LOG(WARNING) << "flushing filesystem caches failed: " << flush_status;
-    }
-  }
-
-  TF_RETURN_IF_ERROR(status);
+  TF_RETURN_IF_ERROR(harness->Load());
 
   {
     mutex_lock l(mu_);
@@ -559,16 +546,18 @@ Status BasicManager::ExecuteLoadOrUnload(const LoadOrUnloadRequest& request,
 }
 
 void BasicManager::SetNumLoadThreads(const uint32 num_load_threads) {
-  mutex_lock l(load_executor_mu_);
+  mutex_lock l(num_load_threads_mu_);
 
   load_executor_.reset();
-  num_load_threads_.store(num_load_threads);
+  num_load_threads_ = num_load_threads;
   load_executor_ =
-      CreateExecutor(env_, num_load_threads, "BasicManager_Load_ThreadPool");
+      CreateExecutor(env_, num_load_threads_, "BasicManager_Load_ThreadPool");
 }
 
 uint32 BasicManager::num_load_threads() const {
-  return num_load_threads_.load();
+  mutex_lock l(num_load_threads_mu_);
+
+  return num_load_threads_;
 }
 
 void BasicManager::LoadOrUnloadServable(const LoadOrUnloadRequest& request,
@@ -596,7 +585,7 @@ void BasicManager::LoadOrUnloadServable(const LoadOrUnloadRequest& request,
 
   switch (request.kind) {
     case LoadOrUnloadRequest::Kind::kLoad: {
-      mutex_lock l(load_executor_mu_);
+      mutex_lock l(num_load_threads_mu_);
       load_executor_->Schedule([this, request, done_callback]() {
         HandleLoadOrUnloadRequest(request, done_callback);
       });
diff --git a/tensorflow_serving/core/basic_manager.h b/tensorflow_serving/core/basic_manager.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_SERVING_CORE_BASIC_MANAGER_H_
 #define TENSORFLOW_SERVING_CORE_BASIC_MANAGER_H_
 
-#include <atomic>
 #include <memory>
 #include <string>
 #include <unordered_map>
@@ -140,12 +139,6 @@ class BasicManager : public Manager {
     // Default: 1 minute.
     int64 load_retry_interval_micros = 1LL * 60 * 1000 * 1000;
 
-    // If true, and there are not multiple load threads, filesystem caches will
-    // be flushed after each servable is loaded. (Cache flush is skipped when
-    // multiple load threads are active, in order to avoid setting back a
-    // concurrent load on another thread.)
-    bool flush_filesystem_caches = false;
-
     // The environment to use for starting threads in the thread-pool.
     Env* env = Env::Default();
 
@@ -270,7 +263,6 @@ class BasicManager : public Manager {
 
   BasicManager(Env* env, uint32 num_load_threads, uint32 num_unload_threads,
                uint32 max_num_load_retries, int64 load_retry_interval_micros,
-               bool flush_filesystem_caches,
                std::unique_ptr<ResourceTracker> resource_tracker,
                EventBus<ServableState>* servable_event_bus,
                PreLoadHook pre_load_hook);
@@ -388,8 +380,8 @@ class BasicManager : public Manager {
   // the old thread pool blocks until all threads are done, so it could block
   // for a long time.
   void SetNumLoadThreads(uint32 num_load_threads)
-      LOCKS_EXCLUDED(load_executor_mu_);
-  uint32 num_load_threads() const;
+      LOCKS_EXCLUDED(num_load_threads_mu_);
+  uint32 num_load_threads() const LOCKS_EXCLUDED(num_load_threads_mu_);
 
   // Keys are the servable names.
   // Values are the harnesses for each servable version. The values when
@@ -482,14 +474,12 @@ class BasicManager : public Manager {
 
   Env* const env_;
 
-  // The number of load threads. Can be changed after instantiation of the
-  // manager via SetNumLoadThreads().
-  std::atomic<uint32> num_load_threads_;
-  // Whether to flush filesystem caches (if num_load_threads_ == 1)
-  const bool flush_filesystem_caches_ = false;
-  // The executor (and associated mutex) used for executing loads of servables.
-  mutable mutex load_executor_mu_;
-  std::unique_ptr<Executor> load_executor_ GUARDED_BY(load_executor_mu_);
+  // The number of load threads and the associated executor. They can be changed
+  // after instantiation of the manager via SetNumLoadThreads().
+  mutable mutex num_load_threads_mu_;
+  uint32 num_load_threads_ GUARDED_BY(num_load_threads_mu_);
+  // The executor used for executing loads of servables.
+  std::unique_ptr<Executor> load_executor_ GUARDED_BY(num_load_threads_mu_);
 
   // The executor used for executing unloads of servables. (Unlike for loads,
   // the unload executor is fixed for the lifetime of the manager.)
diff --git a/tensorflow_serving/core/basic_manager_test.cc b/tensorflow_serving/core/basic_manager_test.cc
@@ -938,73 +938,6 @@ TEST_F(SetNumLoadThreadsBasicManagerTest, FastLoad) {
   }
 }
 
-// This filesystem detects a call to FlushCaches(), which is triggered by the
-// BasicManager's call to Env::Default()->FlushFileSystemCaches() after loading
-// a servable.
-class FlushDetectingFileSystem : public NullFileSystem {
- public:
-  void FlushCaches() override { flushed = true; }
-  static std::atomic<bool> flushed;
-};
-
-std::atomic<bool> FlushDetectingFileSystem::flushed;
-
-REGISTER_FILE_SYSTEM("flush", FlushDetectingFileSystem);
-
-// This test loads servables with BasicManager::Options::flush_filesystem_caches
-// true or false, and verifies that filesystem caches were flushed (or not
-// flushed) as expected.
-class FlushFileSystemCachesTest : public ::testing::TestWithParam<bool> {
- protected:
-  FlushFileSystemCachesTest() : flush_filesystem_caches_(GetParam()) {
-    BasicManager::Options options;
-    options.flush_filesystem_caches = flush_filesystem_caches_;
-    TF_CHECK_OK(BasicManager::Create(std::move(options), &basic_manager_));
-  }
-
-  std::unique_ptr<BasicManager> basic_manager_;
-  bool flush_filesystem_caches_;
-};
-
-TEST_P(FlushFileSystemCachesTest, Load) {
-  test_util::BasicManagerTestAccess manager_test_access(basic_manager_.get());
-  // The number of load threads is initially zero, so filesystems should be
-  // flushed if flush_filesystem_caches_ is true.
-  FlushDetectingFileSystem::flushed.store(false);
-  const ServableId id0 = {kServableName3, 0};
-  TF_CHECK_OK(basic_manager_->ManageServable(CreateServable(id0)));
-  basic_manager_->LoadServable(id0, [&](const Status& status) {
-    TF_ASSERT_OK(status);
-    EXPECT_EQ(flush_filesystem_caches_,
-              FlushDetectingFileSystem::flushed.load());
-  });
-  // Load another servable with two load threads. Filesystem caches should not
-  // be flushed.
-  manager_test_access.SetNumLoadThreads(2);
-  FlushDetectingFileSystem::flushed.store(false);
-  const ServableId id1 = {kServableName3, 1};
-  TF_CHECK_OK(basic_manager_->ManageServable(CreateServable(id1)));
-  basic_manager_->LoadServable(id1, [&](const Status& status) {
-    TF_ASSERT_OK(status);
-    EXPECT_FALSE(FlushDetectingFileSystem::flushed.load());
-  });
-  // Now move to a single load thread and load a third servable. Filesystem
-  // caches should once again be flushed if flush_filesystem_caches_ is true.
-  manager_test_access.SetNumLoadThreads(1);
-  FlushDetectingFileSystem::flushed.store(false);
-  const ServableId id2 = {kServableName3, 2};
-  TF_CHECK_OK(basic_manager_->ManageServable(CreateServable(id2)));
-  basic_manager_->LoadServable(id2, [&](const Status& status) {
-    TF_ASSERT_OK(status);
-    EXPECT_EQ(flush_filesystem_caches_,
-              FlushDetectingFileSystem::flushed.load());
-  });
-  basic_manager_.reset();
-}
-
-INSTANTIATE_TEST_CASE_P(WithOrWithoutFlush, FlushFileSystemCachesTest,
-                        ::testing::Bool());
-
 TEST_P(BasicManagerTest, ConcurrentLoadsOnlyOneSucceeds) {
   const ServableId id = {kServableName3, 0};
   mutex status_mu;
diff --git a/tensorflow_serving/model_servers/main.cc b/tensorflow_serving/model_servers/main.cc
@@ -308,7 +308,6 @@ int main(int argc, char** argv) {
   tensorflow::string batching_parameters_file;
   tensorflow::string model_name = "default";
   tensorflow::int32 file_system_poll_wait_seconds = 1;
-  bool flush_filesystem_caches = true;
   tensorflow::string model_base_path;
   const bool use_saved_model = true;
   // Tensorflow session parallelism of zero means that both inter and intra op
@@ -340,14 +339,6 @@ int main(int argc, char** argv) {
                        &file_system_poll_wait_seconds,
                        "interval in seconds between each poll of the file "
                        "system for new model version"),
-      tensorflow::Flag("flush_filesystem_caches", &flush_filesystem_caches,
-                       "If true (the default), filesystem caches will be "
-                       "flushed after the initial load of all servables, and "
-                       "after each subsequent individual servable reload (if "
-                       "the number of load threads is 1). This reduces memory "
-                       "consumption of the model server, at the potential cost "
-                       "of cache misses if model files are accessed after "
-                       "servables are loaded."),
       tensorflow::Flag("tensorflow_session_parallelism",
                        &tensorflow_session_parallelism,
                        "Number of threads to use for running a "
@@ -427,7 +418,6 @@ int main(int argc, char** argv) {
   options.aspired_version_policy =
       std::unique_ptr<AspiredVersionPolicy>(new AvailabilityPreservingPolicy);
   options.file_system_poll_wait_seconds = file_system_poll_wait_seconds;
-  options.flush_filesystem_caches = flush_filesystem_caches;
 
   std::unique_ptr<ServerCore> core;
   TF_CHECK_OK(ServerCore::Create(std::move(options), &core));
diff --git a/tensorflow_serving/model_servers/server_core.cc b/tensorflow_serving/model_servers/server_core.cc
@@ -459,10 +459,6 @@ Status ServerCore::ReloadConfig(const ModelServerConfig& new_config) {
   }
   TF_RETURN_IF_ERROR(MaybeUpdateServerRequestLogger());
 
-  if (options_.flush_filesystem_caches) {
-    return Env::Default()->FlushFileSystemCaches();
-  }
-
   return Status::OK();
 }
 
@@ -636,7 +632,6 @@ Status ServerCore::CreateAspiredVersionsManager(
   manager_options.num_unload_threads = options_.num_unload_threads;
   manager_options.max_num_load_retries = options_.max_num_load_retries;
   manager_options.pre_load_hook = std::move(options_.pre_load_hook);
-  manager_options.flush_filesystem_caches = options_.flush_filesystem_caches;
   const tensorflow::Status status =
       AspiredVersionsManager::Create(std::move(manager_options), manager);
   if (!status.ok()) {
diff --git a/tensorflow_serving/model_servers/server_core.h b/tensorflow_serving/model_servers/server_core.h
@@ -117,17 +117,6 @@ class ServerCore : public Manager {
     // Time interval between file-system polls, in seconds.
     int32 file_system_poll_wait_seconds = 30;
 
-    // If true, filesystem caches are flushed in the following cases:
-    //
-    // 1) After the initial models are loaded.
-    // 2) After a new config is supplied and a changed set of models are loaded.
-    // 3) After each new model version is loaded, if num_load_threads == 1.
-    //
-    // In the common scenario where the number of load threads is set to 1 after
-    // the initial load, this will take care of flushing the cache once after
-    // the initial load, and after every subsequent load of every model version.
-    bool flush_filesystem_caches = false;
-
     // Configuration for the supported platforms.
     PlatformConfigMap platform_config_map;