Skip to content

Commit 60d515b

Browse files
authored
[Runtime] Add flag to disable per_session_host_allocator. (#688)
1 parent dcdc32e commit 60d515b

File tree

3 files changed

+24
-9
lines changed

3 files changed

+24
-9
lines changed

tensorflow/core/common_runtime/direct_session.cc

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -489,14 +489,18 @@ class DirectSessionFactory : public SessionFactory {
489489

490490
ResourceMgr* gpu_shared_rmgr = nullptr;
491491
#if GOOGLE_CUDA
492+
bool use_per_session_host_allocator = false;
493+
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("PER_SESSION_HOSTALLOC",
494+
/*default_val=*/false,
495+
&use_per_session_host_allocator));
492496
if (use_multi_stream) {
493497
// Create shared resource for gpu devices
494498
gpu_shared_rmgr = new ResourceMgr("localhost");
495499
std::string gpu_dev_prefix("/job:localhost/replica:0/task:0/device:GPU:");
496500
for (int i = 0; i < session_num; ++i) {
497501
dev_rmgr_map.device_rmgr_map[gpu_dev_prefix+std::to_string(base_index+i)] =
498502
gpu_shared_rmgr;
499-
if (i > 0) {
503+
if (use_per_session_host_allocator && i > 0) {
500504
dev_rmgr_map.device_rmgr_map[dev_prefix+"/device:CPU:"+std::to_string(i)] = shared_rmgr;
501505
dev_rmgr_map.device_rmgr_map[dev_prefix+"/device:cpu:"+std::to_string(i)] = shared_rmgr;
502506
dev_rmgr_map.device_rmgr_map["/device:CPU:"+std::to_string(i)] = shared_rmgr;
@@ -571,8 +575,13 @@ class DirectSessionFactory : public SessionFactory {
571575
follower_options.config.add_per_session_devices(
572576
"/job:localhost/replica:0/task:0/device:GPU:" +
573577
std::to_string(base_index+i));
574-
follower_options.config.add_per_session_devices(
575-
"/job:localhost/replica:0/task:0/device:CPU:"+std::to_string(i));
578+
if (use_per_session_host_allocator) {
579+
follower_options.config.add_per_session_devices(
580+
"/job:localhost/replica:0/task:0/device:CPU:"+std::to_string(i));
581+
} else {
582+
follower_options.config.add_per_session_devices(
583+
"/job:localhost/replica:0/task:0/device:CPU:0");
584+
}
576585
}
577586
#endif // GOOGLE_CUDA
578587

tensorflow/core/common_runtime/gpu/gpu_device_factory.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,12 @@ class GPUCompatibleCPUDeviceFactory : public DeviceFactory {
199199
int num_numa_nodes = options.config.experimental().use_numa_affinity()
200200
? port::NUMANumNodes()
201201
: 1;
202+
bool use_per_session_host_allocator = false;
203+
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("PER_SESSION_HOSTALLOC",
204+
/*default_val=*/false,
205+
&use_per_session_host_allocator));
202206
int sess_num = 1;
203-
if (dev_rmgr_map) {
207+
if (use_per_session_host_allocator && dev_rmgr_map) {
204208
for (auto& item : dev_rmgr_map->device_rmgr_map) {
205209
int sess_idx = std::stoi(item.first.substr(item.first.rfind(":")+1));
206210
if (sess_idx >= sess_num) {

tensorflow/core/common_runtime/graph_execution_state.cc

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -737,11 +737,13 @@ Status GraphExecutionState::InitBaseGraph(std::unique_ptr<Graph>&& new_graph) {
737737
break;
738738
}
739739
}
740-
const auto& dname1 = session_options_->config.per_session_devices(1);
741-
for (auto& d : device_set_->devices()) {
742-
if (d->name() == dname1) {
743-
devices.AddDevice(d);
744-
break;
740+
if (session_options_->config.per_session_devices_size() > 1) {
741+
const auto& dname1 = session_options_->config.per_session_devices(1);
742+
for (auto& d : device_set_->devices()) {
743+
if (d->name() == dname1) {
744+
devices.AddDevice(d);
745+
break;
746+
}
745747
}
746748
}
747749
}

0 commit comments

Comments
 (0)