Skip to content

Commit 0e8127a

Browse files
authored
[Distribute] Add elastic-grpc server. (#936)
Signed-off-by: JunqiHu <[email protected]>
1 parent be62ec3 commit 0e8127a

File tree

14 files changed

+849
-8
lines changed

14 files changed

+849
-8
lines changed

configure.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,6 +1433,9 @@ def main():
14331433
set_build_var(environ_cp, 'TF_NEED_STAR', 'STAR', 'with_star_support',
14341434
True, 'star')
14351435

1436+
set_build_var(environ_cp, 'TF_NEED_ELASTIC', 'ELASTIC TRAINING', 'with_elastic_support',
1437+
True, 'elastic')
1438+
14361439
set_build_var(environ_cp, 'TF_ENABLE_PMEM', 'PMEM', 'with_pmem_support',
14371440
False, 'pmem')
14381441

tensorflow/BUILD

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,12 @@ config_setting(
434434
visibility = ["//visibility:public"],
435435
)
436436

437+
config_setting(
438+
name = "with_elastic_support",
439+
values = {"define": "with_elastic_support=true"},
440+
visibility = ["//visibility:public"],
441+
)
442+
437443
config_setting(
438444
name = "with_pmem_support",
439445
values = {"define": "with_pmem_support=true"},
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
licenses(["notice"]) # Apache 2.0
2+
3+
exports_files(["LICENSE"])
4+
5+
package(default_visibility = [
6+
"//tensorflow:internal",
7+
])
8+
9+
load(
10+
"//tensorflow:tensorflow.bzl", "tf_cc_test",
11+
)
12+
13+
cc_library(
14+
name = "elastic_grpc_server_lib",
15+
srcs = select({"//tensorflow:with_elastic_support": ["elastic_service.cc",
16+
"elastic_grpc_server_lib.cc"],
17+
"//conditions:default": []}),
18+
hdrs = ["elastic_service.h",
19+
"elastic_grpc_server_lib.h"],
20+
linkstatic = 1, # Seems to be needed since alwayslink is broken in bazel
21+
deps = [
22+
"//tensorflow/core:elastic_service_proto_cc",
23+
"//tensorflow/core/distributed_runtime/rpc:grpc_server_lib",
24+
"//tensorflow/core/distributed_runtime/rpc:async_service_interface",
25+
"//tensorflow/core/distributed_runtime/rpc:grpc_channel",
26+
"//tensorflow/core/distributed_runtime/rpc:grpc_master_service",
27+
"//tensorflow/core/distributed_runtime/rpc:grpc_worker_cache",
28+
"//tensorflow/core/distributed_runtime/rpc:grpc_worker_service",
29+
"//tensorflow/core/distributed_runtime/rpc:rpc_rendezvous_mgr",
30+
"//tensorflow:grpc",
31+
"//tensorflow:grpc++",
32+
"//tensorflow/core:core_cpu",
33+
"//tensorflow/core:core_cpu_internal",
34+
"//tensorflow/core:framework",
35+
"//tensorflow/core:framework_internal",
36+
"//tensorflow/core:lib",
37+
"//tensorflow/core/common_runtime/eager:context",
38+
"//tensorflow/core/distributed_runtime:collective_param_resolver_distributed",
39+
"//tensorflow/core/distributed_runtime:device_resolver_distributed",
40+
"//tensorflow/core/distributed_runtime:graph_mgr",
41+
"//tensorflow/core/distributed_runtime:local_master",
42+
"//tensorflow/core/distributed_runtime:master",
43+
"//tensorflow/core/distributed_runtime:master_env",
44+
"//tensorflow/core/distributed_runtime:master_session",
45+
"//tensorflow/core/distributed_runtime:rpc_collective_executor_mgr",
46+
"//tensorflow/core/distributed_runtime:server_lib",
47+
"//tensorflow/core/distributed_runtime:session_mgr",
48+
"//tensorflow/core/distributed_runtime:worker_cache_wrapper",
49+
"//tensorflow/core/distributed_runtime:worker_env",
50+
"//tensorflow/core/distributed_runtime:worker_resource",
51+
"//tensorflow/core/distributed_runtime/rpc/eager:grpc_eager_service_impl",
52+
],
53+
alwayslink = 1,
54+
)
55+
56+
tf_cc_test(
57+
name = "elastic_grpc_test",
58+
size = "small",
59+
srcs = ["elastic_grpc_server_lib_test.cc"],
60+
deps = [
61+
":elastic_grpc_server_lib",
62+
"//tensorflow/core/distributed_runtime/rpc:grpc_util",
63+
"//tensorflow:grpc",
64+
"//tensorflow:grpc++",
65+
"//tensorflow/core:test",
66+
"//tensorflow/core:test_main",
67+
"//tensorflow/core:worker_proto_cc",
68+
],
69+
linkstatic = 1,
70+
)

0 commit comments

Comments
 (0)