Skip to content

Commit b8717f3

Browse files
authored
[DeepRec] Support to collect timeline in Serving. (#16)
1 parent 5017b0f commit b8717f3

File tree

22 files changed

+3416
-3
lines changed

22 files changed

+3416
-3
lines changed

WORKSPACE

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,65 @@ http_archive(
4242
urls = ["https://github.com/nelhage/rules_boost/archive/9f9fb8b2f0213989247c9d5c0e814a8451d18d7f.tar.gz"],
4343
)
4444

45+
http_archive(
46+
name = "aliyun_oss_c_sdk",
47+
build_file = "//third_party/oss_c_sdk:oss_c_sdk.BUILD",
48+
sha256 = "6450d3970578c794b23e9e1645440c6f42f63be3f82383097660db5cf2fba685",
49+
strip_prefix = "aliyun-oss-c-sdk-3.7.0",
50+
urls = [
51+
"http://pythonrun.oss-cn-zhangjiakou.aliyuncs.com/tensorflow_io/github.com/aliyun/aliyun-oss-c-sdk/archive/3.7.0.tar.gz",
52+
],
53+
)
54+
55+
http_archive(
56+
name = "libexpat",
57+
build_file = "//third_party/expat:libexpat.BUILD",
58+
sha256 = "574499cba22a599393e28d99ecfa1e7fc85be7d6651d543045244d5b561cb7ff",
59+
strip_prefix = "libexpat-R_2_2_6/expat",
60+
urls = [
61+
"http://pythonrun.oss-cn-zhangjiakou.aliyuncs.com/tensorflow_io/github.com/libexpat/libexpat/archive/R_2_2_6.tar.gz",
62+
],
63+
)
64+
65+
http_archive(
66+
name = "libapr1",
67+
build_file = "//third_party/apr1:libapr1.BUILD",
68+
sha256 = "1a0909a1146a214a6ab9de28902045461901baab4e0ee43797539ec05b6dbae0",
69+
strip_prefix = "apr-1.6.5",
70+
patches = [
71+
"//third_party/apr1:libapr1.patch",
72+
],
73+
urls = [
74+
"http://pythonrun.oss-cn-zhangjiakou.aliyuncs.com/tensorflow_io/github.com/apache/apr/archive/1.6.5.tar.gz",
75+
],
76+
)
77+
78+
http_archive(
79+
name = "libaprutil1",
80+
build_file = "//third_party/aprutil1:libaprutil1.BUILD",
81+
sha256 = "4c9ae319cedc16890fc2776920e7d529672dda9c3a9a9abd53bd80c2071b39af",
82+
strip_prefix = "apr-util-1.6.1",
83+
patches = [
84+
"//third_party/aprutil1:libaprutil1.patch",
85+
],
86+
urls = [
87+
"http://pythonrun.oss-cn-zhangjiakou.aliyuncs.com/tensorflow_io/github.com/apache/apr-util/archive/1.6.1.tar.gz",
88+
],
89+
)
90+
91+
http_archive(
92+
name = "mxml",
93+
build_file = "//third_party/mxml:mxml.BUILD",
94+
sha256 = "4d850d15cdd4fdb9e82817eb069050d7575059a9a2729c82b23440e4445da199",
95+
strip_prefix = "mxml-2.12",
96+
patches = [
97+
"//third_party/mxml:mxml.patch",
98+
],
99+
urls = [
100+
"http://pythonrun.oss-cn-zhangjiakou.aliyuncs.com/tensorflow_io/github.com/michaelrsweet/mxml/archive/v2.12.tar.gz",
101+
],
102+
)
103+
45104
load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps")
46105
boost_deps()
47106

tensorflow_serving/model_servers/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ cc_library(
337337
"//tensorflow_serving/config:platform_config_proto",
338338
"//tensorflow_serving/core:availability_preserving_policy",
339339
"//tensorflow_serving/servables/tensorflow:session_bundle_config_proto",
340+
"//tensorflow_serving/util:tracer",
340341
] + TENSORFLOW_DEPS + SUPPORTED_TENSORFLOW_OPS,
341342
)
342343

tensorflow_serving/model_servers/main.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,20 @@ int main(int argc, char** argv) {
207207
"TensorFlow Lite model from `model.tflite` file in "
208208
"SavedModel directory instead of the TensorFlow model "
209209
"from `saved_model.pb` file."),
210+
tensorflow::Flag("timeline_start_step", &options.timeline_start_step,
211+
"timeline_start_step"),
212+
tensorflow::Flag("timeline_interval_step", &options.timeline_interval_step,
213+
"timeline_interval_step"),
214+
tensorflow::Flag("timeline_trace_count", &options.timeline_trace_count,
215+
"timeline_trace_count"),
216+
tensorflow::Flag("timeline_path", &options.timeline_path,
217+
"timeline_path"),
218+
tensorflow::Flag("oss_endpoint", &options.oss_endpoint,
219+
"oss_endpoint"),
220+
tensorflow::Flag("oss_access_id", &options.oss_access_id,
221+
"oss_access_id"),
222+
tensorflow::Flag("oss_access_key", &options.oss_access_key,
223+
"oss_access_key"),
210224
tensorflow::Flag("use_multi_stream", &options.use_multi_stream,
211225
"Use multi-stream or not in session_group")};
212226

tensorflow_serving/model_servers/server.cc

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ limitations under the License.
1414
==============================================================================*/
1515

1616
#include "tensorflow_serving/model_servers/server.h"
17+
#include "tensorflow_serving/util/tracer.h"
1718

1819
#include <unistd.h>
1920

@@ -179,6 +180,35 @@ void Server::PollFilesystemAndReloadConfig(const string& config_file_path) {
179180
}
180181

181182
namespace {
183+
void ParseTimelineConfig(const Server::Options& options) {
184+
auto start_step = options.timeline_start_step;
185+
auto interval_step = options.timeline_interval_step;
186+
auto trace_count = options.timeline_trace_count;
187+
auto path = options.timeline_path;
188+
if (start_step >= 0 && interval_step > 0
189+
&& trace_count > 0 && !path.empty()) {
190+
// save timeline to local
191+
if (path[0] == '/') {
192+
Tracer::GetTracer()->SetParams(start_step, interval_step, trace_count, path);
193+
} else if (path.find("oss://") != std::string::npos) {
194+
// save timeline to oss
195+
if (options.oss_endpoint == "" ||
196+
options.oss_access_id == "" ||
197+
options.oss_access_key == "") {
198+
LOG(ERROR) << "ERROR: Timeline require oss_endpoint, oss_access_id, and oss_access_key."
199+
<< " We will not collect timeline.";
200+
return;
201+
}
202+
Tracer::GetTracer()->SetParams(start_step,
203+
interval_step, trace_count, options.oss_endpoint,
204+
options.oss_access_id, options.oss_access_key, path);
205+
} else {
206+
LOG(ERROR) << "ERROR: Only support to save timeline to local or oss now."
207+
<< " We will not collect timeline.";
208+
}
209+
}
210+
}
211+
182212
Status CreatePlatformConfigMap(const Server::Options& server_options,
183213
ServerCore::Options& options) {
184214
const bool use_saved_model = true;
@@ -201,6 +231,8 @@ Status CreatePlatformConfigMap(const Server::Options& server_options,
201231
"server_options.enable_batching to true.");
202232
}
203233

234+
ParseTimelineConfig(server_options);
235+
204236
session_bundle_config.mutable_session_config()
205237
->mutable_gpu_options()
206238
->set_per_process_gpu_memory_fraction(
@@ -260,6 +292,8 @@ Status CreatePlatformConfigMapV2(const Server::Options& server_options,
260292
auto model_session_config =
261293
session_bundle_config.add_model_session_config();
262294

295+
ParseTimelineConfig(server_options);
296+
263297
// session num
264298
model_session_config->set_session_num(
265299
server_options.session_num_per_group);

tensorflow_serving/model_servers/server.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,15 @@ class Server {
9191
tensorflow::string gpu_ids_list = "";
9292
bool use_multi_stream = false;
9393

94+
// Timeline
95+
tensorflow::int64 timeline_start_step = -1;
96+
tensorflow::int64 timeline_interval_step = -1;
97+
tensorflow::int64 timeline_trace_count = -1;
98+
tensorflow::string timeline_path = "";
99+
tensorflow::string oss_endpoint = "";
100+
tensorflow::string oss_access_id = "";
101+
tensorflow::string oss_access_key = "";
102+
94103
Options();
95104
};
96105

tensorflow_serving/servables/tensorflow/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,7 @@ cc_library(
489489
":util",
490490
"//tensorflow_serving/apis:predict_proto",
491491
"//tensorflow_serving/util:optional",
492+
"//tensorflow_serving/util:tracer",
492493
"@com_google_absl//absl/strings",
493494
"@org_tensorflow//tensorflow/cc/saved_model:signature_constants",
494495
"@org_tensorflow//tensorflow/contrib/session_bundle",

tensorflow_serving/servables/tensorflow/predict_util.cc

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ limitations under the License.
1414
==============================================================================*/
1515

1616
#include "tensorflow_serving/servables/tensorflow/predict_util.h"
17+
#include "tensorflow_serving/util/tracer.h"
1718

1819
#include <map>
1920
#include <memory>
@@ -32,6 +33,9 @@ limitations under the License.
3233

3334
namespace tensorflow {
3435
namespace serving {
36+
37+
#define likely(x) __builtin_expect(!!(x), 1)
38+
3539
namespace {
3640

3741
Status VerifySignature(const SignatureDef& signature) {
@@ -205,11 +209,21 @@ Status RunPredict(
205209
TF_RETURN_IF_ERROR(PreProcessPrediction(signature, request, &input_tensors,
206210
&output_tensor_names,
207211
&output_tensor_aliases));
212+
bool trace_timeline = Tracer::GetTracer()->NeedTracing();
208213
std::vector<Tensor> outputs;
209214
RunMetadata run_metadata;
210-
TF_RETURN_IF_ERROR(session->Run(run_options, input_tensors,
211-
output_tensor_names, {}, &outputs,
212-
&run_metadata));
215+
if (likely(!trace_timeline)) {
216+
TF_RETURN_IF_ERROR(session->Run(run_options, input_tensors,
217+
output_tensor_names, {}, &outputs,
218+
&run_metadata));
219+
} else {
220+
RunOptions tmp_run_opt = run_options;
221+
tmp_run_opt.set_trace_level(tensorflow::RunOptions::FULL_TRACE);
222+
TF_RETURN_IF_ERROR(session->Run(tmp_run_opt, input_tensors,
223+
output_tensor_names, {}, &outputs,
224+
&run_metadata));
225+
Tracer::GetTracer()->GenTimeline(run_metadata);
226+
}
213227

214228
return PostProcessPredictionResult(output_tensor_aliases, outputs, option,
215229
response);

tensorflow_serving/util/BUILD

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,16 @@ cc_library(
6262
],
6363
)
6464

65+
cc_library(
66+
name = "tracer",
67+
hdrs = ["tracer.h"],
68+
deps = [
69+
"@aliyun_oss_c_sdk",
70+
"@org_tensorflow//tensorflow/core:protos_all_cc",
71+
"@org_tensorflow//tensorflow/core:framework",
72+
],
73+
)
74+
6575
cc_library(
6676
name = "prometheus_exporter",
6777
srcs = ["prometheus_exporter.cc"],

0 commit comments

Comments
 (0)