Skip to content

Commit 932364b

Browse files
authored
Add benchmark upload util to Bigquery. (#3776)
* Add benchmark upload util to bigquery. Also update the benchmark logger and bigquery schema for the errors found during the integration test. * Fix lint error. * Update test to clear all the env vars during test. This was causing error since the Kokoro test has TF_PKG=tf-nightly injected during test. * Update lintrc to ignore google related package. * Another attempt to fix lint import error. * Address the review comment. * Fix lint error. * Another fix for lint. * Update test comment for env var clean up.
1 parent 03781c7 commit 932364b

File tree

9 files changed

+237
-32
lines changed

9 files changed

+237
-32
lines changed

official/benchmark/datastore/schema/benchmark_run.json

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"description": "The date when the test of the model is started",
1616
"mode": "REQUIRED",
1717
"name": "run_date",
18-
"type": "DATETIME"
18+
"type": "TIMESTAMP"
1919
},
2020
{
2121
"description": "The tensorflow version information.",
@@ -58,7 +58,7 @@
5858
"type": "RECORD"
5959
},
6060
{
61-
"description": "Enviornment variables when the benchmark run is executed.",
61+
"description": "Environment variables when the benchmark run is executed.",
6262
"fields": [
6363
{
6464
"description": "The name of the variable.",
@@ -74,7 +74,27 @@
7474
}
7575
],
7676
"mode": "REPEATED",
77-
"name": "enviornment_variable",
77+
"name": "environment_variable",
78+
"type": "RECORD"
79+
},
80+
{
81+
"description": "TF Environment variables when the benchmark run is executed.",
82+
"fields": [
83+
{
84+
"description": "The name of the variable.",
85+
"mode": "REQUIRED",
86+
"name": "name",
87+
"type": "STRING"
88+
},
89+
{
90+
"description": "The value of the variable.",
91+
"mode": "NULLABLE",
92+
"name": "value",
93+
"type": "STRING"
94+
}
95+
],
96+
"mode": "REPEATED",
97+
"name": "tensorflow_environment_variables",
7898
"type": "RECORD"
7999
},
80100
{

official/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
psutil>=5.4.3
2-
py-cpuinfo>=3.3.0
2+
py-cpuinfo>=3.3.0
3+
google-cloud-bigquery>=0.31.0

official/resnet/resnet_run_loop.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,12 @@ def resnet_main(flags, model_function, input_function):
348348
'version': flags.version,
349349
})
350350

351+
if flags.benchmark_log_dir is not None:
352+
benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir)
353+
benchmark_logger.log_run_info("resnet")
354+
else:
355+
benchmark_logger = None
356+
351357
for _ in range(flags.train_epochs // flags.epochs_between_evals):
352358
train_hooks = hooks_helper.get_train_hooks(
353359
flags.hooks,
@@ -380,8 +386,7 @@ def input_fn_eval():
380386
steps=flags.max_train_steps)
381387
print(eval_results)
382388

383-
if flags.benchmark_log_dir is not None:
384-
benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir)
389+
if benchmark_logger:
385390
benchmark_logger.log_estimator_evaluation_result(eval_results)
386391

387392

official/utils/arg_parsers/parsers.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,11 +234,37 @@ class BenchmarkParser(argparse.ArgumentParser):
234234
benchmark_log_dir: Create a flag to specify location for benchmark logging.
235235
"""
236236

237-
def __init__(self, add_help=False, benchmark_log_dir=True):
237+
def __init__(self, add_help=False, benchmark_log_dir=True,
238+
bigquery_uploader=True):
238239
super(BenchmarkParser, self).__init__(add_help=add_help)
239240
if benchmark_log_dir:
240241
self.add_argument(
241242
"--benchmark_log_dir", "-bld", default=None,
242243
help="[default: %(default)s] The location of the benchmark logging.",
243244
metavar="<BLD>"
244245
)
246+
if bigquery_uploader:
247+
self.add_argument(
248+
"--gcp_project", "-gp", default=None,
249+
help="[default: %(default)s] The GCP project name where the benchmark"
250+
" will be uploaded.",
251+
metavar="<GP>"
252+
)
253+
self.add_argument(
254+
"--bigquery_data_set", "-bds", default="test_benchmark",
255+
help="[default: %(default)s] The Bigquery dataset name where the"
256+
" benchmark will be uploaded.",
257+
metavar="<BDS>"
258+
)
259+
self.add_argument(
260+
"--bigquery_run_table", "-brt", default="benchmark_run",
261+
help="[default: %(default)s] The Bigquery table name where the"
262+
" benchmark run information will be uploaded.",
263+
metavar="<BRT>"
264+
)
265+
self.add_argument(
266+
"--bigquery_metric_table", "-bmt", default="benchmark_metric",
267+
help="[default: %(default)s] The Bigquery table name where the"
268+
" benchmark metric information will be uploaded.",
269+
metavar="<BMT>"
270+
)

official/utils/arg_parsers/parsers_test.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def __init__(self):
2929
parsers.PerformanceParser(num_parallel_calls=True, inter_op=True,
3030
intra_op=True, use_synthetic_data=True),
3131
parsers.ImageModelParser(data_format=True),
32-
parsers.BenchmarkParser(benchmark_log_dir=True)
32+
parsers.BenchmarkParser(benchmark_log_dir=True, bigquery_uploader=True)
3333
])
3434

3535

@@ -62,7 +62,8 @@ def test_default_setting(self):
6262
def test_benchmark_setting(self):
6363
defaults = dict(
6464
hooks=["LoggingMetricHook"],
65-
benchmark_log_dir="/tmp/12345"
65+
benchmark_log_dir="/tmp/12345",
66+
gcp_project="project_abc",
6667
)
6768

6869
parser = TestParser()
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
16+
"""Library to upload benchmark generated by BenchmarkLogger to remote repo.
17+
18+
This library require google cloud bigquery lib as dependency, which can be
19+
installed with:
20+
> pip install --upgrade google-cloud-bigquery
21+
"""
22+
23+
from __future__ import absolute_import
24+
from __future__ import division
25+
from __future__ import print_function
26+
27+
import json
28+
import os
29+
import sys
30+
import uuid
31+
32+
from google.cloud import bigquery
33+
34+
import tensorflow as tf # pylint: disable=g-bad-import-order
35+
36+
from official.utils.arg_parsers import parsers
37+
from official.utils.logging import logger
38+
39+
40+
class BigQueryUploader(object):
41+
"""Upload the benchmark and metric info to BigQuery."""
42+
43+
def __init__(self, logging_dir, gcp_project=None, credentials=None):
44+
"""Initialized BigQueryUploader with proper setting.
45+
46+
Args:
47+
logging_dir: string, logging directory that contains the benchmark log.
48+
gcp_project: string, the name of the GCP project that the log will be
49+
uploaded to. The default project name will be detected from local
50+
environment if no value is provided.
51+
credentials: google.auth.credentials. The credential to access the
52+
BigQuery service. The default service account credential will be
53+
detected from local environment if no value is provided. Please use
54+
google.oauth2.service_account.Credentials to load credential from local
55+
file for the case that the test is run out side of GCP.
56+
"""
57+
self._logging_dir = logging_dir
58+
self._bq_client = bigquery.Client(
59+
project=gcp_project, credentials=credentials)
60+
61+
def upload_benchmark_run(self, dataset_name, table_name, run_id):
62+
"""Upload benchmark run information to Bigquery.
63+
64+
Args:
65+
dataset_name: string, the name of bigquery dataset where the data will be
66+
uploaded.
67+
table_name: string, the name of bigquery table under the dataset where
68+
the data will be uploaded.
69+
run_id: string, a unique ID that will be attached to the data, usually
70+
this is a UUID4 format.
71+
"""
72+
expected_file = os.path.join(
73+
self._logging_dir, logger.BENCHMARK_RUN_LOG_FILE_NAME)
74+
with tf.gfile.GFile(expected_file) as f:
75+
benchmark_json = json.load(f)
76+
benchmark_json["model_id"] = run_id
77+
table_ref = self._bq_client.dataset(dataset_name).table(table_name)
78+
errors = self._bq_client.insert_rows_json(table_ref, [benchmark_json])
79+
if errors:
80+
tf.logging.error(
81+
"Failed to upload benchmark info to bigquery: {}".format(errors))
82+
83+
def upload_metric(self, dataset_name, table_name, run_id):
84+
"""Upload metric information to Bigquery.
85+
86+
Args:
87+
dataset_name: string, the name of bigquery dataset where the data will be
88+
uploaded.
89+
table_name: string, the name of bigquery table under the dataset where
90+
the metric data will be uploaded. This is different from the
91+
benchmark_run table.
92+
run_id: string, a unique ID that will be attached to the data, usually
93+
this is a UUID4 format. This should be the same as the benchmark run_id.
94+
"""
95+
expected_file = os.path.join(
96+
self._logging_dir, logger.METRIC_LOG_FILE_NAME)
97+
with tf.gfile.GFile(expected_file) as f:
98+
lines = f.readlines()
99+
metrics = []
100+
for line in filter(lambda l: l.strip(), lines):
101+
metric = json.loads(line)
102+
metric["run_id"] = run_id
103+
metrics.append(metric)
104+
table_ref = self._bq_client.dataset(dataset_name).table(table_name)
105+
errors = self._bq_client.insert_rows_json(table_ref, metrics)
106+
if errors:
107+
tf.logging.error(
108+
"Failed to upload benchmark info to bigquery: {}".format(errors))
109+
110+
111+
def main(argv):
112+
parser = parsers.BenchmarkParser()
113+
flags = parser.parse_args(args=argv[1:])
114+
if not flags.benchmark_log_dir:
115+
print("Usage: benchmark_uploader.py --benchmark_log_dir=/some/dir")
116+
sys.exit(1)
117+
118+
uploader = BigQueryUploader(
119+
flags.benchmark_log_dir,
120+
gcp_project=flags.gcp_project)
121+
run_id = str(uuid.uuid4())
122+
uploader.upload_benchmark_run(
123+
flags.bigquery_data_set, flags.bigquery_run_table, run_id)
124+
uploader.upload_metric(
125+
flags.bigquery_data_set, flags.bigquery_metric_table, run_id)
126+
127+
128+
if __name__ == "__main__":
129+
main(argv=sys.argv)

official/utils/logging/logger.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@
3131
import tensorflow as tf
3232
from tensorflow.python.client import device_lib
3333

34-
_METRIC_LOG_FILE_NAME = "metric.log"
35-
_BENCHMARK_RUN_LOG_FILE_NAME = "benchmark_run.log"
34+
METRIC_LOG_FILE_NAME = "metric.log"
35+
BENCHMARK_RUN_LOG_FILE_NAME = "benchmark_run.log"
3636
_DATE_TIME_FORMAT_PATTERN = "%Y-%m-%dT%H:%M:%S.%fZ"
3737

3838

@@ -81,9 +81,12 @@ def log_metric(self, name, value, unit=None, global_step=None, extras=None):
8181
tf.logging.warning(
8282
"Metric value to log should be a number. Got %s", type(value))
8383
return
84-
84+
if extras:
85+
extras = [{"name": k, "value": v} for k, v in sorted(extras.items())]
86+
else:
87+
extras = []
8588
with tf.gfile.GFile(
86-
os.path.join(self._logging_dir, _METRIC_LOG_FILE_NAME), "a") as f:
89+
os.path.join(self._logging_dir, METRIC_LOG_FILE_NAME), "a") as f:
8790
metric = {
8891
"name": name,
8992
"value": float(value),
@@ -107,15 +110,18 @@ def log_run_info(self, model_name):
107110
Args:
108111
model_name: string, the name of the model.
109112
"""
110-
run_info = {"model_name": model_name}
113+
run_info = {
114+
"model_name": model_name,
115+
"machine_config": {},
116+
"run_date": datetime.datetime.now().strftime(_DATE_TIME_FORMAT_PATTERN)}
111117
_collect_tensorflow_info(run_info)
112118
_collect_tensorflow_environment_variables(run_info)
113119
_collect_cpu_info(run_info)
114120
_collect_gpu_info(run_info)
115121
_collect_memory_info(run_info)
116122

117123
with tf.gfile.GFile(os.path.join(
118-
self._logging_dir, _BENCHMARK_RUN_LOG_FILE_NAME), "w") as f:
124+
self._logging_dir, BENCHMARK_RUN_LOG_FILE_NAME), "w") as f:
119125
try:
120126
json.dump(run_info, f)
121127
f.write("\n")
@@ -130,8 +136,9 @@ def _collect_tensorflow_info(run_info):
130136

131137

132138
def _collect_tensorflow_environment_variables(run_info):
133-
run_info["tensorflow_environment_variables"] = {
134-
k: v for k, v in os.environ.items() if k.startswith("TF_")}
139+
run_info["tensorflow_environment_variables"] = [
140+
{"name": k, "value": v}
141+
for k, v in sorted(os.environ.items()) if k.startswith("TF_")]
135142

136143

137144
# The following code is mirrored from tensorflow/tools/test/system_info_lib
@@ -150,7 +157,7 @@ def _collect_cpu_info(run_info):
150157
cpu_info["cpu_info"] = info["brand"]
151158
cpu_info["mhz_per_cpu"] = info["hz_advertised_raw"][0] / 1.0e6
152159

153-
run_info["cpu_info"] = cpu_info
160+
run_info["machine_config"]["cpu_info"] = cpu_info
154161

155162

156163
def _collect_gpu_info(run_info):
@@ -168,16 +175,16 @@ def _collect_gpu_info(run_info):
168175
gpu_info["model"] = _parse_gpu_model(d.physical_device_desc)
169176
# Assume all the GPU connected are same model
170177
break
171-
run_info["gpu_info"] = gpu_info
178+
run_info["machine_config"]["gpu_info"] = gpu_info
172179

173180

174181
def _collect_memory_info(run_info):
175182
# Note: psutil is not installed in the TensorFlow OSS tree.
176183
# It is installable via pip.
177184
import psutil # pylint: disable=g-import-not-at-top
178185
vmem = psutil.virtual_memory()
179-
run_info["memory_total"] = vmem.total
180-
run_info["memory_available"] = vmem.available
186+
run_info["machine_config"]["memory_total"] = vmem.total
187+
run_info["machine_config"]["memory_available"] = vmem.available
181188

182189

183190
def _parse_gpu_model(physical_device_desc):

0 commit comments

Comments
 (0)