Merge pull request #3619 from qlzh727/model_test

qlzh727 · web-flow · commit 875fcb3b417c · 2018-03-19T13:55:56.000-07:00
Add benchmark utility functions for metric logging
diff --git a/official/utils/logging/logger.py b/official/utils/logging/logger.py
@@ -0,0 +1,75 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Logging utilities for benchmark."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import datetime
+import json
+import numbers
+import os
+
+import tensorflow as tf
+
+_METRIC_LOG_FILE_NAME = "metric.log"
+_DATE_TIME_FORMAT_PATTERN = "%Y-%m-%dT%H:%M:%S.%fZ"
+
+
+class BenchmarkLogger(object):
+  """Class to log the benchmark information to local disk."""
+
+  def __init__(self, logging_dir):
+    self._logging_dir = logging_dir
+    if not tf.gfile.IsDirectory(self._logging_dir):
+      tf.gfile.MakeDirs(self._logging_dir)
+
+  def log_metric(self, name, value, unit=None, global_step=None, extras=None):
+    """Log the benchmark metric information to local file.
+
+    Currently the logging is done in a synchronized way. This should be updated
+    to log asynchronously.
+
+    Args:
+      name: string, the name of the metric to log.
+      value: number, the value of the metric. The value will not be logged if it
+        is not a number type.
+      unit: string, the unit of the metric, E.g "image per second".
+      global_step: int, the global_step when the metric is logged.
+      extras: map of string:string, the extra information about the metric.
+    """
+    if not isinstance(value, numbers.Number):
+      tf.logging.warning(
+        "Metric value to log should be a number. Got %s", type(value))
+      return
+
+    with tf.gfile.GFile(
+        os.path.join(self._logging_dir, _METRIC_LOG_FILE_NAME), "a") as f:
+      metric = {
+          "name": name,
+          "value": float(value),
+          "unit": unit,
+          "global_step": global_step,
+          "timestamp": datetime.datetime.now().strftime(
+              _DATE_TIME_FORMAT_PATTERN),
+          "extras": extras}
+      try:
+        json.dump(metric, f)
+        f.write("\n")
+      except (TypeError, ValueError) as e:
+        tf.logging.warning("Failed to dump metric to log file: name %s, value %s, error %s",
+                           name, value, e)
+
diff --git a/official/utils/logging/logger_test.py b/official/utils/logging/logger_test.py
@@ -0,0 +1,92 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for benchmark logger."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import json
+import os
+import tempfile
+
+
+from official.utils.logging import logger
+import tensorflow as tf
+
+
+class BenchmarkLoggerTest(tf.test.TestCase):
+
+  def tearDown(self):
+    super(BenchmarkLoggerTest, self).tearDown()
+    tf.gfile.DeleteRecursively(self.get_temp_dir())
+
+  def test_create_logging_dir(self):
+    non_exist_temp_dir = os.path.join(self.get_temp_dir(), "unknown_dir")
+    self.assertFalse(tf.gfile.IsDirectory(non_exist_temp_dir))
+
+    logger.BenchmarkLogger(non_exist_temp_dir)
+    self.assertTrue(tf.gfile.IsDirectory(non_exist_temp_dir))
+
+  def test_log_metric(self):
+    log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
+    log = logger.BenchmarkLogger(log_dir)
+    log.log_metric("accuracy", 0.999, global_step=1e4, extras={"name": "value"})
+
+    metric_log = os.path.join(log_dir, "metric.log")
+    self.assertTrue(tf.gfile.Exists(metric_log))
+    with tf.gfile.GFile(metric_log) as f:
+      metric = json.loads(f.readline())
+      self.assertEqual(metric["name"], "accuracy")
+      self.assertEqual(metric["value"], 0.999)
+      self.assertEqual(metric["unit"], None)
+      self.assertEqual(metric["global_step"], 1e4)
+      self.assertEqual(metric["extras"], {"name": "value"})
+
+  def test_log_multiple_metrics(self):
+    log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
+    log = logger.BenchmarkLogger(log_dir)
+    log.log_metric("accuracy", 0.999, global_step=1e4, extras={"name": "value"})
+    log.log_metric("loss", 0.02, global_step=1e4)
+
+    metric_log = os.path.join(log_dir, "metric.log")
+    self.assertTrue(tf.gfile.Exists(metric_log))
+    with tf.gfile.GFile(metric_log) as f:
+      accuracy = json.loads(f.readline())
+      self.assertEqual(accuracy["name"], "accuracy")
+      self.assertEqual(accuracy["value"], 0.999)
+      self.assertEqual(accuracy["unit"], None)
+      self.assertEqual(accuracy["global_step"], 1e4)
+      self.assertEqual(accuracy["extras"], {"name": "value"})
+
+      loss = json.loads(f.readline())
+      self.assertEqual(loss["name"], "loss")
+      self.assertEqual(loss["value"], 0.02)
+      self.assertEqual(loss["unit"], None)
+      self.assertEqual(loss["global_step"], 1e4)
+
+  def test_log_non_nubmer_value(self):
+    log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
+    log = logger.BenchmarkLogger(log_dir)
+    const = tf.constant(1)
+    log.log_metric("accuracy", const)
+
+    metric_log = os.path.join(log_dir, "metric.log")
+    self.assertFalse(tf.gfile.Exists(metric_log))
+
+if __name__ == "__main__":
+  tf.test.main()