Remove uneccesary Tensorboard metrics (facebook#2487)

mpolson64 · facebook-github-bot · commit 128cfdd28bd4 · 2024-07-31T14:08:16.000-07:00
Summary: Pull Request resolved: facebook#2487 Reviewed By: mgarrard Differential Revision: D57920811 fbshipit-source-id: e1861073cfcb5874930a0720bc46c0e966eea36c
diff --git a/ax/metrics/tensorboard.py b/ax/metrics/tensorboard.py
@@ -11,15 +11,14 @@
 import logging
 
 from logging import Logger
-from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Set, Union
+from typing import Any, Dict, List, Optional
 
 import pandas as pd
 from ax.core.base_trial import BaseTrial
 from ax.core.map_data import MapData, MapKeyInfo
 from ax.core.map_metric import MapMetric
 from ax.core.metric import Metric, MetricFetchE, MetricFetchResult
 from ax.core.trial import Trial
-from ax.metrics.curve import AbstractCurveMetric
 from ax.utils.common.logger import get_logger
 from ax.utils.common.result import Err, Ok
 from pyre_extensions import assert_is_instance
@@ -33,7 +32,6 @@
     from tensorboard.backend.event_processing import (
         plugin_event_multiplexer as event_multiplexer,
     )
-    from tensorboard.compat.proto import types_pb2
 
     logging.getLogger("tensorboard").setLevel(logging.CRITICAL)
 
@@ -218,120 +216,9 @@ def _get_event_multiplexer_for_trial(
 
             return mul
 
-    class TensorboardCurveMetric(AbstractCurveMetric):
-        """A `CurveMetric` for getting Tensorboard curves."""
-
-        map_key_info: MapKeyInfo[float] = MapKeyInfo(key="steps", default_value=0.0)
-
-        def get_curves_from_ids(
-            self,
-            ids: Iterable[Union[int, str]],
-            names: Optional[Set[str]] = None,
-        ) -> Dict[Union[int, str], Dict[str, pd.Series]]:
-            """Get curve data from tensorboard logs.
-
-            NOTE: If the ids are not simple paths/posix locations, subclass this metric
-            and replace this method with an appropriate one that retrieves the log
-            results.
-
-            Args:
-                ids: A list of string paths to tensorboard log directories.
-                names: The names of the tags for which to fetch the curves.
-                    If omitted, all tags are returned.
-
-            Returns:
-                A nested dictionary mapping ids (first level) and metric names (second
-                level) to pandas Series of data.
-            """
-            return {idx: get_tb_from_posix(path=str(idx), tags=names) for idx in ids}
-
-    def get_tb_from_posix(
-        path: str,
-        tags: Optional[Set[str]] = None,
-    ) -> Dict[str, pd.Series]:
-        r"""Get Tensorboard data from a posix path.
-
-        Args:
-            path: The posix path for the directory that contains the tensorboard logs.
-            tags: The names of the tags for which to fetch the curves. If omitted,
-                all tags are returned.
-        Returns:
-            A dictionary mapping tag names to pandas Series of data.
-        """
-        logger.debug(f"Reading TB logs from {path}.")
-        mul = event_multiplexer.EventMultiplexer(max_reload_threads=20)
-        mul.AddRunsFromDirectory(path, None)
-        mul.Reload()
-        scalar_dict = mul.PluginRunToTagToContent("scalars")
-
-        raw_result = [
-            {"tag": tag, "event": mul.Tensors(run, tag)}
-            for run, run_dict in scalar_dict.items()
-            for tag in run_dict
-            if tags is None or tag in tags
-        ]
-        tb_run_data = {}
-        for item in raw_result:
-            latest_start_time = _get_latest_start_time(item["event"])
-            steps = [e.step for e in item["event"] if e.wall_time >= latest_start_time]
-            vals = [
-                _get_event_value(e)
-                for e in item["event"]
-                if e.wall_time >= latest_start_time
-            ]
-            key = item["tag"]
-            series = pd.Series(index=steps, data=vals).dropna()
-            if key in tb_run_data:
-                tb_run_data[key] = pd.concat(objs=[tb_run_data[key], series])
-            else:
-                tb_run_data[key] = series
-        for key, series in tb_run_data.items():
-            if any(series.index.duplicated()):
-                # take average of repeated observations of the same "step"
-                series = series.groupby(series.index).mean()
-                logger.debug(
-                    f"Found duplicate steps for tag {key}. "
-                    "Removing duplicates by averaging."
-                )
-                tb_run_data[key] = series
-        return tb_run_data
-
-    # pyre-fixme[24]: Generic type `list` expects 1 type parameter, use
-    #  `typing.List` to avoid runtime subscripting errors.
-    def _get_latest_start_time(events: List) -> float:
-        """In each directory, there may be previous training runs due to restarting
-        training jobs.
-
-        Args:
-            events: A list of TensorEvents.
-
-        Returns:
-            The start time of the latest training run.
-        """
-        events.sort(key=lambda e: e.wall_time)
-        start_time = events[0].wall_time
-        for i in range(1, len(events)):
-            # detect points in time where restarts occurred
-            if events[i].step < events[i - 1].step:
-                start_time = events[i].wall_time
-        return start_time
-
-    def _get_event_value(e: NamedTuple) -> float:
-        r"""Helper function to check the dtype and then get the value
-        stored in a TensorEvent."""
-        tensor = e.tensor_proto  # pyre-ignore[16]
-        if tensor.dtype == types_pb2.DT_FLOAT:
-            return tensor.float_val[0]
-        elif tensor.dtype == types_pb2.DT_DOUBLE:
-            return tensor.double_val[0]
-        elif tensor.dtype == types_pb2.DT_INT32:
-            return tensor.int_val[0]
-        else:
-            raise ValueError(f"Tensorboard dtype {tensor.dtype} not supported.")
-
 except ImportError:
     logger.warning(
         "tensorboard package not found. If you would like to use "
-        "TensorboardCurveMetric, please install tensorboard."
+        "TensorboardMetric, please install tensorboard."
     )
     pass
diff --git a/ax/metrics/tests/test_tensorboard.py b/ax/metrics/tests/test_tensorboard.py
@@ -18,7 +18,7 @@
 from ax.core.map_data import MapData
 from ax.core.objective import Objective
 from ax.core.optimization_config import OptimizationConfig
-from ax.metrics.tensorboard import TensorboardCurveMetric, TensorboardMetric
+from ax.metrics.tensorboard import TensorboardMetric
 from ax.runners.synthetic import SyntheticRunner
 from ax.utils.common.testutils import TestCase
 from ax.utils.testing.core_stubs import get_branin_search_space, get_trial
@@ -165,176 +165,3 @@ def test_cumulative_best(self) -> None:
             )
 
             self.assertTrue(df.equals(expected_df))
-
-
-class TensorboardCurveMetricTest(TestCase):
-    def test_tensorboard_curve_metric(self) -> None:
-        def mock_get_tb_from_posix(
-            path: str, tags: Optional[List[str]] = None
-        ) -> Dict[str, pd.Series]:
-            data = np.array([10, 3, 5, 2, 7, 1])
-            return {"test_curve": pd.Series((int(path) + 1) * data)}
-
-        mock_path = "ax.metrics.tensorboard.get_tb_from_posix"
-
-        class FakeTensorboardCurveMetric(TensorboardCurveMetric):
-            @classmethod
-            def get_ids_from_trials(
-                cls, trials: Iterable[BaseTrial]
-            ) -> Dict[int, Union[int, str]]:
-                result = {}
-                for trial in trials:
-                    result[trial.index] = trial.index
-                return result
-
-        with mock.patch(mock_path, side_effect=mock_get_tb_from_posix):
-            # test simple
-            experiment = Experiment(
-                name="dummy_experiment",
-                search_space=get_branin_search_space(),
-                optimization_config=OptimizationConfig(
-                    objective=Objective(
-                        metric=FakeTensorboardCurveMetric(
-                            name="test_metric",
-                            curve_name="test_curve",
-                            lower_is_better=True,
-                            cumulative_best=False,
-                        ),
-                        minimize=True,
-                    )
-                ),
-                runner=SyntheticRunner(),
-            )
-            for param in range(0, 2):
-                trial = experiment.new_trial()
-                trial.add_arm(Arm(parameters={"x1": float(param), "x2": 0.0}))
-                trial.run()
-
-            self.assertTrue(
-                np.allclose(
-                    # pyre-fixme[16]: `Data` has no attribute `map_df`.
-                    experiment.fetch_data().map_df["mean"].to_numpy(),
-                    np.array(
-                        [10.0, 3.0, 5.0, 2.0, 7.0, 1.0, 20.0, 6.0, 10.0, 4.0, 14.0, 2.0]
-                    ),
-                )
-            )
-
-            # test cumulative best
-            experiment = Experiment(
-                name="dummy_experiment",
-                search_space=get_branin_search_space(),
-                optimization_config=OptimizationConfig(
-                    objective=Objective(
-                        metric=FakeTensorboardCurveMetric(
-                            name="test_metric",
-                            curve_name="test_curve",
-                            lower_is_better=True,
-                            cumulative_best=True,
-                        ),
-                        minimize=True,
-                    )
-                ),
-                runner=SyntheticRunner(),
-            )
-            for param in range(0, 2):
-                trial = experiment.new_trial()
-                trial.add_arm(Arm(parameters={"x1": float(param), "x2": 0.0}))
-                trial.run()
-
-            self.assertTrue(
-                np.allclose(
-                    experiment.fetch_data().map_df["mean"].to_numpy(),
-                    np.array(
-                        [10.0, 3.0, 3.0, 2.0, 2.0, 1.0, 20.0, 6.0, 6.0, 4.0, 4.0, 2.0]
-                    ),
-                )
-            )
-
-            # test cumulative best (lower is worse)
-            experiment = Experiment(
-                name="dummy_experiment",
-                search_space=get_branin_search_space(),
-                optimization_config=OptimizationConfig(
-                    objective=Objective(
-                        metric=FakeTensorboardCurveMetric(
-                            name="test_metric",
-                            curve_name="test_curve",
-                            lower_is_better=False,
-                            cumulative_best=True,
-                        ),
-                        minimize=False,
-                    )
-                ),
-                runner=SyntheticRunner(),
-            )
-            for param in range(0, 2):
-                trial = experiment.new_trial()
-                trial.add_arm(Arm(parameters={"x1": float(param), "x2": 0.0}))
-                trial.run()
-
-            self.assertTrue(
-                np.allclose(
-                    experiment.fetch_data().map_df["mean"].to_numpy(),
-                    np.array(
-                        [
-                            10.0,
-                            10.0,
-                            10.0,
-                            10.0,
-                            10.0,
-                            10.0,
-                            20.0,
-                            20.0,
-                            20.0,
-                            20.0,
-                            20.0,
-                            20.0,
-                        ]
-                    ),
-                ),
-            )
-
-            # test smoothing
-            experiment = Experiment(
-                name="dummy_experiment",
-                search_space=get_branin_search_space(),
-                optimization_config=OptimizationConfig(
-                    objective=Objective(
-                        metric=FakeTensorboardCurveMetric(
-                            name="test_metric",
-                            curve_name="test_curve",
-                            lower_is_better=True,
-                            cumulative_best=False,
-                            smoothing_window=3,
-                        ),
-                        minimize=True,
-                    )
-                ),
-                runner=SyntheticRunner(),
-            )
-            for param in range(0, 2):
-                trial = experiment.new_trial()
-                trial.add_arm(Arm(parameters={"x1": float(param), "x2": 0.0}))
-                trial.run()
-            self.assertTrue(
-                np.allclose(
-                    experiment.fetch_data().map_df["mean"].to_numpy(),
-                    np.array(
-                        [
-                            6.00000000,
-                            6.00000000,
-                            6.00000000,
-                            3.33333333,
-                            4.66666667,
-                            3.33333333,
-                            12.0,
-                            12.0,
-                            12.0,
-                            6.66666667,
-                            9.33333333,
-                            6.66666667,
-                        ]
-                    ),
-                )
-            )