beam/sdks/python/apache_beam/metrics/execution.py at 4c54c1e1819b2a87de7c37287ae37fd0939e3a8b · liferoad/beam · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""
This module is for internal use only; no backwards-compatibility guarantees.

The classes in this file keep shared state, and organize metrics information.

Available classes:

- MetricKey - Internal key for a metric.
- MetricResult - Current status of a metric's updates/commits.
- _MetricsEnvironment - Keeps track of MetricsContainer and other metrics
    information for every single execution working thread.
- MetricsContainer - Holds the metrics of a single step and a single
    unit-of-commit (bundle).
"""

# pytype: skip-file

import threading
from typing import TYPE_CHECKING
from typing import Any
from typing import Dict
from typing import FrozenSet
from typing import Optional
from typing import Type
from typing import Union
from typing import cast

from apache_beam.metrics import monitoring_infos
from apache_beam.metrics.cells import BoundedTrieCell
from apache_beam.metrics.cells import CounterCell
from apache_beam.metrics.cells import DistributionCell
from apache_beam.metrics.cells import GaugeCell
from apache_beam.metrics.cells import MetricCellFactory
from apache_beam.metrics.cells import StringSetCell
from apache_beam.metrics.cells import StringSetData
from apache_beam.runners.worker import statesampler
from apache_beam.runners.worker.statesampler import get_current_tracker

if TYPE_CHECKING:
  from apache_beam.metrics.cells import BoundedTrieData
  from apache_beam.metrics.cells import GaugeData
  from apache_beam.metrics.cells import DistributionData
  from apache_beam.metrics.cells import MetricCell
  from apache_beam.metrics.metricbase import MetricName
  from apache_beam.portability.api import metrics_pb2


class MetricKey(object):
  """Key used to identify instance of metric cell.

  Metrics are internally keyed by the name of the step they're associated with,
  the name and namespace (if it is a user defined metric) of the metric,
  and any extra label metadata added by the runner specific metric collection
  service.
  """
  def __init__(self, step, metric, labels=None):
    """Initializes ``MetricKey``.

    Args:
      step: A string with the step this metric cell is part of.
      metric: A ``MetricName`` namespace+name that identifies a metric.
      labels: An arbitrary set of labels that also identifies the metric.
    """
    self.step = step
    self.metric = metric
    self.labels = labels if labels else {}

  def __eq__(self, other):
    return (
        self.step == other.step and self.metric == other.metric and
        self.labels == other.labels)

  def __hash__(self):
    return hash((self.step, self.metric, frozenset(self.labels)))

  def __repr__(self):
    return 'MetricKey(step={}, metric={}, labels={})'.format(
        self.step, self.metric, self.labels)


class MetricResult(object):
  """Keeps track of the status of a metric within a single bundle.

  It contains the physical and logical updates to the metric. Physical updates
  are updates that have not necessarily been committed, but that have been made
  during pipeline execution. Logical updates are updates that have been
  committed.

  Attributes:
    key: A ``MetricKey`` that identifies the metric and bundle of this result.
    committed: The committed updates of the metric. This attribute's type is
      of metric type result (e.g. int, DistributionResult, GaugeResult).
    attempted: The logical updates of the metric. This attribute's type is that
      of metric type result (e.g. int, DistributionResult, GaugeResult).
  """
  def __init__(self, key, committed, attempted):
    """Initializes ``MetricResult``.
    Args:
      key: A ``MetricKey`` object.
      committed: Metric data that has been committed (e.g. logical updates)
      attempted: Metric data that has been attempted (e.g. physical updates)
    """
    self.key = key
    self.committed = committed
    self.attempted = attempted

  def __eq__(self, other):
    return (
        self.key == other.key and self.committed == other.committed and
        self.attempted == other.attempted)

  def __hash__(self):
    return hash((self.key, self.committed, self.attempted))

  def __repr__(self):
    return 'MetricResult(key={}, committed={}, attempted={})'.format(
        self.key, str(self.committed), str(self.attempted))

  def __str__(self):
    return repr(self)

  @property
  def result(self):
    """Short-hand for falling back to attempted metrics if it seems that
    committed was not populated (e.g. due to not being supported on a given
    runner"""
    return self.committed if self.committed else self.attempted


class _MetricsEnvironment(object):
  """Holds the MetricsContainer for every thread and other metric information.

  This class is not meant to be instantiated, instead being used to keep
  track of global state.
  """
  def current_container(self):
    """Returns the current MetricsContainer."""
    sampler = statesampler.get_current_tracker()
    if sampler is None:
      return None
    return sampler.current_state().metrics_container

  def process_wide_container(self):
    """Returns the MetricsContainer for process wide metrics, e.g. memory."""
    return PROCESS_WIDE_METRICS_CONTAINER


MetricsEnvironment = _MetricsEnvironment()


class _TypedMetricName(object):
  """Like MetricName, but also stores the cell type of the metric."""
  def __init__(
      self,
      cell_type,  # type: Union[Type[MetricCell], MetricCellFactory]
      metric_name  # type: Union[str, MetricName]
  ):
    # type: (...) -> None
    self.cell_type = cell_type
    self.metric_name = metric_name
    if isinstance(metric_name, str):
      self.fast_name = metric_name
    else:
      self.fast_name = metric_name.fast_name()
    # Cached for speed, as this is used as a key for every counter update.
    self._hash = hash((cell_type, self.fast_name))

  def __eq__(self, other):
    return self is other or (
        self.cell_type == other.cell_type and self.fast_name == other.fast_name)

  def __hash__(self):
    return self._hash

  def __str__(self):
    return '%s %s' % (self.cell_type, self.metric_name)

  def __reduce__(self):
    return _TypedMetricName, (self.cell_type, self.metric_name)


_DEFAULT = None  # type: Any


class MetricUpdater(object):
  """A callable that updates the metric as quickly as possible."""
  def __init__(
      self,
      cell_type,  # type: Union[Type[MetricCell], MetricCellFactory]
      metric_name,  # type: Union[str, MetricName]
      default_value=None,
      process_wide=False):
    self.process_wide = process_wide
    self.typed_metric_name = _TypedMetricName(cell_type, metric_name)
    self.default_value = default_value

  def __call__(self, value=_DEFAULT):
    # type: (Any) -> None
    if value is _DEFAULT:
      if self.default_value is _DEFAULT:
        raise ValueError(
            'Missing value for update of %s' % self.typed_metric_name.fast_name)
      value = self.default_value
    if self.process_wide:
      MetricsEnvironment.process_wide_container().get_metric_cell(
          self.typed_metric_name).update(value)
    else:
      tracker = get_current_tracker()
      if tracker is not None:
        tracker.update_metric(self.typed_metric_name, value)

  def __reduce__(self):
    return MetricUpdater, (
        self.typed_metric_name.cell_type,
        self.typed_metric_name.metric_name,
        self.default_value)


class MetricsContainer(object):
  """Holds the metrics of a single step and a single bundle.

  Or the metrics associated with the process/SDK harness. I.e. memory usage.
  """
  def __init__(self, step_name):
    self.step_name = step_name
    self.lock = threading.Lock()
    self.metrics = {}  # type: Dict[_TypedMetricName, MetricCell]

  def get_counter(self, metric_name):
    # type: (MetricName) -> CounterCell
    return cast(
        CounterCell,
        self.get_metric_cell(_TypedMetricName(CounterCell, metric_name)))

  def get_distribution(self, metric_name):
    # type: (MetricName) -> DistributionCell
    return cast(
        DistributionCell,
        self.get_metric_cell(_TypedMetricName(DistributionCell, metric_name)))

  def get_gauge(self, metric_name):
    # type: (MetricName) -> GaugeCell
    return cast(
        GaugeCell,
        self.get_metric_cell(_TypedMetricName(GaugeCell, metric_name)))

  def get_string_set(self, metric_name):
    # type: (MetricName) -> StringSetCell
    return cast(
        StringSetCell,
        self.get_metric_cell(_TypedMetricName(StringSetCell, metric_name)))

  def get_bounded_trie(self, metric_name):
    return self.get_metric_cell(_TypedMetricName(BoundedTrieCell, metric_name))

  def get_metric_cell(self, typed_metric_name):
    # type: (_TypedMetricName) -> MetricCell
    # First check without a lock.
    cell = self.metrics.get(typed_metric_name, None)
    if cell is None:
      # If not found, acquire lock and check again.
      # This is to prevent duplicate cell creation in concurrent scenarios.
      with self.lock:
        cell = self.metrics.get(typed_metric_name, None)
        if cell is None:
          if isinstance(typed_metric_name.cell_type, MetricCellFactory):
            # If it's a factory, call it without container_lock,
            # as the factory's __call__ should handle cell creation.
            cell = self.metrics[
                typed_metric_name] = typed_metric_name.cell_type()
          else:
            # Otherwise, assume it's a MetricCell class and pass container_lock.
            cell = self.metrics[
                typed_metric_name] = typed_metric_name.cell_type(
                    container_lock=self.lock)
    return cell

  def get_cumulative(self):
    # type: () -> MetricUpdates

    """Return MetricUpdates with cumulative values of all metrics in container.

    This returns all the cumulative values for all metrics.
    """
    counters = {
        MetricKey(self.step_name, k.metric_name): v.get_cumulative()
        for k, v in self.metrics.items() if k.cell_type == CounterCell
    }

    distributions = {
        MetricKey(self.step_name, k.metric_name): v.get_cumulative()
        for k, v in self.metrics.items() if k.cell_type == DistributionCell
    }

    gauges = {
        MetricKey(self.step_name, k.metric_name): v.get_cumulative()
        for k, v in self.metrics.items() if k.cell_type == GaugeCell
    }

    string_sets = {
        MetricKey(self.step_name, k.metric_name): v.get_cumulative()
        for k, v in self.metrics.items() if k.cell_type == StringSetCell
    }

    bounded_tries = {
        MetricKey(self.step_name, k.metric_name): v.get_cumulative()
        for k, v in self.metrics.items() if k.cell_type == BoundedTrieCell
    }

    return MetricUpdates(
        counters, distributions, gauges, string_sets, bounded_tries)

  def to_runner_api(self):
    return [
        cell.to_runner_api_user_metric(key.metric_name)
        for key, cell in self.metrics.items()
    ]

  def to_runner_api_monitoring_infos(self, transform_id):
    # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]

    """Returns a list of MonitoringInfos for the metrics in this container."""
    with self.lock:
      items = list(self.metrics.items())
      all_metrics = [
          cell.to_runner_api_monitoring_info(key.metric_name, transform_id)
          for key, cell in items
      ]
      return {
          monitoring_infos.to_key(mi): mi
          for mi in all_metrics if mi is not None
      }

  def reset(self):
    # type: () -> None

    """Reset all metrics in the MetricsContainer. This does not delete added
    metrics.
    """

    for metric in self.metrics.values():
      metric.reset()

  def __reduce__(self):
    raise NotImplementedError


PROCESS_WIDE_METRICS_CONTAINER = MetricsContainer(None)


class MetricUpdates(object):
  """Contains updates for several metrics.

  A metric update is an object containing information to update a metric.
  For Distribution metrics, it is DistributionData, and for Counter metrics,
  it's an int.
  """
  def __init__(
      self,
      counters=None,  # type: Optional[Dict[MetricKey, int]]
      distributions=None,  # type: Optional[Dict[MetricKey, DistributionData]]
      gauges=None,  # type: Optional[Dict[MetricKey, GaugeData]]
      string_sets=None,  # type: Optional[Dict[MetricKey, StringSetData]]
      bounded_tries=None,  # type: Optional[Dict[MetricKey, BoundedTrieData]]
  ):
    # type: (...) -> None

    """Create a MetricUpdates object.

    Args:
      counters: Dictionary of MetricKey:MetricUpdate updates.
      distributions: Dictionary of MetricKey:MetricUpdate objects.
      gauges: Dictionary of MetricKey:MetricUpdate objects.
      string_sets: Dictionary of MetricKey:MetricUpdate objects.
      bounded_tries: Dictionary of MetricKey:MetricUpdate objects.
    """
    self.counters = counters or {}
    self.distributions = distributions or {}
    self.gauges = gauges or {}
    self.string_sets = string_sets or {}
    self.bounded_tries = bounded_tries or {}