Skip to content

Commit 4c51a1f

Browse files
committed
Rewrite (Group)Resampler class
The `GroupResampler` and `Resampler` classes are not very reusable, or at least require quite a bit of work from users to use them and they need to take care of handling timers and calling methods at the right time for them to be useful. The new `Resampler` class has its own task(s) to handle all the resampling, users only need to subscribe to new timeseries by passing a timeseries `Source` and `Sink`. The `Sink` will be called each time a new sample is produced. A new `frequenz.sdk.timeseries.resampling` module is added because there are more classes only used by the `Resampler` that might not be needed by anyone just wanting to use `frequenz.sdk.timeseries`. This commit also introduces the use of `async-solipsism` for testing, so tests using timers can be mocked in a way that they don't really need to wait for timers and time comparisons can be done precisely. Signed-off-by: Leandro Lucarella <[email protected]>
1 parent b37e189 commit 4c51a1f

File tree

8 files changed

+1061
-448
lines changed

8 files changed

+1061
-448
lines changed

noxfile.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,14 @@
1313

1414
FMT_DEPS = ["black", "isort"]
1515
DOCSTRING_DEPS = ["pydocstyle", "darglint"]
16-
PYTEST_DEPS = ["pytest", "pytest-cov", "pytest-mock", "pytest-asyncio", "time-machine"]
16+
PYTEST_DEPS = [
17+
"pytest",
18+
"pytest-cov",
19+
"pytest-mock",
20+
"pytest-asyncio",
21+
"time-machine",
22+
"async-solipsism",
23+
]
1724
MYPY_DEPS = ["mypy", "pandas-stubs", "grpc-stubs"]
1825

1926

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,7 @@ module = [
106106
"grpc.aio.*"
107107
]
108108
ignore_missing_imports = true
109+
110+
[[tool.mypy.overrides]]
111+
module = "async_solipsism"
112+
ignore_missing_imports = true

src/frequenz/sdk/actor/_resampling.py

Lines changed: 91 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@
66
import asyncio
77
import dataclasses
88
import logging
9-
import math
10-
from typing import Dict, Sequence
9+
from typing import Sequence, Set
1110

1211
from frequenz.channels import Receiver, Sender
13-
from frequenz.channels.util import MergeNamed, Select, Timer
1412

15-
from ..timeseries import GroupResampler, ResamplingFunction, Sample
13+
from frequenz.sdk.util.asyncio import cancel_and_await
14+
15+
from ..timeseries import Sample
16+
from ..timeseries.resampling import Resampler, ResamplingError, ResamplingFunction
1617
from ._channel_registry import ChannelRegistry
1718
from ._data_sourcing import ComponentMetricRequest
1819
from ._decorator import actor
@@ -141,97 +142,111 @@ async def run() -> None:
141142
self._resampling_period_s = resampling_period_s
142143
self._max_data_age_in_periods: float = max_data_age_in_periods
143144
self._resampling_function: ResamplingFunction = resampling_function
144-
145-
self._resampler = GroupResampler(
145+
self._resampler = Resampler(
146146
resampling_period_s=resampling_period_s,
147147
max_data_age_in_periods=max_data_age_in_periods,
148-
initial_resampling_function=resampling_function,
148+
resampling_function=resampling_function,
149149
)
150150

151-
self._input_receivers: Dict[str, Receiver[Sample]] = {}
152-
self._output_senders: Dict[str, Sender[Sample]] = {}
153-
self._resampling_timer = Timer(interval=self._resampling_period_s)
154-
155151
async def _subscribe(self, request: ComponentMetricRequest) -> None:
156152
"""Subscribe for data for a specific time series.
157153
158154
Args:
159155
request: subscription request for a specific component metric
160156
"""
161-
channel_name = request.get_channel_name()
162-
163-
data_source_request = dataclasses.replace(request, **dict(namespace="Source"))
157+
data_source_request = dataclasses.replace(
158+
request, namespace=request.namespace + ":Source"
159+
)
164160
data_source_channel_name = data_source_request.get_channel_name()
165-
if channel_name not in self._input_receivers:
166-
await self._subscription_sender.send(data_source_request)
167-
receiver: Receiver[Sample] = self._channel_registry.new_receiver(
168-
data_source_channel_name
169-
)
170-
self._input_receivers[data_source_channel_name] = receiver
171-
self._resampler.add_time_series(time_series_id=data_source_channel_name)
161+
await self._subscription_sender.send(data_source_request)
162+
receiver = self._channel_registry.new_receiver(data_source_channel_name)
172163

173-
if channel_name not in self._output_senders:
174-
sender: Sender[Sample] = self._channel_registry.new_sender(channel_name)
175-
# This means that the `sender` will be sending samples to the channel with
176-
# name `channel_name` based on samples collected from the channel named
177-
# `data_source_channel_name`
178-
self._output_senders[data_source_channel_name] = sender
164+
# This is a temporary hack until the Sender implementation uses
165+
# exceptions to report errors.
166+
sender = self._channel_registry.new_sender(request.get_channel_name())
179167

180-
def _is_sample_valid(self, sample: Sample) -> bool:
181-
"""Check if the provided sample is valid.
168+
async def sink_adapter(sample: Sample) -> None:
169+
if not await sender.send(sample):
170+
raise Exception(f"Error while sending with sender {sender}", sender)
182171

183-
Args:
184-
sample: sample to be validated
172+
self._resampler.add_timeseries(receiver, sink_adapter)
185173

186-
Returns:
187-
True if the sample is valid, False otherwise
188-
"""
189-
if sample.value is None or math.isnan(sample.value):
190-
return False
191-
return True
174+
async def _process_resampling_requests(self) -> None:
175+
"""Process resampling data requests."""
176+
async for request in self._subscription_receiver:
177+
await self._subscribe(request)
192178

193179
async def run(self) -> None:
194-
"""Run the actor.
180+
"""Resample known component metrics and process resampling requests.
181+
182+
If there is a resampling error while resampling some component metric,
183+
then that metric will be discarded and not resampled any more. Any
184+
other error will be propagated (most likely ending in the actor being
185+
restarted).
195186
196187
Raises:
197-
ConnectionError: When the provider of the subscription channel closes the
198-
connection
188+
RuntimeError: If there is some unexpected error while resampling or
189+
handling requests.
190+
191+
# noqa: DAR401 error
199192
"""
200-
while True:
201-
select = Select(
202-
resampling_timer=self._resampling_timer,
203-
subscription_receiver=self._subscription_receiver,
204-
component_data_receiver=MergeNamed(**self._input_receivers),
193+
tasks_to_cancel: Set[asyncio.Task] = set()
194+
try:
195+
subscriptions_task = asyncio.create_task(
196+
self._process_resampling_requests()
205197
)
206-
while await select.ready():
207-
if msg := select.resampling_timer:
208-
assert msg.inner is not None, "The timer should never be 'closed'"
209-
timestamp = msg.inner
210-
awaitables = [
211-
self._output_senders[channel_name].send(sample)
212-
for channel_name, sample in self._resampler.resample(timestamp)
213-
]
214-
await asyncio.gather(*awaitables)
215-
if msg := select.component_data_receiver:
216-
if msg.inner is None:
217-
# When this happens, then DataSourcingActor has closed the channel
218-
# for sending data for a specific `ComponentMetricRequest`,
219-
# which may need to be handled properly here, e.g. unsubscribe
220-
continue
221-
channel_name, sample = msg.inner
222-
if self._is_sample_valid(sample=sample):
223-
self._resampler.add_sample(
224-
time_series_id=channel_name,
225-
sample=sample,
226-
)
227-
if msg := select.subscription_receiver:
228-
if msg.inner is None:
229-
raise ConnectionError(
230-
"Subscription channel connection has been closed!"
198+
tasks_to_cancel.add(subscriptions_task)
199+
200+
while True:
201+
resampling_task = asyncio.create_task(self._resampler.resample())
202+
tasks_to_cancel.add(resampling_task)
203+
done, _ = await asyncio.wait(
204+
[resampling_task, subscriptions_task],
205+
return_when=asyncio.FIRST_COMPLETED,
206+
)
207+
208+
if subscriptions_task in done:
209+
tasks_to_cancel.remove(subscriptions_task)
210+
raise RuntimeError(
211+
"There was a problem with the subscriptions channel."
212+
)
213+
214+
if resampling_task in done:
215+
tasks_to_cancel.remove(resampling_task)
216+
# The resampler shouldn't end without an exception
217+
error = resampling_task.exception()
218+
assert (
219+
error is not None
220+
), "The resample() function shouldn't exit normally."
221+
222+
# We don't know what to do with something other than
223+
# ResamplingError, so propagate the exception if that is the
224+
# case.
225+
if not isinstance(error, ResamplingError):
226+
raise error
227+
for source, source_error in error.exceptions.items():
228+
logger.error(
229+
"Error resampling source %s, removing source...", source
231230
)
232-
await self._subscribe(request=msg.inner)
233-
# Breaking out from the loop is required to regenerate
234-
# component_data_receivers to be able to fulfil this
235-
# subscription (later can be optimized by checking if
236-
# an output channel already existed in the `subscribe()` method)
237-
break
231+
removed = self._resampler.remove_timeseries(source)
232+
if not removed:
233+
logger.warning(
234+
"Got an exception from an unknown source: "
235+
"source=%r, exception=%r",
236+
source,
237+
source_error,
238+
)
239+
# The resampling_task will be re-created if we reached this point
240+
finally:
241+
await asyncio.gather(*[cancel_and_await(t) for t in tasks_to_cancel])
242+
243+
# XXX: Here we should probably do a: pylint: disable=fixme
244+
# await self._resampler.stop()
245+
# But since the actor will be restarted, the internal state would
246+
# be broken if we stop the resampler.
247+
#
248+
# We have an even bigger problem with this naive restarting
249+
# approach, as restarting this actor without really resetting its
250+
# state would be mostly the same as not really leaving the run()
251+
# method and just swallow any exception, which doesn't look super
252+
# smart.

src/frequenz/sdk/timeseries/__init__.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,23 @@
1111
[`Source`][frequenz.sdk.timeseries.Source]s and new timeseries can be
1212
generated by sending samples to
1313
a [`Sink`][frequenz.sdk.timeseries.Sink]
14+
15+
In general timeseries `Source`s don't necessarily come at periodic
16+
intervals. To normalize timeseries to produce `Sample`s at regular
17+
periodic intervals,
18+
a [`Resampler`][frequenz.sdk.timeseries.resampling.Resampler] can be used.
19+
20+
A `Resampler` uses
21+
a [`ResamplingFunction`][frequenz.sdk.timeseries.resampling.ResamplingFunction]
22+
to produce a new sample from samples received in the past. If there are no
23+
samples coming to a resampled timeseries for a while, eventually the
24+
`Resampler` will produce `Sample`s with `None` as value, meaning there is no
25+
way to produce meaningful samples with the available data.
1426
"""
1527

1628
from ._base_types import Sample, Sink, Source
17-
from ._resampler import GroupResampler, Resampler, ResamplingFunction
1829

1930
__all__ = [
20-
"GroupResampler",
21-
"Resampler",
22-
"ResamplingFunction",
2331
"Sample",
2432
"Sink",
2533
"Source",

0 commit comments

Comments
 (0)