Skip to content

Commit b9bc44d

Browse files
committed
Add sync version of the logging.
Refactor tests and code to reuse code.
1 parent 81577e8 commit b9bc44d

File tree

4 files changed

+280
-74
lines changed

4 files changed

+280
-74
lines changed

src/apify_client/clients/resource_clients/actor.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,8 +332,19 @@ def call(
332332
timeout_secs=timeout_secs,
333333
webhooks=webhooks,
334334
)
335+
if not logger:
336+
return self.root_client.run(started_run['id']).wait_for_finish(wait_secs=wait_secs)
337+
338+
run_client = self.root_client.run(run_id=started_run['id'])
339+
if logger == 'default':
340+
actor_data = self.get()
341+
actor_name = actor_data.get('name', '') if actor_data else ''
342+
log_context = run_client.get_streamed_log(actor_name=actor_name)
343+
else:
344+
log_context = run_client.get_streamed_log(to_logger=logger)
335345

336-
return self.root_client.run(started_run['id']).wait_for_finish(wait_secs=wait_secs)
346+
with log_context:
347+
return self.root_client.run(started_run['id']).wait_for_finish(wait_secs=wait_secs)
337348

338349
def build(
339350
self,

src/apify_client/clients/resource_clients/log.py

Lines changed: 108 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
import asyncio
44
import logging
55
import re
6+
import threading
67
from asyncio import Task
78
from contextlib import asynccontextmanager, contextmanager
9+
from threading import Thread
810
from typing import TYPE_CHECKING, Any, cast
911

1012
from apify_shared.utils import ignore_docs
@@ -201,67 +203,33 @@ async def stream(self, *, raw: bool = False) -> AsyncIterator[httpx.Response | N
201203
await response.aclose()
202204

203205

204-
class StreamedLogSync:
205-
"""Utility class for streaming logs from another actor."""
206-
207-
208-
class StreamedLogAsync:
206+
class StreamedLog:
209207
"""Utility class for streaming logs from another actor."""
210208

211209
# Test related flag to enable propagation of logs to the `caplog` fixture during tests.
212210
_force_propagate = False
213211

214-
def __init__(self, log_client: LogClientAsync, to_logger: logging.Logger) -> None:
215-
self._log_client = log_client
212+
def __init__(self, to_logger: logging.Logger) -> None:
216213
self._to_logger = to_logger
217-
self._streaming_task: Task | None = None
218214
if self._force_propagate:
219215
to_logger.propagate = True
220216
self._stream_buffer = list[str]()
221217
# Redirected logs are forwarded to logger as soon as there are at least two split markers present in the buffer.
222218
# For example, 2025-05-12T15:35:59.429Z
223219
self._split_marker = re.compile(r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z)')
224220

225-
def __call__(self) -> Task:
226-
"""Start the streaming task. The caller has to handle any cleanup."""
227-
return asyncio.create_task(self._stream_log())
228-
229-
async def __aenter__(self) -> Self:
230-
"""Start the streaming task within the context. Exiting the context will cancel the streaming task."""
231-
if self._streaming_task:
232-
raise RuntimeError('Streaming task already active')
233-
self._streaming_task = self()
234-
235-
return self
236-
237-
async def __aexit__(
238-
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
239-
) -> None:
240-
"""Cancel the streaming task."""
241-
if not self._streaming_task:
242-
raise RuntimeError('Streaming task is not active')
243-
244-
self._streaming_task.cancel()
245-
self._streaming_task = None
246-
247-
async def _stream_log(self) -> None:
248-
async with self._log_client.stream(raw=True) as log_stream:
249-
if not log_stream:
250-
return
251-
async for data in log_stream.aiter_bytes():
252-
new_chunk = data.decode('utf-8')
253-
self._stream_buffer.append(new_chunk)
254-
if re.findall(self._split_marker, new_chunk):
255-
# If complete split marker was found in new chunk, then process the buffer.
256-
self._log_buffer_content(include_last_part=False)
257-
258-
# If the stream is finished, then the last part will be also processed.
259-
self._log_buffer_content(include_last_part=True)
221+
def _process_new_data(self, data: bytes) -> None:
222+
new_chunk = data.decode('utf-8')
223+
self._stream_buffer.append(new_chunk)
224+
if re.findall(self._split_marker, new_chunk):
225+
# If complete split marker was found in new chunk, then process the buffer.
226+
self._log_buffer_content(include_last_part=False)
260227

261228
def _log_buffer_content(self, *, include_last_part: bool = False) -> None:
262-
"""Merge the whole buffer and plit it into parts based on the marker.
229+
"""Merge the whole buffer and split it into parts based on the marker.
263230
264-
The last part could be incomplete, and so it can be left unprocessed and in the buffer.
231+
Log the messages created from the split parts and remove them from buffer.
232+
The last part could be incomplete, and so it can be left unprocessed and in the buffer until later.
265233
"""
266234
all_parts = re.split(self._split_marker, ''.join(self._stream_buffer))
267235
# First split is empty string
@@ -290,3 +258,98 @@ def _guess_log_level_from_message(message: str) -> int:
290258
return cast('int', logging.getLevelName(level))
291259
# Unknown log level. Fall back to the default.
292260
return logging.INFO
261+
262+
263+
class StreamedLogSync(StreamedLog):
264+
"""Sync variant of `StreamedLog` that is logging in threads."""
265+
266+
def __init__(self, log_client: LogClient, to_logger: logging.Logger) -> None:
267+
super().__init__(to_logger=to_logger)
268+
self._log_client = log_client
269+
self._streaming_thread: Thread | None = None
270+
self._stop_logging = False
271+
272+
def __call__(self) -> Thread:
273+
"""Start the streaming thread. The caller has to handle any cleanup."""
274+
if self._streaming_thread:
275+
raise RuntimeError('Streaming thread already active')
276+
self._stop_logging = False
277+
self._streaming_thread = threading.Thread(target=self._stream_log)
278+
self._streaming_thread.start()
279+
return self._streaming_thread
280+
281+
def __enter__(self) -> Self:
282+
"""Start the streaming thread within the context. Exiting the context will finish the streaming thread."""
283+
self()
284+
return self
285+
286+
def __exit__(
287+
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
288+
) -> None:
289+
"""Stop the streaming thread."""
290+
if not self._streaming_thread:
291+
raise RuntimeError('Streaming thread is not active')
292+
293+
# Signal the thread to stop logging and wait for it to finish.
294+
self._stop_logging = True
295+
self._streaming_thread.join()
296+
self._streaming_thread = None
297+
self._stop_logging = False
298+
299+
def _stream_log(self) -> None:
300+
with self._log_client.stream(raw=True) as log_stream:
301+
if not log_stream:
302+
return
303+
for data in log_stream.iter_bytes():
304+
new_chunk = data.decode('utf-8')
305+
self._stream_buffer.append(new_chunk)
306+
if re.findall(self._split_marker, new_chunk):
307+
# If complete split marker was found in new chunk, then process the buffer.
308+
self._log_buffer_content(include_last_part=False)
309+
if self._stop_logging:
310+
break
311+
312+
# If the stream is finished, then the last part will be also processed.
313+
self._log_buffer_content(include_last_part=True)
314+
return
315+
316+
317+
class StreamedLogAsync(StreamedLog):
318+
"""Async variant of `StreamedLog` that is logging in tasks."""
319+
320+
def __init__(self, log_client: LogClientAsync, to_logger: logging.Logger) -> None:
321+
super().__init__(to_logger=to_logger)
322+
self._log_client = log_client
323+
self._streaming_task: Task | None = None
324+
325+
def __call__(self) -> Task:
326+
"""Start the streaming task. The caller has to handle any cleanup."""
327+
if self._streaming_task:
328+
raise RuntimeError('Streaming task already active')
329+
self._streaming_task = asyncio.create_task(self._stream_log())
330+
return self._streaming_task
331+
332+
async def __aenter__(self) -> Self:
333+
"""Start the streaming task within the context. Exiting the context will cancel the streaming task."""
334+
self()
335+
return self
336+
337+
async def __aexit__(
338+
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
339+
) -> None:
340+
"""Cancel the streaming task."""
341+
if not self._streaming_task:
342+
raise RuntimeError('Streaming task is not active')
343+
344+
self._streaming_task.cancel()
345+
self._streaming_task = None
346+
347+
async def _stream_log(self) -> None:
348+
async with self._log_client.stream(raw=True) as log_stream:
349+
if not log_stream:
350+
return
351+
async for data in log_stream.aiter_bytes():
352+
self._process_new_data(data)
353+
354+
# If the stream is finished, then the last part will be also processed.
355+
self._log_buffer_content(include_last_part=True)

src/apify_client/clients/resource_clients/run.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,12 @@
1313
from apify_client.clients.base import ActorJobBaseClient, ActorJobBaseClientAsync
1414
from apify_client.clients.resource_clients.dataset import DatasetClient, DatasetClientAsync
1515
from apify_client.clients.resource_clients.key_value_store import KeyValueStoreClient, KeyValueStoreClientAsync
16-
from apify_client.clients.resource_clients.log import LogClient, LogClientAsync, StreamedLogAsync
16+
from apify_client.clients.resource_clients.log import (
17+
LogClient,
18+
LogClientAsync,
19+
StreamedLogAsync,
20+
StreamedLogSync,
21+
)
1722
from apify_client.clients.resource_clients.request_queue import RequestQueueClient, RequestQueueClientAsync
1823

1924
if TYPE_CHECKING:
@@ -250,6 +255,27 @@ def log(self) -> LogClient:
250255
**self._sub_resource_init_options(resource_path='log'),
251256
)
252257

258+
def get_streamed_log(self, to_logger: logging.Logger | None = None, actor_name: str = '') -> StreamedLogSync:
259+
"""Get `StreamedLog` instance that can be used to redirect logs.
260+
261+
`StreamedLog` can be directly called or used as a context manager.
262+
263+
Args:
264+
to_logger: `Logger` used for logging the redirected messages. If not provided, a new logger is created
265+
actor_name: Optional component of default logger name.
266+
267+
Returns:
268+
`StreamedLog` instance for redirected logs.
269+
"""
270+
run_data = self.get()
271+
run_id = run_data.get('id', '') if run_data else ''
272+
273+
if not to_logger:
274+
name = '-'.join(part for part in (actor_name, run_id) if part)
275+
to_logger = create_redirect_logger(f'apify.{name}')
276+
277+
return StreamedLogSync(log_client=self.log(), to_logger=to_logger)
278+
253279
def charge(
254280
self,
255281
event_name: str,
@@ -518,16 +544,16 @@ def log(self) -> LogClientAsync:
518544
)
519545

520546
async def get_streamed_log(self, to_logger: logging.Logger | None = None, actor_name: str = '') -> StreamedLogAsync:
521-
"""Get `StreamedLogAsync` instance that can be used to redirect logs.
547+
"""Get `StreamedLog` instance that can be used to redirect logs.
522548
523-
`StreamedLogAsync` can be directly called or used as a context manager.
549+
`StreamedLog` can be directly called or used as a context manager.
524550
525551
Args:
526552
to_logger: `Logger` used for logging the redirected messages. If not provided, a new logger is created
527553
actor_name: Optional component of default logger name.
528554
529555
Returns:
530-
`StreamedLogAsync` instance for redirected logs.
556+
`StreamedLog` instance for redirected logs.
531557
"""
532558
run_data = await self.get()
533559
run_id = run_data.get('id', '') if run_data else ''
@@ -536,7 +562,7 @@ async def get_streamed_log(self, to_logger: logging.Logger | None = None, actor_
536562
name = '-'.join(part for part in (actor_name, run_id) if part)
537563
to_logger = create_redirect_logger(f'apify.{name}')
538564

539-
return StreamedLogAsync(self.log(), to_logger)
565+
return StreamedLogAsync(log_client=self.log(), to_logger=to_logger)
540566

541567
async def charge(
542568
self,

0 commit comments

Comments
 (0)