Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
Changes
=======

0.33.0 (unreleased)
-------------------

- **Backward-incompatible change:** The ``/pools/<pool>`` part in session
stats is now stripped by default. To enable per-pool stats, set the new
:setting:`ZYTE_API_SESSION_STATS_PER_POOL` setting to ``True``.

0.32.0 (2026-01-20)
-------------------

Expand Down
11 changes: 11 additions & 0 deletions docs/reference/settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,17 @@ queue.
See :setting:`ZYTE_API_SESSION_QUEUE_MAX_ATTEMPTS` for details.


.. setting:: ZYTE_API_SESSION_STATS_PER_POOL

ZYTE_API_SESSION_STATS_PER_POOL
===============================

Default: ``False``

Whether to split :ref:`session stats <session-stats>` by pool (``True``) or
aggregate them across pools (``False``, default).


.. setting:: ZYTE_API_SKIP_HEADERS

ZYTE_API_SKIP_HEADERS
Expand Down
10 changes: 9 additions & 1 deletion docs/usage/session.rst
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,15 @@ implementation may also close your spider with a custom reason by raising a
Session stats
=============

The following stats exist for scrapy-zyte-api session management:
Plugin-managed sessions trigger some stats to help understand how well sessions
are working.

By default, stats are aggregated across session pools. Set
:setting:`ZYTE_API_SESSION_STATS_PER_POOL` to ``True`` to enable per-pool
stats.

Tracked stats are as follows (``pools/{pool}/`` is only present if per-pool
stats are enabled):

``scrapy-zyte-api/sessions/pools/{pool}/init/check-error``
Number of times that a session for pool ``{pool}`` triggered an unexpected
Expand Down
46 changes: 17 additions & 29 deletions scrapy_zyte_api/_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,14 @@ def __init__(self, crawler: Crawler):

self._fatal_error_handler = FatalErrorHandler(crawler)

self._stats_per_pool: bool = settings.getbool("ZYTE_API_SESSION_STATS_PER_POOL")

def _inc_stat(self, key: str, pool: str):
pool = f"pools/{pool}/" if self._stats_per_pool else ""
key = f"scrapy-zyte-api/sessions/{pool}{key}"
assert self._crawler.stats
self._crawler.stats.inc_value(key)

async def _handle_engine_start(self):
assert self._crawler.engine
self._download_async = getattr(self._crawler.engine, "download_async", None)
Expand Down Expand Up @@ -668,7 +676,6 @@ def get_pool(self, request):

async def _init_session(self, session_id: str, request: Request, pool: str) -> bool:
assert self._crawler.engine
assert self._crawler.stats
session_config = self._get_session_config(request)
if meta_params := request.meta.get("zyte_api_session_params", None):
session_params = meta_params
Expand All @@ -681,9 +688,7 @@ async def _init_session(self, session_id: str, request: Request, pool: str) -> b
try:
session_params = session_config.params(request)
except Exception:
self._crawler.stats.inc_value(
f"scrapy-zyte-api/sessions/pools/{pool}/init/param-error"
)
self._inc_stat("init/param-error", pool)
logger.exception(
f"Unexpected exception raised while obtaining session "
f"initialization parameters for request {request}."
Expand Down Expand Up @@ -721,28 +726,22 @@ async def _init_session(self, session_id: str, request: Request, pool: str) -> b
try:
response = await download
except Exception:
self._crawler.stats.inc_value(
f"scrapy-zyte-api/sessions/pools/{pool}/init/failed"
)
self._inc_stat("init/failed", pool)
return False
else:
try:
result = session_config.check(response, session_init_request)
except CloseSpider:
raise
except Exception:
self._crawler.stats.inc_value(
f"scrapy-zyte-api/sessions/pools/{pool}/init/check-error"
)
self._inc_stat("init/check-error", pool)
logger.exception(
f"Unexpected exception raised while checking session "
f"validity on response {response}."
)
return False
outcome = "passed" if result else "failed"
self._crawler.stats.inc_value(
f"scrapy-zyte-api/sessions/pools/{pool}/init/check-{outcome}"
)
self._inc_stat(f"init/check-{outcome}", pool)
return result

async def _create_session(self, request: Request, pool: str) -> str:
Expand Down Expand Up @@ -850,7 +849,6 @@ async def check(self, response: Response, request: Request) -> bool:
"""Check the response for signs of session expiration, update the
internal session pool accordingly, and return ``False`` if the session
has expired or ``True`` if the session passed validation."""
assert self._crawler.stats
async with self._fatal_error_handler:
if self.is_init_request(request):
return True
Expand All @@ -863,18 +861,14 @@ async def check(self, response: Response, request: Request) -> bool:
except CloseSpider:
raise
except Exception:
self._crawler.stats.inc_value(
f"scrapy-zyte-api/sessions/pools/{pool}/use/check-error"
)
self._inc_stat("use/check-error", pool)
logger.exception(
f"Unexpected exception raised while checking session "
f"validity on response {response}."
)
else:
outcome = "passed" if passed else "failed"
self._crawler.stats.inc_value(
f"scrapy-zyte-api/sessions/pools/{pool}/use/check-{outcome}"
)
self._inc_stat(f"use/check-{outcome}", pool)
if passed:
return True
session_id = get_request_session_id(request)
Expand All @@ -892,14 +886,14 @@ async def assign(self, request: Request) -> Optional[Request]:
request in place, return that new request, to replace the received
request.
"""
assert self._crawler.stats
async with self._fatal_error_handler:
if self.is_init_request(request) or request.meta.get(
"_zyte_api_session_assigned", False
):
return None
session_config = self._get_session_config(request)
if not session_config.enabled(request):
assert self._crawler.stats
self._crawler.stats.inc_value("scrapy-zyte-api/sessions/use/disabled")
return None
session_id = await self._next(request)
Expand Down Expand Up @@ -937,12 +931,9 @@ def is_enabled(self, request: Request) -> bool:
return session_config.enabled(request)

async def handle_error(self, request: Request):
assert self._crawler.stats
async with self._fatal_error_handler:
pool = self.get_pool(request)
self._crawler.stats.inc_value(
f"scrapy-zyte-api/sessions/pools/{pool}/use/failed"
)
self._inc_stat("use/failed", pool)
session_id = get_request_session_id(request)
if session_id is not None:
self._errors[session_id] += 1
Expand All @@ -951,12 +942,9 @@ async def handle_error(self, request: Request):
self._start_request_session_refresh(request, pool)

async def handle_expiration(self, request: Request):
assert self._crawler.stats
async with self._fatal_error_handler:
pool = self.get_pool(request)
self._crawler.stats.inc_value(
f"scrapy-zyte-api/sessions/pools/{pool}/use/expired"
)
self._inc_stat("use/expired", pool)
self._start_request_session_refresh(request, pool)


Expand Down
Loading
Loading