Skip to content

Commit beaf8ca

Browse files
authored
Implement SessionConfig.enabled (#206)
1 parent 748be07 commit beaf8ca

File tree

3 files changed

+81
-14
lines changed

3 files changed

+81
-14
lines changed

docs/usage/session.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ Enabling session management
6060
To enable session management for all requests, set
6161
:setting:`ZYTE_API_SESSION_ENABLED` to ``True``. You can also toggle session
6262
management on or off for specific requests using the
63-
:reqmeta:`zyte_api_session_enabled` request metadata key.
63+
:reqmeta:`zyte_api_session_enabled` request metadata key, or override the
64+
:meth:`~scrapy_zyte_api.SessionConfig.enabled` method of a :ref:`session config
65+
override <session-configs>`.
6466

6567
By default, scrapy-zyte-api will maintain up to 8 sessions per domain, each
6668
initialized with a :ref:`browser request <zyte-api-browser>` targeting the URL
@@ -360,3 +362,6 @@ The following stats exist for scrapy-zyte-api session management:
360362
``scrapy-zyte-api/sessions/pools/{pool}/use/failed``
361363
Number of times that a request that used a session from pool ``{pool}``
362364
got an :ref:`unsuccessful response <zyte-api-unsuccessful-responses>`.
365+
366+
``scrapy-zyte-api/sessions/use/disabled``
367+
Number of processed requests for which session management was disabled.

scrapy_zyte_api/_session.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,17 @@ def __init__(self, crawler):
192192
else:
193193
self._checker = None
194194

195+
self._enabled = crawler.settings.getbool("ZYTE_API_SESSION_ENABLED", False)
196+
197+
def enabled(self, request: Request) -> bool:
198+
"""Return ``True`` if the request should use sessions from
199+
:ref:`session management <session>` or ``False`` otherwise.
200+
201+
The default implementation is based on settings and request metadata
202+
keys as described in :ref:`enable-sessions`.
203+
"""
204+
return request.meta.get("zyte_api_session_enabled", self._enabled)
205+
195206
def pool(self, request: Request) -> str:
196207
"""Return the ID of the session pool to use for *request*.
197208
@@ -202,7 +213,9 @@ def pool(self, request: Request) -> str:
202213
https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html.
203214
204215
scrapy-zyte-api can maintain multiple session pools, each pool with up
205-
to :setting:`ZYTE_API_SESSION_POOL_SIZE` sessions.
216+
to :setting:`ZYTE_API_SESSION_POOL_SIZE` sessions
217+
(:setting:`ZYTE_API_SESSION_POOL_SIZES` allows configuring
218+
pool-specific sizes).
206219
"""
207220
return urlparse_cached(request).netloc
208221

@@ -655,7 +668,11 @@ async def check(self, response: Response, request: Request) -> bool:
655668
"""Check the response for signs of session expiration, update the
656669
internal session pool accordingly, and return ``False`` if the session
657670
has expired or ``True`` if the session passed validation."""
671+
if self.is_init_request(request):
672+
return True
658673
session_config = self._get_session_config(request)
674+
if not session_config.enabled(request):
675+
return True
659676
pool = self._get_pool(request)
660677
try:
661678
passed = session_config.check(response, request)
@@ -681,6 +698,12 @@ async def check(self, response: Response, request: Request) -> bool:
681698

682699
async def assign(self, request: Request):
683700
"""Assign a working session to *request*."""
701+
if self.is_init_request(request):
702+
return
703+
session_config = self._get_session_config(request)
704+
if not session_config.enabled(request):
705+
self._crawler.stats.inc_value("scrapy-zyte-api/sessions/use/disabled")
706+
return
684707
session_id = await self._next(request)
685708
# Note: If there is a session set already (e.g. a request being
686709
# retried), it is overridden.
@@ -702,6 +725,10 @@ async def assign(self, request: Request):
702725
request.meta[meta_key]["session"] = {"id": session_id}
703726
request.meta.setdefault("dont_merge_cookies", True)
704727

728+
def is_enabled(self, request: Request) -> bool:
729+
session_config = self._get_session_config(request)
730+
return session_config.enabled(request)
731+
705732
def handle_error(self, request: Request):
706733
pool = self._get_pool(request)
707734
self._crawler.stats.inc_value(
@@ -755,27 +782,18 @@ def from_crawler(cls, crawler: Crawler):
755782
return cls(crawler)
756783

757784
def __init__(self, crawler: Crawler):
758-
self._enabled = crawler.settings.getbool("ZYTE_API_SESSION_ENABLED", False)
759785
self._crawler = crawler
760786
self._sessions = _SessionManager(crawler)
761787
self._fatal_error_handler = FatalErrorHandler(crawler)
762788

763789
async def process_request(self, request: Request, spider: Spider) -> None:
764-
if not request.meta.get(
765-
"zyte_api_session_enabled", self._enabled
766-
) or self._sessions.is_init_request(request):
767-
return
768790
async with self._fatal_error_handler:
769791
await self._sessions.assign(request)
770792

771793
async def process_response(
772794
self, request: Request, response: Response, spider: Spider
773795
) -> Union[Request, Response, None]:
774-
if (
775-
isinstance(response, DummyResponse)
776-
or not request.meta.get("zyte_api_session_enabled", self._enabled)
777-
or self._sessions.is_init_request(request)
778-
):
796+
if isinstance(response, DummyResponse):
779797
return response
780798
async with self._fatal_error_handler:
781799
passed = await self._sessions.check(response, request)
@@ -795,8 +813,8 @@ async def process_exception(
795813
) -> Union[Request, None]:
796814
if (
797815
not isinstance(exception, RequestError)
798-
or not request.meta.get("zyte_api_session_enabled", self._enabled)
799816
or self._sessions.is_init_request(request)
817+
or not self._sessions.is_enabled(request)
800818
):
801819
return None
802820

tests/test_sessions.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,9 @@ def parse(self, response):
7373
"scrapy-zyte-api/sessions/pools/example.com/use/check-passed": 1,
7474
}
7575
else:
76-
assert session_stats == {}
76+
assert session_stats == {
77+
"scrapy-zyte-api/sessions/use/disabled": 1,
78+
}
7779

7880

7981
@pytest.mark.parametrize(
@@ -1205,6 +1207,47 @@ def parse(self, response):
12051207
}
12061208

12071209

1210+
@ensureDeferred
1211+
async def test_session_config_enabled(mockserver):
1212+
pytest.importorskip("web_poet")
1213+
1214+
@session_config(["enabled.example", "disabled.example"])
1215+
class CustomSessionConfig(SessionConfig):
1216+
1217+
def enabled(self, request: Request):
1218+
return "enabled" in urlparse_cached(request).netloc
1219+
1220+
settings = {
1221+
"RETRY_TIMES": 0,
1222+
"ZYTE_API_URL": mockserver.urljoin("/"),
1223+
"ZYTE_API_SESSION_MAX_BAD_INITS": 1,
1224+
}
1225+
1226+
class TestSpider(Spider):
1227+
name = "test"
1228+
start_urls = ["https://enabled.example", "https://disabled.example"]
1229+
1230+
def parse(self, response):
1231+
pass
1232+
1233+
crawler = await get_crawler(settings, spider_cls=TestSpider, setup_engine=False)
1234+
await crawler.crawl()
1235+
1236+
session_stats = {
1237+
k: v
1238+
for k, v in crawler.stats.get_stats().items()
1239+
if k.startswith("scrapy-zyte-api/sessions")
1240+
}
1241+
assert session_stats == {
1242+
"scrapy-zyte-api/sessions/use/disabled": 1,
1243+
"scrapy-zyte-api/sessions/pools/enabled.example/init/check-passed": 1,
1244+
"scrapy-zyte-api/sessions/pools/enabled.example/use/check-passed": 1,
1245+
}
1246+
1247+
# Clean up the session config registry.
1248+
session_config_registry.__init__() # type: ignore[misc]
1249+
1250+
12081251
@ensureDeferred
12091252
async def test_session_config_location(mockserver):
12101253
"""A custom session config can be used to customize the params for
@@ -1739,6 +1782,7 @@ def parse4(self, response):
17391782
assert session_stats == {
17401783
"scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 2,
17411784
"scrapy-zyte-api/sessions/pools/example.com/use/check-passed": 2,
1785+
"scrapy-zyte-api/sessions/use/disabled": 2,
17421786
}
17431787

17441788
assert tracker.cookies == [

0 commit comments

Comments
 (0)