Skip to content

Commit 748be07

Browse files
authored
Copy known zyte_api_session-prefixed meta into session initialization requests (#205)
1 parent 7151e73 commit 748be07

File tree

4 files changed

+92
-1
lines changed

4 files changed

+92
-1
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ repos:
88
hooks:
99
- id: black
1010
- repo: https://github.com/pycqa/flake8
11-
rev: 7.0.0
11+
rev: 7.1.0
1212
hooks:
1313
- id: flake8
1414
additional_dependencies:

docs/usage/session.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,13 @@ define a separate :ref:`session config override <session-configs>` for each
145145
website, each with its own implementation of
146146
:meth:`~scrapy_zyte_api.SessionConfig.check`.
147147

148+
The :reqmeta:`zyte_api_session_location` and :reqmeta:`zyte_api_session_params`
149+
request metadata keys, if present in a request that triggers a session
150+
initialization request, will be copied into the session initialization request,
151+
so that they are available when :setting:`ZYTE_API_SESSION_CHECKER` or
152+
:meth:`~scrapy_zyte_api.SessionConfig.check` are called for a session
153+
initialization request.
154+
148155
If your session checking implementation relies on the response body (e.g. it
149156
uses CSS or XPath expressions), you should make sure that you are getting one,
150157
which might not be the case if you are mostly using :ref:`Zyte API automatic

scrapy_zyte_api/_session.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,11 @@ async def _init_session(self, session_id: str, request: Request, pool: str) -> b
505505
SESSION_INIT_META_KEY: True,
506506
"dont_merge_cookies": True,
507507
"zyte_api": {**session_params, "session": {"id": session_id}},
508+
**{
509+
k: v
510+
for k, v in request.meta.items()
511+
if k in {"zyte_api_session_location", "zyte_api_session_params"}
512+
},
508513
},
509514
callback=NO_CALLBACK,
510515
)

tests/test_sessions.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,85 @@ def parse(self, response):
13481348
session_config_registry.__init__() # type: ignore[misc]
13491349

13501350

1351+
@ensureDeferred
1352+
async def test_session_config_check_meta(mockserver):
1353+
"""When initializing a session, known zyte_api_session-prefixed params
1354+
should be included in the session initialization request, so that they can
1355+
be used from check methods validating those requests.
1356+
1357+
For example, when validating a location, access to
1358+
zyte_api_session_location may be necessary.
1359+
"""
1360+
pytest.importorskip("web_poet")
1361+
1362+
params = {
1363+
"actions": [
1364+
{
1365+
"action": "setLocation",
1366+
"address": {"postalCode": "10001"},
1367+
}
1368+
]
1369+
}
1370+
1371+
@session_config(["example.com"])
1372+
class CustomSessionConfig(SessionConfig):
1373+
1374+
def check(self, response, request):
1375+
return (
1376+
bool(self.location(request))
1377+
and response.meta["zyte_api_session_params"] == params
1378+
and (
1379+
(
1380+
response.meta.get("_is_session_init_request", False)
1381+
and "zyte_api_session_foo" not in response.meta
1382+
)
1383+
or response.meta["zyte_api_session_foo"] == "bar"
1384+
)
1385+
)
1386+
1387+
settings = {
1388+
"RETRY_TIMES": 0,
1389+
"ZYTE_API_URL": mockserver.urljoin("/"),
1390+
"ZYTE_API_SESSION_ENABLED": True,
1391+
"ZYTE_API_SESSION_MAX_BAD_INITS": 1,
1392+
}
1393+
1394+
class TestSpider(Spider):
1395+
name = "test"
1396+
start_urls = ["https://example.com"]
1397+
1398+
def start_requests(self):
1399+
for url in self.start_urls:
1400+
yield Request(
1401+
url,
1402+
meta={
1403+
"zyte_api_automap": params,
1404+
"zyte_api_session_params": params,
1405+
"zyte_api_session_location": {"postalCode": "10001"},
1406+
"zyte_api_session_foo": "bar",
1407+
},
1408+
)
1409+
1410+
def parse(self, response):
1411+
pass
1412+
1413+
crawler = await get_crawler(settings, spider_cls=TestSpider, setup_engine=False)
1414+
await crawler.crawl()
1415+
1416+
session_stats = {
1417+
k: v
1418+
for k, v in crawler.stats.get_stats().items()
1419+
if k.startswith("scrapy-zyte-api/sessions")
1420+
}
1421+
assert session_stats == {
1422+
"scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 1,
1423+
"scrapy-zyte-api/sessions/pools/example.com/use/check-passed": 1,
1424+
}
1425+
1426+
# Clean up the session config registry.
1427+
session_config_registry.__init__() # type: ignore[misc]
1428+
1429+
13511430
@ensureDeferred
13521431
async def test_session_config_param_error(mockserver):
13531432
pytest.importorskip("web_poet")

0 commit comments

Comments
 (0)