Skip to content

Commit 1e37e47

Browse files
committed
Scrapy integration test is working
1 parent 5fc57b5 commit 1e37e47

File tree

5 files changed

+16
-12
lines changed

5 files changed

+16
-12
lines changed

docs/02_guides/code/_scrapy_project/src/__main__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,10 @@
1212
# ruff: noqa: E402, I001
1313

1414
from __future__ import annotations
15-
import asyncio
1615
from twisted.internet import asyncioreactor
1716

1817
# Install Twisted's asyncio reactor before importing any other Twisted or Scrapy components.
19-
asyncioreactor.install(asyncio.get_event_loop()) # type: ignore[no-untyped-call]
18+
asyncioreactor.install() # type: ignore[no-untyped-call]
2019

2120
import os
2221
from apify.scrapy import initialize_logging, run_scrapy_actor

docs/02_guides/code/_scrapy_project/src/spiders/title.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ class TitleSpider(Spider):
2424

2525
name = 'title_spider'
2626

27+
# Limit the number of pages to scrape.
28+
custom_settings = {'CLOSESPIDER_PAGECOUNT': 10}
29+
2730
def __init__(
2831
self,
2932
start_urls: list[str],

tests/integration/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ async def test_something(
9494
output_record = await actor.last_run().key_value_store().get_record('OUTPUT')
9595
assert output_record is not None
9696
assert output_record['value'] == expected_output
97-
9897
```
9998

10099
Or you can pass multiple source files with the `source_files` argument, if you need something really complex:
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
# The test fixture will put the Apify SDK wheel path on the next line
22
APIFY_SDK_WHEEL_PLACEHOLDER
3+
scrapy~=2.12.0

tests/integration/test_actor_scrapy.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,11 @@
99

1010

1111
@pytest.mark.only
12-
async def test_actor_scrapy_title_spider(
12+
async def test_actor_scrapy_title_spider_v2(
1313
make_actor: MakeActorFunction,
1414
run_actor: RunActorFunction,
1515
) -> None:
1616
actor_source_files = {
17-
'requirements.txt': """
18-
scrapy ~= 2.12
19-
""",
2017
'src/spiders/title.py': """
2118
from __future__ import annotations
2219
from typing import TYPE_CHECKING, Any
@@ -32,6 +29,9 @@ async def test_actor_scrapy_title_spider(
3229
class TitleSpider(Spider):
3330
name = 'title_spider'
3431
32+
# Limit the number of pages to scrape.
33+
custom_settings = {'CLOSESPIDER_PAGECOUNT': 10}
34+
3535
def __init__(
3636
self,
3737
start_urls: list[str],
@@ -61,7 +61,7 @@ def parse(self, response: Response) -> Generator[TitleItem | Request, None, None
6161
import scrapy
6262
6363
class TitleItem(scrapy.Item):
64-
url = scrapy.Field
64+
url = scrapy.Field()
6565
title = scrapy.Field()
6666
""",
6767
'src/settings.py': """
@@ -107,11 +107,10 @@ async def main() -> None:
107107
""",
108108
'src/__main__.py': """
109109
from __future__ import annotations
110-
import asyncio
111110
from twisted.internet import asyncioreactor
112111
113112
# Install Twisted's asyncio reactor before importing any other Twisted or Scrapy components.
114-
asyncioreactor.install(asyncio.get_event_loop())
113+
asyncioreactor.install()
115114
116115
import os
117116
from apify.scrapy import initialize_logging, run_scrapy_actor
@@ -133,5 +132,8 @@ async def main() -> None:
133132

134133
items = await actor.last_run().dataset().list_items()
135134

136-
assert items.count == 48
137-
assert items.items == {'blah'}
135+
assert items.count >= 10
136+
137+
for item in items.items:
138+
assert 'url' in item
139+
assert 'title' in item

0 commit comments

Comments
 (0)