Skip to content

Commit c33b83f

Browse files
committed
Scrapy integration test is working
1 parent 5fc57b5 commit c33b83f

File tree

5 files changed

+15
-14
lines changed

5 files changed

+15
-14
lines changed

docs/02_guides/code/_scrapy_project/src/__main__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,10 @@
1212
# ruff: noqa: E402, I001
1313

1414
from __future__ import annotations
15-
import asyncio
1615
from twisted.internet import asyncioreactor
1716

1817
# Install Twisted's asyncio reactor before importing any other Twisted or Scrapy components.
19-
asyncioreactor.install(asyncio.get_event_loop()) # type: ignore[no-untyped-call]
18+
asyncioreactor.install() # type: ignore[no-untyped-call]
2019

2120
import os
2221
from apify.scrapy import initialize_logging, run_scrapy_actor

docs/02_guides/code/_scrapy_project/src/spiders/title.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ class TitleSpider(Spider):
2424

2525
name = 'title_spider'
2626

27+
# Limit the number of pages to scrape.
28+
custom_settings = {'CLOSESPIDER_PAGECOUNT': 10}
29+
2730
def __init__(
2831
self,
2932
start_urls: list[str],

tests/integration/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ async def test_something(
9494
output_record = await actor.last_run().key_value_store().get_record('OUTPUT')
9595
assert output_record is not None
9696
assert output_record['value'] == expected_output
97-
9897
```
9998

10099
Or you can pass multiple source files with the `source_files` argument, if you need something really complex:
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
# The test fixture will put the Apify SDK wheel path on the next line
22
APIFY_SDK_WHEEL_PLACEHOLDER
3+
scrapy~=2.12.0

tests/integration/test_actor_scrapy.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,15 @@
22

33
from typing import TYPE_CHECKING
44

5-
import pytest
6-
75
if TYPE_CHECKING:
86
from .conftest import MakeActorFunction, RunActorFunction
97

108

11-
@pytest.mark.only
129
async def test_actor_scrapy_title_spider(
1310
make_actor: MakeActorFunction,
1411
run_actor: RunActorFunction,
1512
) -> None:
1613
actor_source_files = {
17-
'requirements.txt': """
18-
scrapy ~= 2.12
19-
""",
2014
'src/spiders/title.py': """
2115
from __future__ import annotations
2216
from typing import TYPE_CHECKING, Any
@@ -32,6 +26,9 @@ async def test_actor_scrapy_title_spider(
3226
class TitleSpider(Spider):
3327
name = 'title_spider'
3428
29+
# Limit the number of pages to scrape.
30+
custom_settings = {'CLOSESPIDER_PAGECOUNT': 10}
31+
3532
def __init__(
3633
self,
3734
start_urls: list[str],
@@ -61,7 +58,7 @@ def parse(self, response: Response) -> Generator[TitleItem | Request, None, None
6158
import scrapy
6259
6360
class TitleItem(scrapy.Item):
64-
url = scrapy.Field
61+
url = scrapy.Field()
6562
title = scrapy.Field()
6663
""",
6764
'src/settings.py': """
@@ -107,11 +104,10 @@ async def main() -> None:
107104
""",
108105
'src/__main__.py': """
109106
from __future__ import annotations
110-
import asyncio
111107
from twisted.internet import asyncioreactor
112108
113109
# Install Twisted's asyncio reactor before importing any other Twisted or Scrapy components.
114-
asyncioreactor.install(asyncio.get_event_loop())
110+
asyncioreactor.install()
115111
116112
import os
117113
from apify.scrapy import initialize_logging, run_scrapy_actor
@@ -133,5 +129,8 @@ async def main() -> None:
133129

134130
items = await actor.last_run().dataset().list_items()
135131

136-
assert items.count == 48
137-
assert items.items == {'blah'}
132+
assert items.count >= 10
133+
134+
for item in items.items:
135+
assert 'url' in item
136+
assert 'title' in item

0 commit comments

Comments
 (0)