9
9
10
10
11
11
@pytest .mark .only
12
- async def test_actor_scrapy_title_spider (
12
+ async def test_actor_scrapy_title_spider_v2 (
13
13
make_actor : MakeActorFunction ,
14
14
run_actor : RunActorFunction ,
15
15
) -> None :
16
16
actor_source_files = {
17
- 'requirements.txt' : """
18
- scrapy ~= 2.12
19
- """ ,
20
17
'src/spiders/title.py' : """
21
18
from __future__ import annotations
22
19
from typing import TYPE_CHECKING, Any
@@ -32,6 +29,9 @@ async def test_actor_scrapy_title_spider(
32
29
class TitleSpider(Spider):
33
30
name = 'title_spider'
34
31
32
+ # Limit the number of pages to scrape.
33
+ custom_settings = {'CLOSESPIDER_PAGECOUNT': 10}
34
+
35
35
def __init__(
36
36
self,
37
37
start_urls: list[str],
@@ -61,7 +61,7 @@ def parse(self, response: Response) -> Generator[TitleItem | Request, None, None
61
61
import scrapy
62
62
63
63
class TitleItem(scrapy.Item):
64
- url = scrapy.Field
64
+ url = scrapy.Field()
65
65
title = scrapy.Field()
66
66
""" ,
67
67
'src/settings.py' : """
@@ -107,11 +107,10 @@ async def main() -> None:
107
107
""" ,
108
108
'src/__main__.py' : """
109
109
from __future__ import annotations
110
- import asyncio
111
110
from twisted.internet import asyncioreactor
112
111
113
112
# Install Twisted's asyncio reactor before importing any other Twisted or Scrapy components.
114
- asyncioreactor.install(asyncio.get_event_loop() )
113
+ asyncioreactor.install()
115
114
116
115
import os
117
116
from apify.scrapy import initialize_logging, run_scrapy_actor
@@ -133,5 +132,8 @@ async def main() -> None:
133
132
134
133
items = await actor .last_run ().dataset ().list_items ()
135
134
136
- assert items .count == 48
137
- assert items .items == {'blah' }
135
+ assert items .count >= 10
136
+
137
+ for item in items .items :
138
+ assert 'url' in item
139
+ assert 'title' in item
0 commit comments