2
2
3
3
from typing import TYPE_CHECKING
4
4
5
- import pytest
6
-
7
5
if TYPE_CHECKING :
8
6
from .conftest import MakeActorFunction , RunActorFunction
9
7
10
8
11
- @pytest .mark .only
12
9
async def test_actor_scrapy_title_spider (
13
10
make_actor : MakeActorFunction ,
14
11
run_actor : RunActorFunction ,
15
12
) -> None :
16
13
actor_source_files = {
17
- 'requirements.txt' : """
18
- scrapy ~= 2.12
19
- """ ,
20
14
'src/spiders/title.py' : """
21
15
from __future__ import annotations
22
16
from typing import TYPE_CHECKING, Any
@@ -32,6 +26,9 @@ async def test_actor_scrapy_title_spider(
32
26
class TitleSpider(Spider):
33
27
name = 'title_spider'
34
28
29
+ # Limit the number of pages to scrape.
30
+ custom_settings = {'CLOSESPIDER_PAGECOUNT': 10}
31
+
35
32
def __init__(
36
33
self,
37
34
start_urls: list[str],
@@ -61,7 +58,7 @@ def parse(self, response: Response) -> Generator[TitleItem | Request, None, None
61
58
import scrapy
62
59
63
60
class TitleItem(scrapy.Item):
64
- url = scrapy.Field
61
+ url = scrapy.Field()
65
62
title = scrapy.Field()
66
63
""" ,
67
64
'src/settings.py' : """
@@ -107,11 +104,10 @@ async def main() -> None:
107
104
""" ,
108
105
'src/__main__.py' : """
109
106
from __future__ import annotations
110
- import asyncio
111
107
from twisted.internet import asyncioreactor
112
108
113
109
# Install Twisted's asyncio reactor before importing any other Twisted or Scrapy components.
114
- asyncioreactor.install(asyncio.get_event_loop() )
110
+ asyncioreactor.install()
115
111
116
112
import os
117
113
from apify.scrapy import initialize_logging, run_scrapy_actor
@@ -133,5 +129,8 @@ async def main() -> None:
133
129
134
130
items = await actor .last_run ().dataset ().list_items ()
135
131
136
- assert items .count == 48
137
- assert items .items == {'blah' }
132
+ assert items .count >= 10
133
+
134
+ for item in items .items :
135
+ assert 'url' in item
136
+ assert 'title' in item
0 commit comments