Skip to content

Commit f1a7fd7

Browse files
committed
address feedback
1 parent 1321f82 commit f1a7fd7

File tree

15 files changed

+53
-143
lines changed

15 files changed

+53
-143
lines changed

.github/workflows/run_code_checks.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ jobs:
1919
name: Unit tests
2020
uses: apify/workflows/.github/workflows/python_unit_tests.yaml@main
2121

22-
# docs_check:
23-
# name: Docs check
24-
# uses: apify/workflows/.github/workflows/python_docs_check.yaml@main
22+
docs_check:
23+
name: Docs check
24+
uses: apify/workflows/.github/workflows/python_docs_check.yaml@main
2525

2626
integration_tests:
2727
name: Integration tests

docs/02_guides/05_scrapy.mdx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import TabItem from '@theme/TabItem';
1010
import UnderscoreMainExample from '!!raw-loader!./code/scrapy_project/src/__main__.py';
1111
import MainExample from '!!raw-loader!./code/scrapy_project/src/main.py';
1212
import ItemsExample from '!!raw-loader!./code/scrapy_project/src/items.py';
13-
import TitleSpiderExample from '!!raw-loader!./code/scrapy_project/src/spiders/title.py';
13+
import SpidersExample from '!!raw-loader!./code/scrapy_project/src/spiders.py';
1414
import SettingsExample from '!!raw-loader!./code/scrapy_project/src/settings.py';
1515

1616
[Scrapy](https://scrapy.org/) is an open-source web scraping framework for Python. It provides tools for defining scrapers, extracting data from web pages, following links, and handling pagination. With the Apify SDK, Scrapy projects can be converted into Apify [Actors](https://docs.apify.com/platform/actors), integrated with Apify [storages](https://docs.apify.com/platform/storage), and executed on the Apify [platform](https://docs.apify.com/platform).
@@ -48,7 +48,7 @@ Additional helper functions in the [`apify.scrapy`](https://github.com/apify/api
4848

4949
## Create a new Apify-Scrapy project
5050

51-
The simplest way to start using Scrapy in Apify Actors is to use the [Scrapy Actor template](https://apify.com/templates/categories/python). The template provides a pre-configured project structure and setup that includes all necessary components to run Scrapy spiders as Actors and store their output in Apify datasets. If you prefer manual setup, refer to the example Actor section below for configuration details.
51+
The simplest way to start using Scrapy in Apify Actors is to use the [Scrapy Actor template](https://apify.com/templates/python-scrapy). The template provides a pre-configured project structure and setup that includes all necessary components to run Scrapy spiders as Actors and store their output in Apify datasets. If you prefer manual setup, refer to the example Actor section below for configuration details.
5252

5353
## Wrapping an existing Scrapy project
5454

@@ -80,9 +80,9 @@ The following example demonstrates a Scrapy Actor that scrapes page titles and e
8080
{ItemsExample}
8181
</CodeBlock>
8282
</TabItem>
83-
<TabItem value="spiders/title.py" label="spiders/title.py">
83+
<TabItem value="spiders.py" label="spiders.py">
8484
<CodeBlock className="language-python">
85-
{TitleSpiderExample}
85+
{SpidersExample}
8686
</CodeBlock>
8787
</TabItem>
8888
<TabItem value="settings.py" label="settings.py">

docs/02_guides/code/scrapy_project/src/__main__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1-
# ruff: noqa: E402, I001
2-
31
from __future__ import annotations
2+
43
from twisted.internet import asyncioreactor
54

65
# Install Twisted's asyncio reactor before importing any other Twisted or Scrapy components.
76
asyncioreactor.install() # type: ignore[no-untyped-call]
87

98
import os
9+
1010
from apify.scrapy import initialize_logging, run_scrapy_actor
11+
12+
# Import your main Actor coroutine here.
1113
from .main import main
1214

1315
# Ensure the location to the Scrapy settings module is defined.

docs/02_guides/code/scrapy_project/src/items.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
from scrapy import Field, Item
24

35

docs/02_guides/code/scrapy_project/src/main.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# ruff: noqa: I001
2-
31
from __future__ import annotations
42

53
from scrapy.crawler import CrawlerRunner
@@ -9,7 +7,7 @@
97
from apify.scrapy import apply_apify_settings
108

119
# Import your Scrapy spider here.
12-
from .spiders.title import TitleSpider as Spider
10+
from .spiders import TitleSpider as Spider
1311

1412

1513
async def main() -> None:

docs/02_guides/code/scrapy_project/src/settings.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,3 @@
66
SPIDER_MODULES = ['src.spiders']
77
TELNETCONSOLE_ENABLED = False
88
TWISTED_REACTOR = 'twisted.internet.asyncioreactor.AsyncioSelectorReactor'
9-
10-
ITEM_PIPELINES = {
11-
'src.pipelines.TitleItemPipeline': 123,
12-
}
13-
SPIDER_MIDDLEWARES = {
14-
'src.middlewares.TitleSpiderMiddleware': 543,
15-
}
16-
DOWNLOADER_MIDDLEWARES = {
17-
'src.middlewares.TitleDownloaderMiddleware': 543,
18-
}

docs/02_guides/code/scrapy_project/src/spiders/title.py renamed to docs/02_guides/code/scrapy_project/src/spiders.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
1-
# ruff: noqa: TID252, RUF012
2-
31
from __future__ import annotations
42

53
from typing import TYPE_CHECKING, Any
64
from urllib.parse import urljoin
75

86
from scrapy import Request, Spider
97

10-
from ..items import TitleItem
8+
from .items import TitleItem
119

1210
if TYPE_CHECKING:
1311
from collections.abc import Generator
@@ -53,7 +51,7 @@ def parse(self, response: Response) -> Generator[TitleItem | Request, None, None
5351
response: The web page response.
5452
5553
Yields:
56-
Yields scraped TitleItem and Requests for links.
54+
Yields scraped `TitleItem` and new `Request` objects for links.
5755
"""
5856
self.logger.info('TitleSpider is parsing %s...', response)
5957

docs/02_guides/code/scrapy_project/src/spiders/__init__.py

Whitespace-only changes.

docs/02_guides/code/scrapy_project/src/spiders/py.typed

Whitespace-only changes.

pyproject.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,16 @@ indent-style = "space"
136136
"TRY301", # Abstract `raise` to an inner function
137137
"PLW0603", # Using the global statement to update `{name}` is discouraged
138138
]
139+
"**/docs/**/scrapy_project/**/__main__.py" = [
140+
# Because of asyncioreactor.install() call.
141+
"E402", # Module level import not at top of file
142+
]
143+
"**/docs/**/scrapy_project/**" = [
144+
# Local imports are mixed up with the Apify SDK.
145+
"I001", # Import block is un-sorted or un-formatted
146+
# Class variables are common in Scrapy projects.
147+
"RUF012", # Mutable class attributes should be annotated with `typing.ClassVar`
148+
]
139149

140150
[tool.ruff.lint.flake8-quotes]
141151
docstring-quotes = "double"

0 commit comments

Comments
 (0)