Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.6
rev: v0.13.1
hooks:
- id: ruff
- id: ruff-check
args: [ --fix ]
- id: ruff-format
- repo: https://github.com/adamchainz/blacken-docs
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# -- Project information -----------------------------------------------------

project = "python-zyte-api"
copyright = "2021, Zyte Group Ltd"
project_copyright = "2021, Zyte Group Ltd"
author = "Zyte Group Ltd"

# The short X.Y version
Expand Down
47 changes: 39 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,44 @@ filename = "zyte_api/__version__.py"

[tool.coverage.run]
branch = true
patch = [
"subprocess",
]

[tool.coverage.report]
exclude_also = [
"if TYPE_CHECKING:",
]

[tool.mypy]
allow_untyped_defs = false
implicit_reexport = false

[[tool.mypy.overrides]]
module = "runstats"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "tests.*"
allow_untyped_defs = true

[tool.pytest.ini_options]
filterwarnings = [
"ignore:The zyte_api\\.aio module is deprecated:DeprecationWarning"
]

[tool.ruff.lint]
extend-select = [
# flake8-builtins
"A",
# flake8-async
"ASYNC",
# flake8-bugbear
"B",
# flake8-comprehensions
"C4",
# flake8-commas
"COM",
# pydocstyle
"D",
# flake8-future-annotations
Expand Down Expand Up @@ -84,6 +105,8 @@ extend-select = [
"T10",
# flake8-type-checking
"TC",
# flake8-tidy-imports
"TID",
# pyupgrade
"UP",
# pycodestyle warnings
Expand All @@ -92,6 +115,8 @@ extend-select = [
"YTT",
]
ignore = [
# Trailing comma missing
"COM812",
# Missing docstring in public module
"D100",
# Missing docstring in public class
Expand Down Expand Up @@ -144,21 +169,27 @@ ignore = [
"S101",
]

[tool.ruff.lint.flake8-pytest-style]
parametrize-values-type = "tuple"

[tool.ruff.lint.flake8-tidy-imports]
banned-module-level-imports = ["twisted.internet.reactor"]

[tool.ruff.lint.flake8-type-checking]
runtime-evaluated-decorators = ["attr.s"]

[tool.ruff.lint.isort]
split-on-trailing-comma = false

[tool.ruff.lint.per-file-ignores]
"zyte_api/__init__.py" = ["F401"]
"zyte_api/aio/errors.py" = ["F401"]
"zyte_api/aio/retry.py" = ["F401"]
"tests/*" = ["S"]
"docs/**" = ["B006"]
# Skip PEP 604 suggestions for files with attr classes
"zyte_api/errors.py" = ["UP007"]
"zyte_api/stats.py" = ["UP007"]

[tool.ruff.lint.flake8-pytest-style]
parametrize-values-type = "tuple"

[tool.ruff.lint.flake8-type-checking]
runtime-evaluated-decorators = ["attr.s"]
"zyte_api/errors.py" = ["UP007", "UP045"]
"zyte_api/stats.py" = ["UP007", "UP045"]

[tool.ruff.lint.pydocstyle]
convention = "pep257"
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
author_email="[email protected]",
url="https://github.com/zytedata/python-zyte-api",
packages=find_packages(exclude=["tests", "examples"]),
package_data={
"zyte_api": ["py.typed"],
},
include_package_data=True,
entry_points={
"console_scripts": ["zyte-api=zyte_api.__main__:_main"],
},
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

@pytest.fixture(scope="session")
def mockserver():
from .mockserver import MockServer
from .mockserver import MockServer # noqa: PLC0415

with MockServer() as server:
yield server
16 changes: 10 additions & 6 deletions tests/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from typing import Any
from urllib.parse import urlparse

from twisted.internet import reactor
from twisted.internet.task import deferLater
from twisted.web.resource import Resource
from twisted.web.server import NOT_DONE_YET, Site
Expand All @@ -22,11 +21,11 @@


# https://github.com/scrapy/scrapy/blob/02b97f98e74a994ad3e4d74e7ed55207e508a576/tests/mockserver.py#L27C1-L33C19
def getarg(request, name, default=None, type=None):
def getarg(request, name, default=None, type_=None):
if name in request.args:
value = request.args[name][0]
if type is not None:
value = type(value)
if type_ is not None:
value = type_(value)
return value
return default

Expand All @@ -41,6 +40,8 @@ class DropResource(Resource):
isLeaf = True

def deferRequest(self, request, delay, f, *a, **kw):
from twisted.internet import reactor

def _cancelrequest(_):
# silence CancelledError
d.addErrback(lambda _: None)
Expand All @@ -56,7 +57,7 @@ def render_POST(self, request):
return NOT_DONE_YET

def _delayedRender(self, request):
abort = getarg(request, b"abort", 0, type=int)
abort = getarg(request, b"abort", 0, type_=int)
request.write(b"this connection will be dropped\n")
tr = request.channel.transport
try:
Expand Down Expand Up @@ -107,6 +108,7 @@ def render_POST(self, request):
)

request_data = json.loads(request.content.read())
response_data: dict[str, Any]

url = request_data["url"]
domain = urlparse(url).netloc
Expand Down Expand Up @@ -214,7 +216,7 @@ def render_POST(self, request):
}
return json.dumps(response_data).encode()

response_data: dict[str, Any] = {
response_data = {
"url": url,
}

Expand Down Expand Up @@ -269,6 +271,8 @@ def urljoin(self, path):


def main():
from twisted.internet import reactor

parser = argparse.ArgumentParser()
parser.add_argument("resource")
parser.add_argument("--port", type=int)
Expand Down
20 changes: 15 additions & 5 deletions tests/test_async.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from __future__ import annotations

import asyncio
from typing import TYPE_CHECKING, Any
from unittest.mock import AsyncMock

import pytest
Expand All @@ -9,6 +12,9 @@
from zyte_api.errors import ParsedError
from zyte_api.utils import USER_AGENT

if TYPE_CHECKING:
from tests.mockserver import MockServer


@pytest.mark.parametrize(
"client_cls",
Expand Down Expand Up @@ -218,7 +224,7 @@ async def test_semaphore(client_cls, get_method, iter_method, mockserver):


@pytest.mark.asyncio
async def test_session_context_manager(mockserver):
async def test_session_context_manager(mockserver: MockServer) -> None:
client = AsyncZyteAPI(api_key="a", api_url=mockserver.urljoin("/"))
queries = [
{"url": "https://a.example", "httpResponseBody": True},
Expand All @@ -236,11 +242,13 @@ async def test_session_context_manager(mockserver):
"httpResponseBody": "PGh0bWw+PGJvZHk+SGVsbG88aDE+V29ybGQhPC9oMT48L2JvZHk+PC9odG1sPg==",
},
]
actual_results = []
actual_results: list[dict[str, Any] | Exception] = []
async with client.session() as session:
assert session._session.connector is not None
assert session._session.connector.limit == client.n_conn
actual_results.append(await session.get(queries[0]))
for future in session.iter(queries[1:]):
result: dict[str, Any] | Exception
try:
result = await future
except Exception as e:
Expand All @@ -266,7 +274,7 @@ async def test_session_context_manager(mockserver):


@pytest.mark.asyncio
async def test_session_no_context_manager(mockserver):
async def test_session_no_context_manager(mockserver: MockServer) -> None:
client = AsyncZyteAPI(api_key="a", api_url=mockserver.urljoin("/"))
queries = [
{"url": "https://a.example", "httpResponseBody": True},
Expand All @@ -284,8 +292,10 @@ async def test_session_no_context_manager(mockserver):
"httpResponseBody": "PGh0bWw+PGJvZHk+SGVsbG88aDE+V29ybGQhPC9oMT48L2JvZHk+PC9odG1sPg==",
},
]
actual_results = []
actual_results: list[dict[str, Any] | Exception] = []
result: dict[str, Any] | Exception
session = client.session()
assert session._session.connector is not None
assert session._session.connector.limit == client.n_conn
actual_results.append(await session.get(queries[0]))
for future in session.iter(queries[1:]):
Expand Down Expand Up @@ -318,4 +328,4 @@ def test_retrying_class():
"""A descriptive exception is raised when creating a client with an
AsyncRetrying subclass or similar instead of an instance of it."""
with pytest.raises(ValueError, match="must be an instance of AsyncRetrying"):
AsyncZyteAPI(api_key="foo", retrying=AggressiveRetryFactory)
AsyncZyteAPI(api_key="foo", retrying=AggressiveRetryFactory) # type: ignore[arg-type]
37 changes: 25 additions & 12 deletions tests/test_main.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
from __future__ import annotations

import json
import subprocess
from json import JSONDecodeError
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, Any
from unittest.mock import AsyncMock, Mock, patch

import pytest

from zyte_api import RequestError
from zyte_api.__main__ import run
from zyte_api.aio.errors import RequestError

if TYPE_CHECKING:
from collections.abc import Iterable

from tests.mockserver import MockServer


class MockRequestError(RequestError):
Expand Down Expand Up @@ -41,7 +49,7 @@ def get_json_content(file_object):
pass


def forbidden_domain_response():
def forbidden_domain_response() -> dict[str, Any]:
return {
"type": "/download/temporary-error",
"title": "Temporary Downloading Error",
Expand Down Expand Up @@ -95,7 +103,7 @@ async def fake_exception(value=True):
@pytest.mark.asyncio
async def test_run(queries, expected_response, store_errors, exception):
tmp_path = Path("temporary_file.jsonl")
temporary_file = tmp_path.open("w")
temporary_file = tmp_path.open("w") # noqa: ASYNC230
n_conn = 5
api_url = "https://example.com"
api_key = "fake_key"
Expand Down Expand Up @@ -177,10 +185,12 @@ async def test_run_stop_on_errors_true(mockserver):
assert exc_info.value.query == query


def _run(*, input, mockserver, cli_params=None):
def _run(
*, input_: str, mockserver: MockServer, cli_params: Iterable[str] | None = None
) -> subprocess.CompletedProcess[bytes]:
cli_params = cli_params or ()
with NamedTemporaryFile("w") as url_list:
url_list.write(input)
url_list.write(input_)
url_list.flush()
# Note: Using “python -m zyte_api” instead of “zyte-api” enables
# coverage tracking to work.
Expand All @@ -202,14 +212,14 @@ def _run(*, input, mockserver, cli_params=None):


def test_empty_input(mockserver):
result = _run(input="", mockserver=mockserver)
result = _run(input_="", mockserver=mockserver)
assert result.returncode
assert result.stdout == b""
assert result.stderr == b"No input queries found. Is the input file empty?\n"


def test_intype_txt_implicit(mockserver):
result = _run(input="https://a.example", mockserver=mockserver)
result = _run(input_="https://a.example", mockserver=mockserver)
assert not result.returncode
assert (
result.stdout
Expand All @@ -219,7 +229,9 @@ def test_intype_txt_implicit(mockserver):

def test_intype_txt_explicit(mockserver):
result = _run(
input="https://a.example", mockserver=mockserver, cli_params=["--intype", "txt"]
input_="https://a.example",
mockserver=mockserver,
cli_params=["--intype", "txt"],
)
assert not result.returncode
assert (
Expand All @@ -230,7 +242,8 @@ def test_intype_txt_explicit(mockserver):

def test_intype_jsonl_implicit(mockserver):
result = _run(
input='{"url": "https://a.example", "browserHtml": true}', mockserver=mockserver
input_='{"url": "https://a.example", "browserHtml": true}',
mockserver=mockserver,
)
assert not result.returncode
assert (
Expand All @@ -241,7 +254,7 @@ def test_intype_jsonl_implicit(mockserver):

def test_intype_jsonl_explicit(mockserver):
result = _run(
input='{"url": "https://a.example", "browserHtml": true}',
input_='{"url": "https://a.example", "browserHtml": true}',
mockserver=mockserver,
cli_params=["--intype", "jl"],
)
Expand All @@ -255,7 +268,7 @@ def test_intype_jsonl_explicit(mockserver):
@pytest.mark.flaky(reruns=16)
def test_limit_and_shuffle(mockserver):
result = _run(
input="https://a.example\nhttps://b.example",
input_="https://a.example\nhttps://b.example",
mockserver=mockserver,
cli_params=["--limit", "1", "--shuffle"],
)
Expand All @@ -268,7 +281,7 @@ def test_limit_and_shuffle(mockserver):

def test_run_non_json_response(mockserver):
result = _run(
input="https://nonjson.example",
input_="https://nonjson.example",
mockserver=mockserver,
)
assert not result.returncode
Expand Down
Loading