Skip to content

Commit f9ca6c7

Browse files
authored
Merge pull request #237 from lonvia/retry-on-500-errors
Fix handling of HTTP errors for replication handler
2 parents 0459fb4 + 795dc0d commit f9ca6c7

File tree

7 files changed

+406
-333
lines changed

7 files changed

+406
-333
lines changed

.github/actions/run-tests/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ runs:
44
using: "composite"
55
steps:
66
- name: Install test requirements
7-
run: pip install pytest shapely
7+
run: pip install pytest pytest-httpserver shapely
88
shell: bash
99

1010
- name: Run tests

.github/workflows/build_wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
env:
4141
CIBW_ARCHS: native
4242
CIBW_SKIP: "pp* *musllinux*"
43-
CIBW_TEST_REQUIRES: pytest shapely
43+
CIBW_TEST_REQUIRES: pytest pytest-httpserver shapely
4444
CIBW_TEST_COMMAND: pytest {project}/test
4545
CIBW_BUILD_FRONTEND: build
4646
CIBW_BEFORE_BUILD_LINUX: yum install -y sparsehash-devel expat-devel boost-devel zlib-devel bzip2-devel lz4-devel

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ jobs:
182182
- name: Install prerequisites
183183
run: |
184184
python -m pip install --upgrade pip
185-
pip install pytest shapely setuptools requests
185+
pip install pytest pytest-httpserver shapely setuptools requests
186186
shell: bash
187187

188188
- name: Build package

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,15 @@ They are mostly ports of the examples in Libosmium and osmium-contrib.
7474

7575
## Testing
7676

77-
There is a small test suite in the test directory. This provides regression
77+
There is a small test suite in the test directory. This provides unit
7878
test for the python bindings, it is not meant to be a test suite for Libosmium.
7979

80-
You'll need the Python `pytest` module. On Debian/Ubuntu install the package
81-
`python3-pytest`.
80+
Testing requires `pytest` and `pytest-httpserver`. On Debian/Ubuntu install
81+
the dependencies with:
8282

83-
The suite can be run with:
83+
sudo apt-get install python3-pytest python3-pytest-httpserver
84+
85+
The test suite can be run with:
8486

8587
pytest test
8688

src/osmium/replication/server.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,17 @@
77
""" Helper functions to communicate with replication servers.
88
"""
99
from typing import NamedTuple, Optional, Any, Iterator, cast, Dict, Mapping, Tuple
10-
import requests
1110
import urllib.request as urlrequest
1211
from urllib.error import URLError
1312
import datetime as dt
1413
from collections import namedtuple
1514
from contextlib import contextmanager
1615
from math import ceil
1716

17+
import requests
18+
from requests.adapters import HTTPAdapter
19+
from urllib3.util import Retry
20+
1821
from osmium import MergeInputReader, BaseHandler
1922
from osmium import io as oio
2023
from osmium import version
@@ -52,6 +55,8 @@ def __init__(self, url: str, diff_type: str = 'osc.gz') -> None:
5255
self.diff_type = diff_type
5356
self.extra_request_params: dict[str, Any] = dict(timeout=60, stream=True)
5457
self.session: Optional[requests.Session] = None
58+
self.retry = Retry(total=3, backoff_factor=0.5, allowed_methods={'GET'},
59+
status_forcelist=[408, 429, 500, 502, 503, 504])
5560

5661
def close(self) -> None:
5762
""" Close any open connection to the replication server.
@@ -62,6 +67,8 @@ def close(self) -> None:
6267

6368
def __enter__(self) -> 'ReplicationServer':
6469
self.session = requests.Session()
70+
self.session.mount('http://', HTTPAdapter(max_retries=self.retry))
71+
self.session.mount('https://', HTTPAdapter(max_retries=self.retry))
6572
return self
6673

6774
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
@@ -97,6 +104,8 @@ def open_url(self, url: urlrequest.Request) -> Any:
97104
@contextmanager
98105
def _get_url_with_session() -> Iterator[requests.Response]:
99106
with requests.Session() as session:
107+
session.mount('http://', HTTPAdapter(max_retries=self.retry))
108+
session.mount('https://', HTTPAdapter(max_retries=self.retry))
100109
request = session.get(url.get_full_url(), **get_params)
101110
yield request
102111

@@ -133,7 +142,7 @@ def collect_diffs(self, start_id: int, max_size: int = 1024) -> Optional[Downloa
133142
try:
134143
diffdata = self.get_diff_block(current_id)
135144
except:
136-
LOG.debug("Error during diff download. Bailing out.")
145+
LOG.error("Error during diff download. Bailing out.")
137146
diffdata = ''
138147
if len(diffdata) == 0:
139148
if start_id == current_id:
@@ -348,6 +357,7 @@ def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optiona
348357
with self.open_url(self.make_request(self.get_state_url(seq))) as response:
349358
if hasattr(response, 'iter_lines'):
350359
# generated by requests
360+
response.raise_for_status()
351361
lines = response.iter_lines()
352362
else:
353363
lines = response
@@ -372,7 +382,7 @@ def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optiona
372382
ts = ts.replace(tzinfo=dt.timezone.utc)
373383

374384
except (URLError, IOError) as err:
375-
LOG.debug("Loading state info %s failed with: %s", seq, str(err))
385+
LOG.debug("Loading state info failed with: %s", str(err))
376386
return None
377387

378388
if ts is not None and next_seq is not None:
@@ -382,12 +392,13 @@ def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optiona
382392

383393
def get_diff_block(self, seq: int) -> str:
384394
""" Downloads the diff with the given sequence number and returns
385-
it as a byte sequence. Throws a :code:`urllib.error.HTTPError`
395+
it as a byte sequence. Throws an :code:`requests.HTTPError`
386396
if the file cannot be downloaded.
387397
"""
388398
with self.open_url(self.make_request(self.get_diff_url(seq))) as resp:
389399
if hasattr(resp, 'content'):
390400
# generated by requests
401+
resp.raise_for_status()
391402
return cast(str, resp.content)
392403

393404
# generated by urllib.request

test/test_pyosmium_get_changes.py

Lines changed: 23 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22
#
33
# This file is part of Pyosmium.
44
#
5-
# Copyright (C) 2022 Sarah Hoffmann.
5+
# Copyright (C) 2023 Sarah Hoffmann.
66
""" Tests for the pyosmium-get-changes script.
77
"""
8-
from io import BytesIO
98
from pathlib import Path
109
from textwrap import dedent
1110

@@ -18,101 +17,76 @@
1817
import cookielib as cookiejarlib
1918

2019

21-
class RequestsResponses(BytesIO):
22-
23-
def __init__(self, bytes):
24-
super(RequestsResponses, self).__init__(bytes)
25-
self.content = bytes
26-
27-
def iter_lines(self):
28-
return self.readlines()
29-
30-
3120
class TestPyosmiumGetChanges:
3221

3322
@pytest.fixture(autouse=True)
34-
def setUp(self, monkeypatch):
23+
def setup(self):
3524
self.script = dict()
3625

3726
filename = (Path(__file__) / ".." / ".." / "tools"/ "pyosmium-get-changes").resolve()
3827
with filename.open("rb") as f:
3928
exec(compile(f.read(), str(filename), 'exec'), self.script)
4029

41-
self.urls = dict()
42-
43-
44-
@pytest.fixture
45-
def mock_requests(self, monkeypatch):
46-
def mock_get(session, url, **kwargs):
47-
return RequestsResponses(self.urls[url])
48-
monkeypatch.setattr(osmium.replication.server.requests.Session, "get", mock_get)
49-
50-
51-
def url(self, url, result):
52-
self.urls[url] = dedent(result).encode()
5330

54-
def main(self, *args):
55-
return self.script['main'](args)
31+
def main(self, httpserver, *args):
32+
return self.script['main'](['--server', httpserver.url_for('')] + list(args))
5633

5734

58-
def test_init_id(self, capsys):
59-
assert 0 == self.main('-I', '453')
35+
def test_init_id(self, capsys, httpserver):
36+
assert 0 == self.main(httpserver, '-I', '453')
6037

6138
output = capsys.readouterr().out.strip()
6239

6340
assert output == '453'
6441

6542

66-
def test_init_date(self, capsys, mock_requests):
67-
self.url('https://planet.osm.org/replication/minute//state.txt',
68-
"""\
43+
def test_init_date(self, capsys, httpserver):
44+
httpserver.expect_request('/state.txt').respond_with_data(dedent("""\
6945
sequenceNumber=100
7046
timestamp=2017-08-26T11\\:04\\:02Z
71-
""")
72-
self.url('https://planet.osm.org/replication/minute//000/000/000.state.txt',
73-
"""\
47+
"""))
48+
httpserver.expect_request('/000/000/000.state.txt').respond_with_data(dedent("""\
7449
sequenceNumber=0
7550
timestamp=2016-08-26T11\\:04\\:02Z
76-
""")
77-
assert 0 == self.main('-D', '2015-12-24T08:08:08Z')
51+
"""))
52+
assert 0 == self.main(httpserver, '-D', '2015-12-24T08:08:08Z')
7853

7954
output = capsys.readouterr().out.strip()
8055

8156
assert output == '-1'
8257

8358

84-
def test_init_to_file(self, tmp_path):
59+
def test_init_to_file(self, tmp_path, httpserver):
8560
fname = tmp_path / 'db.seq'
8661

87-
assert 0 == self.main('-I', '453', '-f', str(fname))
62+
assert 0 == self.main(httpserver, '-I', '453', '-f', str(fname))
8863
assert fname.read_text() == '453'
8964

9065

91-
def test_init_from_seq_file(self, tmp_path):
66+
def test_init_from_seq_file(self, tmp_path, httpserver):
9267
fname = tmp_path / 'db.seq'
9368
fname.write_text('453')
9469

95-
assert 0 == self.main('-f', str(fname))
70+
assert 0 == self.main(httpserver, '-f', str(fname))
9671
assert fname.read_text() == '453'
9772

9873

99-
def test_init_date_with_cookie(self, capsys, tmp_path, mock_requests):
100-
self.url('https://planet.osm.org/replication/minute//state.txt',
101-
"""\
74+
def test_init_date_with_cookie(self, capsys, tmp_path, httpserver):
75+
httpserver.expect_request('/state.txt').respond_with_data(dedent("""\
10276
sequenceNumber=100
10377
timestamp=2017-08-26T11\\:04\\:02Z
104-
""")
105-
self.url('https://planet.osm.org/replication/minute//000/000/000.state.txt',
106-
"""\
78+
"""))
79+
httpserver.expect_request('/000/000/000.state.txt').respond_with_data(dedent("""\
10780
sequenceNumber=0
10881
timestamp=2016-08-26T11\\:04\\:02Z
109-
""")
82+
"""))
11083

11184
fname = tmp_path / 'my.cookie'
11285
cookie_jar = cookiejarlib.MozillaCookieJar(str(fname))
11386
cookie_jar.save()
11487

115-
assert 0 == self.main('--cookie', str(fname), '-D', '2015-12-24T08:08:08Z')
88+
assert 0 == self.main(httpserver, '--cookie', str(fname),
89+
'-D', '2015-12-24T08:08:08Z')
11690

11791
output = capsys.readouterr().out.strip()
11892

0 commit comments

Comments
 (0)