Skip to content

Commit 0fe27b0

Browse files
committed
raise exceptions on non-200 HTTP returns and retry implicitly
Requests does not throw on HTTP errors, so pyosmium was just consuming the error message. Fix that. Also add a retry mechanism for transient errors. The list of transient status codes is the same as the one that curl uses.
1 parent df5fb81 commit 0fe27b0

File tree

2 files changed

+134
-4
lines changed

2 files changed

+134
-4
lines changed

src/osmium/replication/server.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,17 @@
77
""" Helper functions to communicate with replication servers.
88
"""
99
from typing import NamedTuple, Optional, Any, Iterator, cast, Dict, Mapping, Tuple
10-
import requests
1110
import urllib.request as urlrequest
1211
from urllib.error import URLError
1312
import datetime as dt
1413
from collections import namedtuple
1514
from contextlib import contextmanager
1615
from math import ceil
1716

17+
import requests
18+
from requests.adapters import HTTPAdapter
19+
from urllib3.util import Retry
20+
1821
from osmium import MergeInputReader, BaseHandler
1922
from osmium import io as oio
2023
from osmium import version
@@ -52,6 +55,8 @@ def __init__(self, url: str, diff_type: str = 'osc.gz') -> None:
5255
self.diff_type = diff_type
5356
self.extra_request_params: dict[str, Any] = dict(timeout=60, stream=True)
5457
self.session: Optional[requests.Session] = None
58+
self.retry = Retry(total=3, backoff_factor=0.5, allowed_methods={'GET'},
59+
status_forcelist=[408, 429, 500, 502, 503, 504])
5560

5661
def close(self) -> None:
5762
""" Close any open connection to the replication server.
@@ -62,6 +67,8 @@ def close(self) -> None:
6267

6368
def __enter__(self) -> 'ReplicationServer':
6469
self.session = requests.Session()
70+
self.session.mount('http://', HTTPAdapter(max_retries=self.retry))
71+
self.session.mount('https://', HTTPAdapter(max_retries=self.retry))
6572
return self
6673

6774
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
@@ -97,6 +104,8 @@ def open_url(self, url: urlrequest.Request) -> Any:
97104
@contextmanager
98105
def _get_url_with_session() -> Iterator[requests.Response]:
99106
with requests.Session() as session:
107+
session.mount('http://', HTTPAdapter(max_retries=self.retry))
108+
session.mount('https://', HTTPAdapter(max_retries=self.retry))
100109
request = session.get(url.get_full_url(), **get_params)
101110
yield request
102111

@@ -133,7 +142,7 @@ def collect_diffs(self, start_id: int, max_size: int = 1024) -> Optional[Downloa
133142
try:
134143
diffdata = self.get_diff_block(current_id)
135144
except:
136-
LOG.debug("Error during diff download. Bailing out.")
145+
LOG.error("Error during diff download. Bailing out.")
137146
diffdata = ''
138147
if len(diffdata) == 0:
139148
if start_id == current_id:
@@ -348,6 +357,7 @@ def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optiona
348357
with self.open_url(self.make_request(self.get_state_url(seq))) as response:
349358
if hasattr(response, 'iter_lines'):
350359
# generated by requests
360+
response.raise_for_status()
351361
lines = response.iter_lines()
352362
else:
353363
lines = response
@@ -372,7 +382,7 @@ def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optiona
372382
ts = ts.replace(tzinfo=dt.timezone.utc)
373383

374384
except (URLError, IOError) as err:
375-
LOG.debug("Loading state info %s failed with: %s", seq, str(err))
385+
LOG.debug("Loading state info failed with: %s", str(err))
376386
return None
377387

378388
if ts is not None and next_seq is not None:
@@ -382,12 +392,13 @@ def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optiona
382392

383393
def get_diff_block(self, seq: int) -> str:
384394
""" Downloads the diff with the given sequence number and returns
385-
it as a byte sequence. Throws a :code:`urllib.error.HTTPError`
395+
it as a byte sequence. Throws an :code:`requests.HTTPError`
386396
if the file cannot be downloaded.
387397
"""
388398
with self.open_url(self.make_request(self.get_diff_url(seq))) as resp:
389399
if hasattr(resp, 'content'):
390400
# generated by requests
401+
resp.raise_for_status()
391402
return cast(str, resp.content)
392403

393404
# generated by urllib.request

test/test_replication.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# This file is part of Pyosmium.
44
#
55
# Copyright (C) 2023 Sarah Hoffmann.
6+
import logging
67
import time
78
from textwrap import dedent
89

@@ -124,6 +125,35 @@ def test_get_state_timestamp_cut(httpserver):
124125
assert res.sequence == 2594669
125126

126127

128+
def test_get_state_permanent_error(httpserver, caplog):
129+
httpserver.expect_request('/state.txt').respond_with_data('stuff', status=404)
130+
131+
with caplog.at_level(logging.DEBUG):
132+
res = rserv.ReplicationServer(httpserver.url_for('')).get_state_info()
133+
134+
assert res is None
135+
assert "Loading state info failed" in caplog.text
136+
137+
138+
def test_get_state_transient_error(httpserver):
139+
httpserver.expect_ordered_request('/state.txt').respond_with_data('stuff', status=500)
140+
httpserver.expect_ordered_request('/state.txt').respond_with_data('stuff', status=500)
141+
httpserver.expect_ordered_request('/state.txt').respond_with_data("""\
142+
#Sat Aug 26 11:04:04 UTC 2017
143+
txnMaxQueried=1219304113
144+
sequenceNumber=2594669
145+
timestamp=2017-08-26T11\\:04\\:02Z
146+
txnReadyList=
147+
txnMax=1219304113
148+
txnActiveList=1219303583,1219304054,1219304104""")
149+
150+
res = rserv.ReplicationServer(httpserver.url_for('')).get_state_info()
151+
152+
assert res is not None
153+
assert res.timestamp == mkdate(2017, 8, 26, 11, 4, 2)
154+
assert res.sequence == 2594669
155+
156+
127157
def test_get_state_too_many_retries(httpserver):
128158
httpserver.expect_ordered_request('/state.txt').respond_with_data("""\
129159
#Sat Aug 26 11:04:04 UTC 2017
@@ -321,3 +351,92 @@ def way(self, w):
321351
assert diffs is not None
322352
diffs.reader.apply(h, idx="flex_mem")
323353
assert h.counts == [1, 1, 0, 0]
354+
355+
356+
def test_apply_diffs_permanent_error(httpserver, caplog):
357+
httpserver.expect_ordered_request('/state.txt').respond_with_data("""\
358+
sequenceNumber=100
359+
timestamp=2017-08-26T11\\:04\\:02Z
360+
""")
361+
httpserver.expect_ordered_request('/000/000/100.opl')\
362+
.respond_with_data('not a file', status=404)
363+
364+
with caplog.at_level(logging.ERROR):
365+
with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr:
366+
h = CountingHandler()
367+
assert None == svr.apply_diffs(h, 100, 10000)
368+
assert h.counts == [0, 0, 0, 0]
369+
370+
assert 'Error during diff download' in caplog.text
371+
372+
373+
def test_apply_diffs_permanent_error_later_diff(httpserver, caplog):
374+
httpserver.expect_ordered_request('/state.txt').respond_with_data("""\
375+
sequenceNumber=101
376+
timestamp=2017-08-26T11\\:04\\:02Z
377+
""")
378+
httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\
379+
n1 x10.0 y23.0
380+
w1 Nn1,n2
381+
"""))
382+
httpserver.expect_ordered_request('/000/000/101.opl')\
383+
.respond_with_data('not a file', status=404)
384+
385+
with caplog.at_level(logging.ERROR):
386+
with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr:
387+
h = CountingHandler()
388+
assert 100 == svr.apply_diffs(h, 100, 10000)
389+
assert h.counts == [1, 1, 0, 0]
390+
391+
assert 'Error during diff download' in caplog.text
392+
393+
394+
def test_apply_diffs_transient_error(httpserver, caplog):
395+
httpserver.expect_ordered_request('/state.txt').respond_with_data("""\
396+
sequenceNumber=101
397+
timestamp=2017-08-26T11\\:04\\:02Z
398+
""")
399+
httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\
400+
n1 x10.0 y23.0
401+
w1 Nn1,n2
402+
"""))
403+
httpserver.expect_ordered_request('/000/000/101.opl')\
404+
.respond_with_data('not a file', status=503)
405+
httpserver.expect_ordered_request('/000/000/101.opl').respond_with_data(dedent("""\
406+
n2 x10.0 y23.0
407+
"""))
408+
409+
with caplog.at_level(logging.ERROR):
410+
with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr:
411+
h = CountingHandler()
412+
assert 101 == svr.apply_diffs(h, 100, 10000)
413+
assert h.counts == [2, 1, 0, 0]
414+
415+
assert 'Error during diff download' not in caplog.text
416+
417+
418+
419+
420+
def test_apply_diffs_transient_error_permanent(httpserver, caplog):
421+
httpserver.expect_ordered_request('/state.txt').respond_with_data("""\
422+
sequenceNumber=101
423+
timestamp=2017-08-26T11\\:04\\:02Z
424+
""")
425+
httpserver.expect_ordered_request('/000/000/100.opl').respond_with_data(dedent("""\
426+
n1 x10.0 y23.0
427+
w1 Nn1,n2
428+
"""))
429+
for _ in range(4):
430+
httpserver.expect_ordered_request('/000/000/101.opl')\
431+
.respond_with_data('not a file', status=503)
432+
httpserver.expect_ordered_request('/000/000/101.opl').respond_with_data(dedent("""\
433+
n2 x10.0 y23.0
434+
"""))
435+
436+
with caplog.at_level(logging.ERROR):
437+
with rserv.ReplicationServer(httpserver.url_for(''), "opl") as svr:
438+
h = CountingHandler()
439+
assert 100 == svr.apply_diffs(h, 100, 10000)
440+
assert h.counts == [1, 1, 0, 0]
441+
442+
assert 'Error during diff download' in caplog.text

0 commit comments

Comments
 (0)