11""" Helper functions to communicate with replication servers.
22"""
3-
3+ from typing import NamedTuple , Optional , Any , Iterator , cast , Mapping , Tuple
44import requests
55import urllib .request as urlrequest
66from urllib .error import URLError
99from contextlib import contextmanager
1010from math import ceil
1111
12- from osmium import MergeInputReader
12+ from osmium import MergeInputReader , BaseHandler
1313from osmium import io as oio
1414from osmium import version
1515
1818LOG = logging .getLogger ('pyosmium' )
1919LOG .addHandler (logging .NullHandler ())
2020
21- OsmosisState = namedtuple ('OsmosisState' , ['sequence' , 'timestamp' ])
22- DownloadResult = namedtuple ('DownloadResult' , ['id' , 'reader' , 'newest' ])
21+ class OsmosisState (NamedTuple ):
22+ sequence : int
23+ timestamp : dt .datetime
24+
25+ class DownloadResult (NamedTuple ):
26+ id : int
27+ reader : MergeInputReader
28+ newest : int
2329
2430class ReplicationServer :
2531 """ Represents a connection to a server that publishes replication data.
@@ -30,37 +36,37 @@ class ReplicationServer:
3036 internally keeps a connection to the server making downloads faster.
3137 """
3238
33- def __init__ (self , url , diff_type = 'osc.gz' ):
39+ def __init__ (self , url : str , diff_type : str = 'osc.gz' ) -> None :
3440 self .baseurl = url
3541 self .diff_type = diff_type
36- self .session = None
42+ self .session : Optional [ requests . Session ] = None
3743
38- def close (self ):
44+ def close (self ) -> None :
3945 """ Close any open connection to the replication server.
4046 """
4147 if self .session is not None :
4248 self .session .close ()
4349 self .session = None
4450
45- def __enter__ (self ):
51+ def __enter__ (self ) -> 'ReplicationServer' :
4652 self .session = requests .Session ()
4753 return self
4854
49- def __exit__ (self , exc_type , exc_value , traceback ) :
55+ def __exit__ (self , exc_type : Any , exc_value : Any , traceback : Any ) -> None :
5056 self .close ()
5157
52- def make_request (self , url ) :
58+ def make_request (self , url : str ) -> urlrequest . Request :
5359 headers = {"User-Agent" : "pyosmium/{}" .format (version .pyosmium_release )}
5460 return urlrequest .Request (url , headers = headers )
5561
56- def open_url (self , url ) :
62+ def open_url (self , url : urlrequest . Request ) -> Any :
5763 """ Download a resource from the given URL and return a byte sequence
5864 of the content.
5965
6066 This method has no support for cookies or any special authentication
6167 methods. If you need these, you have to provide your own custom URL
6268 opener. Overwrite open_url() with a method that receives an
63- urllib .Request object and returns a ByteIO-like object or a
69+ urlrequest .Request object and returns a ByteIO-like object or a
6470 requests.Response.
6571
6672 Example::
@@ -79,14 +85,14 @@ def open_url(self, url):
7985 return self .session .get (url .get_full_url (), headers = headers , stream = True )
8086
8187 @contextmanager
82- def _get_url_with_session ():
88+ def _get_url_with_session () -> Iterator [ requests . Response ] :
8389 with requests .Session () as session :
8490 request = session .get (url .get_full_url (), headers = headers , stream = True )
8591 yield request
8692
8793 return _get_url_with_session ()
8894
89- def collect_diffs (self , start_id , max_size = 1024 ):
95+ def collect_diffs (self , start_id : int , max_size : int = 1024 ) -> Optional [ DownloadResult ] :
9096 """ Create a MergeInputReader and download diffs starting with sequence
9197 id `start_id` into it. `max_size`
9298 restricts the number of diffs that are downloaded. The download
@@ -131,7 +137,9 @@ def collect_diffs(self, start_id, max_size=1024):
131137
132138 return DownloadResult (current_id - 1 , rd , newest .sequence )
133139
134- def apply_diffs (self , handler , start_id , max_size = 1024 , idx = "" , simplify = True ):
140+ def apply_diffs (self , handler : BaseHandler , start_id : int ,
141+ max_size : int = 1024 , idx : str = "" ,
142+ simplify : bool = True ) -> Optional [int ]:
135143 """ Download diffs starting with sequence id `start_id`, merge them
136144 together and then apply them to handler `handler`. `max_size`
137145 restricts the number of diffs that are downloaded. The download
@@ -165,9 +173,11 @@ def apply_diffs(self, handler, start_id, max_size=1024, idx="", simplify=True):
165173
166174 return diffs .id
167175
168- def apply_diffs_to_file (self , infile , outfile , start_id , max_size = 1024 ,
169- set_replication_header = True , extra_headers = None ,
170- outformat = None ):
176+ def apply_diffs_to_file (self , infile : str , outfile : str ,
177+ start_id : int , max_size : int = 1024 ,
178+ set_replication_header : bool = True ,
179+ extra_headers : Optional [Mapping [str , str ]] = None ,
180+ outformat : Optional [str ] = None ) -> Optional [Tuple [int , int ]]:
171181 """ Download diffs starting with sequence id `start_id`, merge them
172182 with the data from the OSM file named `infile` and write the result
173183 into a file with the name `outfile`. The output file must not yet
@@ -230,7 +240,8 @@ def apply_diffs_to_file(self, infile, outfile, start_id, max_size=1024,
230240 return (diffs .id , diffs .newest )
231241
232242
233- def timestamp_to_sequence (self , timestamp , balanced_search = False ):
243+ def timestamp_to_sequence (self , timestamp : dt .datetime ,
244+ balanced_search : bool = False ) -> Optional [int ]:
234245 """ Get the sequence number of the replication file that contains the
235246 given timestamp. The search algorithm is optimised for replication
236247 servers that publish updates in regular intervals. For servers
@@ -312,7 +323,7 @@ def timestamp_to_sequence(self, timestamp, balanced_search=False):
312323 return lower .sequence
313324
314325
315- def get_state_info (self , seq = None , retries = 2 ) :
326+ def get_state_info (self , seq : Optional [ int ] = None , retries : int = 2 ) -> Optional [ OsmosisState ] :
316327 """ Downloads and returns the state information for the given
317328 sequence. If the download is successful, a namedtuple with
318329 `sequence` and `timestamp` is returned, otherwise the function
@@ -359,21 +370,21 @@ def get_state_info(self, seq=None, retries=2):
359370
360371 return None
361372
362- def get_diff_block (self , seq ) :
373+ def get_diff_block (self , seq : int ) -> str :
363374 """ Downloads the diff with the given sequence number and returns
364375 it as a byte sequence. Throws a :code:`urllib.error.HTTPError`
365376 if the file cannot be downloaded.
366377 """
367378 with self .open_url (self .make_request (self .get_diff_url (seq ))) as resp :
368379 if hasattr (resp , 'content' ):
369380 # generated by requests
370- return resp .content
381+ return cast ( str , resp .content )
371382
372383 # generated by urllib.request
373- return resp .read ()
384+ return cast ( str , resp .read () )
374385
375386
376- def get_state_url (self , seq ) :
387+ def get_state_url (self , seq : Optional [ int ]) -> str :
377388 """ Returns the URL of the state.txt files for a given sequence id.
378389
379390 If seq is `None` the URL for the latest state info is returned,
@@ -387,7 +398,7 @@ def get_state_url(self, seq):
387398 (self .baseurl , seq / 1000000 , (seq % 1000000 ) / 1000 , seq % 1000 )
388399
389400
390- def get_diff_url (self , seq ) :
401+ def get_diff_url (self , seq : int ) -> str :
391402 """ Returns the URL to the diff file for the given sequence id.
392403 """
393404 return '%s/%03i/%03i/%03i.%s' % \
0 commit comments