add typing information for replication functions

lonvia · lonvia · commit a930927be527 · 2022-09-13T21:21:29.000+02:00
diff --git a/src/osmium/_osmium.pyi b/src/osmium/_osmium.pyi
@@ -16,12 +16,12 @@ class BaseHandler: ...
 
 class SimpleHandler(BaseHandler):
     def __init__(self) -> None: ...
-    def apply_buffer(self, buffer: ByteString, format: str, locations: bool = ..., idx: str = ...) -> None: ...
+    def apply_buffer(self, buffer: Union[ByteString, str], format: str, locations: bool = ..., idx: str = ...) -> None: ...
     def apply_file(self, filename: StrPath, locations: bool = ..., idx: str = ...) -> None: ...
 
 class MergeInputReader:
     def __init__(self) -> None: ...
-    def add_buffer(self, buffer: ByteString, format: str) -> int: ...
+    def add_buffer(self, buffer: Union[ByteString, str], format: str) -> int: ...
     def add_file(self, file: str) -> int: ...
     def apply(self, handler: BaseHandler, idx: str = ..., simplify: bool = ...) -> None: ...
     def apply_to_reader(self, reader: osmium.io.Reader, writer: osmium.io.Writer, with_history: bool = ...) -> None: ...
diff --git a/src/osmium/py.typed b/src/osmium/py.typed
diff --git a/src/osmium/replication/_replication.pyi b/src/osmium/replication/_replication.pyi
@@ -0,0 +1,3 @@
+import datetime
+
+def newest_change_from_file(filename: str) -> datetime.datetime: ...
diff --git a/src/osmium/replication/server.py b/src/osmium/replication/server.py
@@ -1,6 +1,6 @@
 """ Helper functions to communicate with replication servers.
 """
-
+from typing import NamedTuple, Optional, Any, Iterator, cast, Mapping, Tuple
 import requests
 import urllib.request as urlrequest
 from urllib.error import URLError
@@ -9,7 +9,7 @@
 from contextlib import contextmanager
 from math import ceil
 
-from osmium import MergeInputReader
+from osmium import MergeInputReader, BaseHandler
 from osmium import io as oio
 from osmium import version
 
@@ -18,8 +18,14 @@
 LOG = logging.getLogger('pyosmium')
 LOG.addHandler(logging.NullHandler())
 
-OsmosisState = namedtuple('OsmosisState', ['sequence', 'timestamp'])
-DownloadResult = namedtuple('DownloadResult', ['id', 'reader', 'newest'])
+class OsmosisState(NamedTuple):
+    sequence: int
+    timestamp: dt.datetime
+
+class DownloadResult(NamedTuple):
+    id: int
+    reader: MergeInputReader
+    newest: int
 
 class ReplicationServer:
     """ Represents a connection to a  server that publishes replication data.
@@ -30,37 +36,37 @@ class ReplicationServer:
         internally keeps a connection to the server making downloads faster.
     """
 
-    def __init__(self, url, diff_type='osc.gz'):
+    def __init__(self, url: str, diff_type: str = 'osc.gz') -> None:
         self.baseurl = url
         self.diff_type = diff_type
-        self.session = None
+        self.session: Optional[requests.Session] = None
 
-    def close(self):
+    def close(self) -> None:
         """ Close any open connection to the replication server.
         """
         if self.session is not None:
             self.session.close()
             self.session = None
 
-    def __enter__(self):
+    def __enter__(self) -> 'ReplicationServer':
         self.session = requests.Session()
         return self
 
-    def __exit__(self, exc_type, exc_value, traceback):
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
         self.close()
 
-    def make_request(self, url):
+    def make_request(self, url: str) -> urlrequest.Request:
         headers = {"User-Agent" : "pyosmium/{}".format(version.pyosmium_release)}
         return urlrequest.Request(url, headers=headers)
 
-    def open_url(self, url):
+    def open_url(self, url: urlrequest.Request) -> Any:
         """ Download a resource from the given URL and return a byte sequence
             of the content.
 
             This method has no support for cookies or any special authentication
             methods. If you need these, you have to provide your own custom URL
             opener. Overwrite open_url() with a method that receives an
-            urllib.Request object and returns a ByteIO-like object or a
+            urlrequest.Request object and returns a ByteIO-like object or a
             requests.Response.
 
             Example::
@@ -79,14 +85,14 @@ def open_url(self, url):
             return self.session.get(url.get_full_url(), headers=headers, stream=True)
 
         @contextmanager
-        def _get_url_with_session():
+        def _get_url_with_session() -> Iterator[requests.Response]:
             with requests.Session() as session:
                 request = session.get(url.get_full_url(), headers=headers, stream=True)
                 yield request
 
         return _get_url_with_session()
 
-    def collect_diffs(self, start_id, max_size=1024):
+    def collect_diffs(self, start_id: int, max_size: int = 1024) -> Optional[DownloadResult]:
         """ Create a MergeInputReader and download diffs starting with sequence
             id `start_id` into it. `max_size`
             restricts the number of diffs that are downloaded. The download
@@ -131,7 +137,9 @@ def collect_diffs(self, start_id, max_size=1024):
 
         return DownloadResult(current_id - 1, rd, newest.sequence)
 
-    def apply_diffs(self, handler, start_id, max_size=1024, idx="", simplify=True):
+    def apply_diffs(self, handler: BaseHandler, start_id: int,
+                    max_size: int = 1024, idx: str = "",
+                    simplify: bool = True) -> Optional[int]:
         """ Download diffs starting with sequence id `start_id`, merge them
             together and then apply them to handler `handler`. `max_size`
             restricts the number of diffs that are downloaded. The download
@@ -165,9 +173,11 @@ def apply_diffs(self, handler, start_id, max_size=1024, idx="", simplify=True):
 
         return diffs.id
 
-    def apply_diffs_to_file(self, infile, outfile, start_id, max_size=1024,
-                            set_replication_header=True, extra_headers=None,
-                            outformat=None):
+    def apply_diffs_to_file(self, infile: str, outfile: str,
+                            start_id: int, max_size: int = 1024,
+                            set_replication_header: bool = True,
+                            extra_headers: Optional[Mapping[str, str]] = None,
+                            outformat: Optional[str] = None) -> Optional[Tuple[int, int]]:
         """ Download diffs starting with sequence id `start_id`, merge them
             with the data from the OSM file named `infile` and write the result
             into a file with the name `outfile`. The output file must not yet
@@ -230,7 +240,8 @@ def apply_diffs_to_file(self, infile, outfile, start_id, max_size=1024,
         return (diffs.id, diffs.newest)
 
 
-    def timestamp_to_sequence(self, timestamp, balanced_search=False):
+    def timestamp_to_sequence(self, timestamp: dt.datetime,
+                              balanced_search: bool = False) -> Optional[int]:
         """ Get the sequence number of the replication file that contains the
             given timestamp. The search algorithm is optimised for replication
             servers that publish updates in regular intervals. For servers
@@ -312,7 +323,7 @@ def timestamp_to_sequence(self, timestamp, balanced_search=False):
                 return lower.sequence
 
 
-    def get_state_info(self, seq=None, retries=2):
+    def get_state_info(self, seq: Optional[int] = None, retries: int = 2) -> Optional[OsmosisState]:
         """ Downloads and returns the state information for the given
             sequence. If the download is successful, a namedtuple with
             `sequence` and `timestamp` is returned, otherwise the function
@@ -359,21 +370,21 @@ def get_state_info(self, seq=None, retries=2):
 
         return None
 
-    def get_diff_block(self, seq):
+    def get_diff_block(self, seq: int) -> str:
         """ Downloads the diff with the given sequence number and returns
             it as a byte sequence. Throws a :code:`urllib.error.HTTPError`
             if the file cannot be downloaded.
         """
         with self.open_url(self.make_request(self.get_diff_url(seq))) as resp:
             if hasattr(resp, 'content'):
                 # generated by requests
-                return resp.content
+                return cast(str, resp.content)
 
             # generated by urllib.request
-            return resp.read()
+            return cast(str, resp.read())
 
 
-    def get_state_url(self, seq):
+    def get_state_url(self, seq: Optional[int]) -> str:
         """ Returns the URL of the state.txt files for a given sequence id.
 
             If seq is `None` the URL for the latest state info is returned,
@@ -387,7 +398,7 @@ def get_state_url(self, seq):
                (self.baseurl, seq / 1000000, (seq % 1000000) / 1000, seq % 1000)
 
 
-    def get_diff_url(self, seq):
+    def get_diff_url(self, seq: int) -> str:
         """ Returns the URL to the diff file for the given sequence id.
         """
         return '%s/%03i/%03i/%03i.%s' % \
diff --git a/src/osmium/replication/utils.py b/src/osmium/replication/utils.py
@@ -1,5 +1,5 @@
 """ Helper functions for change file handling. """
-
+from typing import NamedTuple, Optional
 import logging
 import datetime as dt
 from collections import namedtuple
@@ -8,10 +8,13 @@
 
 LOG = logging.getLogger('pyosmium')
 
-ReplicationHeader = namedtuple('ReplicationHeader',
-                               ['url', 'sequence', 'timestamp'])
+class ReplicationHeader(NamedTuple):
+    url: Optional[str]
+    sequence: Optional[int]
+    timestamp: Optional[dt.datetime]
+
 
-def get_replication_header(fname):
+def get_replication_header(fname: str) -> ReplicationHeader:
     """ Scans the given file for an Osmosis replication header. It returns
         a namedtuple with `url`, `sequence` and `timestamp`. Each or all fields
         may be None, if the piece of information is not avilable. If any of
@@ -24,20 +27,21 @@ def get_replication_header(fname):
     r = oreader(fname, NOTHING)
     h = r.header()
 
-    ts = h.get("osmosis_replication_timestamp")
-    url = h.get("osmosis_replication_base_url")
+    tsstr = h.get("osmosis_replication_timestamp")
+    url: Optional[str] = h.get("osmosis_replication_base_url")
 
-    if url or ts:
+    if url or tsstr:
         LOG.debug("Replication information found in OSM file header.")
 
     if url:
         LOG.debug("Replication URL: %s", url)
         # the sequence ID is only considered valid, if an URL is given
-        seq = h.get("osmosis_replication_sequence_number")
-        if seq:
-            LOG.debug("Replication sequence: %s", seq)
+        seqstr = h.get("osmosis_replication_sequence_number")
+        seq: Optional[int]
+        if seqstr:
+            LOG.debug("Replication sequence: %s", seqstr)
             try:
-                seq = int(seq)
+                seq = int(seqstr)
                 if seq < 0:
                     LOG.warning("Sequence id '%d' in OSM file header is negative. Ignored.", seq)
                     seq = None
@@ -50,10 +54,10 @@ def get_replication_header(fname):
         url = None
         seq = None
 
-    if ts:
-        LOG.debug("Replication timestamp: %s", ts)
+    if tsstr:
+        LOG.debug("Replication timestamp: %s", tsstr)
         try:
-            ts = dt.datetime.strptime(ts, "%Y-%m-%dT%H:%M:%SZ")
+            ts = dt.datetime.strptime(tsstr, "%Y-%m-%dT%H:%M:%SZ")
             ts = ts.replace(tzinfo=dt.timezone.utc)
 
         except ValueError:

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+import datetime`
	`2`	`+`
	`3`	`+def newest_change_from_file(filename: str) -> datetime.datetime: ...`