1+ # SPDX-License-Identifier: BSD-2-Clause
2+ #
3+ # This file is part of pyosmium. (https://osmcode.org/pyosmium/)
4+ #
5+ # Copyright (C) 2025 Sarah Hoffmann <[email protected] > and others. 6+ # For a full list of authors see the git log.
17"""
28Fetch diffs from an OSM planet server.
39
2329However, it can read cookies from a Netscape-style cookie jar file, send these
2430cookies to the server and will save received cookies to the jar file.
2531"""
32+ from typing import Optional , List
2633import sys
2734import logging
2835from textwrap import dedent as msgfmt
2936
3037from argparse import ArgumentParser , RawDescriptionHelpFormatter , ArgumentTypeError
3138import datetime as dt
39+ from dataclasses import dataclass
3240import http .cookiejar
3341
3442from osmium .replication import server as rserv
4048log = logging .getLogger ()
4149
4250
43- class ReplicationStart (object ):
51+ @dataclass
52+ class ReplicationStart :
4453 """ Represents the point where changeset download should begin.
4554 """
55+ date : Optional [dt .datetime ] = None
56+ seq_id : Optional [int ] = None
57+ source : Optional [str ] = None
4658
47- def __init__ (self , date = None , seq_id = None , src = None ):
48- self .date = date
49- self .seq_id = seq_id
50- self .source = src
51-
52- def get_sequence (self , svr ):
59+ def get_sequence (self , svr : rserv .ReplicationServer ) -> Optional [int ]:
5360 if self .seq_id is not None :
5461 log .debug ("Using given sequence ID %d" % self .seq_id )
5562 return self .seq_id + 1
5663
64+ assert self .date is not None
5765 log .debug ("Looking up sequence ID for timestamp %s" % self .date )
5866 return svr .timestamp_to_sequence (self .date )
5967
68+ def get_end_sequence (self , svr : rserv .ReplicationServer ) -> Optional [int ]:
69+ if self .seq_id is not None :
70+ log .debug ("Using end sequence ID %d" % self .seq_id )
71+ return self .seq_id
72+
73+ assert self .date is not None
74+ log .debug ("Looking up end sequence ID for timestamp %s" % self .date )
75+ return svr .timestamp_to_sequence (self .date )
76+
6077 @staticmethod
61- def from_id (idstr ) :
78+ def from_id (idstr : str ) -> 'ReplicationStart' :
6279 try :
6380 seq_id = int (idstr )
6481 except ValueError :
@@ -70,7 +87,7 @@ def from_id(idstr):
7087 return ReplicationStart (seq_id = seq_id )
7188
7289 @staticmethod
73- def from_date (datestr ) :
90+ def from_date (datestr : str ) -> 'ReplicationStart' :
7491 try :
7592 date = dt .datetime .strptime (datestr , "%Y-%m-%dT%H:%M:%SZ" )
7693 date = date .replace (tzinfo = dt .timezone .utc )
@@ -81,7 +98,7 @@ def from_date(datestr):
8198 return ReplicationStart (date = date )
8299
83100 @staticmethod
84- def from_osm_file (fname , ignore_headers ) :
101+ def from_osm_file (fname : str , ignore_headers : bool ) -> 'ReplicationStart' :
85102 if ignore_headers :
86103 ts = None
87104 seq = None
@@ -102,10 +119,10 @@ def from_osm_file(fname, ignore_headers):
102119 if ts is None :
103120 raise ArgumentTypeError ("OSM file does not seem to contain valid data." )
104121
105- return ReplicationStart (seq_id = seq , date = ts , src = url )
122+ return ReplicationStart (seq_id = seq , date = ts , source = url )
106123
107124
108- def write_end_sequence (fname , seqid ) :
125+ def write_end_sequence (fname : str , seqid : int ) -> None :
109126 """Either writes out the sequence file or prints the sequence id to stdout.
110127 """
111128 if fname is None :
@@ -115,7 +132,7 @@ def write_end_sequence(fname, seqid):
115132 fd .write (str (seqid ))
116133
117134
118- def get_arg_parser (from_main = False ):
135+ def get_arg_parser (from_main : bool = False ) -> ArgumentParser :
119136 parser = ArgumentParser (prog = 'pyosmium-get-changes' ,
120137 description = __doc__ ,
121138 usage = None if from_main else 'pyosmium-get-changes [options]' ,
@@ -134,8 +151,9 @@ def get_arg_parser(from_main=False):
134151 parser .add_argument ('--cookie' , dest = 'cookie' ,
135152 help = 'Netscape-style cookie jar file to read cookies from '
136153 'and where received cookies will be written to.' )
137- parser .add_argument ('-s' , '--size' , dest = 'outsize' , type = int , default = 100 ,
138- help = 'Maximum data to load in MB (default: 100MB).' )
154+ parser .add_argument ('-s' , '--size' , dest = 'outsize' , type = int ,
155+ help = 'Maximum data to load in MB '
156+ '(Defaults to 100MB when no end date/ID has been set).' )
139157 group = parser .add_mutually_exclusive_group ()
140158 group .add_argument ('-I' , '--start-id' , dest = 'start' ,
141159 type = ReplicationStart .from_id , metavar = 'ID' ,
@@ -145,6 +163,13 @@ def get_arg_parser(from_main=False):
145163 help = 'Date when to start updates' )
146164 group .add_argument ('-O' , '--start-osm-data' , dest = 'start_file' , metavar = 'OSMFILE' ,
147165 help = 'start at the date of the newest OSM object in the file' )
166+ group = parser .add_mutually_exclusive_group ()
167+ group .add_argument ('--end-id' , dest = 'end' ,
168+ type = ReplicationStart .from_id , metavar = 'ID' ,
169+ help = 'Last sequence ID to download.' )
170+ group .add_argument ('-E' , '--end-date' , dest = 'end' , metavar = 'DATE' ,
171+ type = ReplicationStart .from_date ,
172+ help = 'Do not download diffs later than the given date.' )
148173 parser .add_argument ('-f' , '--sequence-file' , dest = 'seq_file' ,
149174 help = 'Sequence file. If the file exists, then updates '
150175 'will start after the id given in the file. At the '
@@ -164,7 +189,7 @@ def get_arg_parser(from_main=False):
164189 return parser
165190
166191
167- def pyosmium_get_changes (args ) :
192+ def pyosmium_get_changes (args : List [ str ]) -> int :
168193 logging .basicConfig (stream = sys .stderr ,
169194 format = '%(asctime)s %(levelname)s: %(message)s' ,
170195 datefmt = '%Y-%m-%d %H:%M:%S' )
@@ -223,14 +248,30 @@ def pyosmium_get_changes(args):
223248 write_end_sequence (options .seq_file , startseq - 1 )
224249 return 0
225250
226- log .debug ("Starting download at ID %d (max %d MB)" % (startseq , options .outsize ))
251+ log .debug ("Starting download at ID %d (max %f MB)"
252+ % (startseq , options .outsize or float ('inf' )))
227253 if options .outformat is not None :
228254 outhandler = SimpleWriter (options .outfile , filetype = options .outformat )
229255 else :
230256 outhandler = SimpleWriter (options .outfile )
231257
232- endseq = svr .apply_diffs (outhandler , startseq , max_size = options .outsize * 1024 ,
233- simplify = options .simplify )
258+ if options .outsize is not None :
259+ max_size = options .outsize * 1024
260+ elif options .end is None :
261+ max_size = 100 * 1024
262+ else :
263+ max_size = None
264+
265+ if options .end is None :
266+ end_id = None
267+ else :
268+ end_id = options .end .get_end_sequence (svr )
269+ if end_id is None :
270+ log .error ("Cannot find the end date/ID on the server." )
271+ return 1
272+
273+ endseq = svr .apply_diffs (outhandler , startseq , max_size = max_size ,
274+ end_id = end_id , simplify = options .simplify )
234275 outhandler .close ()
235276
236277 # save cookies
@@ -247,7 +288,7 @@ def pyosmium_get_changes(args):
247288 return 0
248289
249290
250- def main ():
291+ def main () -> int :
251292 logging .basicConfig (stream = sys .stderr ,
252293 format = '%(asctime)s %(levelname)s: %(message)s' ,
253294 datefmt = '%Y-%m-%d %H:%M:%S' )
0 commit comments