Skip to content

Commit a2f1606

Browse files
committed
add end date to pyosmium-get-changes
1 parent 6ef9865 commit a2f1606

File tree

2 files changed

+91
-20
lines changed

2 files changed

+91
-20
lines changed

src/osmium/tools/pyosmium_get_changes.py

Lines changed: 61 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# SPDX-License-Identifier: BSD-2-Clause
2+
#
3+
# This file is part of pyosmium. (https://osmcode.org/pyosmium/)
4+
#
5+
# Copyright (C) 2025 Sarah Hoffmann <[email protected]> and others.
6+
# For a full list of authors see the git log.
17
"""
28
Fetch diffs from an OSM planet server.
39
@@ -23,12 +29,14 @@
2329
However, it can read cookies from a Netscape-style cookie jar file, send these
2430
cookies to the server and will save received cookies to the jar file.
2531
"""
32+
from typing import Optional, List
2633
import sys
2734
import logging
2835
from textwrap import dedent as msgfmt
2936

3037
from argparse import ArgumentParser, RawDescriptionHelpFormatter, ArgumentTypeError
3138
import datetime as dt
39+
from dataclasses import dataclass
3240
import http.cookiejar
3341

3442
from osmium.replication import server as rserv
@@ -40,25 +48,34 @@
4048
log = logging.getLogger()
4149

4250

43-
class ReplicationStart(object):
51+
@dataclass
52+
class ReplicationStart:
4453
""" Represents the point where changeset download should begin.
4554
"""
55+
date: Optional[dt.datetime] = None
56+
seq_id: Optional[int] = None
57+
source: Optional[str] = None
4658

47-
def __init__(self, date=None, seq_id=None, src=None):
48-
self.date = date
49-
self.seq_id = seq_id
50-
self.source = src
51-
52-
def get_sequence(self, svr):
59+
def get_sequence(self, svr: rserv.ReplicationServer) -> Optional[int]:
5360
if self.seq_id is not None:
5461
log.debug("Using given sequence ID %d" % self.seq_id)
5562
return self.seq_id + 1
5663

64+
assert self.date is not None
5765
log.debug("Looking up sequence ID for timestamp %s" % self.date)
5866
return svr.timestamp_to_sequence(self.date)
5967

68+
def get_end_sequence(self, svr: rserv.ReplicationServer) -> Optional[int]:
69+
if self.seq_id is not None:
70+
log.debug("Using end sequence ID %d" % self.seq_id)
71+
return self.seq_id
72+
73+
assert self.date is not None
74+
log.debug("Looking up end sequence ID for timestamp %s" % self.date)
75+
return svr.timestamp_to_sequence(self.date)
76+
6077
@staticmethod
61-
def from_id(idstr):
78+
def from_id(idstr: str) -> 'ReplicationStart':
6279
try:
6380
seq_id = int(idstr)
6481
except ValueError:
@@ -70,7 +87,7 @@ def from_id(idstr):
7087
return ReplicationStart(seq_id=seq_id)
7188

7289
@staticmethod
73-
def from_date(datestr):
90+
def from_date(datestr: str) -> 'ReplicationStart':
7491
try:
7592
date = dt.datetime.strptime(datestr, "%Y-%m-%dT%H:%M:%SZ")
7693
date = date.replace(tzinfo=dt.timezone.utc)
@@ -81,7 +98,7 @@ def from_date(datestr):
8198
return ReplicationStart(date=date)
8299

83100
@staticmethod
84-
def from_osm_file(fname, ignore_headers):
101+
def from_osm_file(fname: str, ignore_headers: bool) -> 'ReplicationStart':
85102
if ignore_headers:
86103
ts = None
87104
seq = None
@@ -102,10 +119,10 @@ def from_osm_file(fname, ignore_headers):
102119
if ts is None:
103120
raise ArgumentTypeError("OSM file does not seem to contain valid data.")
104121

105-
return ReplicationStart(seq_id=seq, date=ts, src=url)
122+
return ReplicationStart(seq_id=seq, date=ts, source=url)
106123

107124

108-
def write_end_sequence(fname, seqid):
125+
def write_end_sequence(fname: str, seqid: int) -> None:
109126
"""Either writes out the sequence file or prints the sequence id to stdout.
110127
"""
111128
if fname is None:
@@ -115,7 +132,7 @@ def write_end_sequence(fname, seqid):
115132
fd.write(str(seqid))
116133

117134

118-
def get_arg_parser(from_main=False):
135+
def get_arg_parser(from_main: bool = False) -> ArgumentParser:
119136
parser = ArgumentParser(prog='pyosmium-get-changes',
120137
description=__doc__,
121138
usage=None if from_main else 'pyosmium-get-changes [options]',
@@ -134,8 +151,9 @@ def get_arg_parser(from_main=False):
134151
parser.add_argument('--cookie', dest='cookie',
135152
help='Netscape-style cookie jar file to read cookies from '
136153
'and where received cookies will be written to.')
137-
parser.add_argument('-s', '--size', dest='outsize', type=int, default=100,
138-
help='Maximum data to load in MB (default: 100MB).')
154+
parser.add_argument('-s', '--size', dest='outsize', type=int,
155+
help='Maximum data to load in MB '
156+
'(Defaults to 100MB when no end date/ID has been set).')
139157
group = parser.add_mutually_exclusive_group()
140158
group.add_argument('-I', '--start-id', dest='start',
141159
type=ReplicationStart.from_id, metavar='ID',
@@ -145,6 +163,13 @@ def get_arg_parser(from_main=False):
145163
help='Date when to start updates')
146164
group.add_argument('-O', '--start-osm-data', dest='start_file', metavar='OSMFILE',
147165
help='start at the date of the newest OSM object in the file')
166+
group = parser.add_mutually_exclusive_group()
167+
group.add_argument('--end-id', dest='end',
168+
type=ReplicationStart.from_id, metavar='ID',
169+
help='Last sequence ID to download.')
170+
group.add_argument('-E', '--end-date', dest='end', metavar='DATE',
171+
type=ReplicationStart.from_date,
172+
help='Do not download diffs later than the given date.')
148173
parser.add_argument('-f', '--sequence-file', dest='seq_file',
149174
help='Sequence file. If the file exists, then updates '
150175
'will start after the id given in the file. At the '
@@ -164,7 +189,7 @@ def get_arg_parser(from_main=False):
164189
return parser
165190

166191

167-
def pyosmium_get_changes(args):
192+
def pyosmium_get_changes(args: List[str]) -> int:
168193
logging.basicConfig(stream=sys.stderr,
169194
format='%(asctime)s %(levelname)s: %(message)s',
170195
datefmt='%Y-%m-%d %H:%M:%S')
@@ -223,14 +248,30 @@ def pyosmium_get_changes(args):
223248
write_end_sequence(options.seq_file, startseq - 1)
224249
return 0
225250

226-
log.debug("Starting download at ID %d (max %d MB)" % (startseq, options.outsize))
251+
log.debug("Starting download at ID %d (max %f MB)"
252+
% (startseq, options.outsize or float('inf')))
227253
if options.outformat is not None:
228254
outhandler = SimpleWriter(options.outfile, filetype=options.outformat)
229255
else:
230256
outhandler = SimpleWriter(options.outfile)
231257

232-
endseq = svr.apply_diffs(outhandler, startseq, max_size=options.outsize*1024,
233-
simplify=options.simplify)
258+
if options.outsize is not None:
259+
max_size = options.outsize * 1024
260+
elif options.end is None:
261+
max_size = 100 * 1024
262+
else:
263+
max_size = None
264+
265+
if options.end is None:
266+
end_id = None
267+
else:
268+
end_id = options.end.get_end_sequence(svr)
269+
if end_id is None:
270+
log.error("Cannot find the end date/ID on the server.")
271+
return 1
272+
273+
endseq = svr.apply_diffs(outhandler, startseq, max_size=max_size,
274+
end_id=end_id, simplify=options.simplify)
234275
outhandler.close()
235276

236277
# save cookies
@@ -247,7 +288,7 @@ def pyosmium_get_changes(args):
247288
return 0
248289

249290

250-
def main():
291+
def main() -> int:
251292
logging.basicConfig(stream=sys.stderr,
252293
format='%(asctime)s %(levelname)s: %(message)s',
253294
datefmt='%Y-%m-%d %H:%M:%S')

test/test_pyosmium_get_changes.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from textwrap import dedent
1010
import uuid
1111

12+
import pytest
13+
1214
import osmium.replication.server
1315
import osmium
1416
from osmium.tools.pyosmium_get_changes import pyosmium_get_changes
@@ -105,3 +107,31 @@ def test_get_simple_update(self, tmp_path, httpserver):
105107
assert ids.nodes == [12, 13]
106108
assert ids.ways == [2]
107109
assert ids.relations == []
110+
111+
@pytest.mark.parametrize('end_id,max_size,actual_end', [(107, None, 107),
112+
(None, 1, 108),
113+
(105, 1, 105),
114+
(110, 1, 108)])
115+
def test_apply_diffs_endid(self, tmp_path, httpserver, end_id, max_size, actual_end):
116+
outfile = tmp_path / f"{uuid.uuid4()}.opl"
117+
118+
httpserver.expect_request('/state.txt').respond_with_data("""\
119+
sequenceNumber=140
120+
timestamp=2017-08-26T11\\:04\\:02Z
121+
""")
122+
for i in range(100, 141):
123+
httpserver.expect_request(f'/000/000/{i}.opl')\
124+
.respond_with_data(f"r{i} M" + ",".join(f"n{i}@" for i in range(1, 6000)))
125+
126+
params = [httpserver, '--diff-type', 'opl', '-I', '100', '-o', str(outfile)]
127+
if end_id is not None:
128+
params.extend(('--end-id', str(end_id)))
129+
if max_size is not None:
130+
params.extend(('-s', str(max_size)))
131+
132+
assert 0 == self.main(*params)
133+
134+
ids = IDCollector()
135+
osmium.apply(str(outfile), ids)
136+
137+
assert ids.relations == list(range(101, actual_end + 1))

0 commit comments

Comments
 (0)