Skip to content

Commit 353e3bf

Browse files
authored
Merge pull request #1369 from joto/refactor-osmdata
Refactor osmdata
2 parents d7843de + 3549a3c commit 353e3bf

File tree

11 files changed

+476
-402
lines changed

11 files changed

+476
-402
lines changed

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ set(osm2pgsql_lib_SOURCES
66
expire-tiles.cpp
77
gazetteer-style.cpp
88
geometry-processor.cpp
9+
input.cpp
910
logging.cpp
1011
middle-pgsql.cpp
1112
middle-ram.cpp

src/input.cpp

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
2+
#include <memory>
3+
#include <queue>
4+
#include <stdexcept>
5+
#include <vector>
6+
7+
#include <osmium/io/any_input.hpp>
8+
#include <osmium/visitor.hpp>
9+
10+
#include "format.hpp"
11+
#include "input.hpp"
12+
#include "logging.hpp"
13+
#include "osmdata.hpp"
14+
#include "progress-display.hpp"
15+
16+
type_id_version check_input(type_id_version const &last, type_id_version curr)
17+
{
18+
if (curr.id < 0) {
19+
throw std::runtime_error{
20+
"Negative OSM object ids are not allowed: {} id {}."_format(
21+
osmium::item_type_to_name(curr.type), curr.id)};
22+
}
23+
24+
if (last.type == curr.type) {
25+
if (last.id < curr.id) {
26+
return curr;
27+
}
28+
29+
if (last.id > curr.id) {
30+
throw std::runtime_error{
31+
"Input data is not ordered: {} id {} after {}."_format(
32+
osmium::item_type_to_name(last.type), curr.id, last.id)};
33+
}
34+
35+
if (last.version < curr.version) {
36+
return curr;
37+
}
38+
39+
throw std::runtime_error{
40+
"Input data is not ordered: {} id {} version {} after {}."_format(
41+
osmium::item_type_to_name(last.type), curr.id, curr.version,
42+
last.version)};
43+
}
44+
45+
if (item_type_to_nwr_index(last.type) <=
46+
item_type_to_nwr_index(curr.type)) {
47+
return curr;
48+
}
49+
50+
throw std::runtime_error{"Input data is not ordered: {} after {}."_format(
51+
osmium::item_type_to_name(curr.type),
52+
osmium::item_type_to_name(last.type))};
53+
}
54+
55+
type_id_version check_input(type_id_version const &last,
56+
osmium::OSMObject const &object)
57+
{
58+
return check_input(last, {object.type(), object.id(), object.version()});
59+
}
60+
61+
/**
62+
* A data source is where we get the OSM objects from, one at a time. It
63+
* wraps the osmium::io::Reader.
64+
*/
65+
class data_source_t
66+
{
67+
public:
68+
explicit data_source_t(osmium::io::File const &file)
69+
: m_reader(new osmium::io::Reader{file})
70+
{
71+
get_next_nonempty_buffer();
72+
m_last = check_input(m_last, *m_it);
73+
}
74+
75+
bool empty() const noexcept { return !m_buffer; }
76+
77+
bool next()
78+
{
79+
assert(!empty());
80+
++m_it;
81+
82+
while (m_it == m_end) {
83+
if (!get_next_nonempty_buffer()) {
84+
return false;
85+
}
86+
}
87+
88+
m_last = check_input(m_last, *m_it);
89+
return true;
90+
}
91+
92+
osmium::OSMObject *get() noexcept
93+
{
94+
assert(!empty());
95+
return &*m_it;
96+
}
97+
98+
std::size_t offset() const noexcept { return m_reader->offset(); }
99+
100+
void close()
101+
{
102+
m_reader->close();
103+
m_reader.reset();
104+
}
105+
106+
private:
107+
bool get_next_nonempty_buffer()
108+
{
109+
while ((m_buffer = m_reader->read())) {
110+
m_it = m_buffer.begin<osmium::OSMObject>();
111+
m_end = m_buffer.end<osmium::OSMObject>();
112+
if (m_it != m_end) {
113+
return true;
114+
}
115+
}
116+
return false;
117+
}
118+
119+
using iterator = osmium::memory::Buffer::t_iterator<osmium::OSMObject>;
120+
121+
std::unique_ptr<osmium::io::Reader> m_reader;
122+
osmium::memory::Buffer m_buffer{};
123+
iterator m_it{};
124+
iterator m_end{};
125+
type_id_version m_last = {osmium::item_type::node, 0, 0};
126+
127+
}; // class data_source_t
128+
129+
/**
130+
* A element in a priority queue of OSM objects. Holds a pointer to the OSM
131+
* object as well as a pointer to the source the OSM object came from.
132+
*/
133+
class queue_element_t
134+
{
135+
public:
136+
queue_element_t(osmium::OSMObject *object, data_source_t *source) noexcept
137+
: m_object(object), m_source(source)
138+
{}
139+
140+
osmium::OSMObject const &object() const noexcept { return *m_object; }
141+
142+
osmium::OSMObject &object() noexcept { return *m_object; }
143+
144+
data_source_t *data_source() const noexcept { return m_source; }
145+
146+
friend bool operator<(queue_element_t const &lhs,
147+
queue_element_t const &rhs) noexcept
148+
{
149+
// This is needed for the priority queue. We want objects with smaller
150+
// id (and earlier versions of the same object) to come first, but
151+
// the priority queue expects largest first. So we need to reverse the
152+
// comparison here.
153+
return lhs.object() > rhs.object();
154+
}
155+
156+
friend bool operator==(queue_element_t const &lhs,
157+
queue_element_t const &rhs) noexcept
158+
{
159+
return lhs.object().type() == rhs.object().type() &&
160+
lhs.object().id() == rhs.object().id();
161+
}
162+
163+
friend bool operator!=(queue_element_t const &lhs,
164+
queue_element_t const &rhs) noexcept
165+
{
166+
return !(lhs == rhs);
167+
}
168+
169+
private:
170+
osmium::OSMObject *m_object;
171+
data_source_t *m_source;
172+
173+
}; // class queue_element_t
174+
175+
std::vector<osmium::io::File>
176+
prepare_input_files(std::vector<std::string> const &input_files,
177+
std::string const &input_format, bool append)
178+
{
179+
std::vector<osmium::io::File> files;
180+
181+
for (auto const &filename : input_files) {
182+
osmium::io::File file{filename, input_format};
183+
184+
if (file.format() == osmium::io::file_format::unknown) {
185+
if (input_format.empty()) {
186+
throw std::runtime_error{
187+
"Cannot detect file format for '{}'. Try using -r."_format(
188+
filename)};
189+
}
190+
throw std::runtime_error{
191+
"Unknown file format '{}'."_format(input_format)};
192+
}
193+
194+
if (!append && file.has_multiple_object_versions()) {
195+
throw std::runtime_error{
196+
"Reading an OSM change file only works in append mode."};
197+
}
198+
199+
log_debug("Reading file: {}", filename);
200+
201+
files.emplace_back(file);
202+
}
203+
204+
return files;
205+
}
206+
207+
static void apply(osmium::OSMObject &object, osmdata_t &osmdata,
208+
progress_display_t &progress)
209+
{
210+
static osmium::item_type last_type = osmium::item_type::node;
211+
212+
if (last_type != object.type()) {
213+
if (last_type == osmium::item_type::node) {
214+
osmdata.after_nodes();
215+
progress.start_way_counter();
216+
} else if (last_type == osmium::item_type::way) {
217+
osmdata.after_ways();
218+
progress.start_relation_counter();
219+
}
220+
last_type = object.type();
221+
}
222+
223+
osmium::apply_item(object, osmdata, progress);
224+
}
225+
226+
static void process_single_file(osmium::io::File const &file,
227+
osmdata_t &osmdata,
228+
progress_display_t &progress, bool append)
229+
{
230+
osmium::io::Reader reader{file};
231+
type_id_version last{osmium::item_type::node, 0, 0};
232+
233+
while (osmium::memory::Buffer buffer = reader.read()) {
234+
for (auto &object : buffer.select<osmium::OSMObject>()) {
235+
last = check_input(last, object);
236+
if (!append && object.deleted()) {
237+
throw std::runtime_error{
238+
"Input file contains deleted objects but "
239+
"you are not in append mode."};
240+
}
241+
apply(object, osmdata, progress);
242+
}
243+
}
244+
245+
reader.close();
246+
}
247+
248+
static void process_multiple_files(std::vector<osmium::io::File> const &files,
249+
osmdata_t &osmdata,
250+
progress_display_t &progress, bool append)
251+
{
252+
std::vector<data_source_t> data_sources;
253+
data_sources.reserve(files.size());
254+
255+
std::priority_queue<queue_element_t> queue;
256+
257+
for (osmium::io::File const &file : files) {
258+
data_sources.emplace_back(file);
259+
260+
if (!data_sources.back().empty()) {
261+
queue.emplace(data_sources.back().get(), &data_sources.back());
262+
}
263+
}
264+
265+
while (!queue.empty()) {
266+
auto element = queue.top();
267+
queue.pop();
268+
if (queue.empty() || element != queue.top()) {
269+
if (!append && element.object().deleted()) {
270+
throw std::runtime_error{
271+
"Input file contains deleted objects but "
272+
"you are not in append mode."};
273+
}
274+
apply(element.object(), osmdata, progress);
275+
}
276+
277+
auto *source = element.data_source();
278+
if (source->next()) {
279+
queue.emplace(source->get(), source);
280+
}
281+
}
282+
283+
for (auto &data_source : data_sources) {
284+
data_source.close();
285+
}
286+
}
287+
288+
void process_files(std::vector<osmium::io::File> const &files,
289+
osmdata_t &osmdata, bool append, bool show_progress)
290+
{
291+
progress_display_t progress{show_progress};
292+
293+
if (files.size() == 1) {
294+
process_single_file(files.front(), osmdata, progress, append);
295+
} else {
296+
process_multiple_files(files, osmdata, progress, append);
297+
}
298+
299+
osmdata.after_relations();
300+
progress.print_summary();
301+
}

src/input.hpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#ifndef OSM2PGSQL_INPUT_HPP
2+
#define OSM2PGSQL_INPUT_HPP
3+
4+
/**
5+
* \file
6+
*
7+
* This file is part of osm2pgsql (https://github.com/openstreetmap/osm2pgsql).
8+
*
9+
* It contains the functions reading and checking the input data.
10+
*/
11+
12+
#include <string>
13+
#include <vector>
14+
15+
#include <osmium/fwd.hpp>
16+
#include <osmium/io/file.hpp>
17+
18+
#include "osmtypes.hpp"
19+
20+
class osmdata_t;
21+
class progress_display_t;
22+
23+
struct type_id_version
24+
{
25+
osmium::item_type type;
26+
osmid_t id;
27+
osmium::object_version_type version;
28+
};
29+
30+
/**
31+
* Compare two tuples (type, id, version) throw a descriptive error if either
32+
* the curr id is negative or if the data is not ordered.
33+
*/
34+
type_id_version check_input(type_id_version const &last, type_id_version curr);
35+
36+
type_id_version check_input(type_id_version const &last,
37+
osmium::OSMObject const &object);
38+
39+
/**
40+
* Prepare input file(s). Does format checks as far as this is possible
41+
* without actually opening the files.
42+
*/
43+
std::vector<osmium::io::File>
44+
prepare_input_files(std::vector<std::string> const &input_files,
45+
std::string const &input_format, bool append);
46+
47+
/**
48+
* Process the specified OSM files (stage 1a).
49+
*/
50+
void process_files(std::vector<osmium::io::File> const &files,
51+
osmdata_t &osmdata, bool append, bool show_progress);
52+
53+
#endif // OSM2PGSQL_INPUT_HPP

0 commit comments

Comments
 (0)