Skip to content

Commit 50cd39b

Browse files
committed
New ram middle that's much more memory-efficient
Replaces the somewhat dated middle_ram_t by a completely new implementation for importing small to medium sized files into a non-updateable database. It works completely in memory, no data is written to disk. The following traits of OSM objects can be stored. All are optional: - Node locations for building geometries of ways. - Way node ids for building geometries of relations based on ways. - Tags and attributes for nodes, ways, and/or relations for full 2-stage-processing support. - Attributes for untagged nodes.
1 parent 913632c commit 50cd39b

File tree

3 files changed

+325
-227
lines changed

3 files changed

+325
-227
lines changed

src/middle-ram.cpp

Lines changed: 245 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -7,66 +7,197 @@
77
* For a full list of authors see the git log.
88
*/
99

10-
/* Implements the mid-layer processing for osm2pgsql
11-
* using several arrays in RAM. This is fastest if you
12-
* have sufficient RAM+Swap.
13-
*
14-
* This layer stores data read in from the planet.osm file
15-
* and is then read by the backend processing code to
16-
* emit the final geometry-enabled output formats
17-
*/
10+
#include "logging.hpp"
11+
#include "middle-ram.hpp"
12+
#include "options.hpp"
13+
14+
#include <osmium/builder/osm_object_builder.hpp>
15+
#include <osmium/util/delta.hpp>
16+
#include <osmium/util/string.hpp>
17+
18+
// Workaround: This must be included before buffer_string.hpp due to a missing
19+
// include in the upstream code. https://github.com/mapbox/protozero/pull/104
20+
#include <protozero/config.hpp>
21+
22+
#include <protozero/buffer_string.hpp>
23+
#include <protozero/varint.hpp>
1824

25+
#include <algorithm>
1926
#include <cassert>
27+
#include <limits>
2028
#include <memory>
2129

22-
#include <osmium/builder/attr.hpp>
30+
middle_ram_t::middle_ram_t(options_t const *options)
31+
{
32+
assert(options);
2333

24-
#include "middle-ram.hpp"
25-
#include "node-ram-cache.hpp"
26-
#include "options.hpp"
34+
if (options->extra_attributes) {
35+
m_store_options.untagged_nodes = true;
36+
}
37+
}
2738

28-
/* Object storage now uses 2 levels of storage arrays.
29-
*
30-
* - Low level storage of 2^16 (~65k) objects in an indexed array
31-
* These are allocated dynamically when we need to first store data with
32-
* an ID in this block
33-
*
34-
* - Fixed array of 2^(32 - 16) = 65k pointers to the dynamically allocated arrays.
35-
*
36-
* This allows memory usage to be efficient and scale dynamically without needing to
37-
* hard code maximum IDs. We now support an ID range of -2^31 to +2^31.
38-
* The negative IDs often occur in non-uploaded JOSM data or other data import scripts.
39-
*
40-
*/
39+
void middle_ram_t::set_requirements(output_requirements const &requirements)
40+
{
41+
if (requirements.full_nodes) {
42+
m_store_options.nodes = true;
43+
}
44+
45+
if (requirements.full_ways) {
46+
m_store_options.ways = true;
47+
m_store_options.way_nodes = false;
48+
}
49+
50+
if (requirements.full_relations) {
51+
m_store_options.relations = true;
52+
}
53+
54+
log_debug("Middle 'ram' options:");
55+
log_debug(" locations: {}", m_store_options.locations);
56+
log_debug(" way_nodes: {}", m_store_options.way_nodes);
57+
log_debug(" nodes: {}", m_store_options.nodes);
58+
log_debug(" untagged_nodes: {}", m_store_options.untagged_nodes);
59+
log_debug(" ways: {}", m_store_options.ways);
60+
log_debug(" relations: {}", m_store_options.relations);
61+
}
62+
63+
void middle_ram_t::stop(thread_pool_t &)
64+
{
65+
auto const mbyte = 1024 * 1024;
66+
67+
log_debug("Middle 'ram': Node locations: size={} bytes={}M",
68+
m_node_locations.size(), m_node_locations.used_memory() / mbyte);
69+
70+
log_debug("Middle 'ram': Way nodes data: size={} capacity={} bytes={}M",
71+
m_way_nodes_data.size(), m_way_nodes_data.capacity(),
72+
m_way_nodes_data.capacity() / mbyte);
73+
74+
log_debug("Middle 'ram': Way nodes index: size={} capacity={} bytes={}M",
75+
m_way_nodes_index.size(), m_way_nodes_index.capacity(),
76+
m_way_nodes_index.used_memory() / mbyte);
77+
78+
log_debug("Middle 'ram': Object data: size={} capacity={} bytes={}M",
79+
m_object_buffer.committed(), m_object_buffer.capacity(),
80+
m_object_buffer.capacity() / mbyte);
81+
82+
std::size_t index_size = 0;
83+
std::size_t index_capacity = 0;
84+
std::size_t index_mem = 0;
85+
for (auto const &index : m_object_index) {
86+
index_size += index.size();
87+
index_capacity += index.capacity();
88+
index_mem += index.used_memory();
89+
}
90+
log_debug("Middle 'ram': Object indexes: size={} capacity={} bytes={}M",
91+
index_size, index_capacity, index_mem / mbyte);
92+
93+
log_debug("Middle 'ram': Memory used overall: {}MBytes",
94+
(m_node_locations.used_memory() + m_way_nodes_data.capacity() +
95+
m_way_nodes_index.used_memory() + m_object_buffer.capacity() +
96+
index_mem) /
97+
mbyte);
98+
99+
m_node_locations.clear();
100+
101+
m_way_nodes_index.clear();
102+
m_way_nodes_data.clear();
103+
m_way_nodes_data.shrink_to_fit();
104+
105+
m_object_buffer = osmium::memory::Buffer{};
106+
107+
for (auto &index : m_object_index) {
108+
index.clear();
109+
}
110+
}
111+
112+
void middle_ram_t::store_object(osmium::OSMObject const &object)
113+
{
114+
auto const offset = m_object_buffer.committed();
115+
m_object_buffer.add_item(object);
116+
m_object_buffer.commit();
117+
m_object_index(object.type()).add(object.id(), offset);
118+
}
119+
120+
bool middle_ram_t::get_object(osmium::item_type type, osmid_t id,
121+
osmium::memory::Buffer *buffer) const
122+
{
123+
assert(buffer);
124+
125+
auto const offset = m_object_index(type).get(id);
126+
if (offset == ordered_index_t::not_found_value()) {
127+
return false;
128+
}
129+
buffer->add_item(m_object_buffer.get<osmium::memory::Item>(offset));
130+
buffer->commit();
131+
return true;
132+
}
133+
134+
static void add_delta_encoded_way_node_list(std::string *data,
135+
osmium::WayNodeList const &wnl)
136+
{
137+
assert(data);
138+
139+
// Add number of nodes in list
140+
protozero::add_varint_to_buffer(data, wnl.size());
141+
142+
// Add delta encoded node ids
143+
osmium::DeltaEncode<osmid_t> delta;
144+
for (auto const &nr : wnl) {
145+
protozero::add_varint_to_buffer(data, delta.update(nr.ref()));
146+
}
147+
}
41148

42149
void middle_ram_t::node(osmium::Node const &node)
43150
{
44151
assert(node.visible());
45-
m_cache->set(node.id(), node.location());
152+
153+
if (m_store_options.locations) {
154+
m_node_locations.set(node.positive_id(), node.location());
155+
}
156+
157+
if (m_store_options.nodes &&
158+
(!node.tags().empty() || m_store_options.untagged_nodes)) {
159+
store_object(node);
160+
}
46161
}
47162

48163
void middle_ram_t::way(osmium::Way const &way)
49164
{
50165
assert(way.visible());
51-
m_ways.set(way.id(), new ramWay{way, m_extra_attributes});
166+
167+
if (m_store_options.way_nodes) {
168+
auto const offset = m_way_nodes_data.size();
169+
add_delta_encoded_way_node_list(&m_way_nodes_data, way.nodes());
170+
m_way_nodes_index.add(way.id(), offset);
171+
}
172+
173+
if (m_store_options.ways) {
174+
store_object(way);
175+
}
52176
}
53177

54-
void middle_ram_t::relation(osmium::Relation const &rel)
178+
void middle_ram_t::relation(osmium::Relation const &relation)
55179
{
56-
assert(rel.visible());
57-
m_rels.set(rel.id(), new ramRel{rel, m_extra_attributes});
180+
assert(relation.visible());
181+
182+
if (m_store_options.relations) {
183+
store_object(relation);
184+
}
58185
}
59186

60-
size_t middle_ram_t::nodes_get_list(osmium::WayNodeList *nodes) const
187+
void middle_ram_t::after_nodes() { m_node_locations.freeze(); }
188+
189+
std::size_t middle_ram_t::nodes_get_list(osmium::WayNodeList *nodes) const
61190
{
62191
assert(nodes);
63-
size_t count = 0;
64192

65-
for (auto &n : *nodes) {
66-
auto loc = m_cache->get(n.ref());
67-
n.set_location(loc);
68-
if (loc.valid()) {
69-
++count;
193+
std::size_t count = 0;
194+
195+
if (m_store_options.locations) {
196+
for (auto &nr : *nodes) {
197+
nr.set_location(m_node_locations.get(nr.positive_ref()));
198+
if (nr.location().valid()) {
199+
++count;
200+
}
70201
}
71202
}
72203

@@ -77,33 +208,89 @@ bool middle_ram_t::way_get(osmid_t id, osmium::memory::Buffer *buffer) const
77208
{
78209
assert(buffer);
79210

80-
auto const *ele = m_ways.get(id);
81-
82-
if (!ele) {
83-
return false;
211+
if (m_store_options.ways) {
212+
return get_object(osmium::item_type::way, id, buffer);
84213
}
214+
return false;
215+
}
85216

86-
// NOLINTNEXTLINE(google-build-using-namespace)
87-
using namespace osmium::builder::attr;
88-
osmium::builder::add_way(*buffer, _id(id), _tags(ele->tags),
89-
_nodes(ele->ndids));
217+
static void
218+
get_delta_encoded_way_nodes_list(std::string const &data, std::size_t offset,
219+
osmium::builder::WayBuilder *builder)
220+
{
221+
assert(builder);
90222

91-
return true;
223+
char const *begin = data.data() + offset;
224+
char const *const end = data.data() + data.size();
225+
226+
auto count = protozero::decode_varint(&begin, end);
227+
228+
osmium::DeltaDecode<osmid_t> delta;
229+
osmium::builder::WayNodeListBuilder wnl_builder{*builder};
230+
while (count > 0) {
231+
auto const val = protozero::decode_varint(&begin, end);
232+
wnl_builder.add_node_ref(delta.update(val));
233+
--count;
234+
}
92235
}
93236

94-
size_t middle_ram_t::rel_way_members_get(osmium::Relation const &rel,
95-
rolelist_t *roles,
96-
osmium::memory::Buffer *buffer) const
237+
std::size_t
238+
middle_ram_t::rel_way_members_get(osmium::Relation const &rel,
239+
rolelist_t *roles,
240+
osmium::memory::Buffer *buffer) const
97241
{
98242
assert(buffer);
99243

100-
size_t count = 0;
101-
for (auto const &m : rel.members()) {
102-
if (m.type() == osmium::item_type::way && way_get(m.ref(), buffer)) {
103-
if (roles) {
104-
roles->emplace_back(m.role());
244+
std::size_t count = 0;
245+
246+
for (auto const &member : rel.members()) {
247+
switch (member.type()) {
248+
case osmium::item_type::node:
249+
if (m_store_options.nodes) {
250+
auto const offset =
251+
m_object_index(osmium::item_type::node).get(member.ref());
252+
if (offset != ordered_index_t::not_found_value()) {
253+
buffer->add_item(m_object_buffer.get<osmium::Node>(offset));
254+
buffer->commit();
255+
++count;
256+
}
257+
}
258+
break;
259+
case osmium::item_type::way:
260+
if (m_store_options.ways) {
261+
auto const offset =
262+
m_object_index(osmium::item_type::way).get(member.ref());
263+
if (offset != ordered_index_t::not_found_value()) {
264+
buffer->add_item(m_object_buffer.get<osmium::Way>(offset));
265+
buffer->commit();
266+
if (roles) {
267+
roles->emplace_back(member.role());
268+
}
269+
++count;
270+
}
271+
} else if (m_store_options.way_nodes) {
272+
auto const offset = m_way_nodes_index.get(member.ref());
273+
if (offset != ordered_index_t::not_found_value()) {
274+
osmium::builder::WayBuilder builder{*buffer};
275+
builder.set_id(member.ref());
276+
get_delta_encoded_way_nodes_list(m_way_nodes_data, offset,
277+
&builder);
278+
}
279+
buffer->commit();
280+
++count;
281+
}
282+
break;
283+
default: // osmium::item_type::relation
284+
if (m_store_options.relations) {
285+
auto const offset = m_object_index(osmium::item_type::relation)
286+
.get(member.ref());
287+
if (offset != ordered_index_t::not_found_value()) {
288+
buffer->add_item(
289+
m_object_buffer.get<osmium::Relation>(offset));
290+
buffer->commit();
291+
++count;
292+
}
105293
}
106-
++count;
107294
}
108295
}
109296

@@ -115,33 +302,12 @@ bool middle_ram_t::relation_get(osmid_t id,
115302
{
116303
assert(buffer);
117304

118-
auto const *ele = m_rels.get(id);
119-
120-
if (!ele) {
121-
return false;
305+
if (m_store_options.relations) {
306+
return get_object(osmium::item_type::relation, id, buffer);
122307
}
123-
124-
// NOLINTNEXTLINE(google-build-using-namespace)
125-
using namespace osmium::builder::attr;
126-
osmium::builder::add_relation(*buffer, _id(id),
127-
_members(ele->members.for_builder()),
128-
_tags(ele->tags));
129-
130-
return true;
308+
return false;
131309
}
132310

133-
void middle_ram_t::stop(thread_pool_t &)
134-
{
135-
m_cache.reset();
136-
m_ways.clear();
137-
m_rels.clear();
138-
}
139-
140-
middle_ram_t::middle_ram_t(options_t const *options)
141-
: m_cache(new node_ram_cache{options->alloc_chunkwise, options->cache}),
142-
m_extra_attributes(options->extra_attributes)
143-
{}
144-
145311
std::shared_ptr<middle_query_t> middle_ram_t::get_query_instance()
146312
{
147313
return shared_from_this();

0 commit comments

Comments
 (0)