Skip to content

Commit 913632c

Browse files
committed
New node_locations_t class for storing node locations in memory
This is a very memory-efficient storage which will be used for the new ram middle.
1 parent 4680152 commit 913632c

File tree

5 files changed

+295
-0
lines changed

5 files changed

+295
-0
lines changed

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ set(osm2pgsql_lib_SOURCES
1111
middle.cpp
1212
middle-pgsql.cpp
1313
middle-ram.cpp
14+
node-locations.cpp
1415
node-persistent-cache.cpp
1516
node-ram-cache.cpp
1617
options.cpp

src/node-locations.cpp

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/**
2+
* SPDX-License-Identifier: GPL-2.0-or-later
3+
*
4+
* This file is part of osm2pgsql (https://osm2pgsql.org/).
5+
*
6+
* Copyright (C) 2006-2021 by the osm2pgsql developer community.
7+
* For a full list of authors see the git log.
8+
*/
9+
10+
#include "node-locations.hpp"
11+
12+
#include <osmium/osm/location.hpp>
13+
#include <osmium/util/delta.hpp>
14+
15+
// Workaround: This must be included before buffer_string.hpp due to a missing
16+
// include in the upstream code. https://github.com/mapbox/protozero/pull/104
17+
#include <protozero/config.hpp>
18+
19+
#include <protozero/buffer_string.hpp>
20+
#include <protozero/varint.hpp>
21+
22+
#include <cassert>
23+
#include <limits>
24+
25+
void node_locations_t::set(osmid_t id, osmium::Location location)
26+
{
27+
assert(block_index() == 0 || m_block[block_index() - 1].first < id);
28+
29+
m_block[block_index()] = {id, location};
30+
++m_count;
31+
if (block_index() == 0) {
32+
freeze();
33+
}
34+
}
35+
36+
osmium::Location node_locations_t::get(osmid_t id) const
37+
{
38+
auto const offset = m_index.get_block(id);
39+
if (offset == ordered_index_t::not_found_value()) {
40+
return osmium::Location{};
41+
}
42+
43+
assert(offset < m_data.size());
44+
45+
char const *begin = m_data.data() + offset;
46+
char const *const end = m_data.data() + m_data.size();
47+
48+
osmium::DeltaDecode<osmid_t> did;
49+
std::size_t num = block_size;
50+
for (std::size_t n = 0; n < block_size; ++n) {
51+
auto bid = did.update(protozero::decode_varint(&begin, end));
52+
if (bid == id) {
53+
num = n;
54+
}
55+
if (bid > id && num == block_size) {
56+
return osmium::Location{};
57+
}
58+
}
59+
if (num == block_size) {
60+
return osmium::Location{};
61+
}
62+
63+
osmium::DeltaDecode<int64_t> dx;
64+
osmium::DeltaDecode<int64_t> dy;
65+
int32_t x = 0;
66+
int32_t y = 0;
67+
for (std::size_t n = 0; n <= num; ++n) {
68+
x = dx.update(
69+
protozero::decode_zigzag64(protozero::decode_varint(&begin, end)));
70+
y = dy.update(
71+
protozero::decode_zigzag64(protozero::decode_varint(&begin, end)));
72+
}
73+
74+
return osmium::Location{x, y};
75+
}
76+
77+
void node_locations_t::freeze()
78+
{
79+
encode_block();
80+
clear_block();
81+
}
82+
83+
void node_locations_t::clear()
84+
{
85+
m_data.clear();
86+
m_data.shrink_to_fit();
87+
m_index.clear();
88+
clear_block();
89+
m_count = 0;
90+
}
91+
92+
void node_locations_t::encode_block()
93+
{
94+
auto const offset = m_data.size();
95+
osmium::DeltaEncode<osmid_t> did;
96+
osmium::DeltaEncode<int64_t> dx;
97+
osmium::DeltaEncode<int64_t> dy;
98+
for (auto const &nl : m_block) {
99+
protozero::add_varint_to_buffer(&m_data, did.update(nl.first));
100+
}
101+
for (auto const &nl : m_block) {
102+
protozero::add_varint_to_buffer(
103+
&m_data, protozero::encode_zigzag64(dx.update(nl.second.x())));
104+
protozero::add_varint_to_buffer(
105+
&m_data, protozero::encode_zigzag64(dy.update(nl.second.y())));
106+
}
107+
m_index.add(m_block[0].first, offset);
108+
}
109+
110+
void node_locations_t::clear_block()
111+
{
112+
for (auto &nl : m_block) {
113+
nl.first = std::numeric_limits<osmid_t>::max();
114+
nl.second = osmium::Location{};
115+
}
116+
}
117+

src/node-locations.hpp

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#ifndef OSM2PGSQL_NODE_LOCATIONS_HPP
2+
#define OSM2PGSQL_NODE_LOCATIONS_HPP
3+
4+
/**
5+
* SPDX-License-Identifier: GPL-2.0-or-later
6+
*
7+
* This file is part of osm2pgsql (https://osm2pgsql.org/).
8+
*
9+
* Copyright (C) 2006-2021 by the osm2pgsql developer community.
10+
* For a full list of authors see the git log.
11+
*/
12+
13+
#include "ordered-index.hpp"
14+
#include "osmtypes.hpp"
15+
16+
#include <array>
17+
#include <cstddef>
18+
#include <string>
19+
#include <utility>
20+
21+
/**
22+
* Node locations storage. This implementation encodes ids and locations
23+
* with delta encoding and varints making it very memory-efficient but a bit
24+
* slower than other implementations.
25+
*
26+
* Internally nodes are stored in blocks of `block_size` (id, location) pairs.
27+
* Ids inside a block and the x and y coordinates of each location are first
28+
* delta encoded and then stored as varints. To access a stored location a
29+
* full block must be decoded.
30+
*
31+
* Ids must be added in strictly ascending order. After all ids are stored,
32+
* the `freeze()` function must be called. Only after that can the store
33+
* be queried.
34+
*/
35+
class node_locations_t
36+
{
37+
public:
38+
node_locations_t() { clear_block(); }
39+
40+
/**
41+
* Store a node location.
42+
*
43+
* \pre id must be strictly larger than all ids stored before.
44+
*/
45+
void set(osmid_t id, osmium::Location location);
46+
47+
/**
48+
* Retrieve a node location. If the location wasn't stored before, an
49+
* invalid Location will be returned.
50+
*/
51+
osmium::Location get(osmid_t id) const;
52+
53+
/// The number of locations stored.
54+
std::size_t size() const noexcept { return m_count; }
55+
56+
/// Return the approximate number of bytes used for internal storage.
57+
std::size_t used_memory() const noexcept
58+
{
59+
return m_data.capacity() + m_index.used_memory();
60+
}
61+
62+
/**
63+
* Freeze storage. Muste be called after set()ing all the ids and before
64+
* get()ing the first one.
65+
*/
66+
void freeze();
67+
68+
/**
69+
* Clear the memory used by this object. The object can not be reused
70+
* after that.
71+
*/
72+
void clear();
73+
74+
private:
75+
std::size_t block_index() const noexcept { return m_count % block_size; }
76+
77+
void encode_block();
78+
void clear_block();
79+
80+
/**
81+
* The block size used for internal blocks. The larger the block size
82+
* the less memory is consumed but the more expensive the access is.
83+
*/
84+
static constexpr const std::size_t block_size = 32;
85+
86+
std::array<std::pair<osmid_t, osmium::Location>, block_size> m_block;
87+
ordered_index_t m_index;
88+
std::string m_data;
89+
90+
/// The number of (id, location) pairs stored.
91+
std::size_t m_count = 0;
92+
}; // class node_locations_t
93+
94+
#endif // OSM2PGSQL_NODE_LOCATIONS_HPP

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ set_test(test-domain-matcher LABELS NoDB)
4545
set_test(test-expire-tiles LABELS NoDB)
4646
set_test(test-geom LABELS NoDB)
4747
set_test(test-middle)
48+
set_test(test-node-locations LABELS NoDB)
4849
set_test(test-options-database LABELS NoDB)
4950
set_test(test-options-parse LABELS NoDB)
5051
set_test(test-options-projection)

tests/test-node-locations.cpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/**
2+
* SPDX-License-Identifier: GPL-2.0-or-later
3+
*
4+
* This file is part of osm2pgsql (https://osm2pgsql.org/).
5+
*
6+
* Copyright (C) 2006-2021 by the osm2pgsql developer community.
7+
* For a full list of authors see the git log.
8+
*/
9+
10+
#include <catch.hpp>
11+
12+
#include "node-locations.hpp"
13+
14+
TEST_CASE("node locations basics", "[NoDB]")
15+
{
16+
node_locations_t nl;
17+
REQUIRE(nl.size() == 0);
18+
19+
nl.set(3, {1.2, 3.4});
20+
nl.set(5, {5.6, 7.8});
21+
22+
REQUIRE(nl.size() == 2);
23+
24+
nl.freeze();
25+
REQUIRE(nl.size() == 2);
26+
27+
REQUIRE(nl.get(1) == osmium::Location{});
28+
REQUIRE(nl.get(4) == osmium::Location{});
29+
REQUIRE(nl.get(6) == osmium::Location{});
30+
REQUIRE(nl.get(100) == osmium::Location{});
31+
32+
REQUIRE(nl.get(3) == osmium::Location{1.2, 3.4});
33+
REQUIRE(nl.get(5) == osmium::Location{5.6, 7.8});
34+
35+
nl.clear();
36+
REQUIRE(nl.size() == 0);
37+
}
38+
39+
TEST_CASE("node locations in more than one block", "[NoDB]")
40+
{
41+
node_locations_t nl;
42+
43+
std::size_t max_id = 0;
44+
45+
SECTION("max_id 0") {
46+
max_id = 0;
47+
}
48+
49+
SECTION("max_id 31") {
50+
max_id = 31;
51+
}
52+
53+
SECTION("max_id 32") {
54+
max_id = 32;
55+
}
56+
57+
SECTION("max_id 33") {
58+
max_id = 33;
59+
}
60+
61+
SECTION("max_id 64") {
62+
max_id = 64;
63+
}
64+
65+
SECTION("max_id 80") {
66+
max_id = 80;
67+
}
68+
69+
for (std::size_t id = 1; id <= max_id; ++id) {
70+
nl.set(id, {id + 0.1, id + 0.2});
71+
}
72+
73+
nl.freeze();
74+
REQUIRE(nl.size() == max_id);
75+
76+
for (std::size_t id = 1; id <= max_id; ++id) {
77+
auto const location = nl.get(id);
78+
REQUIRE(location.lon() == id + 0.1);
79+
REQUIRE(location.lat() == id + 0.2);
80+
}
81+
}
82+

0 commit comments

Comments
 (0)