Skip to content

Commit 777b7ff

Browse files
authored
Merge pull request #1490 from joto/node-location-store-improvements
Node location store improvements
2 parents db6d89d + 2285e0e commit 777b7ff

File tree

6 files changed

+89
-91
lines changed

6 files changed

+89
-91
lines changed

src/middle-ram.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,6 @@ void middle_ram_t::relation(osmium::Relation const &relation)
184184
}
185185
}
186186

187-
void middle_ram_t::after_nodes() { m_node_locations.freeze(); }
188-
189187
std::size_t middle_ram_t::nodes_get_list(osmium::WayNodeList *nodes) const
190188
{
191189
assert(nodes);

src/middle-ram.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,6 @@ class middle_ram_t : public middle_t, public middle_query_t
5252
void way(osmium::Way const &way) override;
5353
void relation(osmium::Relation const &) override;
5454

55-
void after_nodes() override;
56-
5755
std::size_t nodes_get_list(osmium::WayNodeList *nodes) const override;
5856

5957
bool way_get(osmid_t id, osmium::memory::Buffer *buffer) const override;

src/node-locations.cpp

Lines changed: 31 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include "node-locations.hpp"
1111

1212
#include <osmium/osm/location.hpp>
13-
#include <osmium/util/delta.hpp>
1413

1514
// Workaround: This must be included before buffer_string.hpp due to a missing
1615
// include in the upstream code. https://github.com/mapbox/protozero/pull/104
@@ -22,15 +21,28 @@
2221
#include <cassert>
2322
#include <limits>
2423

25-
void node_locations_t::set(osmid_t id, osmium::Location location)
24+
bool node_locations_t::set(osmid_t id, osmium::Location location)
2625
{
27-
assert(block_index() == 0 || m_block[block_index() - 1].first < id);
26+
if (used_memory() >= m_max_size && will_resize()) {
27+
return false;
28+
}
2829

29-
m_block[block_index()] = {id, location};
30-
++m_count;
31-
if (block_index() == 0) {
32-
freeze();
30+
if (first_entry_in_block()) {
31+
m_did.clear();
32+
m_dx.clear();
33+
m_dy.clear();
34+
m_index.add(id, m_data.size());
3335
}
36+
37+
protozero::add_varint_to_buffer(&m_data, m_did.update(id));
38+
protozero::add_varint_to_buffer(
39+
&m_data, protozero::encode_zigzag64(m_dx.update(location.x())));
40+
protozero::add_varint_to_buffer(
41+
&m_data, protozero::encode_zigzag64(m_dy.update(location.y())));
42+
43+
++m_count;
44+
45+
return true;
3446
}
3547

3648
osmium::Location node_locations_t::get(osmid_t id) const
@@ -46,72 +58,29 @@ osmium::Location node_locations_t::get(osmid_t id) const
4658
char const *const end = m_data.data() + m_data.size();
4759

4860
osmium::DeltaDecode<osmid_t> did;
49-
std::size_t num = block_size;
50-
for (std::size_t n = 0; n < block_size; ++n) {
51-
auto bid = did.update(protozero::decode_varint(&begin, end));
52-
if (bid == id) {
53-
num = n;
54-
}
55-
if (bid > id && num == block_size) {
56-
return osmium::Location{};
57-
}
58-
}
59-
if (num == block_size) {
60-
return osmium::Location{};
61-
}
62-
6361
osmium::DeltaDecode<int64_t> dx;
6462
osmium::DeltaDecode<int64_t> dy;
65-
int32_t x = 0;
66-
int32_t y = 0;
67-
for (std::size_t n = 0; n <= num; ++n) {
68-
x = dx.update(
63+
64+
for (std::size_t n = 0; n < block_size && begin != end; ++n) {
65+
auto const bid = did.update(protozero::decode_varint(&begin, end));
66+
int32_t const x = dx.update(
6967
protozero::decode_zigzag64(protozero::decode_varint(&begin, end)));
70-
y = dy.update(
68+
int32_t const y = dy.update(
7169
protozero::decode_zigzag64(protozero::decode_varint(&begin, end)));
70+
if (bid == id) {
71+
return osmium::Location{x, y};
72+
}
73+
if (bid > id) {
74+
break;
75+
}
7276
}
73-
74-
return osmium::Location{x, y};
75-
}
76-
77-
void node_locations_t::freeze()
78-
{
79-
encode_block();
80-
clear_block();
77+
return osmium::Location{};
8178
}
8279

8380
void node_locations_t::clear()
8481
{
8582
m_data.clear();
8683
m_data.shrink_to_fit();
8784
m_index.clear();
88-
clear_block();
8985
m_count = 0;
9086
}
91-
92-
void node_locations_t::encode_block()
93-
{
94-
auto const offset = m_data.size();
95-
osmium::DeltaEncode<osmid_t> did;
96-
osmium::DeltaEncode<int64_t> dx;
97-
osmium::DeltaEncode<int64_t> dy;
98-
for (auto const &nl : m_block) {
99-
protozero::add_varint_to_buffer(&m_data, did.update(nl.first));
100-
}
101-
for (auto const &nl : m_block) {
102-
protozero::add_varint_to_buffer(
103-
&m_data, protozero::encode_zigzag64(dx.update(nl.second.x())));
104-
protozero::add_varint_to_buffer(
105-
&m_data, protozero::encode_zigzag64(dy.update(nl.second.y())));
106-
}
107-
m_index.add(m_block[0].first, offset);
108-
}
109-
110-
void node_locations_t::clear_block()
111-
{
112-
for (auto &nl : m_block) {
113-
nl.first = std::numeric_limits<osmid_t>::max();
114-
nl.second = osmium::Location{};
115-
}
116-
}
117-

src/node-locations.hpp

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@
1313
#include "ordered-index.hpp"
1414
#include "osmtypes.hpp"
1515

16+
#include <osmium/util/delta.hpp>
17+
1618
#include <array>
1719
#include <cstddef>
20+
#include <limits>
1821
#include <string>
1922
#include <utility>
2023

@@ -25,24 +28,33 @@
2528
*
2629
* Internally nodes are stored in blocks of `block_size` (id, location) pairs.
2730
* Ids inside a block and the x and y coordinates of each location are first
28-
* delta encoded and then stored as varints. To access a stored location a
29-
* full block must be decoded.
31+
* delta encoded and then stored as varints. To access a stored location the
32+
* block must be decoded until the id is found.
3033
*
31-
* Ids must be added in strictly ascending order. After all ids are stored,
32-
* the `freeze()` function must be called. Only after that can the store
33-
* be queried.
34+
* Ids must be added in strictly ascending order.
3435
*/
3536
class node_locations_t
3637
{
3738
public:
38-
node_locations_t() { clear_block(); }
39+
/**
40+
* Construct a node locations store. Takes a single optional argument
41+
* which gives the maximum number of bytes this store should be allowed
42+
* to use. If this is not specified, the size is only limited by available
43+
* memory. The store will try to keep the memory used under what's
44+
* specified here.
45+
*/
46+
explicit node_locations_t(
47+
std::size_t max_size = std::numeric_limits<std::size_t>::max())
48+
: m_max_size(max_size)
49+
{}
3950

4051
/**
4152
* Store a node location.
4253
*
4354
* \pre id must be strictly larger than all ids stored before.
55+
* \return True if the entry was added, false if the index is full.
4456
*/
45-
void set(osmid_t id, osmium::Location location);
57+
bool set(osmid_t id, osmium::Location location);
4658

4759
/**
4860
* Retrieve a node location. If the location wasn't stored before, an
@@ -60,35 +72,46 @@ class node_locations_t
6072
}
6173

6274
/**
63-
* Freeze storage. Muste be called after set()ing all the ids and before
64-
* get()ing the first one.
65-
*/
66-
void freeze();
67-
68-
/**
69-
* Clear the memory used by this object. The object can not be reused
70-
* after that.
75+
* Clear the memory used by this object. The object can be reused after
76+
* that.
7177
*/
7278
void clear();
7379

7480
private:
75-
std::size_t block_index() const noexcept { return m_count % block_size; }
81+
bool first_entry_in_block() const noexcept
82+
{
83+
return m_count % block_size == 0;
84+
}
7685

77-
void encode_block();
78-
void clear_block();
86+
/// The maximum number of bytes an entry will need in storage.
87+
constexpr static std::size_t max_bytes_per_entry() noexcept {
88+
return 10U /*max varint length*/ * 3U /*id, x, y*/;
89+
}
90+
91+
bool will_resize() const noexcept
92+
{
93+
return m_index.will_resize() ||
94+
(m_data.size() + max_bytes_per_entry() >= m_data.capacity());
95+
}
7996

8097
/**
8198
* The block size used for internal blocks. The larger the block size
8299
* the less memory is consumed but the more expensive the access is.
83100
*/
84101
static constexpr const std::size_t block_size = 32;
85102

86-
std::array<std::pair<osmid_t, osmium::Location>, block_size> m_block;
87103
ordered_index_t m_index;
88104
std::string m_data;
89105

106+
/// Maximum size in bytes this object may allocate.
107+
std::size_t m_max_size;
108+
90109
/// The number of (id, location) pairs stored.
91110
std::size_t m_count = 0;
111+
112+
osmium::DeltaEncode<osmid_t> m_did;
113+
osmium::DeltaEncode<int64_t> m_dx;
114+
osmium::DeltaEncode<int64_t> m_dy;
92115
}; // class node_locations_t
93116

94117
#endif // OSM2PGSQL_NODE_LOCATIONS_HPP

src/ordered-index.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ class ordered_index_t
150150
m_size = 0;
151151
}
152152

153+
/// Return true if adding an entry to the index will make it resize.
154+
bool will_resize() const noexcept { return m_size + 1 >= m_capacity; }
155+
153156
private:
154157
struct second_level_index_entry
155158
{

tests/test-node-locations.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,11 @@ TEST_CASE("node locations basics", "[NoDB]")
1616
node_locations_t nl;
1717
REQUIRE(nl.size() == 0);
1818

19-
nl.set(3, {1.2, 3.4});
20-
nl.set(5, {5.6, 7.8});
19+
REQUIRE(nl.set(3, {1.2, 3.4}));
20+
REQUIRE(nl.set(5, {5.6, 7.8}));
2121

2222
REQUIRE(nl.size() == 2);
2323

24-
nl.freeze();
25-
REQUIRE(nl.size() == 2);
26-
2724
REQUIRE(nl.get(1) == osmium::Location{});
2825
REQUIRE(nl.get(4) == osmium::Location{});
2926
REQUIRE(nl.get(6) == osmium::Location{});
@@ -70,7 +67,6 @@ TEST_CASE("node locations in more than one block", "[NoDB]")
7067
nl.set(id, {id + 0.1, id + 0.2});
7168
}
7269

73-
nl.freeze();
7470
REQUIRE(nl.size() == max_id);
7571

7672
for (std::size_t id = 1; id <= max_id; ++id) {
@@ -80,3 +76,14 @@ TEST_CASE("node locations in more than one block", "[NoDB]")
8076
}
8177
}
8278

79+
TEST_CASE("full node locations store", "[NoDB]")
80+
{
81+
node_locations_t nl{30};
82+
REQUIRE(nl.size() == 0);
83+
84+
REQUIRE(nl.set(3, {1.2, 3.4}));
85+
REQUIRE_FALSE(nl.set(5, {5.6, 7.8}));
86+
87+
REQUIRE(nl.size() == 1);
88+
}
89+

0 commit comments

Comments
 (0)