Skip to content

Commit ea7858a

Browse files
committed
Find node dependencies faster by doing a single lookup
For each changed node osm2pgsql has to find all ways and all relations referencing that node and make sure they are updated as well. This was done by doing one SQL query per node which is quite inefficient. Instead the new code collects all node ids and does only one query for the parent ways and one for the parent relations after all nodes have been read. This implementation uses temporary tables. The changed nodes are written to a temp table and from that temp tables are filled with the parent nodes and ways. Strictly speaking the latter tables are not necessary, we could read the data directly from the query. But maybe those tables could be useful in the future for some additional processing in the database. (They'd need to be converted to non-temp tables then, though.)
1 parent e9c075a commit ea7858a

File tree

7 files changed

+167
-73
lines changed

7 files changed

+167
-73
lines changed

src/dependency-manager.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,7 @@
1515

1616
void full_dependency_manager_t::node_changed(osmid_t id)
1717
{
18-
for (auto const way_id : m_object_store->get_ways_by_node(id)) {
19-
way_changed(way_id);
20-
m_ways_pending_tracker.set(way_id);
21-
}
22-
23-
for (auto const rel_id : m_object_store->get_rels_by_node(id)) {
24-
m_rels_pending_tracker.set(rel_id);
25-
}
18+
m_changed_nodes.set(id);
2619
}
2720

2821
void full_dependency_manager_t::way_changed(osmid_t id)
@@ -36,6 +29,23 @@ void full_dependency_manager_t::way_changed(osmid_t id)
3629
}
3730
}
3831

32+
void full_dependency_manager_t::after_nodes()
33+
{
34+
if (m_changed_nodes.empty()) {
35+
return;
36+
}
37+
38+
m_object_store->get_node_parents(m_changed_nodes, &m_ways_pending_tracker,
39+
&m_rels_pending_tracker);
40+
m_changed_nodes.clear();
41+
42+
for (auto const way_id : m_ways_pending_tracker) {
43+
for (auto const rel_id : m_object_store->get_rels_by_way(way_id)) {
44+
m_rels_pending_tracker.set(rel_id);
45+
}
46+
}
47+
}
48+
3949
bool full_dependency_manager_t::has_pending() const noexcept
4050
{
4151
return !m_ways_pending_tracker.empty() || !m_rels_pending_tracker.empty();

src/dependency-manager.hpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ class dependency_manager_t
5252
*/
5353
virtual void way_changed(osmid_t) {}
5454

55+
virtual void after_nodes() {}
56+
5557
/// Are there pending objects that need to be processed?
5658
virtual bool has_pending() const noexcept { return false; }
5759

@@ -95,6 +97,8 @@ class full_dependency_manager_t : public dependency_manager_t
9597
void node_changed(osmid_t id) override;
9698
void way_changed(osmid_t id) override;
9799

100+
void after_nodes() override;
101+
98102
bool has_pending() const noexcept override;
99103

100104
idlist_t get_pending_way_ids() override
@@ -112,6 +116,16 @@ class full_dependency_manager_t : public dependency_manager_t
112116

113117
std::shared_ptr<middle_t> m_object_store;
114118

119+
/**
120+
* In append mode all new and changed nodes will be added to this. After
121+
* all nodes are read this is used to figure out which parent ways and
122+
* relations reference these nodes. Deleted nodes are not stored in here,
123+
* because all ways and relations that referenced deleted nodes must be in
124+
* the change file, too, and so we don't have to find out which ones they
125+
* are.
126+
*/
127+
osmium::index::IdSetSmall<osmid_t> m_changed_nodes;
128+
115129
osmium::index::IdSetSmall<osmid_t> m_ways_pending_tracker;
116130
osmium::index::IdSetSmall<osmid_t> m_rels_pending_tracker;
117131
};

src/middle-pgsql.cpp

Lines changed: 114 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <cassert>
1919
#include <cstdint>
2020
#include <cstdlib>
21+
#include <iterator>
2122
#include <memory>
2223
#include <stdexcept>
2324
#include <string>
@@ -41,6 +42,43 @@
4142
#include "pgsql-helper.hpp"
4243
#include "util.hpp"
4344

45+
static bool check_bucket_index(pg_conn_t const *db_connection,
46+
std::string const &prefix)
47+
{
48+
auto const res =
49+
db_connection->exec("SELECT relname FROM pg_class"
50+
" WHERE relkind='i'"
51+
" AND relname = '{}_ways_nodes_bucket_idx'",
52+
prefix);
53+
return res.num_tuples() > 0;
54+
}
55+
56+
static void send_id_list(pg_conn_t const &db_connection,
57+
std::string const &table,
58+
osmium::index::IdSetSmall<osmid_t> const &ids)
59+
{
60+
std::string data;
61+
for (auto const id : ids) {
62+
fmt::format_to(std::back_inserter(data), FMT_STRING("{}\n"), id);
63+
}
64+
65+
auto const sql = fmt::format("COPY {} FROM STDIN", table);
66+
db_connection.copy_start(sql.c_str());
67+
db_connection.copy_send(data, table);
68+
db_connection.copy_end(table);
69+
}
70+
71+
static void load_id_list(pg_conn_t const &db_connection,
72+
std::string const &table,
73+
osmium::index::IdSetSmall<osmid_t> *ids)
74+
{
75+
auto const res =
76+
db_connection.exec(fmt::format("SELECT id FROM {} ORDER BY id", table));
77+
for (int n = 0; n < res.num_tuples(); ++n) {
78+
ids->set(osmium::string_to_object_id(res.get_value(n, 0)));
79+
}
80+
}
81+
4482
static std::string build_sql(options_t const &options, std::string const &templ)
4583
{
4684
std::string const using_tablespace{options.tblsslim_index.empty()
@@ -746,14 +784,79 @@ void middle_pgsql_t::node_delete(osmid_t osm_id)
746784
}
747785
}
748786

749-
idlist_t middle_pgsql_t::get_ways_by_node(osmid_t osm_id)
787+
void middle_pgsql_t::get_node_parents(
788+
osmium::index::IdSetSmall<osmid_t> const &changed_nodes,
789+
osmium::index::IdSetSmall<osmid_t> *parent_ways,
790+
osmium::index::IdSetSmall<osmid_t> *parent_relations) const
750791
{
751-
return get_ids_from_db(&m_db_connection, "mark_ways_by_node", osm_id);
752-
}
792+
util::timer_t timer;
753793

754-
idlist_t middle_pgsql_t::get_rels_by_node(osmid_t osm_id)
755-
{
756-
return get_ids_from_db(&m_db_connection, "mark_rels_by_node", osm_id);
794+
m_db_connection.exec("BEGIN");
795+
m_db_connection.exec("CREATE TEMP TABLE osm2pgsql_changed_nodes"
796+
" (id int8 NOT NULL) ON COMMIT DROP");
797+
m_db_connection.exec("CREATE TEMP TABLE osm2pgsql_changed_ways"
798+
" (id int8 NOT NULL) ON COMMIT DROP");
799+
m_db_connection.exec("CREATE TEMP TABLE osm2pgsql_changed_relations"
800+
" (id int8 NOT NULL) ON COMMIT DROP");
801+
802+
send_id_list(m_db_connection, "osm2pgsql_changed_nodes", changed_nodes);
803+
804+
m_db_connection.exec("ANALYZE osm2pgsql_changed_nodes");
805+
806+
bool const has_bucket_index =
807+
check_bucket_index(&m_db_connection, m_options->prefix);
808+
809+
if (has_bucket_index) {
810+
m_db_connection.exec(build_sql(*m_options, R"(
811+
WITH changed_buckets AS (
812+
SELECT array_agg(id) AS node_ids, id >> {way_node_index_id_shift} AS bucket
813+
FROM osm2pgsql_changed_nodes GROUP BY id >> {way_node_index_id_shift}
814+
)
815+
INSERT INTO osm2pgsql_changed_ways
816+
SELECT DISTINCT w.id
817+
FROM {schema}"{prefix}_ways" w, changed_buckets b
818+
WHERE w.nodes && b.node_ids
819+
AND {schema}"{prefix}_index_bucket"(w.nodes)
820+
&& ARRAY[b.bucket];
821+
)"));
822+
} else {
823+
m_db_connection.exec(build_sql(*m_options, R"(
824+
INSERT INTO osm2pgsql_changed_ways
825+
SELECT DISTINCT w.id
826+
FROM {schema}"{prefix}_ways" w, osm2pgsql_changed_nodes n
827+
WHERE w.nodes && ARRAY[n.id]
828+
)"));
829+
}
830+
831+
if (m_options->middle_database_format == 1) {
832+
m_db_connection.exec(build_sql(*m_options, R"(
833+
INSERT INTO osm2pgsql_changed_relations
834+
SELECT DISTINCT r.id
835+
FROM {schema}"{prefix}_rels" r, osm2pgsql_changed_nodes n
836+
WHERE r.parts && ARRAY[n.id]
837+
AND r.parts[1:way_off] && ARRAY[n.id]
838+
)"));
839+
} else {
840+
m_db_connection.exec(build_sql(*m_options, R"(
841+
INSERT INTO osm2pgsql_changed_relations
842+
SELECT DISTINCT r.id
843+
FROM {schema}"{prefix}_rels" r, osm2pgsql_changed_nodes c
844+
WHERE {schema}"{prefix}_member_ids"(r.members, 'N'::char) && ARRAY[c.id];
845+
)"));
846+
}
847+
848+
load_id_list(m_db_connection, "osm2pgsql_changed_ways", parent_ways);
849+
load_id_list(m_db_connection, "osm2pgsql_changed_relations",
850+
parent_relations);
851+
852+
m_db_connection.exec("COMMIT");
853+
854+
timer.stop();
855+
856+
log_debug("Found {} new/changed nodes in input.", changed_nodes.size());
857+
log_debug(" Found in {} their {} parent ways and {} parent relations.",
858+
std::chrono::duration_cast<std::chrono::seconds>(timer.elapsed()),
859+
parent_ways->size(), parent_relations->size());
757860
}
758861

759862
idlist_t middle_pgsql_t::get_rels_by_way(osmid_t osm_id)
@@ -1279,8 +1382,7 @@ static table_sql sql_for_nodes_format2(middle_pgsql_options const &options)
12791382
return sql;
12801383
}
12811384

1282-
static table_sql sql_for_ways_format1(bool has_bucket_index,
1283-
uint8_t way_node_index_id_shift)
1385+
static table_sql sql_for_ways_format1(uint8_t way_node_index_id_shift)
12841386
{
12851387
table_sql sql{};
12861388

@@ -1300,20 +1402,6 @@ static table_sql sql_for_ways_format1(bool has_bucket_index,
13001402
" FROM {schema}\"{prefix}_ways\""
13011403
" WHERE id = ANY($1::int8[]);\n";
13021404

1303-
if (has_bucket_index) {
1304-
sql.prepare_fw_dep_lookups =
1305-
"PREPARE mark_ways_by_node(int8) AS"
1306-
" SELECT id FROM {schema}\"{prefix}_ways\" w"
1307-
" WHERE $1 = ANY(nodes)"
1308-
" AND {schema}\"{prefix}_index_bucket\"(w.nodes)"
1309-
" && {schema}\"{prefix}_index_bucket\"(ARRAY[$1]);\n";
1310-
} else {
1311-
sql.prepare_fw_dep_lookups =
1312-
"PREPARE mark_ways_by_node(int8) AS"
1313-
" SELECT id FROM {schema}\"{prefix}_ways\""
1314-
" WHERE nodes && ARRAY[$1];\n";
1315-
}
1316-
13171405
if (way_node_index_id_shift == 0) {
13181406
sql.create_fw_dep_indexes =
13191407
"CREATE INDEX ON {schema}\"{prefix}_ways\" USING GIN (nodes)"
@@ -1335,8 +1423,7 @@ static table_sql sql_for_ways_format1(bool has_bucket_index,
13351423
return sql;
13361424
}
13371425

1338-
static table_sql sql_for_ways_format2(bool has_bucket_index,
1339-
middle_pgsql_options const &options)
1426+
static table_sql sql_for_ways_format2(middle_pgsql_options const &options)
13401427
{
13411428
table_sql sql{};
13421429

@@ -1361,20 +1448,6 @@ static table_sql sql_for_ways_format2(bool has_bucket_index,
13611448
" {users_table_access}"
13621449
" WHERE o.id = ANY($1::int8[]);\n";
13631450

1364-
if (has_bucket_index) {
1365-
sql.prepare_fw_dep_lookups =
1366-
"PREPARE mark_ways_by_node(int8) AS"
1367-
" SELECT DISTINCT id FROM {schema}\"{prefix}_ways\" w"
1368-
" WHERE $1 = ANY(nodes)"
1369-
" AND {schema}\"{prefix}_index_bucket\"(w.nodes)"
1370-
" && {schema}\"{prefix}_index_bucket\"(ARRAY[$1]);\n";
1371-
} else {
1372-
sql.prepare_fw_dep_lookups =
1373-
"PREPARE mark_ways_by_node(int8) AS"
1374-
" SELECT DISTINCT id FROM {schema}\"{prefix}_ways\""
1375-
" WHERE nodes && ARRAY[$1];\n";
1376-
}
1377-
13781451
if (options.way_node_index_id_shift == 0) {
13791452
sql.create_fw_dep_indexes =
13801453
"CREATE INDEX ON {schema}\"{prefix}_ways\" USING GIN (nodes)"
@@ -1416,10 +1489,6 @@ static table_sql sql_for_relations_format1()
14161489
" FROM {schema}\"{prefix}_rels\" WHERE id = $1;\n";
14171490

14181491
sql.prepare_fw_dep_lookups =
1419-
"PREPARE mark_rels_by_node(int8) AS"
1420-
" SELECT id FROM {schema}\"{prefix}_rels\""
1421-
" WHERE parts && ARRAY[$1]"
1422-
" AND parts[1:way_off] && ARRAY[$1];\n"
14231492
"PREPARE mark_rels_by_way(int8) AS"
14241493
" SELECT id FROM {schema}\"{prefix}_rels\""
14251494
" WHERE parts && ARRAY[$1]"
@@ -1460,10 +1529,6 @@ static table_sql sql_for_relations_format2()
14601529
" WHERE o.id = $1;\n";
14611530

14621531
sql.prepare_fw_dep_lookups =
1463-
"PREPARE mark_rels_by_node(int8) AS"
1464-
" SELECT DISTINCT id FROM {schema}\"{prefix}_rels\""
1465-
" WHERE {schema}\"{prefix}_member_ids\"(members, 'N'::char)"
1466-
" @> ARRAY[$1::bigint];\n"
14671532
"PREPARE mark_rels_by_way(int8) AS"
14681533
" SELECT DISTINCT id FROM {schema}\"{prefix}_rels\""
14691534
" WHERE {schema}\"{prefix}_member_ids\"(members, 'W'::char)"
@@ -1482,17 +1547,6 @@ static table_sql sql_for_relations_format2()
14821547
return sql;
14831548
}
14841549

1485-
static bool check_bucket_index(pg_conn_t *db_connection,
1486-
std::string const &prefix)
1487-
{
1488-
auto const res =
1489-
db_connection->exec("SELECT relname FROM pg_class"
1490-
" WHERE relkind='i'"
1491-
" AND relname = '{}_ways_nodes_bucket_idx'",
1492-
prefix);
1493-
return res.num_tuples() > 0;
1494-
}
1495-
14961550
middle_pgsql_t::middle_pgsql_t(std::shared_ptr<thread_pool_t> thread_pool,
14971551
options_t const *options)
14981552
: middle_t(std::move(thread_pool)), m_options(options),
@@ -1537,17 +1591,16 @@ middle_pgsql_t::middle_pgsql_t(std::shared_ptr<thread_pool_t> thread_pool,
15371591

15381592
m_tables.ways() = table_desc{
15391593
*options,
1540-
sql_for_ways_format1(has_bucket_index,
1541-
m_store_options.way_node_index_id_shift)};
1594+
sql_for_ways_format1(m_store_options.way_node_index_id_shift)};
15421595

15431596
m_tables.relations() =
15441597
table_desc{*options, sql_for_relations_format1()};
15451598
} else {
15461599
m_tables.nodes() =
15471600
table_desc{*options, sql_for_nodes_format2(m_store_options)};
15481601

1549-
m_tables.ways() = table_desc{
1550-
*options, sql_for_ways_format2(has_bucket_index, m_store_options)};
1602+
m_tables.ways() =
1603+
table_desc{*options, sql_for_ways_format2(m_store_options)};
15511604

15521605
m_tables.relations() =
15531606
table_desc{*options, sql_for_relations_format2()};

src/middle-pgsql.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,11 @@ struct middle_pgsql_t : public middle_t
117117
void after_ways() override;
118118
void after_relations() override;
119119

120-
idlist_t get_ways_by_node(osmid_t osm_id) override;
121-
idlist_t get_rels_by_node(osmid_t osm_id) override;
120+
void get_node_parents(
121+
osmium::index::IdSetSmall<osmid_t> const &changed_nodes,
122+
osmium::index::IdSetSmall<osmid_t> *parent_ways,
123+
osmium::index::IdSetSmall<osmid_t> *parent_relations) const override;
124+
122125
idlist_t get_rels_by_way(osmid_t osm_id) override;
123126

124127
class table_desc

src/middle.hpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* For a full list of authors see the git log.
1111
*/
1212

13+
#include <osmium/index/id_set.hpp>
1314
#include <osmium/memory/buffer.hpp>
1415
#include <osmium/osm/entity_bits.hpp>
1516

@@ -148,8 +149,13 @@ class middle_t
148149
#endif
149150
}
150151

151-
virtual idlist_t get_ways_by_node(osmid_t) { return {}; }
152-
virtual idlist_t get_rels_by_node(osmid_t) { return {}; }
152+
virtual void get_node_parents(
153+
osmium::index::IdSetSmall<osmid_t> const & /*changed_nodes*/,
154+
osmium::index::IdSetSmall<osmid_t> * /*parent_ways*/,
155+
osmium::index::IdSetSmall<osmid_t> * /*parent_relations*/) const
156+
{
157+
}
158+
153159
virtual idlist_t get_rels_by_way(osmid_t) { return {}; }
154160

155161
virtual std::shared_ptr<middle_query_t> get_query_instance() = 0;

src/osmdata.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ void osmdata_t::after_nodes()
7878
{
7979
m_mid->after_nodes();
8080
m_output->after_nodes();
81+
if (m_append) {
82+
m_dependency_manager->after_nodes();
83+
}
8184
}
8285

8386
void osmdata_t::way(osmium::Way &way)

0 commit comments

Comments
 (0)