Skip to content

Commit c885922

Browse files
committed
Append mode: Do not try to delete objects that can't exist in middle
When osm2pgsql runs in append mode it deletes all objects for which it gets new versions from the middle tables before then adding the new version. For a typical diff many of these deletes will be unnecessary because the objects are new. With this commit the behaviour changes slightly: We first get the maximum id from the nodes/ways/relations middle tables. This operation is fast, because the PostgreSQL max() function is aware of the btree index on those tables. Later, before we delete an object we check the id against that maximum id, if it is larger the object can't be in the table and we don't do the delete. (Note that in theory we could use the fact that an object has version number 1 to figure out that it must be new. But this is much less robust than what we are doing here, for instance when the diff overlaps with the original import.)
1 parent 3f927c0 commit c885922

File tree

2 files changed

+39
-5
lines changed

2 files changed

+39
-5
lines changed

src/middle-pgsql.cpp

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,18 @@ void middle_pgsql_t::table_desc::build_index(std::string const &conninfo) const
178178
}
179179
}
180180

181+
void middle_pgsql_t::table_desc::init_max_id(pg_conn_t const &db_connection)
182+
{
183+
auto const qual_name = qualified_name(schema(), name());
184+
auto const res = db_connection.exec("SELECT max(id) FROM {}", qual_name);
185+
186+
if (res.is_null(0, 0)) {
187+
return;
188+
}
189+
190+
m_max_id = osmium::string_to_object_id(res.get_value(0, 0));
191+
}
192+
181193
/**
182194
* Decode item in an array literal from PostgreSQL to the next delimiter.
183195
*
@@ -790,7 +802,7 @@ void middle_pgsql_t::node_delete(osmid_t osm_id)
790802
m_persistent_cache->set(osm_id, osmium::Location{});
791803
}
792804

793-
if (m_store_options.nodes) {
805+
if (m_store_options.nodes && osm_id <= m_tables.nodes().max_id()) {
794806
m_db_copy.new_line(m_tables.nodes().copy_target());
795807
m_db_copy.delete_object(osm_id);
796808
}
@@ -1049,8 +1061,11 @@ middle_query_pgsql_t::rel_members_get(osmium::Relation const &rel,
10491061
void middle_pgsql_t::way_delete(osmid_t osm_id)
10501062
{
10511063
assert(m_options->append);
1052-
m_db_copy.new_line(m_tables.ways().copy_target());
1053-
m_db_copy.delete_object(osm_id);
1064+
1065+
if (osm_id <= m_tables.ways().max_id()) {
1066+
m_db_copy.new_line(m_tables.ways().copy_target());
1067+
m_db_copy.delete_object(osm_id);
1068+
}
10541069
}
10551070

10561071
void middle_pgsql_t::relation_set_format1(osmium::Relation const &rel)
@@ -1189,8 +1204,10 @@ void middle_pgsql_t::relation_delete(osmid_t osm_id)
11891204
{
11901205
assert(m_options->append);
11911206

1192-
m_db_copy.new_line(m_tables.relations().copy_target());
1193-
m_db_copy.delete_object(osm_id);
1207+
if (osm_id <= m_tables.relations().max_id()) {
1208+
m_db_copy.new_line(m_tables.relations().copy_target());
1209+
m_db_copy.delete_object(osm_id);
1210+
}
11941211
}
11951212

11961213
void middle_pgsql_t::after_nodes()
@@ -1283,6 +1300,16 @@ void middle_pgsql_t::start()
12831300
// problems when accessing the intarrays.
12841301
m_db_connection.set_config("jit_above_cost", "-1");
12851302
m_db_connection.set_config("max_parallel_workers_per_gather", "0");
1303+
1304+
// Remember the maximum OSM ids in the middle tables. This is a very
1305+
// fast operation due to the index on the table. Later when we need
1306+
// to delete entries, we don't have to bother with entries that are
1307+
// definitely not in the table.
1308+
if (m_store_options.nodes) {
1309+
m_tables.nodes().init_max_id(m_db_connection);
1310+
}
1311+
m_tables.ways().init_max_id(m_db_connection);
1312+
m_tables.relations().init_max_id(m_db_connection);
12861313
} else {
12871314
if (m_store_options.db_format == 2) {
12881315
table_setup(m_db_connection, m_users_table);

src/middle-pgsql.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,16 @@ struct middle_pgsql_t : public middle_t
160160

161161
std::chrono::microseconds task_wait() { return m_task_result.wait(); }
162162

163+
void init_max_id(pg_conn_t const &db_connection);
164+
165+
osmid_t max_id() const noexcept { return m_max_id; }
166+
163167
private:
164168
std::shared_ptr<db_target_descr_t> m_copy_target;
165169
task_result_t m_task_result;
170+
171+
/// The maximum id in the table (used only in append mode)
172+
osmid_t m_max_id = 0;
166173
};
167174

168175
std::shared_ptr<middle_query_t> get_query_instance() override;

0 commit comments

Comments
 (0)