Skip to content

Commit a0bd896

Browse files
committed
convert output-gazetteer to use db copy manager
1 parent c437d96 commit a0bd896

File tree

5 files changed

+223
-288
lines changed

5 files changed

+223
-288
lines changed

db-copy.hpp

Lines changed: 71 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ struct db_target_descr_t
3333
{
3434
return (this == &other) || (name == other.name && rows == other.rows);
3535
}
36+
37+
db_target_descr_t() = default;
38+
db_target_descr_t(char const *n, char const *i, char const *r = "")
39+
: name(n), rows(r), id(i)
40+
{
41+
}
3642
};
3743

3844
/**
@@ -200,29 +206,7 @@ class db_copy_mgr_t
200206
{
201207
assert(m_current);
202208
m_current->buffer += '"';
203-
for (char const *c = s; *c; ++c) {
204-
switch (*c) {
205-
case '"':
206-
m_current->buffer += "\\\\\"";
207-
break;
208-
case '\\':
209-
m_current->buffer += "\\\\\\\\";
210-
break;
211-
case '\n':
212-
m_current->buffer += "\\n";
213-
break;
214-
case '\r':
215-
m_current->buffer += "\\r";
216-
break;
217-
case '\t':
218-
m_current->buffer += "\\t";
219-
break;
220-
default:
221-
m_current->buffer += *c;
222-
break;
223-
}
224-
}
225-
209+
add_escaped_string(s);
226210
m_current->buffer += "\",";
227211
}
228212

@@ -237,6 +221,44 @@ class db_copy_mgr_t
237221
m_current->buffer += '\t';
238222
}
239223

224+
/// Start a hash column.
225+
void new_hash() { /* nothing */}
226+
227+
void add_hash_elem(std::string const &k, std::string const &v)
228+
{
229+
add_hash_elem(k.c_str(), v.c_str());
230+
}
231+
232+
void add_hash_elem(char const *k, char const *v)
233+
{
234+
m_current->buffer += '"';
235+
add_escaped_string(k);
236+
m_current->buffer += "\"=>\"";
237+
add_escaped_string(v);
238+
m_current->buffer += "\",";
239+
}
240+
241+
void finish_hash()
242+
{
243+
auto idx = m_current->buffer.size() - 1;
244+
if (!m_current->buffer.empty() && m_current->buffer[idx] == ',') {
245+
m_current->buffer[idx] = '\t';
246+
} else {
247+
m_current->buffer += '\t';
248+
}
249+
}
250+
251+
void add_hex_geom(std::string const &wkb)
252+
{
253+
char const *lookup_hex = "0123456789ABCDEF";
254+
255+
for (char c : wkb) {
256+
m_current->buffer += lookup_hex[(c >> 4) & 0xf];
257+
m_current->buffer += lookup_hex[c & 0xf];
258+
}
259+
m_current->buffer += '\t';
260+
}
261+
240262
/**
241263
* Mark an OSM object for deletion in the current table.
242264
*
@@ -256,7 +278,6 @@ class db_copy_mgr_t
256278
template <typename T>
257279
void add_value(T value)
258280
{
259-
assert(m_current);
260281
m_current->buffer += std::to_string(value);
261282
}
262283

@@ -289,6 +310,32 @@ class db_copy_mgr_t
289310
}
290311
}
291312

313+
void add_escaped_string(char const *s)
314+
{
315+
for (char const *c = s; *c; ++c) {
316+
switch (*c) {
317+
case '"':
318+
m_current->buffer += "\\\\\"";
319+
break;
320+
case '\\':
321+
m_current->buffer += "\\\\\\\\";
322+
break;
323+
case '\n':
324+
m_current->buffer += "\\n";
325+
break;
326+
case '\r':
327+
m_current->buffer += "\\r";
328+
break;
329+
case '\t':
330+
m_current->buffer += "\\t";
331+
break;
332+
default:
333+
m_current->buffer += *c;
334+
break;
335+
}
336+
}
337+
}
338+
292339
std::shared_ptr<db_copy_thread_t> m_processor;
293340
std::unique_ptr<db_cmd_copy_t> m_current;
294341
unsigned m_last_line;

gazetteer-style.cpp

Lines changed: 54 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -13,65 +13,30 @@ enum : int
1313
MAX_ADMINLEVEL = 15
1414
};
1515

16-
namespace {
17-
void escape_array_record(char const *in, std::string &out)
16+
static std::vector<osmium::Tag const *>
17+
domain_names(char const *cls, osmium::TagList const &tags)
1818
{
19-
for (char const *c = in; *c; ++c) {
20-
switch (*c) {
21-
case '\\':
22-
// Tripple escaping required: string escaping leaves us
23-
// with 4 backslashes, COPY then reduces it to two, which
24-
// are then interpreted as a single backslash by the hash
25-
// parsing code.
26-
out += "\\\\\\\\";
27-
break;
28-
case '\n':
29-
case '\r':
30-
case '\t':
31-
case '"':
32-
/* This is a bit naughty - we know that nominatim ignored these characters so just drop them now for simplicity */
33-
out += ' ';
34-
break;
35-
default:
36-
out += *c;
37-
break;
38-
}
39-
}
40-
}
41-
42-
std::string domain_name(char const *cls, osmium::TagList const &tags)
43-
{
44-
std::string ret;
45-
bool hasname = false;
19+
std::vector<osmium::Tag const *> ret;
4620

47-
std::string prefix(cls);
48-
auto clen = prefix.length() + 1;
49-
prefix += ":name";
21+
std::string const prefix = cls + std::string(":name");
5022
auto plen = prefix.length();
5123

5224
for (auto const &item : tags) {
5325
char const *k = item.key();
5426
if (prefix.compare(0, plen, k) == 0 &&
5527
(k[plen] == '\0' || k[plen] == ':')) {
56-
if (!hasname) {
57-
hasname = true;
58-
} else {
59-
ret += ",";
60-
}
61-
ret += "\"";
62-
escape_array_record(k + clen, ret);
63-
ret += "\"=>\"";
64-
escape_array_record(item.value(), ret);
65-
ret += "\"";
28+
ret.push_back(&item);
6629
}
6730
}
6831

6932
return ret;
7033
}
71-
}
7234

7335
namespace pt = boost::property_tree;
7436

37+
static auto place_table =
38+
std::make_shared<db_target_descr_t>("place", "place_id");
39+
7540
void gazetteer_style_t::clear()
7641
{
7742
m_main.clear();
@@ -387,7 +352,7 @@ void gazetteer_style_t::process_tags(osmium::OSMObject const &o)
387352
}
388353

389354
void gazetteer_style_t::copy_out(osmium::OSMObject const &o,
390-
std::string const &geom, std::string &buffer)
355+
std::string const &geom, db_copy_mgr_t &buffer)
391356
{
392357
bool any = false;
393358
for (auto const &main : m_main) {
@@ -409,104 +374,101 @@ void gazetteer_style_t::copy_out(osmium::OSMObject const &o,
409374
bool gazetteer_style_t::copy_out_maintag(pmaintag_t const &tag,
410375
osmium::OSMObject const &o,
411376
std::string const &geom,
412-
std::string &buffer)
377+
db_copy_mgr_t &buffer)
413378
{
414-
std::string name;
379+
std::vector<osmium::Tag const *> domain_name;
415380
if (std::get<2>(tag) & SF_MAIN_NAMED_KEY) {
416-
name = domain_name(std::get<0>(tag), o.tags());
417-
if (name.empty())
381+
domain_name = domain_names(std::get<0>(tag), o.tags());
382+
if (domain_name.empty())
418383
return false;
419384
}
420385

421386
if (std::get<2>(tag) & SF_MAIN_NAMED) {
422-
if (name.empty() && !m_is_named) {
387+
if (domain_name.empty() && !m_is_named) {
423388
return false;
424389
}
425390
}
426391

427-
// osm_type
428-
buffer += (char)toupper(osmium::item_type_to_char(o.type()));
429-
buffer += '\t';
392+
buffer.new_line(place_table);
430393
// osm_id
431-
buffer += (m_single_fmt % o.id()).str();
394+
buffer.add_column(o.id());
395+
// osm_type
396+
char const osm_type[2] = { (char)toupper(osmium::item_type_to_char(o.type())), '\0'};
397+
buffer.add_column(osm_type);
432398
// class
433-
escape(std::get<0>(tag), buffer);
434-
buffer += '\t';
399+
buffer.add_column(std::get<0>(tag));
435400
// type
436-
escape(std::get<1>(tag), buffer);
437-
buffer += '\t';
401+
buffer.add_column(std::get<1>(tag));
438402
// names
439-
if (!name.empty()) {
440-
buffer += name;
441-
buffer += '\t';
403+
if (!domain_name.empty()) {
404+
auto prefix_len = strlen(std::get<0>(tag)) + 1; // class name and ':'
405+
buffer.new_hash();
406+
for (auto *t : domain_name) {
407+
buffer.add_hash_elem(t->key() + prefix_len, t->value());
408+
}
409+
buffer.finish_hash();
442410
} else {
443411
bool first = true;
444412
// operator will be ignored on anything but these classes
445413
if (m_operator && (std::get<2>(tag) & SF_MAIN_OPERATOR)) {
446-
buffer += "\"operator\"=>\"";
447-
escape_array_record(m_operator, buffer);
448-
buffer += "\"";
414+
buffer.new_hash();
415+
buffer.add_hash_elem("operator", m_operator);
449416
first = false;
450417
}
451418
for (auto const &entry : m_names) {
452419
if (first) {
420+
buffer.new_hash();
453421
first = false;
454-
} else {
455-
buffer += ',';
456422
}
457423

458-
buffer += "\"";
459-
escape_array_record(entry.first, buffer);
460-
buffer += "\"=>\"";
461-
escape_array_record(entry.second, buffer);
462-
buffer += "\"";
424+
buffer.add_hash_elem(entry.first, entry.second);
463425
}
464426

465-
buffer += first ? "\\N\t" : "\t";
427+
if (first) {
428+
buffer.add_null_column();
429+
} else {
430+
buffer.finish_hash();
431+
}
466432
}
467433
// admin_level
468-
buffer += (m_single_fmt % m_admin_level).str();
434+
buffer.add_column(m_admin_level);
469435
// address
470436
if (m_address.empty()) {
471-
buffer += "\\N\t";
437+
buffer.add_null_column();
472438
} else {
439+
buffer.new_hash();
473440
for (auto const &a : m_address) {
474-
buffer += "\"";
475-
escape_array_record(a.first, buffer);
476-
buffer += "\"=>\"";
477441
if (strcmp(a.first, "tiger:county") == 0) {
442+
std::string term;
478443
auto *end = strchr(a.second, ',');
479444
if (end) {
480445
auto len = (std::string::size_type)(end - a.second);
481-
escape_array_record(std::string(a.second, len).c_str(),
482-
buffer);
446+
term = std::string(a.second, len);
483447
} else {
484-
escape_array_record(a.second, buffer);
448+
term = a.second;
485449
}
486-
buffer += " county";
450+
term += " county";
451+
buffer.add_hash_elem(a.first, term);
487452
} else {
488-
escape_array_record(a.second, buffer);
453+
buffer.add_hash_elem(a.first, a.second);
489454
}
490-
buffer += "\",";
491455
}
492-
buffer[buffer.length() - 1] = '\t';
456+
buffer.finish_hash();
493457
}
494458
// extra tags
495459
if (m_extra.empty()) {
496-
buffer += "\\N\t";
460+
buffer.add_null_column();
497461
} else {
462+
buffer.new_hash();
498463
for (auto const &entry : m_extra) {
499-
buffer += "\"";
500-
escape_array_record(entry.first, buffer);
501-
buffer += "\"=>\"";
502-
escape_array_record(entry.second, buffer);
503-
buffer += "\",";
464+
buffer.add_hash_elem(entry.first, entry.second);
504465
}
505-
buffer[buffer.length() - 1] = '\t';
466+
buffer.finish_hash();
506467
}
507468
// add the geometry - encoding it to hex along the way
508-
ewkb::writer_t::write_as_hex(buffer, geom);
509-
buffer += '\n';
469+
buffer.add_hex_geom(geom);
470+
471+
buffer.finish_line();
510472

511473
return true;
512474
}

gazetteer-style.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#include <boost/format.hpp>
1010

11+
#include "db-copy.hpp"
12+
1113
class gazetteer_style_t
1214
{
1315
using flag_t = uint16_t;
@@ -59,7 +61,7 @@ class gazetteer_style_t
5961
void load_style(std::string const &filename);
6062
void process_tags(osmium::OSMObject const &o);
6163
void copy_out(osmium::OSMObject const &o, std::string const &geom,
62-
std::string &buffer);
64+
db_copy_mgr_t &buffer);
6365
bool has_place(std::string const &cls) const;
6466

6567
bool has_data() const { return !m_main.empty(); }
@@ -70,7 +72,7 @@ class gazetteer_style_t
7072
flag_t parse_flags(std::string const &str);
7173
flag_t find_flag(char const *k, char const *v) const;
7274
bool copy_out_maintag(pmaintag_t const &tag, osmium::OSMObject const &o,
73-
std::string const &geom, std::string &buffer);
75+
std::string const &geom, db_copy_mgr_t &buffer);
7476
void clear();
7577

7678
// Style data.
@@ -94,8 +96,6 @@ class gazetteer_style_t
9496
int m_admin_level;
9597
/// True if there is an actual name to the object (not a ref).
9698
bool m_is_named;
97-
98-
boost::format m_single_fmt{"%1%\t"};
9999
};
100100

101101
#endif

0 commit comments

Comments
 (0)