Skip to content

Commit 001137d

Browse files
authored
Better progress bar and decreased disk IO during dump phase (ad-freiburg#115)
* Output a "state" postfix during Phase 2 (dump) at the end of the progress bar, either N (node parsing), W (way parsing), or R (relation parsing). * Prefix our progress bar with a timestamp * Fix the percentage output - the max value was computed incorrectly and was, for some osm2rdf configurations, off by a factor of 2 * Update libspatialjoin, which now sets a 4MB write buffer in the GeometryCache. The default seems to be 8 KB (cat /usr/include/stdio.h | grep -i bufsiz). Reasoning: during a multithreaded phase 2, many threads are doing concurrent random reads on the disk. Previously, the GeometryCache (which is also filled by these threads) was also writing 8 KB blocks concurrently.
1 parent 8c5a09c commit 001137d

File tree

11 files changed

+180
-146
lines changed

11 files changed

+180
-146
lines changed

include/osm2rdf/config/Constants.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ const static inline std::string SECTION_IO =
3838
SECTION_MARKER + " I/O " + SECTION_MARKER;
3939
const static inline std::string SECTION_FACTS =
4040
SECTION_MARKER + " Facts " + SECTION_MARKER;
41-
const static inline std::string SECTION_CONTAINS =
42-
SECTION_MARKER + " Contains " + SECTION_MARKER;
41+
const static inline std::string SECTION_SPATIAL_RELATION_TRIPLES =
42+
SECTION_MARKER + " Spatial relations " + SECTION_MARKER;
4343
const static inline std::string SECTION_MISCELLANEOUS =
4444
SECTION_MARKER + " Miscellaneous " + SECTION_MARKER;
4545
const static inline std::string SECTION_OPENMP =

include/osm2rdf/util/ProgressBar.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ static const int k100Percent = 100;
2323
static const int kTerminalWidth = 80;
2424
#include <cstdio>
2525
#include <ctime>
26+
#include <string>
2627

2728
namespace osm2rdf::util {
2829

@@ -34,6 +35,10 @@ class ProgressBar {
3435
ProgressBar() = default;
3536
// Updates the progress bar.
3637
void update(std::size_t count);
38+
39+
// Updates the progress bar, sets a phase.
40+
void update(std::size_t count, char phase);
41+
3742
// Marks progress bar as done (calling update with _maxValue).
3843
void done();
3944

@@ -55,6 +60,8 @@ class ProgressBar {
5560
std::time_t _last;
5661
// Print to std::cerr or not.
5762
bool _show = false;
63+
64+
char _phase = 0;
5865
};
5966

6067
} // namespace osm2rdf::util

src/config/Config.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
// You should have received a copy of the GNU General Public License
1818
// along with osm2rdf. If not, see <https://www.gnu.org/licenses/>.
1919

20+
#include "osm2rdf/config/Config.h"
21+
2022
#include <filesystem>
2123
#include <iostream>
2224
#include <string>
2325

24-
#include "osm2rdf/config/Config.h"
25-
2626
#if defined(_OPENMP)
2727
#include "omp.h"
2828
#endif
@@ -134,7 +134,8 @@ std::string osm2rdf::config::Config::getInfo(std::string_view prefix) const {
134134
}
135135
}
136136
}
137-
oss << "\n" << prefix << osm2rdf::config::constants::SECTION_CONTAINS;
137+
oss << "\n"
138+
<< prefix << osm2rdf::config::constants::SECTION_SPATIAL_RELATION_TRIPLES;
138139
std::string modeStrings[2] = {"none", "full"};
139140

140141
oss << "\n"
@@ -242,11 +243,10 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
242243
osm2rdf::config::constants::NO_WAY_FACTS_OPTION_LONG,
243244
osm2rdf::config::constants::NO_WAY_FACTS_OPTION_HELP);
244245

245-
auto addZeroFactNumberOp =
246-
parser.add<popl::Switch, popl::Attribute::expert>(
247-
osm2rdf::config::constants::ADD_ZERO_FACT_NUMBER_OPTION_SHORT,
248-
osm2rdf::config::constants::ADD_ZERO_FACT_NUMBER_OPTION_LONG,
249-
osm2rdf::config::constants::ADD_ZERO_FACT_NUMBER_OPTION_HELP);
246+
auto addZeroFactNumberOp = parser.add<popl::Switch, popl::Attribute::expert>(
247+
osm2rdf::config::constants::ADD_ZERO_FACT_NUMBER_OPTION_SHORT,
248+
osm2rdf::config::constants::ADD_ZERO_FACT_NUMBER_OPTION_LONG,
249+
osm2rdf::config::constants::ADD_ZERO_FACT_NUMBER_OPTION_HELP);
250250

251251
auto sourceDatasetOp =
252252
parser.add<popl::Value<std::string>, popl::Attribute::advanced>(
@@ -306,11 +306,11 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
306306
osm2rdf::config::constants::ADD_AREA_WAY_LINESTRINGS_OPTION_LONG,
307307
osm2rdf::config::constants::ADD_AREA_WAY_LINESTRINGS_OPTION_HELP);
308308

309-
auto untaggedNodesSpatialRelsOp = parser.add<popl::Switch,
310-
popl::Attribute::expert>(
311-
osm2rdf::config::constants::UNTAGGED_NODES_SPATIAL_RELS_OPTION_SHORT,
312-
osm2rdf::config::constants::UNTAGGED_NODES_SPATIAL_RELS_OPTION_LONG,
313-
osm2rdf::config::constants::UNTAGGED_NODES_SPATIAL_RELS_OPTION_HELP);
309+
auto untaggedNodesSpatialRelsOp =
310+
parser.add<popl::Switch, popl::Attribute::expert>(
311+
osm2rdf::config::constants::UNTAGGED_NODES_SPATIAL_RELS_OPTION_SHORT,
312+
osm2rdf::config::constants::UNTAGGED_NODES_SPATIAL_RELS_OPTION_LONG,
313+
osm2rdf::config::constants::UNTAGGED_NODES_SPATIAL_RELS_OPTION_HELP);
314314

315315
auto noUntaggedNodesOp = parser.add<popl::Switch, popl::Attribute::expert>(
316316
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_SHORT,

src/osm/GeometryHandler.cpp

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,12 @@ GeometryHandler<W>::GeometryHandler(const osm2rdf::config::Config& config,
8080
true,
8181
false,
8282
false,
83+
-1,
8384
[this](size_t t, const std::string& a, const std::string& b,
8485
const std::string& pred) { this->writeRelCb(t, a, b, pred); },
8586
{},
8687
{},
87-
[this](size_t progr) { this->progressCb(progr); }},
88+
[this](size_t progr) { this->progressCb(progr); }, {}},
8889
config.cache, ""),
8990
_parseBatches(config.numThreads) {}
9091

@@ -276,24 +277,15 @@ void GeometryHandler<W>::calculateRelations() {
276277
throw std::runtime_error("Could not read auxiliary geo file " + auxFile);
277278
}
278279

279-
::util::JobQueue<sj::ParseBatch> jobs(1000); // the WKT parse jobs
280-
std::vector<std::thread> thrds(_config.numThreads); // the parse workers
281-
for (size_t i = 0; i < thrds.size(); i++)
282-
thrds[i] = std::thread(&sj::processQueue, &jobs, i, &_sweeper);
280+
sj::WKTParser parser(&_sweeper, _config.numThreads);
283281

284282
ssize_t len;
285-
std::string dangling;
286-
size_t gid = 0;
287283

288284
while ((len = ::util::readAll(file, buf, CACHE_SIZE)) > 0) {
289-
sj::parse(reinterpret_cast<char*>(buf), len, dangling, &gid, jobs, 0);
285+
parser.parse(reinterpret_cast<char*>(buf), len, 0);
290286
}
291287

292-
// end event
293-
jobs.add({});
294-
295-
// wait for all parse workers to finish
296-
for (auto& thr : thrds) thr.join();
288+
parser.done();
297289

298290
delete[] buf;
299291
}

src/osm/OsmiumHandler.cpp

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -205,25 +205,26 @@ void osm2rdf::osm::OsmiumHandler<W>::node(const osmium::Node& node) {
205205
#pragma omp critical(progress)
206206
{
207207
_nodesDumped++;
208-
_progressBar.update(_numTasksDone++);
208+
_progressBar.update(_numTasksDone++, 'N');
209209
}
210210
}
211-
if (!_config.noGeometricRelations && !_config.noNodeGeometricRelations &&
212-
(!osmNode.tags().empty() || _config.addSpatialRelsForUntaggedNodes)) {
213-
_geometryHandler->node(osmNode);
211+
if (!_config.noGeometricRelations && !_config.noNodeGeometricRelations) {
212+
if (!osmNode.tags().empty() || _config.addSpatialRelsForUntaggedNodes) {
213+
_geometryHandler->node(osmNode);
214+
}
214215
#pragma omp critical(progress)
215216
{
216217
_nodeGeometriesHandled++;
217-
_progressBar.update(_numTasksDone++);
218+
_progressBar.update(_numTasksDone++, 'N');
218219
}
219220
}
220221
};
221222
} catch (const osmium::invalid_location& e) {
222223
if (!_config.noFacts && !_config.noNodeFacts) {
223-
_progressBar.update(_numTasksDone++);
224+
_progressBar.update(_numTasksDone++, 'N');
224225
}
225226
if (!_config.noGeometricRelations && !_config.noNodeGeometricRelations) {
226-
_progressBar.update(_numTasksDone++);
227+
_progressBar.update(_numTasksDone++, 'N');
227228
}
228229
return;
229230
}
@@ -254,7 +255,7 @@ void osm2rdf::osm::OsmiumHandler<W>::relation(
254255
#pragma omp critical(progress)
255256
{
256257
_relationsDumped++;
257-
_progressBar.update(_numTasksDone++);
258+
_progressBar.update(_numTasksDone++, 'R');
258259
}
259260
}
260261

@@ -266,18 +267,18 @@ void osm2rdf::osm::OsmiumHandler<W>::relation(
266267
if (osmRelation.isArea() || osmRelation.hasGeometry()) {
267268
_relationGeometriesHandled++;
268269
}
269-
_progressBar.update(_numTasksDone++);
270+
_progressBar.update(_numTasksDone++, 'R');
270271
}
271272
}
272273
}
273274
} catch (const osmium::invalid_location& e) {
274275
if (!_config.noFacts && !_config.noRelationFacts) {
275-
_progressBar.update(_numTasksDone++);
276+
_progressBar.update(_numTasksDone++, 'R');
276277
}
277278

278279
if (!_config.noGeometricRelations &&
279280
!_config.noRelationGeometricRelations) {
280-
_progressBar.update(_numTasksDone++);
281+
_progressBar.update(_numTasksDone++, 'R');
281282
}
282283
return;
283284
}
@@ -305,7 +306,7 @@ void osm2rdf::osm::OsmiumHandler<W>::way(const osmium::Way& way) {
305306
#pragma omp critical(progress)
306307
{
307308
_waysDumped++;
308-
_progressBar.update(_numTasksDone++);
309+
_progressBar.update(_numTasksDone++, 'W');
309310
}
310311
}
311312

@@ -314,16 +315,16 @@ void osm2rdf::osm::OsmiumHandler<W>::way(const osmium::Way& way) {
314315
#pragma omp critical(progress)
315316
{
316317
_wayGeometriesHandled++;
317-
_progressBar.update(_numTasksDone++);
318+
_progressBar.update(_numTasksDone++, 'W');
318319
}
319320
}
320321
}
321322
} catch (const osmium::invalid_location& e) {
322323
if (!_config.noFacts && !_config.noWayFacts) {
323-
_progressBar.update(_numTasksDone++);
324+
_progressBar.update(_numTasksDone++, 'W');
324325
}
325326
if (!_config.noGeometricRelations && !_config.noWayGeometricRelations) {
326-
_progressBar.update(_numTasksDone++);
327+
_progressBar.update(_numTasksDone++, 'W');
327328
}
328329
return;
329330
}

src/util/ProgressBar.cpp

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@
1717
// along with osm2rdf. If not, see <https://www.gnu.org/licenses/>.
1818

1919
#include "osm2rdf/util/ProgressBar.h"
20+
#include "osm2rdf/util/Time.h"
2021

2122
#include <cassert>
2223
#include <chrono>
2324
#include <cmath>
2425
#include <cstdio>
2526
#include <iomanip>
2627
#include <iostream>
28+
#include <string>
2729

2830
// ____________________________________________________________________________
2931
osm2rdf::util::ProgressBar::ProgressBar(std::size_t maxValue, bool show)
@@ -36,7 +38,13 @@ osm2rdf::util::ProgressBar::ProgressBar(std::size_t maxValue, bool show)
3638
if (maxValue == 0) {
3739
_countWidth = 1;
3840
}
39-
_width = kTerminalWidth - _countWidth * 2 - 4 - 5 - 2;
41+
_width = kTerminalWidth - _countWidth * 2 - 4 - 5 - 2 - 4 - 20;
42+
}
43+
44+
// ____________________________________________________________________________
45+
void osm2rdf::util::ProgressBar::update(std::size_t count, char phase) {
46+
_phase = phase;
47+
update(count);
4048
}
4149

4250
// ____________________________________________________________________________
@@ -59,6 +67,10 @@ void osm2rdf::util::ProgressBar::update(std::size_t count) {
5967
// Store new values.
6068
_percent = percent;
6169
_oldValue = count;
70+
71+
// Add time
72+
std::cerr << osm2rdf::util::currentTimeFormatted();
73+
6274
// Open progress bar part with [ ...
6375
std::cerr << '[';
6476
// ... add = to indicate done parts ...
@@ -79,12 +91,19 @@ void osm2rdf::util::ProgressBar::update(std::size_t count) {
7991
// Add percentage display %
8092
std::cerr << ' ' << std::setw(3) << std::right << percent << "%";
8193

94+
// Update last update time
95+
_last = std::time(nullptr);
96+
8297
// Add absolute progress [x/y]
8398
std::cerr << " [" << std::setw(_countWidth) << std::right << count << "/"
84-
<< _maxValue << "]\r";
99+
<< _maxValue << "]";
100+
101+
// Add phase
102+
if (_phase) std::cerr << " [" << _phase << "]";
103+
else std::cerr << " ";
104+
105+
std::cerr << "\r";
85106

86-
// Update last update time
87-
_last = std::time(nullptr);
88107
}
89108

90109
// ____________________________________________________________________________

tests/config/Config.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -339,9 +339,8 @@ TEST(CONFIG_Config, fromArgsStoreLocationsLongSparse) {
339339
assertDefaultConfig(config);
340340
osm2rdf::util::CacheFile cf("/tmp/dummyInput");
341341

342-
const auto arg = "--" +
343-
osm2rdf::config::constants::STORE_LOCATIONS_LONG +
344-
"=sparse";
342+
const auto arg =
343+
"--" + osm2rdf::config::constants::STORE_LOCATIONS_LONG + "=sparse";
345344
const int argc = 3;
346345
char* argv[argc] = {const_cast<char*>(""), const_cast<char*>(arg.c_str()),
347346
const_cast<char*>("/tmp/dummyInput")};
@@ -356,9 +355,8 @@ TEST(CONFIG_Config, fromArgsStoreLocationsLongDense) {
356355
assertDefaultConfig(config);
357356
osm2rdf::util::CacheFile cf("/tmp/dummyInput");
358357

359-
const auto arg = "--" +
360-
osm2rdf::config::constants::STORE_LOCATIONS_LONG +
361-
"=dense";
358+
const auto arg =
359+
"--" + osm2rdf::config::constants::STORE_LOCATIONS_LONG + "=dense";
362360
const int argc = 3;
363361
char* argv[argc] = {const_cast<char*>(""), const_cast<char*>(arg.c_str()),
364362
const_cast<char*>("/tmp/dummyInput")};
@@ -776,7 +774,8 @@ TEST(CONFIG_Config, getInfoHasSections) {
776774
ASSERT_THAT(res,
777775
::testing::HasSubstr(osm2rdf::config::constants::SECTION_FACTS));
778776
ASSERT_THAT(
779-
res, ::testing::HasSubstr(osm2rdf::config::constants::SECTION_CONTAINS));
777+
res, ::testing::HasSubstr(
778+
osm2rdf::config::constants::SECTION_SPATIAL_RELATION_TRIPLES));
780779
ASSERT_THAT(res, ::testing::HasSubstr(
781780
osm2rdf::config::constants::SECTION_MISCELLANEOUS));
782781
}

0 commit comments

Comments
 (0)