Skip to content

Commit e2e398e

Browse files
oxidaseTheMarex
authored andcommitted
Remove STXXL from OSM parsing and enable in CMake by default
1 parent b873661 commit e2e398e

File tree

3 files changed

+26
-109
lines changed

3 files changed

+26
-109
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ option(BUILD_PACKAGE "Build OSRM package" OFF)
2121
option(ENABLE_ASSERTIONS "Use assertions in release mode" OFF)
2222
option(ENABLE_COVERAGE "Build with coverage instrumentalisation" OFF)
2323
option(ENABLE_SANITIZER "Use memory sanitizer for Debug build" OFF)
24-
option(ENABLE_STXXL "Use STXXL library" OFF)
24+
option(ENABLE_STXXL "Use STXXL library" ON)
2525
option(ENABLE_LTO "Use LTO if available" OFF)
2626
option(ENABLE_FUZZING "Fuzz testing using LLVM's libFuzzer" OFF)
2727
option(ENABLE_GOLD_LINKER "Use GNU gold linker if available" ON)

include/extractor/extraction_containers.hpp

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,33 +10,19 @@
1010

1111
#include "storage/io.hpp"
1212

13-
#include <cstdint>
14-
#include <unordered_map>
15-
16-
#if USE_STXXL_LIBRARY
17-
#include <stxxl/vector>
18-
#endif
19-
2013
namespace osrm
2114
{
2215
namespace extractor
2316
{
2417

2518
/**
26-
* Uses external memory containers from stxxl to store all the data that
19+
* Uses memory containers to store all the data that
2720
* is collected by the extractor callbacks.
2821
*
2922
* The data is the filtered, aggregated and finally written to disk.
3023
*/
3124
class ExtractionContainers
3225
{
33-
#if USE_STXXL_LIBRARY
34-
template <typename T> using ExternalVector = stxxl::vector<T>;
35-
#else
36-
template <typename T> using ExternalVector = std::vector<T>;
37-
#endif
38-
39-
void FlushVectors();
4026
void PrepareNodes();
4127
void PrepareRestrictions();
4228
void PrepareEdges(ScriptingEnvironment &scripting_environment);
@@ -47,13 +33,13 @@ class ExtractionContainers
4733
void WriteCharData(const std::string &file_name);
4834

4935
public:
50-
using NodeIDVector = ExternalVector<OSMNodeID>;
51-
using NodeVector = ExternalVector<QueryNode>;
52-
using EdgeVector = ExternalVector<InternalExtractorEdge>;
36+
using NodeIDVector = std::vector<OSMNodeID>;
37+
using NodeVector = std::vector<QueryNode>;
38+
using EdgeVector = std::vector<InternalExtractorEdge>;
5339
using RestrictionsVector = std::vector<InputRestrictionContainer>;
54-
using WayIDStartEndVector = ExternalVector<FirstAndLastSegmentOfWay>;
55-
using NameCharData = ExternalVector<unsigned char>;
56-
using NameOffsets = ExternalVector<unsigned>;
40+
using WayIDStartEndVector = std::vector<FirstAndLastSegmentOfWay>;
41+
using NameCharData = std::vector<unsigned char>;
42+
using NameOffsets = std::vector<unsigned>;
5743

5844
std::vector<OSMNodeID> barrier_nodes;
5945
std::vector<OSMNodeID> traffic_lights;

src/extractor/extraction_containers.cpp

Lines changed: 18 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -28,37 +28,17 @@
2828
#include <mutex>
2929
#include <sstream>
3030

31-
#if USE_STXXL_LIBRARY
32-
#include <stxxl/sort>
33-
#endif
34-
3531
namespace
3632
{
3733
namespace oe = osrm::extractor;
3834

39-
// Needed for STXXL comparison - STXXL requires max_value(), min_value(), so we can not use
40-
// std::less<OSMNodeId>{}. Anonymous namespace to keep translation unit local.
41-
struct OSMNodeIDLess
42-
{
43-
OSMNodeIDLess() {}
44-
45-
using value_type = OSMNodeID;
46-
bool operator()(const value_type left, const value_type right) const { return left < right; }
47-
48-
value_type max_value() { return MAX_OSM_NODEID; }
49-
value_type min_value() { return MIN_OSM_NODEID; }
50-
};
51-
5235
struct CmpEdgeByOSMStartID
5336
{
5437
using value_type = oe::InternalExtractorEdge;
5538
bool operator()(const value_type &lhs, const value_type &rhs) const
5639
{
5740
return lhs.result.osm_source_id < rhs.result.osm_source_id;
5841
}
59-
60-
value_type max_value() { return value_type::max_osm_value(); }
61-
value_type min_value() { return value_type::min_osm_value(); }
6242
};
6343

6444
struct CmpEdgeByOSMTargetID
@@ -68,9 +48,6 @@ struct CmpEdgeByOSMTargetID
6848
{
6949
return lhs.result.osm_target_id < rhs.result.osm_target_id;
7050
}
71-
72-
value_type max_value() { return value_type::max_osm_value(); }
73-
value_type min_value() { return value_type::min_osm_value(); }
7451
};
7552

7653
struct CmpEdgeByInternalSourceTargetAndName
@@ -99,7 +76,6 @@ struct CmpEdgeByInternalSourceTargetAndName
9976
if (rhs.result.name_id == EMPTY_NAMEID)
10077
return true;
10178

102-
std::lock_guard<std::mutex> lock(mutex);
10379
BOOST_ASSERT(!name_offsets.empty() && name_offsets.back() == name_data.size());
10480
const oe::ExtractionContainers::NameCharData::const_iterator data = name_data.begin();
10581
return std::lexicographical_compare(data + name_offsets[lhs.result.name_id],
@@ -108,37 +84,16 @@ struct CmpEdgeByInternalSourceTargetAndName
10884
data + name_offsets[rhs.result.name_id + 1]);
10985
}
11086

111-
value_type max_value() { return value_type::max_internal_value(); }
112-
value_type min_value() { return value_type::min_internal_value(); }
113-
114-
std::mutex &mutex;
11587
const oe::ExtractionContainers::NameCharData &name_data;
11688
const oe::ExtractionContainers::NameOffsets &name_offsets;
11789
};
11890

11991
template <typename Iter>
12092
inline NodeID mapExternalToInternalNodeID(Iter first, Iter last, const OSMNodeID value)
12193
{
122-
const OSMNodeIDLess compare;
123-
const auto it = std::lower_bound(first, last, value, compare);
124-
return (it == last || compare(value, *it)) ? SPECIAL_NODEID
125-
: static_cast<NodeID>(std::distance(first, it));
126-
}
127-
128-
template <typename T, typename Func> void sort_external_vector(T &vector, const Func &func)
129-
{
130-
#if USE_STXXL_LIBRARY
131-
#ifndef _MSC_VER
132-
constexpr static unsigned stxxl_memory =
133-
((sizeof(std::size_t) == 4) ? std::numeric_limits<int>::max()
134-
: std::numeric_limits<unsigned>::max());
135-
#else
136-
const static unsigned stxxl_memory = ((sizeof(std::size_t) == 4) ? INT_MAX : UINT_MAX);
137-
#endif
138-
stxxl::sort(vector.begin(), vector.end(), func, stxxl_memory);
139-
#else
140-
tbb::parallel_sort(vector.begin(), vector.end(), func);
141-
#endif
94+
const auto it = std::lower_bound(first, last, value);
95+
return (it == last || value < *it) ? SPECIAL_NODEID
96+
: static_cast<NodeID>(std::distance(first, it));
14297
}
14398
}
14499

@@ -149,11 +104,6 @@ namespace extractor
149104

150105
ExtractionContainers::ExtractionContainers()
151106
{
152-
#if USE_STXXL_LIBRARY
153-
// Check if stxxl can be instantiated
154-
stxxl::vector<unsigned> dummy_vector;
155-
#endif
156-
157107
// Insert four empty strings offsets for name, ref, destination, pronunciation, and exits
158108
name_offsets.push_back(0);
159109
name_offsets.push_back(0);
@@ -164,18 +114,6 @@ ExtractionContainers::ExtractionContainers()
164114
name_offsets.push_back(0);
165115
}
166116

167-
void ExtractionContainers::FlushVectors()
168-
{
169-
#if USE_STXXL_LIBRARY
170-
used_node_id_list.flush();
171-
all_nodes_list.flush();
172-
all_edges_list.flush();
173-
name_char_data.flush();
174-
name_offsets.flush();
175-
way_start_end_id_list.flush();
176-
#endif
177-
}
178-
179117
/**
180118
* Processes the collected data and serializes it.
181119
* At this point nodes are still referenced by their OSM id.
@@ -194,11 +132,11 @@ void ExtractionContainers::PrepareData(ScriptingEnvironment &scripting_environme
194132
storage::io::FileWriter file_out(output_file_name,
195133
storage::io::FileWriter::GenerateFingerprint);
196134

197-
FlushVectors();
198-
199135
PrepareNodes();
200136
WriteNodes(file_out);
201137
PrepareEdges(scripting_environment);
138+
all_nodes_list.clear(); // free all_nodes_list before allocation of normal_edges
139+
all_nodes_list.shrink_to_fit();
202140
WriteEdges(file_out);
203141

204142
PrepareRestrictions();
@@ -226,7 +164,7 @@ void ExtractionContainers::PrepareNodes()
226164
util::UnbufferedLog log;
227165
log << "Sorting used nodes ... " << std::flush;
228166
TIMER_START(sorting_used_nodes);
229-
sort_external_vector(used_node_id_list, OSMNodeIDLess());
167+
tbb::parallel_sort(used_node_id_list.begin(), used_node_id_list.end());
230168
TIMER_STOP(sorting_used_nodes);
231169
log << "ok, after " << TIMER_SEC(sorting_used_nodes) << "s";
232170
}
@@ -242,22 +180,13 @@ void ExtractionContainers::PrepareNodes()
242180
}
243181

244182
{
245-
struct QueryNodeCompare
246-
{
247-
using value_type = QueryNode;
248-
value_type max_value() { return value_type::max_value(); }
249-
value_type min_value() { return value_type::min_value(); }
250-
251-
bool operator()(const value_type &left, const value_type &right) const
252-
{
253-
return left.node_id < right.node_id;
254-
}
255-
};
256-
257183
util::UnbufferedLog log;
258184
log << "Sorting all nodes ... " << std::flush;
259185
TIMER_START(sorting_nodes);
260-
sort_external_vector(all_nodes_list, QueryNodeCompare());
186+
tbb::parallel_sort(
187+
all_nodes_list.begin(), all_nodes_list.end(), [](const auto &left, const auto &right) {
188+
return left.node_id < right.node_id;
189+
});
261190
TIMER_STOP(sorting_nodes);
262191
log << "ok, after " << TIMER_SEC(sorting_nodes) << "s";
263192
}
@@ -313,7 +242,7 @@ void ExtractionContainers::PrepareEdges(ScriptingEnvironment &scripting_environm
313242
util::UnbufferedLog log;
314243
log << "Sorting edges by start ... " << std::flush;
315244
TIMER_START(sort_edges_by_start);
316-
sort_external_vector(all_edges_list, CmpEdgeByOSMStartID());
245+
tbb::parallel_sort(all_edges_list.begin(), all_edges_list.end(), CmpEdgeByOSMStartID());
317246
TIMER_STOP(sort_edges_by_start);
318247
log << "ok, after " << TIMER_SEC(sort_edges_by_start) << "s";
319248
}
@@ -384,7 +313,7 @@ void ExtractionContainers::PrepareEdges(ScriptingEnvironment &scripting_environm
384313
util::UnbufferedLog log;
385314
log << "Sorting edges by target ... " << std::flush;
386315
TIMER_START(sort_edges_by_target);
387-
sort_external_vector(all_edges_list, CmpEdgeByOSMTargetID());
316+
tbb::parallel_sort(all_edges_list.begin(), all_edges_list.end(), CmpEdgeByOSMTargetID());
388317
TIMER_STOP(sort_edges_by_target);
389318
log << "ok, after " << TIMER_SEC(sort_edges_by_target) << "s";
390319
}
@@ -487,9 +416,9 @@ void ExtractionContainers::PrepareEdges(ScriptingEnvironment &scripting_environm
487416
log << "Sorting edges by renumbered start ... ";
488417
TIMER_START(sort_edges_by_renumbered_start);
489418
std::mutex name_data_mutex;
490-
sort_external_vector(
491-
all_edges_list,
492-
CmpEdgeByInternalSourceTargetAndName{name_data_mutex, name_char_data, name_offsets});
419+
tbb::parallel_sort(all_edges_list.begin(),
420+
all_edges_list.end(),
421+
CmpEdgeByInternalSourceTargetAndName{name_char_data, name_offsets});
493422
TIMER_STOP(sort_edges_by_renumbered_start);
494423
log << "ok, after " << TIMER_SEC(sort_edges_by_renumbered_start) << "s";
495424
}
@@ -749,7 +678,9 @@ void ExtractionContainers::PrepareRestrictions()
749678
util::UnbufferedLog log;
750679
log << "Sorting used ways ... ";
751680
TIMER_START(sort_ways);
752-
sort_external_vector(way_start_end_id_list, FirstAndLastSegmentOfWayCompare());
681+
tbb::parallel_sort(way_start_end_id_list.begin(),
682+
way_start_end_id_list.end(),
683+
FirstAndLastSegmentOfWayCompare());
753684
TIMER_STOP(sort_ways);
754685
log << "ok, after " << TIMER_SEC(sort_ways) << "s";
755686
}

0 commit comments

Comments
 (0)