diff --git a/include/osm2rdf/config/Config.h b/include/osm2rdf/config/Config.h index b580e0ad..2131eb24 100644 --- a/include/osm2rdf/config/Config.h +++ b/include/osm2rdf/config/Config.h @@ -85,6 +85,8 @@ struct Config { std::string iriPrefixForUntaggedNodes = osm2rdf::ttl::constants::IRI_PREFIX__OSM_NODE_UNTAGGED; + bool noBlankNodes = false; + int numThreads = std::thread::hardware_concurrency(); // Default settings for data diff --git a/include/osm2rdf/config/Constants.h b/include/osm2rdf/config/Constants.h index 4927f3ad..59553bf1 100644 --- a/include/osm2rdf/config/Constants.h +++ b/include/osm2rdf/config/Constants.h @@ -382,6 +382,13 @@ const static inline std::string WKT_PRECISION_OPTION_LONG = "wkt-precision"; const static inline std::string WKT_PRECISION_OPTION_HELP = "Precision (number of decimal digits) for WKT coordinates"; +const static inline std::string BLANK_NODES_INFO = + "Blank nodes are masked"; +const static inline std::string BLANK_NODES_OPTION_SHORT = ""; +const static inline std::string BLANK_NODES_OPTION_LONG = "no-blank-nodes"; +const static inline std::string BLANK_NODES_OPTION_HELP = + "Avoid blank nodes by using a unique identifier for each member"; + const static inline std::string WRITE_RDF_STATISTICS_INFO = "Storing RDF statistics as .stats.json"; const static inline std::string WRITE_RDF_STATISTICS_OPTION_SHORT = ""; diff --git a/include/osm2rdf/ttl/Constants.h b/include/osm2rdf/ttl/Constants.h index b108ef06..496b281d 100644 --- a/include/osm2rdf/ttl/Constants.h +++ b/include/osm2rdf/ttl/Constants.h @@ -49,6 +49,7 @@ const static inline std::string NAMESPACE__OSM2RDF_TAG = "osm2rdfkey"; const static inline std::string NAMESPACE__RDF = "rdf"; const static inline std::string NAMESPACE__WIKIDATA_ENTITY = "wd"; const static inline std::string NAMESPACE__XML_SCHEMA = "xsd"; +const static inline std::string NAMESPACE__GENID = "genid"; // IRI Prefixes const static inline std::string IRI_PREFIX__OSM_NODE_TAGGED = "https://www.openstreetmap.org/node/"; diff --git a/include/osm2rdf/ttl/Writer.h b/include/osm2rdf/ttl/Writer.h index d152c970..6aef16ff 100644 --- a/include/osm2rdf/ttl/Writer.h +++ b/include/osm2rdf/ttl/Writer.h @@ -168,6 +168,26 @@ class Writer { // generateBlankNode creates a new unique identifier for a blank node. std::string generateBlankNode(); + // generateSkolem creates a unique identifier for an osm object member. + std::string generateSkolem(const std::string& id); + + // generateSkolemForRelationMember creates a unique identifier for a member of + // a relation by combining the id of the relation and member with a letter + // indicating the object type ('r', 'w', or 'n'). The relative position of the + // object in the relation is appended behind 'p'. Example: "r1234w5678p3" + std::string generateSkolemForRelationMember(const uint64_t& relationId, + const uint64_t& memberId, + const std::string& memberType, + const size_t& relPos); + + // generateSkolemForWayMember creates a unique identifier for a member of + // a way by combining the id of the way and member with a letter + // indicating the object type ('r', 'w', or 'n'). The relative position of the node in the way is + // appended behind 'p'. Example: "w1234n5678p0" + std::string generateSkolemForWayMember(const uint64_t& wayId, + const uint64_t& nodeId, + const size_t& relPos); + // Creates a IRI from given prefix p and string value v. // Assumes that both p and v are "safe", that is, they can be used // directly in the TTL diff --git a/src/config/Config.cpp b/src/config/Config.cpp index 75ed8244..42f80aec 100644 --- a/src/config/Config.cpp +++ b/src/config/Config.cpp @@ -175,6 +175,12 @@ std::string osm2rdf::config::Config::getInfo(std::string_view prefix) const { oss << "\n" << prefix << osm2rdf::config::constants::SECTION_MISCELLANEOUS; oss << "\n" << prefix << "Num Threads: " << numThreads; + if (noBlankNodes) { + oss << "\n" + << prefix + << osm2rdf::config::constants::BLANK_NODES_INFO; + } + if (!storeLocations.empty()) { oss << "\n" << prefix << osm2rdf::config::constants::STORE_LOCATIONS_INFO << " " @@ -408,6 +414,12 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) { osm2rdf::config::constants::WKT_PRECISION_OPTION_LONG, osm2rdf::config::constants::WKT_PRECISION_OPTION_HELP, wktPrecision); + auto noBlankNodesOp = + parser.add( + osm2rdf::config::constants::BLANK_NODES_OPTION_SHORT, + osm2rdf::config::constants::BLANK_NODES_OPTION_LONG, + osm2rdf::config::constants::BLANK_NODES_OPTION_HELP); + auto writeRDFStatisticsOp = parser.add( osm2rdf::config::constants::WRITE_RDF_STATISTICS_OPTION_SHORT, @@ -535,6 +547,8 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) { addSpatialRelsForUntaggedNodes = untaggedNodesSpatialRelsOp->is_set(); + noBlankNodes = noBlankNodesOp->is_set(); + addUntaggedNodes = !noUntaggedNodesOp->is_set(); if (iriPrefixForUntaggedNodesOp->is_set() && iriPrefixForUntaggedNodesOp->value().size() > 0) { diff --git a/src/osm/FactHandler.cpp b/src/osm/FactHandler.cpp index 44b14252..9c1b2ad1 100644 --- a/src/osm/FactHandler.cpp +++ b/src/osm/FactHandler.cpp @@ -277,16 +277,22 @@ void osm2rdf::osm::FactHandler::relation( std::string role = member.role(); if (role.empty()) role = "member"; - const std::string& blankNode = _writer->generateBlankNode(); + + const std::string memberIRI = _config.noBlankNodes + ? _writer->generateSkolemForRelationMember( + relation.id(), member.positive_ref(), + type, inRelPos) + : _writer->generateBlankNode(); + _writer->writeTriple( subj, _writer->generateIRIUnsafe(NAMESPACE__OSM_RELATION, "member"), - blankNode); + memberIRI); - _writer->writeTriple(blankNode, IRI__OSMREL__MEMBER_ID, + _writer->writeTriple(memberIRI, IRI__OSMREL__MEMBER_ID, _writer->generateIRI(type, member.positive_ref())); - _writer->writeTriple(blankNode, IRI__OSMREL__MEMBER_ROLE, + _writer->writeTriple(memberIRI, IRI__OSMREL__MEMBER_ROLE, _writer->generateLiteral(role)); - _writer->writeLiteralTripleUnsafe(blankNode, IRI__OSMREL__MEMBER_POS, + _writer->writeLiteralTripleUnsafe(memberIRI, IRI__OSMREL__MEMBER_POS, std::to_string(inRelPos++), _iriXSDInteger); } @@ -348,11 +354,15 @@ void osm2rdf::osm::FactHandler::way(const osm2rdf::osm::Way& way) { if (_config.addMemberTriples && way.nodes().size()) { size_t wayOrder = 0; - std::string lastBlankNode; + std::string lastMemberIRI; auto lastNode = way.nodes().front(); for (const auto& node : way.nodes()) { - const std::string& blankNode = _writer->generateBlankNode(); - _writer->writeTriple(subj, IRI__OSMWAY__NODE, blankNode); + const std::string memberIRI = _config.noBlankNodes + ? _writer->generateSkolemForWayMember( + way.id(), node.positive_ref(), wayOrder) + : _writer->generateBlankNode(); + + _writer->writeTriple(subj, IRI__OSMWAY__NODE, memberIRI); std::string nodeNamespace; if (_config.iriPrefixForUntaggedNodes == @@ -365,17 +375,17 @@ void osm2rdf::osm::FactHandler::way(const osm2rdf::osm::Way& way) { } _writer->writeTriple( - blankNode, osm2rdf::ttl::constants::IRI__OSMWAY__MEMBER_ID, + memberIRI, osm2rdf::ttl::constants::IRI__OSMWAY__MEMBER_ID, _writer->generateIRI(nodeNamespace, node.positive_ref())); _writer->writeLiteralTripleUnsafe( - blankNode, osm2rdf::ttl::constants::IRI__OSMWAY__MEMBER_POS, + memberIRI, osm2rdf::ttl::constants::IRI__OSMWAY__MEMBER_POS, std::to_string(wayOrder++), _iriXSDInteger); - if (_config.addWayNodeSpatialMetadata && !lastBlankNode.empty() && + if (_config.addWayNodeSpatialMetadata && !lastMemberIRI.empty() && node.location().valid() && lastNode.location().valid()) { _writer->writeTriple( - lastBlankNode, IRI__OSMWAY__NEXT_NODE, + lastMemberIRI, IRI__OSMWAY__NEXT_NODE, _writer->generateIRI(nodeNamespace, node.positive_ref())); // Haversine distance const double distanceLat = (node.location().lat_without_check() - @@ -393,10 +403,10 @@ void osm2rdf::osm::FactHandler::way(const osm2rdf::osm::Way& way) { osm2rdf::osm::constants::METERS_IN_KM * 2 * asin(sqrt(haversine)); _writer->writeLiteralTripleUnsafe( - lastBlankNode, IRI__OSMWAY__NEXT_NODE_DISTANCE, + lastMemberIRI, IRI__OSMWAY__NEXT_NODE_DISTANCE, std::to_string(distance), "^^" + IRI__XSD__DECIMAL); } - lastBlankNode = blankNode; + lastMemberIRI = memberIRI; lastNode = node; } } diff --git a/src/ttl/Writer.cpp b/src/ttl/Writer.cpp index 2de57c00..0e482c30 100644 --- a/src/ttl/Writer.cpp +++ b/src/ttl/Writer.cpp @@ -61,6 +61,8 @@ osm2rdf::ttl::Writer::Writer(const osm2rdf::config::Config& config, "https://osm2rdf.cs.uni-freiburg.de/rdf/key#"}, {osm2rdf::ttl::constants::NAMESPACE__OSM2RDF_META, "https://osm2rdf.cs.uni-freiburg.de/rdf/meta#"}, + {osm2rdf::ttl::constants::NAMESPACE__GENID, + "http://osm2rdf.cs.uni-freiburg.de/.well-known/genid/"}, // https://wiki.openstreetmap.org/wiki/Sophox#How_OSM_data_is_stored // https://github.com/Sophox/sophox/blob/master/osm2rdf/osmutils.py#L35-L39 // osm prefixes @@ -388,7 +390,8 @@ void osm2rdf::ttl::Writer::writeMetadata() { writeOptionTriple( osm2rdf::config::constants::UNTAGGED_NODES_SPATIAL_RELS_OPTION_LONG, generateBooleanLiteral(_config.addSpatialRelsForUntaggedNodes)); - + writeOptionTriple(osm2rdf::config::constants::BLANK_NODES_OPTION_LONG, + generateBooleanLiteral(_config.noBlankNodes)); writeOptionTriple(osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_LONG, generateBooleanLiteral(!_config.addUntaggedNodes)); writeOptionTriple(osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_LONG, @@ -426,6 +429,39 @@ std::string osm2rdf::ttl::Writer::generateBlankNode() { std::to_string(_blankNodeCount[threadId]++); } +// ____________________________________________________________________________ +template +std::string osm2rdf::ttl::Writer::generateSkolem(const std::string& id) { + return generateIRIUnsafe(osm2rdf::ttl::constants::NAMESPACE__GENID, id); +} + +// ____________________________________________________________________________ +template +std::string osm2rdf::ttl::Writer::generateSkolemForRelationMember( + const uint64_t& relationId, + const uint64_t& memberId, + const std::string& memberType, + const size_t& relPos) { + const std::string skolemId = "r" + std::to_string(relationId) + + //Extract the first relevant char to determine + //the type of the osm/ohm object + memberType.at(3) + std::to_string(memberId) + + "p" + std::to_string(relPos); + return generateSkolem(skolemId); +} + +// ____________________________________________________________________________ +template +std::string osm2rdf::ttl::Writer::generateSkolemForWayMember( + const uint64_t& wayId, + const uint64_t& nodeId, + const size_t& relPos) { + const std::string skolemId = "w" + std::to_string(wayId) + + "n" + std::to_string(nodeId) + + "p" + std::to_string(relPos); + return generateSkolem(skolemId); +} + // ____________________________________________________________________________ template void osm2rdf::ttl::Writer::writeIRI(std::string_view p, uint64_t v, diff --git a/tests/config/Config.cpp b/tests/config/Config.cpp index cd5ce861..f7b66aad 100644 --- a/tests/config/Config.cpp +++ b/tests/config/Config.cpp @@ -51,6 +51,8 @@ void assertDefaultConfig(const osm2rdf::config::Config& config) { ASSERT_FALSE(config.writeRDFStatistics); + ASSERT_FALSE(config.noBlankNodes); + ASSERT_EQ(0, config.simplifyGeometries); ASSERT_EQ(0, config.simplifyWKT); ASSERT_EQ(5, config.wktDeviation); @@ -658,6 +660,21 @@ TEST(CONFIG_Config, fromArgsSimplifyWKTLong) { ASSERT_EQ(25, config.simplifyWKT); } +// ____________________________________________________________________________ +TEST(CONFIG_Config, fromArgsNoBlankNodesLong) { + osm2rdf::config::Config config; + assertDefaultConfig(config); + osm2rdf::util::CacheFile cf("/tmp/dummyInput"); + + const auto arg = "--" + osm2rdf::config::constants::BLANK_NODES_OPTION_LONG; + const int argc = 3; + char* argv[argc] = {const_cast(""), const_cast(arg.c_str()), + const_cast("/tmp/dummyInput")}; + config.fromArgs(argc, argv); + ASSERT_EQ("", config.output.string()); + ASSERT_TRUE(config.noBlankNodes); +} + // ____________________________________________________________________________ TEST(CONFIG_Config, fromArgsSimplifyWKTDeviationLong) { osm2rdf::config::Config config; @@ -928,6 +945,17 @@ TEST(CONFIG_Config, getInfoSimplifyWKT) { res, ::testing::HasSubstr(osm2rdf::config::constants::SIMPLIFY_WKT_INFO)); } +// ____________________________________________________________________________ +TEST(CONFIG_Config, getInfoNoBlankNodes) { + osm2rdf::config::Config config; + assertDefaultConfig(config); + config.noBlankNodes = true; + + const std::string res = config.getInfo(""); + ASSERT_THAT( + res, ::testing::HasSubstr(osm2rdf::config::constants::BLANK_NODES_INFO)); +} + // ____________________________________________________________________________ TEST(CONFIG_Config, getInfoSkipWikiLinks) { osm2rdf::config::Config config; diff --git a/tests/ttl/Writer.cpp b/tests/ttl/Writer.cpp index 6d741d00..5674ff09 100644 --- a/tests/ttl/Writer.cpp +++ b/tests/ttl/Writer.cpp @@ -252,6 +252,60 @@ TEST(TTL_WriterQLEVER, generateBlankNode) { } } +// ____________________________________________________________________________ +TEST(TTL_WriterQLEVER, generateSkolem) { + osm2rdf::config::Config config; + osm2rdf::ttl::Writer w{config, nullptr}; + { + const std::string res = w.generateSkolem("1"); + ASSERT_STREQ("genid:1", res.c_str()); + } + { + const std::string res = w.generateSkolem("2"); + ASSERT_STREQ("genid:2", res.c_str()); + } + { + const std::string res = w.generateSkolem("3"); + ASSERT_STREQ("genid:3", res.c_str()); + } +} + +// ____________________________________________________________________________ +TEST(TTL_WriterQLEVER, generateSkolemForRelationMember) { + osm2rdf::config::Config config; + osm2rdf::ttl::Writer w{config, nullptr}; + { + const std::string res = w.generateSkolemForRelationMember(1, 2, "osmnode", 3); + ASSERT_STREQ("genid:r1n2p3", res.c_str()); + } + { + const std::string res = w.generateSkolemForRelationMember(1, 2, "osmway", 3); + ASSERT_STREQ("genid:r1w2p3", res.c_str()); + } + { + const std::string res = w.generateSkolemForRelationMember(1, 2, "ohmnode", 3); + ASSERT_STREQ("genid:r1n2p3", res.c_str()); + } +} + +// ____________________________________________________________________________ +TEST(TTL_WriterQLEVER, generateSkolemForWayMember) { + osm2rdf::config::Config config; + osm2rdf::ttl::Writer w{config, nullptr}; + { + const std::string res = w.generateSkolemForWayMember(1, 1, 1); + ASSERT_STREQ("genid:w1n1p1", res.c_str()); + } + { + const std::string res = w.generateSkolemForWayMember(2, 3, 4); + ASSERT_STREQ("genid:w2n3p4", res.c_str()); + } + { + const std::string res = w.generateSkolemForWayMember(3, 5, 7); + ASSERT_STREQ("genid:w3n5p7", res.c_str()); + } +} + // ____________________________________________________________________________ TEST(TTL_WriterNT, generateIRI_ID) { osm2rdf::config::Config config;