diff --git a/.gitignore b/.gitignore index 2f754d9b..6cfbaf9f 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ Brewfile.lock.json .DS_Store *.snap /node_modules +/.cache diff --git a/DEPENDENCIES b/DEPENDENCIES index a8472a38..ff6faa61 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,6 +1,6 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core 94ff49bd58ca63c5e5a7fb2435b3bc72872517c4 -jsonbinpack https://github.com/sourcemeta/jsonbinpack 8fae212dc7ec02af4bb0cd4e7fccd42a2471f1c1 -blaze https://github.com/sourcemeta/blaze 8dba65f8aebfe1ac976168b76e01c20dd406c517 +core https://github.com/sourcemeta/core 376f397db98e72be48d8401e2fbf2ee96f033966 +jsonbinpack https://github.com/sourcemeta/jsonbinpack 0c2340990bf31c630155991a93306990d9d94fd4 +blaze https://github.com/sourcemeta/blaze 93342104a85814bc0fd11792d305c4e83de259c0 hydra https://github.com/sourcemeta/hydra c86d2165a2f27f838837af1a5af24b1055a35317 ctrf https://github.com/ctrf-io/ctrf 93ea827d951390190171d37443bff169cf47c808 diff --git a/src/command_inspect.cc b/src/command_inspect.cc index fea4188e..663677a9 100644 --- a/src/command_inspect.cc +++ b/src/command_inspect.cc @@ -50,7 +50,7 @@ auto print_frame(std::ostream &stream, } stream << " Root : " - << location.second.root.value_or("") << "\n"; + << (frame.root().empty() ? "" : frame.root()) << "\n"; if (location.second.pointer.empty()) { stream << " Pointer :\n"; @@ -60,7 +60,8 @@ auto print_frame(std::ostream &stream, stream << "\n"; } - const auto position{positions.get(location.second.pointer)}; + const auto position{ + positions.get(sourcemeta::core::to_pointer(location.second.pointer))}; if (position.has_value()) { stream << " File Position : " << std::get<0>(position.value()) << ":" << std::get<1>(position.value()) << "\n"; @@ -70,17 +71,19 @@ auto print_frame(std::ostream &stream, stream << " Base : " << location.second.base << "\n"; - if (location.second.relative_pointer.empty()) { + const auto relative_pointer{ + location.second.pointer.slice(location.second.relative_pointer)}; + if (relative_pointer.empty()) { stream << " Relative Pointer :\n"; } else { stream << " Relative Pointer : "; - sourcemeta::core::stringify(location.second.relative_pointer, stream); + sourcemeta::core::stringify(relative_pointer, stream); stream << "\n"; } stream << " Dialect : " << location.second.dialect << "\n"; - stream << " Base Dialect : " << location.second.base_dialect - << "\n"; + stream << " Base Dialect : " + << sourcemeta::core::to_string(location.second.base_dialect) << "\n"; if (location.second.parent.has_value()) { if (location.second.parent.value().empty()) { @@ -114,7 +117,8 @@ auto print_frame(std::ostream &stream, stream << " Type : Dynamic\n"; } - const auto position{positions.get(reference.first.second)}; + const auto position{ + positions.get(sourcemeta::core::to_pointer(reference.first.second))}; if (position.has_value()) { stream << " File Position : " << std::get<0>(position.value()) << ":" << std::get<1>(position.value()) << "\n"; @@ -125,7 +129,8 @@ auto print_frame(std::ostream &stream, stream << " Destination : " << reference.second.destination << "\n"; stream << " - (w/o fragment) : " - << reference.second.base.value_or("") << "\n"; + << (reference.second.base.empty() ? "" : reference.second.base) + << "\n"; stream << " - (fragment) : " << reference.second.fragment.value_or("") << "\n"; } @@ -157,17 +162,17 @@ auto sourcemeta::jsonschema::inspect(const sourcemeta::core::Options &options) const auto identifier{ sourcemeta::core::identify(schema, custom_resolver, dialect)}; - frame.analyse( - schema, sourcemeta::core::schema_walker, custom_resolver, dialect, - - // Only use the file-based URI if the schema has no identifier, - // as otherwise we make the output unnecessarily hard when it - // comes to debugging schemas - identifier.has_value() - ? std::optional(std::nullopt) - : sourcemeta::core::URI::from_path( - sourcemeta::core::weakly_canonical(schema_path)) - .recompose()); + frame.analyse(schema, sourcemeta::core::schema_walker, custom_resolver, + dialect, + + // Only use the file-based URI if the schema has no + // identifier, as otherwise we make the output unnecessarily + // hard when it comes to debugging schemas + !identifier.empty() + ? "" + : sourcemeta::core::URI::from_path( + sourcemeta::core::weakly_canonical(schema_path)) + .recompose()); } catch ( const sourcemeta::core::SchemaRelativeMetaschemaResolutionError &error) { throw FileError( diff --git a/src/command_lint.cc b/src/command_lint.cc index fbb2e262..763f09d2 100644 --- a/src/command_lint.cc +++ b/src/command_lint.cc @@ -155,24 +155,21 @@ auto sourcemeta::jsonschema::lint(const sourcemeta::core::Options &options) } if (options.contains("list")) { - std::vector, - std::reference_wrapper>> - rules; + std::vector> rules; for (const auto &entry : bundle) { rules.emplace_back(entry->name(), entry->message()); } - std::sort(rules.begin(), rules.end(), - [](const auto &left, const auto &right) { - return left.first.get() < right.first.get() || - (left.first.get() == right.first.get() && - left.second.get() < right.second.get()); - }); + std::sort( + rules.begin(), rules.end(), [](const auto &left, const auto &right) { + return left.first < right.first || + (left.first == right.first && left.second < right.second); + }); std::size_t count{0}; for (const auto &entry : rules) { - std::cout << entry.first.get() << "\n"; - std::cout << " " << entry.second.get() << "\n\n"; + std::cout << entry.first << "\n"; + std::cout << " " << entry.second << "\n\n"; count += 1; } diff --git a/src/command_metaschema.cc b/src/command_metaschema.cc index 3d59c1bc..5c0f46e5 100644 --- a/src/command_metaschema.cc +++ b/src/command_metaschema.cc @@ -47,7 +47,7 @@ auto sourcemeta::jsonschema::metaschema( try { const auto dialect{ sourcemeta::core::dialect(entry.second, default_dialect_option)}; - if (!dialect) { + if (dialect.empty()) { throw FileError( entry.first); } @@ -62,27 +62,27 @@ auto sourcemeta::jsonschema::metaschema( frame.analyse(bundled, sourcemeta::core::schema_walker, custom_resolver, default_dialect_option); - if (!cache.contains(dialect.value())) { + if (!cache.contains(std::string{dialect})) { const auto metaschema_template{sourcemeta::blaze::compile( bundled, sourcemeta::core::schema_walker, custom_resolver, sourcemeta::blaze::default_schema_compiler, frame, sourcemeta::blaze::Mode::Exhaustive, default_dialect_option)}; - cache.insert({dialect.value(), metaschema_template}); + cache.insert({std::string{dialect}, metaschema_template}); } if (trace) { sourcemeta::blaze::TraceOutput output{ sourcemeta::core::schema_walker, custom_resolver, sourcemeta::core::empty_weak_pointer, frame}; - result = evaluator.validate(cache.at(dialect.value()), entry.second, - std::ref(output)); + result = evaluator.validate(cache.at(std::string{dialect}), + entry.second, std::ref(output)); print(output, entry.positions, std::cout); } else if (json_output) { // Otherwise its impossible to correlate the output // when validating i.e. a directory of schemas std::cerr << entry.first.string() << "\n"; const auto output{sourcemeta::blaze::standard( - evaluator, cache.at(dialect.value()), entry.second, + evaluator, cache.at(std::string{dialect}), entry.second, sourcemeta::blaze::StandardOutput::Basic, entry.positions)}; assert(output.is_object()); assert(output.defines("valid")); @@ -95,12 +95,12 @@ auto sourcemeta::jsonschema::metaschema( std::cout << "\n"; } else { sourcemeta::blaze::SimpleOutput output{entry.second}; - if (evaluator.validate(cache.at(dialect.value()), entry.second, + if (evaluator.validate(cache.at(std::string{dialect}), entry.second, std::ref(output))) { LOG_VERBOSE(options) << "ok: " << sourcemeta::core::weakly_canonical(entry.first).string() - << "\n matches " << dialect.value() << "\n"; + << "\n matches " << dialect << "\n"; } else { std::cerr << "fail: " << sourcemeta::core::weakly_canonical(entry.first).string() diff --git a/src/command_test.cc b/src/command_test.cc index f40c17be..46d82c10 100644 --- a/src/command_test.cc +++ b/src/command_test.cc @@ -3,13 +3,13 @@ #include -#include // std::chrono -#include // EXIT_FAILURE -#include // std::cerr, std::cout -#include // std::ostringstream -#include // std::string -#include // std::this_thread -#include // std::vector +#include // std::chrono +#include // EXIT_FAILURE +#include // std::cerr, std::cout +#include // std::ostringstream +#include // std::string +#include // std::string_view +#include // std::this_thread #include "command.h" #include "configuration.h" @@ -24,8 +24,8 @@ namespace { auto parse_test_suite(const sourcemeta::jsonschema::InputJSON &entry, const sourcemeta::core::SchemaResolver &schema_resolver, - const std::optional &dialect, - const bool json_output) -> sourcemeta::blaze::TestSuite { + const std::string_view dialect, const bool json_output) + -> sourcemeta::blaze::TestSuite { try { return sourcemeta::blaze::TestSuite::parse( entry.second, entry.positions, entry.first.parent_path(), diff --git a/src/command_validate.cc b/src/command_validate.cc index 8657a7b3..afb86981 100644 --- a/src/command_validate.cc +++ b/src/command_validate.cc @@ -8,10 +8,11 @@ #include #include -#include // std::chrono -#include // std::sqrt -#include // std::cerr -#include // std::string +#include // std::chrono +#include // std::sqrt +#include // std::cerr +#include // std::string +#include // std::string_view #include "command.h" #include "configuration.h" @@ -36,8 +37,8 @@ auto get_precompiled_schema_template_path( auto get_schema_template(const sourcemeta::core::JSON &bundled, const sourcemeta::core::SchemaResolver &resolver, const sourcemeta::core::SchemaFrame &frame, - const std::optional &default_dialect, - const std::optional &default_id, + const std::string_view default_dialect, + const std::string_view default_id, const bool fast_mode, const sourcemeta::core::Options &options) -> sourcemeta::blaze::Template { diff --git a/src/error.h b/src/error.h index bdd7a2c6..996ad225 100644 --- a/src/error.h +++ b/src/error.h @@ -298,10 +298,11 @@ inline auto print_exception(const bool is_json, const Exception &exception) } if constexpr (requires(const Exception ¤t) { - { current.option() } -> std::convertible_to; + { current.option() } -> std::convertible_to; }) { if (is_json) { - error_json.assign("option", sourcemeta::core::JSON{exception.option()}); + error_json.assign( + "option", sourcemeta::core::JSON{std::string{exception.option()}}); } else { std::cerr << " at option " << exception.option() << "\n"; } diff --git a/src/resolver.h b/src/resolver.h index 712a3b55..3d69bfa4 100644 --- a/src/resolver.h +++ b/src/resolver.h @@ -80,7 +80,7 @@ class CustomResolver { CustomResolver( const sourcemeta::core::Options &options, const std::optional &configuration, - const bool remote, const std::optional &default_dialect) + const bool remote, const std::string_view default_dialect) : options_{options}, configuration_{configuration}, remote_{remote} { if (options.contains("resolve")) { for (const auto &entry : for_each_json(options.at("resolve"), options)) { @@ -121,8 +121,8 @@ class CustomResolver { } auto add(const sourcemeta::core::JSON &schema, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt, + const std::string_view default_dialect = "", + const std::string_view default_id = "", const std::function &callback = nullptr) -> bool { assert(sourcemeta::core::is_schema(schema)); @@ -223,12 +223,12 @@ class CustomResolver { inline auto resolver(const sourcemeta::core::Options &options, const bool remote, - const std::optional &default_dialect, + const std::string_view default_dialect, const std::optional &configuration) -> const CustomResolver & { - using CacheKey = std::pair>; + using CacheKey = std::pair; static std::map resolver_cache; - const CacheKey cache_key{remote, default_dialect}; + const CacheKey cache_key{remote, std::string{default_dialect}}; // Check if resolver is already cached auto iterator{resolver_cache.find(cache_key)}; diff --git a/src/utils.h b/src/utils.h index 66c6ff87..fbde10ba 100644 --- a/src/utils.h +++ b/src/utils.h @@ -8,28 +8,29 @@ #include -#include // assert -#include // std::next -#include // std::optional -#include // std::ostream -#include // std::ostringstream -#include // std::string, std::stoull -#include // std::get -#include // std::visit +#include // assert +#include // std::next +#include // std::optional +#include // std::ostream +#include // std::string, std::stoull +#include // std::string_view +#include // std::get +#include // std::visit namespace sourcemeta::jsonschema { inline auto default_dialect( const sourcemeta::core::Options &options, const std::optional &configuration) - -> std::optional { + -> std::string_view { if (options.contains("default-dialect")) { - return std::string{options.at("default-dialect").front()}; - } else if (configuration.has_value()) { - return configuration.value().default_dialect; + return options.at("default-dialect").front(); + } else if (configuration.has_value() && + configuration.value().default_dialect.has_value()) { + return configuration.value().default_dialect.value(); } - return std::nullopt; + return ""; } inline auto parse_indentation(const sourcemeta::core::Options &options) diff --git a/vendor/blaze/src/compiler/compile.cc b/vendor/blaze/src/compiler/compile.cc index 1ef46676..3bf7d068 100644 --- a/vendor/blaze/src/compiler/compile.cc +++ b/vendor/blaze/src/compiler/compile.cc @@ -3,11 +3,12 @@ #include -#include // std::move, std::sort, std::unique -#include // assert -#include // std::back_inserter -#include // std::tuple, std::get -#include // std::move, std::pair +#include // std::move, std::sort, std::unique +#include // assert +#include // std::back_inserter +#include // std::string_view +#include // std::tuple, std::get +#include // std::move, std::pair #include "compile_helpers.h" @@ -16,7 +17,7 @@ namespace { auto compile_subschema(const sourcemeta::blaze::Context &context, const sourcemeta::blaze::SchemaContext &schema_context, const sourcemeta::blaze::DynamicContext &dynamic_context, - const std::optional &default_dialect) + const std::string_view default_dialect) -> sourcemeta::blaze::Instructions { using namespace sourcemeta::blaze; assert(is_schema(schema_context.schema)); @@ -48,8 +49,8 @@ auto compile_subschema(const sourcemeta::blaze::Context &context, assert(!schema_context.base.fragment().has_value()); for (auto &&step : context.compiler( context, - {.relative_pointer = - schema_context.relative_pointer.concat({keyword}), + {.relative_pointer = schema_context.relative_pointer.concat( + make_weak_pointer(keyword)), .schema = schema_context.schema, .vocabularies = entry.vocabularies, .base = schema_context.base, @@ -83,7 +84,7 @@ auto precompile( sourcemeta::blaze::schema_resource_id( context.resources, anchor_uri.recompose_without_fragment().value_or("")), - std::string{anchor_uri.fragment().value_or("")})}; + anchor_uri.fragment().value_or(""))}; schema_context.labels.insert(label); // Configure a schema context that corresponds to the @@ -91,11 +92,14 @@ auto precompile( auto subschema{sourcemeta::core::get(context.root, entry.second.pointer)}; auto nested_vocabularies{sourcemeta::core::vocabularies( subschema, context.resolver, entry.second.dialect)}; + const auto nested_relative_pointer{ + entry.second.pointer.slice(entry.second.relative_pointer)}; + const sourcemeta::core::URI nested_base{entry.second.base}; const sourcemeta::blaze::SchemaContext nested_schema_context{ - .relative_pointer = entry.second.relative_pointer, + .relative_pointer = nested_relative_pointer, .schema = std::move(subschema), .vocabularies = std::move(nested_vocabularies), - .base = entry.second.base, + .base = nested_base, .labels = {}, .is_property_name = schema_context.is_property_name}; @@ -105,8 +109,8 @@ auto precompile( sourcemeta::blaze::compile( context, nested_schema_context, sourcemeta::blaze::relative_dynamic_context(), - sourcemeta::core::empty_pointer, - sourcemeta::core::empty_pointer, entry.first.second))}; + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer, entry.first.second))}; } } // namespace @@ -118,8 +122,8 @@ auto compile(const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaResolver &resolver, const Compiler &compiler, const sourcemeta::core::SchemaFrame &frame, const Mode mode, - const std::optional &default_dialect, - const std::optional &default_id, + const std::string_view default_dialect, + const std::string_view default_id, const std::optional &tweaks) -> Template { assert(is_schema(schema)); const auto effective_tweaks{tweaks.value_or(Tweaks{})}; @@ -133,9 +137,8 @@ auto compile(const sourcemeta::core::JSON &schema, const auto identifier{ base_dialect.has_value() ? sourcemeta::core::identify(schema, base_dialect.value(), default_id) - : std::optional{std::nullopt}}; - const std::string base{ - sourcemeta::core::URI::canonicalize(identifier.value_or(""))}; + : std::string_view{}}; + const std::string base{sourcemeta::core::URI::canonicalize(identifier)}; assert(frame.locations().contains( {sourcemeta::core::SchemaReferenceType::Static, base})); const auto root_frame_entry{frame.locations().at( @@ -202,7 +205,7 @@ auto compile(const sourcemeta::core::JSON &schema, } const auto label{Evaluator{}.hash( - schema_resource_id(resources, reference.second.base.value_or("")), + schema_resource_id(resources, reference.second.base), reference.second.fragment.value_or(""))}; auto [iterator, inserted] = static_reference_destinations.try_emplace( reference.second.destination, std::make_pair(label, 0)); @@ -250,7 +253,7 @@ auto compile(const sourcemeta::core::JSON &schema, /////////////////////////////////////////////////////////////////// SchemaContext schema_context{ - .relative_pointer = sourcemeta::core::empty_pointer, + .relative_pointer = sourcemeta::core::empty_weak_pointer, .schema = schema, .vocabularies = vocabularies(schema, resolver, root_frame_entry.dialect), .base = sourcemeta::core::URI::canonicalize(root_frame_entry.base), @@ -324,13 +327,17 @@ auto compile(const sourcemeta::core::JSON &schema, auto nested_vocabularies{sourcemeta::core::vocabularies( subschema, context.resolver, entry->second.dialect)}; + const auto nested_relative_pointer{ + entry->second.pointer.slice(entry->second.relative_pointer)}; + // TODO: I think this is hiding a framing bug that we should later + // investigate + const sourcemeta::core::URI nested_base{ + entry->second.base.starts_with('#') ? "" : entry->second.base}; const sourcemeta::blaze::SchemaContext nested_schema_context{ - .relative_pointer = entry->second.relative_pointer, + .relative_pointer = nested_relative_pointer, .schema = std::move(subschema), .vocabularies = std::move(nested_vocabularies), - // TODO: I think this is hiding a framing bug that we should later - // investigate - .base = entry->second.base.starts_with('#') ? "" : entry->second.base, + .base = nested_base, .labels = {}, .is_property_name = schema_context.is_property_name}; static_reference_template.push_back( @@ -340,8 +347,8 @@ auto compile(const sourcemeta::core::JSON &schema, sourcemeta::blaze::compile( context, nested_schema_context, sourcemeta::blaze::relative_dynamic_context(), - sourcemeta::core::empty_pointer, - sourcemeta::core::empty_pointer, entry->first.second))); + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer, entry->first.second))); } for (auto &&substep : static_reference_template) { @@ -384,8 +391,8 @@ auto compile(const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, const Compiler &compiler, const Mode mode, - const std::optional &default_dialect, - const std::optional &default_id, + const std::string_view default_dialect, + const std::string_view default_id, const std::optional &tweaks) -> Template { assert(is_schema(schema)); @@ -405,9 +412,9 @@ auto compile(const sourcemeta::core::JSON &schema, auto compile(const Context &context, const SchemaContext &schema_context, const DynamicContext &dynamic_context, - const sourcemeta::core::Pointer &schema_suffix, - const sourcemeta::core::Pointer &instance_suffix, - const std::optional &uri) -> Instructions { + const sourcemeta::core::WeakPointer &schema_suffix, + const sourcemeta::core::WeakPointer &instance_suffix, + const std::optional uri) -> Instructions { // Determine URI of the destination after recursion const std::string destination{ uri.has_value() @@ -421,7 +428,7 @@ auto compile(const Context &context, const SchemaContext &schema_context, if (!context.frame.locations().contains( {sourcemeta::core::SchemaReferenceType::Static, destination})) { throw sourcemeta::core::SchemaReferenceError( - destination, schema_context.relative_pointer, + destination, to_pointer(schema_context.relative_pointer), "The target of the reference does not exist in the schema"); } @@ -431,26 +438,29 @@ auto compile(const Context &context, const SchemaContext &schema_context, if (!is_schema(new_schema)) { throw sourcemeta::core::SchemaReferenceError( - destination, schema_context.relative_pointer, + destination, to_pointer(schema_context.relative_pointer), "The target of the reference is not a valid schema"); } - const sourcemeta::core::Pointer destination_pointer{ + const sourcemeta::core::WeakPointer destination_pointer{ dynamic_context.keyword.empty() ? dynamic_context.base_schema_location.concat(schema_suffix) : dynamic_context.base_schema_location - .concat({dynamic_context.keyword}) + .concat(make_weak_pointer(dynamic_context.keyword)) .concat(schema_suffix)}; + const auto new_relative_pointer{entry.pointer.slice(entry.relative_pointer)}; + const sourcemeta::core::URI new_base{ + sourcemeta::core::URI{entry.base}.recompose_without_fragment().value_or( + "")}; + return compile_subschema( context, - {.relative_pointer = entry.relative_pointer, + {.relative_pointer = new_relative_pointer, .schema = new_schema, .vocabularies = vocabularies(new_schema, context.resolver, entry.dialect), - .base = sourcemeta::core::URI{entry.base} - .recompose_without_fragment() - .value_or(""), + .base = new_base, // TODO: This represents a copy .labels = schema_context.labels, .is_property_name = schema_context.is_property_name}, diff --git a/vendor/blaze/src/compiler/compile_helpers.h b/vendor/blaze/src/compiler/compile_helpers.h index 63e45bff..2e06dee8 100644 --- a/vendor/blaze/src/compiler/compile_helpers.h +++ b/vendor/blaze/src/compiler/compile_helpers.h @@ -4,39 +4,67 @@ #include #include -#include // std::ranges::find, std::ranges::any_of -#include // assert -#include // std::distance -#include // std::regex, std::regex_match, std::smatch -#include // std::declval, std::move -#include // std::visit +#include // std::ranges::find, std::ranges::any_of +#include // assert +#include // std::cref +#include // std::distance +#include // std::regex, std::regex_match, std::smatch +#include // std::declval, std::move namespace sourcemeta::blaze { +// Helper to create a single-element WeakPointer from a property name reference +inline auto make_weak_pointer(const std::string &property) + -> sourcemeta::core::WeakPointer { + sourcemeta::core::WeakPointer result; + result.push_back(std::cref(property)); + return result; +} + +// Helper to create a two-element WeakPointer from property name and index +inline auto make_weak_pointer(const std::string &property, + const std::size_t index) + -> sourcemeta::core::WeakPointer { + sourcemeta::core::WeakPointer result; + result.push_back(std::cref(property)); + result.push_back(index); + return result; +} + +// Helper to create a two-element WeakPointer from two property names +inline auto make_weak_pointer(const std::string &property1, + const std::string &property2) + -> sourcemeta::core::WeakPointer { + sourcemeta::core::WeakPointer result; + result.push_back(std::cref(property1)); + result.push_back(std::cref(property2)); + return result; +} + inline auto relative_dynamic_context() -> DynamicContext { return {.keyword = "", - .base_schema_location = sourcemeta::core::empty_pointer, - .base_instance_location = sourcemeta::core::empty_pointer, + .base_schema_location = sourcemeta::core::empty_weak_pointer, + .base_instance_location = sourcemeta::core::empty_weak_pointer, .property_as_target = false}; } inline auto relative_dynamic_context(const DynamicContext &dynamic_context) -> DynamicContext { return {.keyword = "", - .base_schema_location = sourcemeta::core::empty_pointer, - .base_instance_location = sourcemeta::core::empty_pointer, + .base_schema_location = sourcemeta::core::empty_weak_pointer, + .base_instance_location = sourcemeta::core::empty_weak_pointer, .property_as_target = dynamic_context.property_as_target}; } inline auto property_relative_dynamic_context() -> DynamicContext { return {.keyword = "", - .base_schema_location = sourcemeta::core::empty_pointer, - .base_instance_location = sourcemeta::core::empty_pointer, + .base_schema_location = sourcemeta::core::empty_weak_pointer, + .base_instance_location = sourcemeta::core::empty_weak_pointer, .property_as_target = true}; } inline auto schema_resource_id(const std::vector &resources, - const std::string &resource) -> std::size_t { + const std::string_view resource) -> std::size_t { const auto iterator{std::ranges::find( resources, sourcemeta::core::URI::canonicalize(resource))}; if (iterator == resources.cend()) { @@ -55,13 +83,15 @@ inline auto make_with_resource(const InstructionIndex type, const DynamicContext &dynamic_context, const Value &value, const std::string &resource) -> Instruction { + const auto schema_location{ + dynamic_context.keyword.empty() + ? to_pointer(dynamic_context.base_schema_location) + : to_pointer(dynamic_context.base_schema_location) + .concat({dynamic_context.keyword})}; return {.type = type, - .relative_schema_location = - dynamic_context.keyword.empty() - ? dynamic_context.base_schema_location - : dynamic_context.base_schema_location.concat( - {dynamic_context.keyword}), - .relative_instance_location = dynamic_context.base_instance_location, + .relative_schema_location = schema_location, + .relative_instance_location = + to_pointer(dynamic_context.base_instance_location), .keyword_location = to_uri(schema_context.relative_pointer, schema_context.base) .recompose(), @@ -84,13 +114,15 @@ inline auto make(const InstructionIndex type, const Context &context, const SchemaContext &schema_context, const DynamicContext &dynamic_context, Value &&value, Instructions &&children) -> Instruction { + const auto schema_location{ + dynamic_context.keyword.empty() + ? to_pointer(dynamic_context.base_schema_location) + : to_pointer(dynamic_context.base_schema_location) + .concat({dynamic_context.keyword})}; return {.type = type, - .relative_schema_location = - dynamic_context.keyword.empty() - ? dynamic_context.base_schema_location - : dynamic_context.base_schema_location.concat( - {dynamic_context.keyword}), - .relative_instance_location = dynamic_context.base_instance_location, + .relative_schema_location = schema_location, + .relative_instance_location = + to_pointer(dynamic_context.base_instance_location), .keyword_location = to_uri(schema_context.relative_pointer, schema_context.base) .recompose(), @@ -101,12 +133,13 @@ inline auto make(const InstructionIndex type, const Context &context, } inline auto unroll(const Instruction &step, - const sourcemeta::core::Pointer &base_instance_location = - sourcemeta::core::empty_pointer) -> Instruction { + const sourcemeta::core::WeakPointer &base_instance_location = + sourcemeta::core::empty_weak_pointer) -> Instruction { return {.type = step.type, .relative_schema_location = step.relative_schema_location, .relative_instance_location = - base_instance_location.concat(step.relative_instance_location), + to_pointer(base_instance_location) + .concat(step.relative_instance_location), .keyword_location = step.keyword_location, .schema_resource = step.schema_resource, .value = step.value, @@ -182,17 +215,20 @@ inline auto find_adjacent(const Context &context, const sourcemeta::core::JSON::Type type) -> auto { std::vector possible_keyword_uris; possible_keyword_uris.push_back( - to_uri(schema_context.relative_pointer.initial().concat({keyword}), + to_uri(schema_context.relative_pointer.initial().concat( + make_weak_pointer(keyword)), schema_context.base) .recompose()); // TODO: Do something similar with `allOf` // Attempt to statically follow references + static const std::string ref_keyword{"$ref"}; if (schema_context.schema.defines("$ref")) { const auto reference_type{sourcemeta::core::SchemaReferenceType::Static}; const auto destination_uri{ - to_uri(schema_context.relative_pointer.initial().concat({"$ref"}), + to_uri(schema_context.relative_pointer.initial().concat( + make_weak_pointer(ref_keyword)), schema_context.base) .recompose()}; assert( @@ -205,9 +241,10 @@ inline auto find_adjacent(const Context &context, context.frame.references().at({reference_type, destination.pointer})}; const auto keyword_uri{ sourcemeta::core::to_uri( - sourcemeta::core::to_pointer(reference.fragment.value_or("")) + sourcemeta::core::to_pointer( + std::string{reference.fragment.value_or("")}) .concat({keyword})) - .resolve_from(reference.base.value_or(""))}; + .resolve_from(sourcemeta::core::URI{reference.base})}; // TODO: When this logic is used by // `unevaluatedProperties`/`unevaluatedItems`, how can we let the @@ -282,7 +319,7 @@ inline auto requires_evaluation(const Context &context, inline auto is_circular(const sourcemeta::core::SchemaFrame &frame, - const sourcemeta::core::Pointer &reference_origin, + const sourcemeta::core::WeakPointer &reference_origin, const sourcemeta::core::SchemaFrame::ReferencesEntry &reference, std::unordered_set &visited) -> bool { if (visited.contains(reference.destination)) { diff --git a/vendor/blaze/src/compiler/default_compiler_2019_09.h b/vendor/blaze/src/compiler/default_compiler_2019_09.h index d90aa125..d5f665a3 100644 --- a/vendor/blaze/src/compiler/default_compiler_2019_09.h +++ b/vendor/blaze/src/compiler/default_compiler_2019_09.h @@ -46,8 +46,8 @@ auto compiler_2019_09_applicator_dependentschemas( schema_context, relative_dynamic_context(dynamic_context), make_property(dependent), compile(context, schema_context, - relative_dynamic_context(dynamic_context), {dependent}, - sourcemeta::core::empty_pointer))); + relative_dynamic_context(dynamic_context), + sourcemeta::blaze::make_weak_pointer(dependent)))); } } @@ -155,9 +155,10 @@ auto compiler_2019_09_applicator_contains_with_options( return {}; } - Instructions children{compile( - context, schema_context, relative_dynamic_context(dynamic_context), - sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer)}; + Instructions children{compile(context, schema_context, + relative_dynamic_context(dynamic_context), + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer)}; if (annotate) { children.push_back( @@ -280,9 +281,10 @@ auto compiler_2019_09_applicator_unevaluateditems( // NOLINTEND(bugprone-branch-clone) } - Instructions children{compile( - context, schema_context, relative_dynamic_context(dynamic_context), - sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer)}; + Instructions children{compile(context, schema_context, + relative_dynamic_context(dynamic_context), + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer)}; if (context.mode == Mode::Exhaustive) { children.push_back( @@ -319,9 +321,10 @@ auto compiler_2019_09_applicator_unevaluatedproperties( return {}; } - Instructions children{compile( - context, schema_context, relative_dynamic_context(dynamic_context), - sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer)}; + Instructions children{compile(context, schema_context, + relative_dynamic_context(dynamic_context), + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer)}; if (context.mode == Mode::Exhaustive) { children.push_back( @@ -357,10 +360,13 @@ auto compiler_2019_09_applicator_unevaluatedproperties( if (maybe_prefix.has_value()) { filter_prefixes.push_back(maybe_prefix.value()); } else { + static const std::string pattern_properties_keyword{ + "patternProperties"}; filter_regexes.push_back( {parse_regex(property.first, schema_context.base, schema_context.relative_pointer.initial().concat( - {"patternProperties"})), + sourcemeta::blaze::make_weak_pointer( + pattern_properties_keyword))), property.first}); } } diff --git a/vendor/blaze/src/compiler/default_compiler_draft4.h b/vendor/blaze/src/compiler/default_compiler_draft4.h index d6fc357c..90a8efdf 100644 --- a/vendor/blaze/src/compiler/default_compiler_draft4.h +++ b/vendor/blaze/src/compiler/default_compiler_draft4.h @@ -17,13 +17,13 @@ static auto parse_regex(const std::string &pattern, const sourcemeta::core::URI &base, - const sourcemeta::core::Pointer &schema_location) + const sourcemeta::core::WeakPointer &schema_location) -> sourcemeta::core::Regex { const auto result{sourcemeta::core::to_regex(pattern)}; if (!result.has_value()) { std::ostringstream message; message << "Invalid regular expression: " << pattern; - throw sourcemeta::blaze::CompilerError(base, schema_location, + throw sourcemeta::blaze::CompilerError(base, to_pointer(schema_location), message.str()); } @@ -51,7 +51,7 @@ defines_direct_enumeration(const sourcemeta::blaze::Instructions &steps) return std::distance(steps.cbegin(), iterator); } -static auto is_inside_disjunctor(const sourcemeta::core::Pointer &pointer) +static auto is_inside_disjunctor(const sourcemeta::core::WeakPointer &pointer) -> bool { return pointer.size() > 2 && pointer.at(pointer.size() - 2).is_index() && pointer.at(pointer.size() - 3).is_property() && @@ -95,9 +95,11 @@ compile_properties(const sourcemeta::blaze::Context &context, std::vector> properties; for (const auto &entry : schema_context.schema.at("properties").as_object()) { - properties.emplace_back(entry.first, - compile(context, schema_context, dynamic_context, - {entry.first}, {entry.first})); + properties.emplace_back( + entry.first, + compile(context, schema_context, dynamic_context, + sourcemeta::blaze::make_weak_pointer(entry.first), + sourcemeta::blaze::make_weak_pointer(entry.first))); } // In many cases, `properties` have some subschemas that are small @@ -196,12 +198,14 @@ auto compiler_draft4_core_ref(const Context &context, throw sourcemeta::core::SchemaReferenceError( schema_context.schema.at(dynamic_context.keyword).to_string(), - entry.pointer, "The schema location is inside of an unknown keyword"); + to_pointer(entry.pointer), + "The schema location is inside of an unknown keyword"); } const auto &reference{context.frame.references().at({type, entry.pointer})}; - const auto label{Evaluator{}.hash( - schema_resource_id(context.resources, reference.base.value_or("")), - reference.fragment.value_or(""))}; + + const auto label{ + Evaluator{}.hash(schema_resource_id(context.resources, reference.base), + reference.fragment.value_or(""))}; /////////////////////////////////////////////////////////////////// // (2) If we know about such label, then just jump into it @@ -228,8 +232,8 @@ auto compiler_draft4_core_ref(const Context &context, if (is_circular(context.frame, entry.pointer, reference, visited)) { auto children{compile( context, new_schema_context, relative_dynamic_context(dynamic_context), - sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer, - reference.destination)}; + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer, reference.destination)}; return {make(sourcemeta::blaze::InstructionIndex::ControlLabel, context, new_schema_context, dynamic_context, ValueUnsignedInteger{label}, std::move(children))}; @@ -245,16 +249,16 @@ auto compiler_draft4_core_ref(const Context &context, // one of the necessary schema resources to the evaluator !context.uses_dynamic_scopes) { return compile(context, schema_context, dynamic_context, - sourcemeta::core::empty_pointer, - sourcemeta::core::empty_pointer, reference.destination); + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer, reference.destination); } else { - return { - make(sourcemeta::blaze::InstructionIndex::LogicalAnd, context, - schema_context, dynamic_context, ValueNone{}, - compile(context, schema_context, - relative_dynamic_context(dynamic_context), - sourcemeta::core::empty_pointer, - sourcemeta::core::empty_pointer, reference.destination))}; + return {make(sourcemeta::blaze::InstructionIndex::LogicalAnd, context, + schema_context, dynamic_context, ValueNone{}, + compile(context, schema_context, + relative_dynamic_context(dynamic_context), + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer, + reference.destination))}; } } @@ -550,10 +554,11 @@ auto compiler_draft4_validation_required(const Context &context, } else if (is_closed_properties_required(schema_context.schema, properties_set)) { if (context.mode == Mode::FastValidation && assume_object) { + static const std::string properties_keyword{"properties"}; const SchemaContext new_schema_context{ .relative_pointer = schema_context.relative_pointer.initial().concat( - {"properties"}), + sourcemeta::blaze::make_weak_pointer(properties_keyword)), .schema = schema_context.schema, .vocabularies = schema_context.vocabularies, .base = schema_context.base, @@ -561,8 +566,8 @@ auto compiler_draft4_validation_required(const Context &context, .is_property_name = schema_context.is_property_name}; const DynamicContext new_dynamic_context{ .keyword = "properties", - .base_schema_location = sourcemeta::core::empty_pointer, - .base_instance_location = sourcemeta::core::empty_pointer, + .base_schema_location = sourcemeta::core::empty_weak_pointer, + .base_instance_location = sourcemeta::core::empty_weak_pointer, .property_as_target = false}; auto properties{compile_properties(context, new_schema_context, new_dynamic_context, current)}; @@ -1208,8 +1213,8 @@ auto compiler_draft4_applicator_patternproperties_with_options( // For each regular expression and corresponding subschema in the object for (const auto &pattern : patterns) { auto substeps{compile(context, schema_context, - relative_dynamic_context(dynamic_context), {pattern}, - {})}; + relative_dynamic_context(dynamic_context), + sourcemeta::blaze::make_weak_pointer(pattern))}; if (annotate) { substeps.push_back( @@ -1280,9 +1285,10 @@ auto compiler_draft4_applicator_additionalproperties_with_options( return {}; } - Instructions children{compile( - context, schema_context, relative_dynamic_context(dynamic_context), - sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer)}; + Instructions children{compile(context, schema_context, + relative_dynamic_context(dynamic_context), + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer)}; if (annotate) { children.push_back( @@ -1311,10 +1317,13 @@ auto compiler_draft4_applicator_additionalproperties_with_options( if (maybe_prefix.has_value()) { filter_prefixes.push_back(maybe_prefix.value()); } else { + static const std::string pattern_properties_keyword{ + "patternProperties"}; filter_regexes.push_back( {parse_regex(entry.first, schema_context.base, schema_context.relative_pointer.initial().concat( - {"patternProperties"})), + sourcemeta::blaze::make_weak_pointer( + pattern_properties_keyword))), entry.first}); } } @@ -1530,9 +1539,10 @@ auto compiler_draft4_applicator_not(const Context &context, subschemas += 1; } - Instructions children{compile( - context, schema_context, relative_dynamic_context(dynamic_context), - sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer)}; + Instructions children{compile(context, schema_context, + relative_dynamic_context(dynamic_context), + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer)}; // TODO: Be smarter about how we treat `unevaluatedItems` like how we do for // `unevaluatedProperties` @@ -1663,7 +1673,8 @@ auto compiler_draft4_applicator_items_with_options( if (annotate || track_evaluation) { Instructions subchildren{compile( context, schema_context, relative_dynamic_context(dynamic_context), - sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer)}; + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer)}; Instructions children; @@ -1701,9 +1712,10 @@ auto compiler_draft4_applicator_items_with_options( return children; } - Instructions children{compile( - context, schema_context, relative_dynamic_context(dynamic_context), - sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer)}; + Instructions children{compile(context, schema_context, + relative_dynamic_context(dynamic_context), + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer)}; if (track_evaluation) { children.push_back( make(sourcemeta::blaze::InstructionIndex::ControlEvaluate, context, @@ -1786,9 +1798,10 @@ auto compiler_draft4_applicator_additionalitems_from_cursor( return {}; } - Instructions subchildren{compile( - context, schema_context, relative_dynamic_context(dynamic_context), - sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer)}; + Instructions subchildren{compile(context, schema_context, + relative_dynamic_context(dynamic_context), + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer)}; Instructions children; @@ -1890,8 +1903,8 @@ auto compiler_draft4_applicator_dependencies( sourcemeta::blaze::InstructionIndex::LogicalWhenDefines, context, schema_context, dynamic_context, make_property(entry.first), compile(context, schema_context, - relative_dynamic_context(dynamic_context), {entry.first}, - sourcemeta::core::empty_pointer))); + relative_dynamic_context(dynamic_context), + sourcemeta::blaze::make_weak_pointer(entry.first)))); } } else if (entry.second.is_array()) { std::vector properties; diff --git a/vendor/blaze/src/compiler/default_compiler_draft6.h b/vendor/blaze/src/compiler/default_compiler_draft6.h index f8a71661..b21317b4 100644 --- a/vendor/blaze/src/compiler/default_compiler_draft6.h +++ b/vendor/blaze/src/compiler/default_compiler_draft6.h @@ -108,7 +108,7 @@ auto compiler_draft6_validation_type(const Context &context, sourcemeta::blaze::InstructionIndex:: LoopItemsPropertiesExactlyTypeStrictHash3) && current.back().relative_instance_location == - dynamic_context.base_instance_location) { + to_pointer(dynamic_context.base_instance_location)) { return {}; } @@ -392,9 +392,10 @@ auto compiler_draft6_applicator_contains(const Context &context, return {}; } - Instructions children{compile( - context, schema_context, relative_dynamic_context(dynamic_context), - sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer)}; + Instructions children{compile(context, schema_context, + relative_dynamic_context(dynamic_context), + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer)}; if (children.empty()) { // We still need to check the instance is not empty @@ -421,9 +422,10 @@ auto compiler_draft6_validation_propertynames( // TODO: How can we avoid this copy? auto nested_schema_context = schema_context; nested_schema_context.is_property_name = true; - Instructions children{compile( - context, nested_schema_context, property_relative_dynamic_context(), - sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer)}; + Instructions children{compile(context, nested_schema_context, + property_relative_dynamic_context(), + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer)}; if (children.empty()) { return {}; diff --git a/vendor/blaze/src/compiler/default_compiler_draft7.h b/vendor/blaze/src/compiler/default_compiler_draft7.h index 3948919a..119d0fdd 100644 --- a/vendor/blaze/src/compiler/default_compiler_draft7.h +++ b/vendor/blaze/src/compiler/default_compiler_draft7.h @@ -14,16 +14,19 @@ auto compiler_draft7_applicator_if(const Context &context, const DynamicContext &dynamic_context, const Instructions &) -> Instructions { // `if` - Instructions children{compile( - context, schema_context, relative_dynamic_context(dynamic_context), - sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer)}; + Instructions children{compile(context, schema_context, + relative_dynamic_context(dynamic_context), + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer)}; // `then` + static const std::string then_keyword{"then"}; std::size_t then_cursor{0}; if (schema_context.schema.defines("then")) { then_cursor = children.size(); const auto destination{ - to_uri(schema_context.relative_pointer.initial().concat({"then"}), + to_uri(schema_context.relative_pointer.initial().concat( + make_weak_pointer(then_keyword)), schema_context.base) .recompose()}; assert(context.frame.locations().contains( @@ -31,11 +34,12 @@ auto compiler_draft7_applicator_if(const Context &context, DynamicContext new_dynamic_context{ .keyword = "then", .base_schema_location = dynamic_context.base_schema_location, - .base_instance_location = sourcemeta::core::empty_pointer, + .base_instance_location = sourcemeta::core::empty_weak_pointer, .property_as_target = dynamic_context.property_as_target}; - for (auto &&step : compile(context, schema_context, new_dynamic_context, - sourcemeta::core::empty_pointer, - sourcemeta::core::empty_pointer, destination)) { + for (auto &&step : + compile(context, schema_context, new_dynamic_context, + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer, destination)) { children.push_back(std::move(step)); } @@ -46,11 +50,13 @@ auto compiler_draft7_applicator_if(const Context &context, } // `else` + static const std::string else_keyword{"else"}; std::size_t else_cursor{0}; if (schema_context.schema.defines("else")) { else_cursor = children.size(); const auto destination{ - to_uri(schema_context.relative_pointer.initial().concat({"else"}), + to_uri(schema_context.relative_pointer.initial().concat( + make_weak_pointer(else_keyword)), schema_context.base) .recompose()}; assert(context.frame.locations().contains( @@ -58,11 +64,12 @@ auto compiler_draft7_applicator_if(const Context &context, DynamicContext new_dynamic_context{ .keyword = "else", .base_schema_location = dynamic_context.base_schema_location, - .base_instance_location = sourcemeta::core::empty_pointer, + .base_instance_location = sourcemeta::core::empty_weak_pointer, .property_as_target = dynamic_context.property_as_target}; - for (auto &&step : compile(context, schema_context, new_dynamic_context, - sourcemeta::core::empty_pointer, - sourcemeta::core::empty_pointer, destination)) { + for (auto &&step : + compile(context, schema_context, new_dynamic_context, + sourcemeta::core::empty_weak_pointer, + sourcemeta::core::empty_weak_pointer, destination)) { children.push_back(std::move(step)); } } diff --git a/vendor/blaze/src/compiler/include/sourcemeta/blaze/compiler.h b/vendor/blaze/src/compiler/include/sourcemeta/blaze/compiler.h index 4460f2e0..b60552ba 100644 --- a/vendor/blaze/src/compiler/include/sourcemeta/blaze/compiler.h +++ b/vendor/blaze/src/compiler/include/sourcemeta/blaze/compiler.h @@ -19,6 +19,7 @@ #include // std::function #include // std::optional, std::nullopt #include // std::string +#include // std::string_view #include // std::unordered_set #include // std::vector @@ -34,7 +35,7 @@ namespace sourcemeta::blaze { struct SchemaContext { // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members) /// The schema location relative to the base URI - const sourcemeta::core::Pointer &relative_pointer; + const sourcemeta::core::WeakPointer &relative_pointer; /// The current subschema const sourcemeta::core::JSON &schema; /// The schema vocabularies in use @@ -56,9 +57,9 @@ struct DynamicContext { /// The schema keyword const std::string keyword; /// The schema base keyword path - const sourcemeta::core::Pointer &base_schema_location; + const sourcemeta::core::WeakPointer &base_schema_location; /// The base instance location that the keyword must be evaluated to - const sourcemeta::core::Pointer &base_instance_location; + const sourcemeta::core::WeakPointer &base_instance_location; /// Whether the instance location property acts as the target const bool property_as_target; // NOLINTEND(cppcoreguidelines-avoid-const-or-ref-data-members) @@ -170,8 +171,7 @@ compile(const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, const Compiler &compiler, const Mode mode = Mode::FastValidation, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt, + std::string_view default_dialect = "", std::string_view default_id = "", const std::optional &tweaks = std::nullopt) -> Template; /// @ingroup compiler @@ -189,8 +189,7 @@ compile(const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaResolver &resolver, const Compiler &compiler, const sourcemeta::core::SchemaFrame &frame, const Mode mode = Mode::FastValidation, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt, + std::string_view default_dialect = "", std::string_view default_id = "", const std::optional &tweaks = std::nullopt) -> Template; /// @ingroup compiler @@ -204,10 +203,10 @@ compile(const sourcemeta::core::JSON &schema, auto SOURCEMETA_BLAZE_COMPILER_EXPORT compile(const Context &context, const SchemaContext &schema_context, const DynamicContext &dynamic_context, - const sourcemeta::core::Pointer &schema_suffix, - const sourcemeta::core::Pointer &instance_suffix = - sourcemeta::core::empty_pointer, - const std::optional &uri = std::nullopt) -> Instructions; + const sourcemeta::core::WeakPointer &schema_suffix, + const sourcemeta::core::WeakPointer &instance_suffix = + sourcemeta::core::empty_weak_pointer, + std::optional uri = std::nullopt) -> Instructions; /// @ingroup compiler /// Serialise a template as JSON diff --git a/vendor/blaze/src/compiler/include/sourcemeta/blaze/compiler_unevaluated.h b/vendor/blaze/src/compiler/include/sourcemeta/blaze/compiler_unevaluated.h index bf87ab4c..aca465c4 100644 --- a/vendor/blaze/src/compiler/include/sourcemeta/blaze/compiler_unevaluated.h +++ b/vendor/blaze/src/compiler/include/sourcemeta/blaze/compiler_unevaluated.h @@ -20,9 +20,9 @@ namespace sourcemeta::blaze { /// @ingroup compiler struct SchemaUnevaluatedEntry { /// The absolute pointers of the static keyword dependencies - std::set static_dependencies; + std::set static_dependencies; /// The absolute pointers of the static keyword dependencies - std::set dynamic_dependencies; + std::set dynamic_dependencies; /// Whether the entry cannot be fully resolved, which means /// there might be unknown dynamic dependencies bool unresolved{false}; diff --git a/vendor/blaze/src/compiler/unevaluated.cc b/vendor/blaze/src/compiler/unevaluated.cc index 95bd569b..37de5c88 100644 --- a/vendor/blaze/src/compiler/unevaluated.cc +++ b/vendor/blaze/src/compiler/unevaluated.cc @@ -1,7 +1,10 @@ #include +#include "compile_helpers.h" + namespace { using namespace sourcemeta::core; +using namespace sourcemeta::blaze; using Known = Vocabularies::Known; auto find_adjacent_dependencies( @@ -29,7 +32,7 @@ auto find_adjacent_dependencies( continue; } - auto pointer{entry.pointer.concat({property.first})}; + auto pointer{entry.pointer.concat(make_weak_pointer(property.first))}; if (is_static) { result.static_dependencies.emplace(std::move(pointer)); } else { @@ -42,7 +45,8 @@ auto find_adjacent_dependencies( switch (walker(property.first, subschema_vocabularies).type) { // References case SchemaKeywordType::Reference: { - const auto reference{frame.dereference(entry, {property.first})}; + const auto reference{ + frame.dereference(entry, make_weak_pointer(property.first))}; if (reference.first == SchemaReferenceType::Static && reference.second.has_value()) { find_adjacent_dependencies( @@ -60,8 +64,8 @@ auto find_adjacent_dependencies( for (std::size_t index = 0; index < property.second.size(); index++) { find_adjacent_dependencies( current, schema, frame, walker, resolver, keywords, root, - frame.traverse(entry, {property.first, index}), is_static, - result); + frame.traverse(entry, make_weak_pointer(property.first, index)), + is_static, result); } break; @@ -72,7 +76,8 @@ auto find_adjacent_dependencies( for (std::size_t index = 0; index < property.second.size(); index++) { find_adjacent_dependencies( current, schema, frame, walker, resolver, keywords, root, - frame.traverse(entry, {property.first, index}), false, result); + frame.traverse(entry, make_weak_pointer(property.first, index)), + false, result); } } @@ -85,7 +90,8 @@ auto find_adjacent_dependencies( if (is_schema(property.second)) { find_adjacent_dependencies( current, schema, frame, walker, resolver, keywords, root, - frame.traverse(entry, {property.first}), false, result); + frame.traverse(entry, make_weak_pointer(property.first)), false, + result); } break; @@ -94,12 +100,14 @@ auto find_adjacent_dependencies( for (std::size_t index = 0; index < property.second.size(); index++) { find_adjacent_dependencies( current, schema, frame, walker, resolver, keywords, root, - frame.traverse(entry, {property.first, index}), false, result); + frame.traverse(entry, make_weak_pointer(property.first, index)), + false, result); } } else if (is_schema(property.second)) { find_adjacent_dependencies( current, schema, frame, walker, resolver, keywords, root, - frame.traverse(entry, {property.first}), false, result); + frame.traverse(entry, make_weak_pointer(property.first)), false, + result); } break; @@ -108,8 +116,9 @@ auto find_adjacent_dependencies( for (const auto &pair : property.second.as_object()) { find_adjacent_dependencies( current, schema, frame, walker, resolver, keywords, root, - frame.traverse(entry, {property.first, pair.first}), false, - result); + frame.traverse(entry, + make_weak_pointer(property.first, pair.first)), + false, result); } } @@ -152,7 +161,8 @@ auto unevaluated(const JSON &schema, const SchemaFrame &frame, const auto subschema_vocabularies{ frame.vocabularies(entry.second, resolver)}; for (const auto &pair : subschema.as_object()) { - const auto keyword_uri{frame.uri(entry.second, {pair.first})}; + const auto keyword_uri{ + frame.uri(entry.second, make_weak_pointer(pair.first))}; SchemaUnevaluatedEntry unevaluated; if ((subschema_vocabularies.contains( diff --git a/vendor/blaze/src/evaluator/evaluator.cc b/vendor/blaze/src/evaluator/evaluator.cc index 37ad699c..e48b2bf4 100644 --- a/vendor/blaze/src/evaluator/evaluator.cc +++ b/vendor/blaze/src/evaluator/evaluator.cc @@ -108,8 +108,8 @@ const sourcemeta::core::JSON Evaluator::null{nullptr}; const sourcemeta::core::JSON Evaluator::empty_string{""}; auto Evaluator::hash(const std::size_t resource, - const sourcemeta::core::JSON::String &fragment) - const noexcept -> std::size_t { + const std::string_view fragment) const noexcept + -> std::size_t { return resource + this->hasher_(fragment); } diff --git a/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator.h b/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator.h index d103d077..2863a386 100644 --- a/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator.h +++ b/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator.h @@ -14,6 +14,7 @@ #include // std::chrono #include // std::uint8_t #include // std::function, std::reference_wrapper +#include // std::string_view #include // std::unordered_map #include // std::vector @@ -157,9 +158,8 @@ class SOURCEMETA_BLAZE_EVALUATOR_EXPORT Evaluator { static const sourcemeta::core::JSON null; static const sourcemeta::core::JSON empty_string; - [[nodiscard]] auto - hash(const std::size_t resource, - const sourcemeta::core::JSON::String &fragment) const noexcept + [[nodiscard]] auto hash(const std::size_t resource, + std::string_view fragment) const noexcept -> std::size_t; auto evaluate(const sourcemeta::core::JSON *target) -> void; @@ -175,7 +175,7 @@ class SOURCEMETA_BLAZE_EVALUATOR_EXPORT Evaluator { sourcemeta::core::WeakPointer evaluate_path; sourcemeta::core::WeakPointer instance_location; // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) - const std::hash hasher_{}; + const std::hash hasher_{}; std::vector resources; // To speed up the labels map diff --git a/vendor/blaze/src/linter/valid_default.cc b/vendor/blaze/src/linter/valid_default.cc index f11592fe..af058c38 100644 --- a/vendor/blaze/src/linter/valid_default.cc +++ b/vendor/blaze/src/linter/valid_default.cc @@ -45,21 +45,20 @@ auto ValidDefault::condition( } } - const auto &root_base_dialect{frame.traverse(location.root.value_or("")) - .value_or(location) - .get() - .base_dialect}; - std::optional default_id{location.base}; - if (sourcemeta::core::identify(root, root_base_dialect).has_value() || - default_id.value().empty()) { + const auto &root_base_dialect{ + frame.traverse(frame.root()).value_or(location).get().base_dialect}; + std::string_view default_id{location.base}; + if (!sourcemeta::core::identify(root, root_base_dialect).empty() || + default_id.empty()) { // We want to only set a default identifier if the root schema does not // have an explicit identifier. Otherwise, we can get into corner case // when wrapping the schema - default_id = std::nullopt; + default_id = ""; } - const auto subschema{sourcemeta::core::wrap(root, location.pointer, resolver, - location.dialect)}; + const auto subschema{sourcemeta::core::wrap( + root, sourcemeta::core::to_pointer(location.pointer), resolver, + location.dialect)}; const auto schema_template{compile(subschema, walker, resolver, this->compiler_, Mode::FastValidation, location.dialect, default_id)}; diff --git a/vendor/blaze/src/linter/valid_examples.cc b/vendor/blaze/src/linter/valid_examples.cc index fe4e967e..6a6a79c9 100644 --- a/vendor/blaze/src/linter/valid_examples.cc +++ b/vendor/blaze/src/linter/valid_examples.cc @@ -49,21 +49,20 @@ auto ValidExamples::condition( } } - const auto &root_base_dialect{frame.traverse(location.root.value_or("")) - .value_or(location) - .get() - .base_dialect}; - std::optional default_id{location.base}; - if (sourcemeta::core::identify(root, root_base_dialect).has_value() || - default_id.value().empty()) { + const auto &root_base_dialect{ + frame.traverse(frame.root()).value_or(location).get().base_dialect}; + std::string_view default_id{location.base}; + if (!sourcemeta::core::identify(root, root_base_dialect).empty() || + default_id.empty()) { // We want to only set a default identifier if the root schema does not // have an explicit identifier. Otherwise, we can get into corner case // when wrapping the schema - default_id = std::nullopt; + default_id = ""; } - const auto subschema{sourcemeta::core::wrap(root, location.pointer, resolver, - location.dialect)}; + const auto subschema{sourcemeta::core::wrap( + root, sourcemeta::core::to_pointer(location.pointer), resolver, + location.dialect)}; const auto schema_template{compile(subschema, walker, resolver, this->compiler_, Mode::FastValidation, location.dialect, default_id)}; diff --git a/vendor/blaze/src/test/include/sourcemeta/blaze/test.h b/vendor/blaze/src/test/include/sourcemeta/blaze/test.h index 13bb39c6..139b7399 100644 --- a/vendor/blaze/src/test/include/sourcemeta/blaze/test.h +++ b/vendor/blaze/src/test/include/sourcemeta/blaze/test.h @@ -201,8 +201,7 @@ struct SOURCEMETA_BLAZE_TEST_EXPORT TestSuite { const std::filesystem::path &base_path, const sourcemeta::core::SchemaResolver &schema_resolver, const sourcemeta::core::SchemaWalker &walker, const Compiler &compiler, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt, + std::string_view default_dialect = "", std::string_view default_id = "", const std::optional &tweaks = std::nullopt) -> TestSuite; }; diff --git a/vendor/blaze/src/test/test_parser.cc b/vendor/blaze/src/test/test_parser.cc index 3103f77d..22ea86b8 100644 --- a/vendor/blaze/src/test/test_parser.cc +++ b/vendor/blaze/src/test/test_parser.cc @@ -4,9 +4,10 @@ #include #include -#include // assert -#include // std::get -#include // std::move +#include // assert +#include // std::string_view +#include // std::get +#include // std::move namespace { inline auto TEST_ERROR_IF( @@ -90,8 +91,8 @@ auto TestSuite::parse(const sourcemeta::core::JSON &document, const sourcemeta::core::SchemaResolver &schema_resolver, const sourcemeta::core::SchemaWalker &walker, const Compiler &compiler, - const std::optional &default_dialect, - const std::optional &default_id, + const std::string_view default_dialect, + const std::string_view default_id, const std::optional &tweaks) -> TestSuite { assert(std::filesystem::is_directory(base_path)); TEST_ERROR_IF(!document.is_object(), tracker, sourcemeta::core::empty_pointer, diff --git a/vendor/core/CMakeLists.txt b/vendor/core/CMakeLists.txt index fc8d88c1..4edf68f0 100644 --- a/vendor/core/CMakeLists.txt +++ b/vendor/core/CMakeLists.txt @@ -13,6 +13,7 @@ option(SOURCEMETA_CORE_UUID "Build the Sourcemeta Core UUID library" ON) option(SOURCEMETA_CORE_MD5 "Build the Sourcemeta Core MD5 library" ON) option(SOURCEMETA_CORE_REGEX "Build the Sourcemeta Core Regex library" ON) option(SOURCEMETA_CORE_URI "Build the Sourcemeta Core URI library" ON) +option(SOURCEMETA_CORE_URITEMPLATE "Build the Sourcemeta Core URI Template library" ON) option(SOURCEMETA_CORE_JSON "Build the Sourcemeta Core JSON library" ON) option(SOURCEMETA_CORE_JSONSCHEMA "Build the Sourcemeta Core JSON Schema library" ON) option(SOURCEMETA_CORE_JSONPOINTER "Build the Sourcemeta Core JSON Pointer library" ON) @@ -100,6 +101,10 @@ if(SOURCEMETA_CORE_URI) add_subdirectory(src/core/uri) endif() +if(SOURCEMETA_CORE_URITEMPLATE) + add_subdirectory(src/core/uritemplate) +endif() + if(SOURCEMETA_CORE_JSON) add_subdirectory(src/core/json) endif() @@ -212,6 +217,10 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/uri) endif() + if(SOURCEMETA_CORE_URITEMPLATE) + add_subdirectory(test/uritemplate) + endif() + if(SOURCEMETA_CORE_JSON) add_subdirectory(test/json) endif() diff --git a/vendor/core/config.cmake.in b/vendor/core/config.cmake.in index 2a663c02..c3d107df 100644 --- a/vendor/core/config.cmake.in +++ b/vendor/core/config.cmake.in @@ -14,6 +14,7 @@ if(NOT SOURCEMETA_CORE_COMPONENTS) list(APPEND SOURCEMETA_CORE_COMPONENTS md5) list(APPEND SOURCEMETA_CORE_COMPONENTS regex) list(APPEND SOURCEMETA_CORE_COMPONENTS uri) + list(APPEND SOURCEMETA_CORE_COMPONENTS uritemplate) list(APPEND SOURCEMETA_CORE_COMPONENTS json) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonl) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonpointer) @@ -52,6 +53,9 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") elseif(component STREQUAL "uri") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") + elseif(component STREQUAL "uritemplate") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uritemplate.cmake") elseif(component STREQUAL "json") find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h index 61da59fc..67f7ea1c 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h @@ -38,8 +38,9 @@ namespace sourcemeta::core { using Pointer = GenericPointer>; /// @ingroup jsonpointer -using WeakPointer = GenericPointer, - PropertyHashJSON>; +using WeakPointer = GenericPointer< + // We use this instead of a string view as the latter occupies more memory + std::reference_wrapper, PropertyHashJSON>; /// @ingroup jsonpointer /// A global constant instance of the empty JSON Pointer. @@ -113,6 +114,28 @@ auto get(const JSON &document, const WeakPointer &pointer) -> const JSON &; // constant reference. auto get(JSON &&document, const WeakPointer &pointer) -> const JSON & = delete; +/// @ingroup jsonpointer +/// Get a value from a JSON document using a JSON WeakPointer (non-`const` +/// overload). For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// #include +/// +/// std::istringstream stream{"[ { \"foo\": 1 }, { \"bar\": 2 } ]"}; +/// auto document{sourcemeta::core::parse_json(stream)}; +/// const sourcemeta::core::Pointer pointer{1, "bar"}; +/// sourcemeta::core::JSON &value{ +/// sourcemeta::core::get(document, +/// sourcemeta::core::to_weak_pointer(pointer))}; +/// value = sourcemeta::core::JSON{3}; +/// assert(document.at(1).at("bar").to_integer() == 3); +/// ``` +SOURCEMETA_CORE_JSONPOINTER_EXPORT +auto get(JSON &document, const WeakPointer &pointer) -> JSON &; + /// @ingroup jsonpointer /// Get a value from a JSON document using a Pointer, returning an optional that /// is not set if the path does not exist in the document. For example: @@ -573,34 +596,48 @@ auto to_uri(const Pointer &pointer) -> URI; SOURCEMETA_CORE_JSONPOINTER_EXPORT auto to_uri(const Pointer &pointer, const URI &base) -> URI; -// TODO: Only support this with weak pointers +/// @ingroup jsonpointer +SOURCEMETA_CORE_JSONPOINTER_EXPORT +auto to_uri(const WeakPointer &pointer) -> URI; + +/// @ingroup jsonpointer +SOURCEMETA_CORE_JSONPOINTER_EXPORT +auto to_uri(const WeakPointer &pointer, const URI &base) -> URI; + +/// @ingroup jsonpointer +SOURCEMETA_CORE_JSONPOINTER_EXPORT +auto to_uri(const WeakPointer &pointer, const std::string_view base) -> URI; + /// @ingroup jsonpointer /// -/// Walk over every element of a JSON document, top-down, using JSON Pointers. -/// For example: +/// Walk over every element of a JSON document, top-down, using weak pointers. +/// Note that the resulting weak pointers hold references to strings in the JSON +/// document, so the document must outlive the walker and any pointers obtained +/// from it. For example: /// /// ```cpp /// #include /// #include /// #include +/// #include /// #include /// /// const sourcemeta::core::JSON document = /// sourcemeta::core::parse_json("[ 1, 2, 3 ]"); -/// std::vector subpointers; +/// std::vector subpointers; /// /// for (const auto &subpointer : /// sourcemeta::core::PointerWalker{document}) { -/// subpointers.push_back(subpointer); +/// subpointers.push_back(sourcemeta::core::to_string(subpointer)); /// } /// /// assert(subpointers.size() == 4); -/// assert(subpointers.at(0) == sourcemeta::core::Pointer{}); -/// assert(subpointers.at(1) == sourcemeta::core::Pointer{0}); -/// assert(subpointers.at(2) == sourcemeta::core::Pointer{1}); -/// assert(subpointers.at(3) == sourcemeta::core::Pointer{2}); +/// assert(subpointers.at(0) == ""); +/// assert(subpointers.at(1) == "/0"); +/// assert(subpointers.at(2) == "/1"); +/// assert(subpointers.at(3) == "/2"); /// ``` -using PointerWalker = GenericPointerWalker; +using PointerWalker = GenericPointerWalker; /// @ingroup jsonpointer /// Serialise a Pointer as JSON diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h index a3057ece..0e5931e5 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h @@ -433,6 +433,33 @@ template class GenericPointer { return result; } + /// Get a copy of the JSON Pointer starting from a given token index. This + /// method is undefined if the index is greater than the pointer size. For + /// example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const sourcemeta::core::Pointer pointer{"foo", "bar", "baz"}; + /// const sourcemeta::core::Pointer result{pointer.slice(1)}; + /// assert(result.size() == 2); + /// assert(result.at(0).is_property()); + /// assert(result.at(0).to_property() == "bar"); + /// assert(result.at(1).is_property()); + /// assert(result.at(1).to_property() == "baz"); + /// ``` + [[nodiscard]] auto slice(const std::size_t index) const + -> GenericPointer { + assert(index <= this->size()); + auto new_begin{this->data.cbegin()}; + std::advance(new_begin, index); + GenericPointer result; + result.reserve(this->size() - index); + std::copy(new_begin, this->data.cend(), std::back_inserter(result.data)); + return result; + } + /// Concatenate a JSON Pointer with another JSON Pointer, getting a new /// pointer as a result. For example: /// diff --git a/vendor/core/src/core/jsonpointer/jsonpointer.cc b/vendor/core/src/core/jsonpointer/jsonpointer.cc index 8a645a7f..794e8b46 100644 --- a/vendor/core/src/core/jsonpointer/jsonpointer.cc +++ b/vendor/core/src/core/jsonpointer/jsonpointer.cc @@ -162,6 +162,14 @@ auto get(JSON &document, const Pointer &pointer) -> JSON & { return traverse_all(document, pointer); } +auto get(JSON &document, const WeakPointer &pointer) -> JSON & { + if (pointer.empty()) { + return document; + } + + return traverse_all(document, pointer); +} + auto try_get(const JSON &document, const Pointer &pointer) -> const JSON * { return pointer.empty() ? &document : try_traverse(document, pointer); } @@ -379,4 +387,24 @@ auto to_uri(const Pointer &pointer, const URI &base) -> URI { return to_uri(pointer).resolve_from(base).canonicalize(); } +auto to_uri(const WeakPointer &pointer) -> URI { + std::basic_ostringstream> + result; + stringify(pointer, result); + return URI::from_fragment(result.str()); +} + +auto to_uri(const WeakPointer &pointer, const URI &base) -> URI { + return to_uri(pointer).resolve_from(base).canonicalize(); +} + +auto to_uri(const WeakPointer &pointer, const std::string_view base) -> URI { + if (base.empty()) { + return to_uri(pointer); + } + + return to_uri(pointer).resolve_from(URI{base}).canonicalize(); +} + } // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonschema/bundle.cc b/vendor/core/src/core/jsonschema/bundle.cc index c668d664..f9bfa842 100644 --- a/vendor/core/src/core/jsonschema/bundle.cc +++ b/vendor/core/src/core/jsonschema/bundle.cc @@ -10,23 +10,24 @@ namespace { -auto is_official_metaschema_reference(const sourcemeta::core::Pointer &pointer, - const std::string &destination) -> bool { +auto is_official_metaschema_reference( + const sourcemeta::core::WeakPointer &pointer, + const std::string &destination) -> bool { assert(!pointer.empty()); assert(pointer.back().is_property()); return pointer.back().to_property() == "$schema" && sourcemeta::core::schema_resolver(destination).has_value(); } -auto dependencies_internal( - const sourcemeta::core::JSON &schema, - const sourcemeta::core::SchemaWalker &walker, - const sourcemeta::core::SchemaResolver &resolver, - const sourcemeta::core::DependencyCallback &callback, - const std::optional &default_dialect, - const std::optional &default_id, - const sourcemeta::core::SchemaFrame::Paths &paths, - std::unordered_set &visited) -> void { +auto dependencies_internal(const sourcemeta::core::JSON &schema, + const sourcemeta::core::SchemaWalker &walker, + const sourcemeta::core::SchemaResolver &resolver, + const sourcemeta::core::DependencyCallback &callback, + std::string_view default_dialect, + std::string_view default_id, + const sourcemeta::core::SchemaFrame::Paths &paths, + std::unordered_set &visited) + -> void { sourcemeta::core::SchemaFrame frame{ sourcemeta::core::SchemaFrame::Mode::References}; frame.analyse(schema, walker, resolver, default_dialect, default_id, paths); @@ -34,40 +35,38 @@ auto dependencies_internal( default_dialect, default_id)}; std::vector< - std::tuple>> + std::tuple> found; - for (const auto &[key, reference] : frame.references()) { - if (frame.traverse(reference.destination).has_value() || - - // We don't want to report official schemas, as we can expect - // virtually all implementations to understand them out of the box - is_official_metaschema_reference(key.second, reference.destination)) { - continue; + frame.for_each_unresolved_reference([&](const auto &pointer, + const auto &reference) { + // We don't want to report official schemas, as we can expect + // virtually all implementations to understand them out of the box + if (is_official_metaschema_reference(pointer, reference.destination)) { + return; } - if (!reference.base.has_value()) { + if (reference.base.empty()) { throw sourcemeta::core::SchemaReferenceError( - reference.destination, key.second, + reference.destination, sourcemeta::core::to_pointer(pointer), "Could not resolve schema reference"); } // To not infinitely loop on circular references - if (visited.contains(reference.base.value())) { - continue; + if (visited.contains(reference.base)) { + return; } // If we can't find the destination but there is a base and we can // find the base, then we are facing an unresolved fragment - if (frame.traverse(reference.base.value()).has_value()) { + if (frame.traverse(reference.base).has_value()) { throw sourcemeta::core::SchemaReferenceError( - reference.destination, key.second, + reference.destination, sourcemeta::core::to_pointer(pointer), "Could not resolve schema reference"); } - assert(reference.base.has_value()); - const auto &identifier{reference.base.value()}; + assert(!reference.base.empty()); + const auto &identifier{reference.base}; auto remote{resolver(identifier)}; if (!remote.has_value()) { throw sourcemeta::core::SchemaResolutionError( @@ -76,33 +75,34 @@ auto dependencies_internal( if (!sourcemeta::core::is_schema(remote.value())) { throw sourcemeta::core::SchemaReferenceError( - identifier, key.second, + identifier, sourcemeta::core::to_pointer(pointer), "The JSON document is not a valid JSON Schema"); } - const auto base_dialect{sourcemeta::core::base_dialect( + const auto remote_base_dialect{sourcemeta::core::base_dialect( remote.value(), resolver, default_dialect)}; - if (!base_dialect.has_value()) { + if (!remote_base_dialect.has_value()) { throw sourcemeta::core::SchemaReferenceError( - identifier, key.second, + identifier, sourcemeta::core::to_pointer(pointer), "The JSON document is not a valid JSON Schema"); } - callback(origin, key.second, identifier, remote.value()); - found.emplace_back(std::move(remote).value(), identifier); + callback(origin, pointer, identifier, remote.value()); + found.emplace_back(std::move(remote).value(), + sourcemeta::core::JSON::String{identifier}); visited.emplace(identifier); - } + }); for (const auto &entry : found) { dependencies_internal(std::get<0>(entry), walker, resolver, callback, - default_dialect, std::get<1>(entry).get(), - {sourcemeta::core::empty_pointer}, visited); + default_dialect, std::get<1>(entry), + {sourcemeta::core::empty_weak_pointer}, visited); } } auto embed_schema(sourcemeta::core::JSON &root, const sourcemeta::core::Pointer &container, - const std::string &identifier, + const std::string_view identifier, sourcemeta::core::JSON &&target) -> void { auto *current{&root}; for (const auto &token : container) { @@ -134,18 +134,17 @@ auto embed_schema(sourcemeta::core::JSON &root, auto bundle_schema(sourcemeta::core::JSON &root, const sourcemeta::core::Pointer &container, const sourcemeta::core::JSON &subschema, - sourcemeta::core::SchemaFrame &frame, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect, - const std::optional &default_id, + std::string_view default_dialect, + std::string_view default_id, const sourcemeta::core::SchemaFrame::Paths &paths, + std::unordered_set &bundled, const std::size_t depth = 0) -> void { - // Keep in mind that the resulting frame does miss some information. For - // example, when we recurse to framing embedded schemas, we will frame them - // without keeping their new relationship to their parent (after embedding if - // to the container location). However, that's fine for the purpose of this - // function, given we don't pass the frame back to the caller + // Create a fresh frame for each schema we analyze to avoid key collisions + // between different schemas that have references at the same pointer paths + sourcemeta::core::SchemaFrame frame{ + sourcemeta::core::SchemaFrame::Mode::References}; if (depth == 0) { frame.analyse( subschema, walker, resolver, default_dialect, default_id, @@ -155,41 +154,42 @@ auto bundle_schema(sourcemeta::core::JSON &root, frame.analyse(subschema, walker, resolver, default_dialect, default_id); } - // Otherwise, given recursion, we would be modifying the - // references list *while* looping on it - // TODO: How can we avoid this very expensive copy? - const auto references_copy = frame.references(); - for (const auto &[key, reference] : references_copy) { - if (frame.traverse(reference.destination).has_value() || - - // We don't want to bundle official schemas, as we can expect - // virtually all implementations to understand them out of the box - is_official_metaschema_reference(key.second, reference.destination)) { - continue; + frame.for_each_unresolved_reference([&](const auto &pointer, + const auto &reference) { + // We don't want to bundle official schemas, as we can expect + // virtually all implementations to understand them out of the box + if (is_official_metaschema_reference(pointer, reference.destination)) { + return; } // If we can't find the destination but there is a base and we can // find base, then we are facing an unresolved fragment - if (reference.base.has_value() && - frame.traverse(reference.base.value()).has_value()) { + if (!reference.base.empty() && frame.traverse(reference.base).has_value()) { throw sourcemeta::core::SchemaReferenceError( - reference.destination, key.second, + reference.destination, sourcemeta::core::to_pointer(pointer), "Could not resolve schema reference"); } - if (!reference.base.has_value()) { + if (reference.base.empty()) { throw sourcemeta::core::SchemaReferenceError( - reference.destination, key.second, + reference.destination, sourcemeta::core::to_pointer(pointer), "Could not resolve schema reference"); } - assert(reference.base.has_value()); - const auto &identifier{reference.base.value()}; + assert(!reference.base.empty()); + const sourcemeta::core::JSON::String identifier{reference.base}; + + // Skip if already bundled to avoid infinite loops on circular + // references + if (bundled.contains(identifier)) { + return; + } + auto remote{resolver(identifier)}; if (!remote.has_value()) { if (frame.traverse(identifier).has_value()) { throw sourcemeta::core::SchemaReferenceError( - reference.destination, key.second, + reference.destination, sourcemeta::core::to_pointer(pointer), "Could not resolve schema reference"); } @@ -199,29 +199,47 @@ auto bundle_schema(sourcemeta::core::JSON &root, if (!sourcemeta::core::is_schema(remote.value())) { throw sourcemeta::core::SchemaReferenceError( - identifier, key.second, + identifier, sourcemeta::core::to_pointer(pointer), "The JSON document is not a valid JSON Schema"); } - const auto base_dialect{sourcemeta::core::base_dialect( + const auto remote_base_dialect{sourcemeta::core::base_dialect( remote.value(), resolver, default_dialect)}; - if (!base_dialect.has_value()) { + if (!remote_base_dialect.has_value()) { throw sourcemeta::core::SchemaReferenceError( - identifier, key.second, + identifier, sourcemeta::core::to_pointer(pointer), "The JSON document is not a valid JSON Schema"); } + // If the reference has a fragment, verify it exists in the remote + // schema + if (reference.fragment.has_value()) { + // TODO: The fact that we have to re-frame on each loop pass to check + // for this is probably insanely slow + sourcemeta::core::SchemaFrame remote_frame{ + sourcemeta::core::SchemaFrame::Mode::Locations}; + remote_frame.analyse(remote.value(), walker, resolver, default_dialect, + identifier); + if (!remote_frame.traverse(reference.destination).has_value()) { + throw sourcemeta::core::SchemaReferenceError( + reference.destination, sourcemeta::core::to_pointer(pointer), + "Could not resolve schema reference"); + } + } + if (remote.value().is_object()) { - // Always insert an identifier, as a schema might refer to another schema - // using another URI (i.e. due to relying on HTTP re-directions, etc) + // Always insert an identifier, as a schema might refer to another + // schema using another URI (i.e. due to relying on HTTP + // re-directions, etc) sourcemeta::core::reidentify(remote.value(), identifier, - base_dialect.value()); + remote_base_dialect.value()); } - bundle_schema(root, container, remote.value(), frame, walker, resolver, - default_dialect, identifier, paths, depth + 1); + bundled.emplace(identifier); + bundle_schema(root, container, remote.value(), walker, resolver, + default_dialect, identifier, paths, bundled, depth + 1); embed_schema(root, container, identifier, std::move(remote).value()); - } + }); } } // namespace @@ -231,10 +249,9 @@ namespace sourcemeta::core { auto dependencies(const JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, const DependencyCallback &callback, - const std::optional &default_dialect, - const std::optional &default_id, + std::string_view default_dialect, std::string_view default_id, const SchemaFrame::Paths &paths) -> void { - std::unordered_set visited; + std::unordered_set visited; dependencies_internal(schema, walker, resolver, callback, default_dialect, default_id, paths, visited); } @@ -242,18 +259,24 @@ auto dependencies(const JSON &schema, const SchemaWalker &walker, // TODO: Refactor this function to internally rely on the `.dependencies()` // function auto bundle(JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect, - const std::optional &default_id, + const SchemaResolver &resolver, std::string_view default_dialect, + std::string_view default_id, const std::optional &default_container, const SchemaFrame::Paths &paths) -> void { - SchemaFrame frame{SchemaFrame::Mode::References}; - + // Pre-scan the schema to find any already-embedded schemas and mark them + // as bundled to avoid re-embedding them. This includes the root schema itself + // and any schemas already embedded within it + std::unordered_set bundled; + SchemaFrame initial_frame{SchemaFrame::Mode::Locations}; + initial_frame.analyse(schema, walker, resolver, default_dialect, default_id, + paths); + initial_frame.for_each_resource_uri( + [&bundled](const auto uri) { bundled.emplace(uri); }); if (default_container.has_value()) { // This is undefined behavior assert(!default_container.value().empty()); - bundle_schema(schema, default_container.value(), schema, frame, walker, - resolver, default_dialect, default_id, paths); + bundle_schema(schema, default_container.value(), schema, walker, resolver, + default_dialect, default_id, paths, bundled); return; } @@ -261,9 +284,9 @@ auto bundle(JSON &schema, const SchemaWalker &walker, // bundled schema. Otherwise, potential relative references based on this // implicit base URI will likely not resolve unless end users happen to // know that this implicit base URI is. - if (default_id.has_value() && - !identify(schema, resolver, default_dialect).has_value()) { - reidentify(schema, default_id.value(), resolver, default_dialect); + if (!default_id.empty() && + identify(schema, resolver, default_dialect).empty()) { + reidentify(schema, default_id, resolver, default_dialect); } const auto vocabularies{ @@ -272,8 +295,8 @@ auto bundle(JSON &schema, const SchemaWalker &walker, sourcemeta::core::Vocabularies::Known::JSON_Schema_2020_12_Core) || vocabularies.contains( sourcemeta::core::Vocabularies::Known::JSON_Schema_2019_09_Core)) { - bundle_schema(schema, {"$defs"}, schema, frame, walker, resolver, - default_dialect, default_id, paths); + bundle_schema(schema, {"$defs"}, schema, walker, resolver, default_dialect, + default_id, paths, bundled); return; } else if ( vocabularies.contains( @@ -304,8 +327,8 @@ auto bundle(JSON &schema, const SchemaWalker &walker, } } - bundle_schema(schema, {"definitions"}, schema, frame, walker, resolver, - default_dialect, default_id, paths); + bundle_schema(schema, {"definitions"}, schema, walker, resolver, + default_dialect, default_id, paths, bundled); return; } else if ( vocabularies.contains( @@ -324,6 +347,7 @@ auto bundle(JSON &schema, const SchemaWalker &walker, sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_0_Hyper) || vocabularies.contains( sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_0)) { + SchemaFrame frame{SchemaFrame::Mode::References}; frame.analyse(schema, walker, resolver, default_dialect, default_id); if (frame.standalone()) { return; @@ -337,9 +361,8 @@ auto bundle(JSON &schema, const SchemaWalker &walker, } auto bundle(const JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect, - const std::optional &default_id, + const SchemaResolver &resolver, std::string_view default_dialect, + std::string_view default_id, const std::optional &default_container, const SchemaFrame::Paths &paths) -> JSON { JSON copy = schema; diff --git a/vendor/core/src/core/jsonschema/format.cc b/vendor/core/src/core/jsonschema/format.cc index 864904af..266abead 100644 --- a/vendor/core/src/core/jsonschema/format.cc +++ b/vendor/core/src/core/jsonschema/format.cc @@ -2,8 +2,9 @@ #include // std::uint64_t #include // std::numeric_limits -#include // std::string +#include // std::string_view #include // std::unordered_map +#include // std::vector namespace { @@ -136,22 +137,31 @@ auto keyword_compare(const sourcemeta::core::JSON::String &left, namespace sourcemeta::core { auto format(JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect) -> void { + const SchemaResolver &resolver, std::string_view default_dialect) + -> void { assert(is_schema(schema)); - SchemaFrame frame{SchemaFrame::Mode::Locations}; - frame.analyse(schema, walker, resolver, default_dialect); - - for (const auto &entry : frame.locations()) { - if (entry.second.type != SchemaFrame::LocationType::Resource && - entry.second.type != SchemaFrame::LocationType::Subschema) { - continue; + std::vector objects_to_reorder; + + { + SchemaFrame frame{SchemaFrame::Mode::Locations}; + frame.analyse(schema, walker, resolver, default_dialect); + + for (const auto &entry : frame.locations()) { + if (entry.second.type != SchemaFrame::LocationType::Resource && + entry.second.type != SchemaFrame::LocationType::Subschema) { + continue; + } + + auto &subschema{get(schema, entry.second.pointer)}; + if (subschema.is_object()) { + objects_to_reorder.push_back(&subschema); + } } + } - auto &value{get(schema, entry.second.pointer)}; - if (value.is_object()) { - value.reorder(keyword_compare); - } + // Now apply the reordering after the frame is destroyed + for (auto *object : objects_to_reorder) { + object->reorder(keyword_compare); } } diff --git a/vendor/core/src/core/jsonschema/frame.cc b/vendor/core/src/core/jsonschema/frame.cc index 9cfb1af5..96cedbd7 100644 --- a/vendor/core/src/core/jsonschema/frame.cc +++ b/vendor/core/src/core/jsonschema/frame.cc @@ -13,12 +13,18 @@ enum class AnchorType : std::uint8_t { Static, Dynamic, All }; +// Static keyword strings for reference pointers +static const std::string KEYWORD_SCHEMA{"$schema"}; +static const std::string KEYWORD_REF{"$ref"}; +static const std::string KEYWORD_RECURSIVE_REF{"$recursiveRef"}; +static const std::string KEYWORD_DYNAMIC_REF{"$dynamicRef"}; + namespace { auto find_anchors(const sourcemeta::core::JSON &schema, const sourcemeta::core::Vocabularies &vocabularies) - -> std::map { - std::map result; + -> std::vector> { + std::vector> result; // 2020-12 if (schema.is_object() && @@ -27,18 +33,24 @@ auto find_anchors(const sourcemeta::core::JSON &schema, if (schema.defines("$dynamicAnchor")) { const auto &anchor{schema.at("$dynamicAnchor")}; if (anchor.is_string()) { - result.insert({anchor.to_string(), AnchorType::Dynamic}); + result.emplace_back(anchor.to_string(), AnchorType::Dynamic); } } if (schema.defines("$anchor")) { const auto &anchor{schema.at("$anchor")}; if (anchor.is_string()) { - const auto anchor_string{anchor.to_string()}; - const auto success = result.insert({anchor_string, AnchorType::Static}); - assert(success.second || result.contains(anchor_string)); - if (!success.second) { - result[anchor_string] = AnchorType::All; + const std::string_view anchor_view{anchor.to_string()}; + bool found = false; + for (auto &entry : result) { + if (entry.first == anchor_view) { + entry.second = AnchorType::All; + found = true; + break; + } + } + if (!found) { + result.emplace_back(anchor_view, AnchorType::Static); } } } @@ -53,18 +65,24 @@ auto find_anchors(const sourcemeta::core::JSON &schema, assert(anchor.is_boolean()); if (anchor.to_boolean()) { // We store a 2019-09 recursive anchor as an empty anchor - result.insert({"", AnchorType::Dynamic}); + result.emplace_back(std::string_view{}, AnchorType::Dynamic); } } if (schema.defines("$anchor")) { const auto &anchor{schema.at("$anchor")}; if (anchor.is_string()) { - const auto anchor_string{anchor.to_string()}; - const auto success = result.insert({anchor_string, AnchorType::Static}); - assert(success.second || result.contains(anchor_string)); - if (!success.second) { - result[anchor_string] = AnchorType::All; + const std::string_view anchor_view{anchor.to_string()}; + bool found = false; + for (auto &entry : result) { + if (entry.first == anchor_view) { + entry.second = AnchorType::All; + found = true; + break; + } + } + if (!found) { + result.emplace_back(anchor_view, AnchorType::Static); } } } @@ -79,14 +97,11 @@ auto find_anchors(const sourcemeta::core::JSON &schema, sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_6))) { if (schema.defines("$id")) { assert(schema.at("$id").is_string()); - const sourcemeta::core::URI identifier(schema.at("$id").to_string()); - if (identifier.is_fragment_only()) { - result.insert( - {sourcemeta::core::JSON::String{ - // Check for optional is happening inside is_fragment_only() - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - identifier.fragment().value()}, - AnchorType::Static}); + const auto &id_string{schema.at("$id").to_string()}; + if (id_string.starts_with('#')) { + // The original string is "#fragment", skip the '#' + result.emplace_back(std::string_view{id_string}.substr(1), + AnchorType::Static); } } } @@ -98,14 +113,11 @@ auto find_anchors(const sourcemeta::core::JSON &schema, sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_4)) { if (schema.defines("id")) { assert(schema.at("id").is_string()); - const sourcemeta::core::URI identifier(schema.at("id").to_string()); - if (identifier.is_fragment_only()) { - result.insert( - {sourcemeta::core::JSON::String{ - // Check for optional is happening inside is_fragment_only() - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - identifier.fragment().value()}, - AnchorType::Static}); + const auto &id_string{schema.at("id").to_string()}; + if (id_string.starts_with('#')) { + // The original string is "#fragment", skip the '#' + result.emplace_back(std::string_view{id_string}.substr(1), + AnchorType::Static); } } } @@ -113,19 +125,19 @@ auto find_anchors(const sourcemeta::core::JSON &schema, return result; } -auto find_nearest_bases( - const std::unordered_map> - &bases, - const sourcemeta::core::Pointer &pointer, - const std::optional &default_base) - -> std::pair, - sourcemeta::core::Pointer> { +template +auto find_nearest_bases_ref( + const std::unordered_map> &bases, + const sourcemeta::core::WeakPointer &pointer) + -> std::optional< + std::pair>, + sourcemeta::core::WeakPointer>> { auto current_pointer{pointer}; while (true) { const auto match{bases.find(current_pointer)}; if (match != bases.cend()) { - return {match->second, current_pointer}; + return std::make_pair(std::cref(match->second), current_pointer); } if (current_pointer.empty()) { @@ -135,22 +147,36 @@ auto find_nearest_bases( current_pointer = current_pointer.initial(); } + return std::nullopt; +} + +template +auto find_nearest_bases( + const std::unordered_map> &bases, + const sourcemeta::core::WeakPointer &pointer, + const std::optional &default_base) + -> std::pair, sourcemeta::core::WeakPointer> { + const auto result{find_nearest_bases_ref(bases, pointer)}; + if (result.has_value()) { + return {result->first.get(), result->second}; + } + if (default_base.has_value()) { - return {{default_base.value()}, sourcemeta::core::empty_pointer}; + return {{StringType{default_base.value()}}, + sourcemeta::core::empty_weak_pointer}; } - return {{}, sourcemeta::core::empty_pointer}; + return {{}, sourcemeta::core::empty_weak_pointer}; } auto find_every_base( - const std::unordered_map> &bases, - const sourcemeta::core::Pointer &pointer) - -> std::vector< - std::pair> { - std::vector< - std::pair> + const sourcemeta::core::WeakPointer &pointer) + -> std::vector> { + std::vector> result; auto current_pointer{pointer}; @@ -158,7 +184,7 @@ auto find_every_base( const auto match{bases.find(current_pointer)}; if (match != bases.cend()) { for (const auto &base : match->second) { - result.emplace_back(base, current_pointer); + result.emplace_back(std::string_view{base}, current_pointer); } } @@ -170,46 +196,77 @@ auto find_every_base( } if (result.empty() || - result.back().second != sourcemeta::core::empty_pointer) { - result.emplace_back("", sourcemeta::core::empty_pointer); + result.back().second != sourcemeta::core::empty_weak_pointer) { + result.emplace_back(std::string_view{}, + sourcemeta::core::empty_weak_pointer); } return result; } // TODO: Why do we have this function both here and on `walker.cc`? -auto ref_overrides_adjacent_keywords(std::string_view base_dialect) -> bool { +auto ref_overrides_adjacent_keywords( + const sourcemeta::core::SchemaBaseDialect base_dialect) -> bool { + using sourcemeta::core::SchemaBaseDialect; // In older drafts, the presence of `$ref` would override any sibling // keywords // See // https://json-schema.org/draft-07/draft-handrews-json-schema-01#rfc.section.8.3 - return base_dialect == "http://json-schema.org/draft-07/schema#" || - base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-06/schema#" || - base_dialect == "http://json-schema.org/draft-06/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-04/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-03/schema#" || - base_dialect == "http://json-schema.org/draft-03/hyper-schema#"; + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_Draft_7: + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_6: + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_4: + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_3: + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + return true; + default: + return false; + } } -auto supports_id_anchors(std::string_view base_dialect) -> bool { - return base_dialect == "http://json-schema.org/draft-07/schema#" || - base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-06/schema#" || - base_dialect == "http://json-schema.org/draft-06/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-04/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#"; +auto supports_id_anchors(const sourcemeta::core::SchemaBaseDialect base_dialect) + -> bool { + using sourcemeta::core::SchemaBaseDialect; + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_Draft_7: + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_6: + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_4: + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + return true; + default: + return false; + } } -auto fragment_string(const sourcemeta::core::URI &uri) - -> std::optional { - const auto fragment{uri.fragment()}; - if (fragment.has_value()) { - return sourcemeta::core::JSON::String{fragment.value()}; +auto set_base_and_fragment( + sourcemeta::core::SchemaFrame::ReferencesEntry &entry) -> void { + if (entry.destination.empty()) { + entry.base = std::string_view{}; + entry.fragment = std::nullopt; + return; } - return std::nullopt; + const auto hash_position{entry.destination.find('#')}; + if (hash_position != std::string::npos) { + // Has a fragment + if (hash_position == 0) { + // Starts with #, so no base + entry.base = std::string_view{}; + } else { + entry.base = std::string_view{entry.destination}.substr(0, hash_position); + } + entry.fragment = + std::string_view{entry.destination}.substr(hash_position + 1); + } else { + // No fragment + entry.base = std::string_view{entry.destination}; + entry.fragment = std::nullopt; + } } [[noreturn]] @@ -221,31 +278,31 @@ auto throw_already_exists(const sourcemeta::core::JSON::String &uri) -> void { auto store(sourcemeta::core::SchemaFrame::Locations &frame, const sourcemeta::core::SchemaReferenceType type, const sourcemeta::core::SchemaFrame::LocationType entry_type, - const sourcemeta::core::JSON::String &uri, - const std::optional &root_id, - const sourcemeta::core::JSON::String &base_id, - const sourcemeta::core::Pointer &pointer_from_root, - const sourcemeta::core::Pointer &pointer_from_base, - const sourcemeta::core::JSON::String &dialect, - const sourcemeta::core::JSON::String &base_dialect, - const std::optional &parent, + sourcemeta::core::JSON::String uri, const std::string_view base, + const sourcemeta::core::WeakPointer &pointer_from_root, + const std::size_t relative_pointer_offset, + const std::string_view dialect, + const sourcemeta::core::SchemaBaseDialect base_dialect, + const std::optional &parent, const bool ignore_if_present = false, const bool already_canonical = false) -> void { - const auto canonical{ - already_canonical ? uri : sourcemeta::core::URI::canonicalize(uri)}; - const auto inserted{frame - .insert({{type, canonical}, - {.parent = parent, - .type = entry_type, - .root = root_id, - .base = base_id, - .pointer = pointer_from_root, - .relative_pointer = pointer_from_base, - .dialect = dialect, - .base_dialect = base_dialect}}) - .second}; + auto canonical{already_canonical ? std::move(uri) + : sourcemeta::core::URI::canonicalize(uri)}; + auto [iterator, inserted] = + frame.insert({{type, std::move(canonical)}, + {.parent = parent, + .type = entry_type, + .base = base, + .pointer = pointer_from_root, + .relative_pointer = relative_pointer_offset, + .dialect = dialect, + .base_dialect = base_dialect}}); if (!ignore_if_present && !inserted) { - throw_already_exists(canonical); + throw_already_exists(iterator->first.second); + } + + if (inserted && iterator->first.second == base) { + iterator->second.base = iterator->first.second; } } @@ -260,7 +317,7 @@ struct InternalEntry { // NOLINTNEXTLINE(bugprone-exception-escape) struct CacheSubschema { bool orphan{}; - std::optional parent{}; + std::optional parent{}; }; } // namespace @@ -300,27 +357,28 @@ auto SchemaFrame::to_json( sourcemeta::core::to_json(location.second.parent)); entry.assign_assume_new("type", sourcemeta::core::to_json(location.second.type)); - entry.assign_assume_new("root", - sourcemeta::core::to_json(location.second.root)); - entry.assign_assume_new("base", - sourcemeta::core::to_json(location.second.base)); + entry.assign_assume_new("root", this->root_.empty() ? JSON{nullptr} + : JSON{this->root_}); + entry.assign_assume_new("base", JSON{JSON::String{location.second.base}}); entry.assign_assume_new("pointer", sourcemeta::core::to_json(location.second.pointer)); if (tracker.has_value()) { - entry.assign_assume_new( - "position", sourcemeta::core::to_json( - tracker.value().get(location.second.pointer))); + entry.assign_assume_new("position", + sourcemeta::core::to_json(tracker.value().get( + to_pointer(location.second.pointer)))); } else { entry.assign_assume_new("position", sourcemeta::core::to_json(nullptr)); } entry.assign_assume_new( "relativePointer", - sourcemeta::core::to_json(location.second.relative_pointer)); + sourcemeta::core::to_json( + this->relative_instance_location(location.second))); entry.assign_assume_new("dialect", - sourcemeta::core::to_json(location.second.dialect)); + JSON{JSON::String{location.second.dialect}}); entry.assign_assume_new( - "baseDialect", sourcemeta::core::to_json(location.second.base_dialect)); + "baseDialect", + JSON{JSON::String{to_string(location.second.base_dialect)}}); switch (location.first.first) { case SchemaReferenceType::Static: @@ -348,18 +406,24 @@ auto SchemaFrame::to_json( if (tracker.has_value()) { entry.assign_assume_new("position", - sourcemeta::core::to_json( - tracker.value().get(reference.first.second))); + sourcemeta::core::to_json(tracker.value().get( + to_pointer(reference.first.second)))); } else { entry.assign_assume_new("position", sourcemeta::core::to_json(nullptr)); } entry.assign_assume_new( "destination", sourcemeta::core::to_json(reference.second.destination)); - entry.assign_assume_new("base", - sourcemeta::core::to_json(reference.second.base)); entry.assign_assume_new( - "fragment", sourcemeta::core::to_json(reference.second.fragment)); + "base", + !reference.second.base.empty() + ? sourcemeta::core::to_json(JSON::String{reference.second.base}) + : sourcemeta::core::to_json(nullptr)); + entry.assign_assume_new( + "fragment", reference.second.fragment.has_value() + ? sourcemeta::core::to_json( + JSON::String{reference.second.fragment.value()}) + : sourcemeta::core::to_json(nullptr)); root.at("references").push_back(std::move(entry)); } @@ -368,15 +432,16 @@ auto SchemaFrame::to_json( auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, const SchemaResolver &resolver, - const std::optional &default_dialect, - const std::optional &default_id, + std::string_view default_dialect, + std::string_view default_id, const SchemaFrame::Paths &paths) -> void { + this->reset(); + assert(std::unordered_set(paths.cbegin(), paths.cend()).size() == + paths.size()); std::vector subschema_entries; - std::unordered_map subschemas; - std::unordered_map> - base_uris; - std::unordered_map> - base_dialects; + std::unordered_map subschemas; + std::unordered_map> base_uris; + std::unordered_map> base_dialects; for (const auto &path : paths) { // Passing paths that overlap is undefined behavior. No path should @@ -387,38 +452,41 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, const auto &schema{get(root, path)}; - const std::optional root_base_dialect{ + const auto root_base_dialect{ sourcemeta::core::base_dialect(schema, resolver, default_dialect)}; if (!root_base_dialect.has_value()) { throw SchemaUnknownBaseDialectError(); } - std::optional root_id{ - // If we are dealing with nested schemas, then by definition - // the root has no identifier - !path.empty() ? std::nullopt - : sourcemeta::core::identify( - schema, root_base_dialect.value(), default_id)}; - if (root_id.has_value()) { - root_id = URI::canonicalize(root_id.value()); + // If we are dealing with nested schemas, then by definition + // the root has no identifier + std::optional root_id{std::nullopt}; + if (path.empty()) { + const auto maybe_id{sourcemeta::core::identify( + schema, root_base_dialect.value(), default_id)}; + if (!maybe_id.empty()) { + root_id = URI::canonicalize(maybe_id); + this->root_ = root_id.value(); + } } - const std::optional root_dialect{ + const std::string_view root_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; - assert(root_dialect.has_value()); + assert(!root_dialect.empty()); // If the top-level schema has a specific identifier but the user // passes a different default identifier, then the schema is by // definition known by two names, and we should handle that accordingly const bool has_explicit_different_id{root_id.has_value() && - default_id.has_value() && - root_id.value() != default_id.value()}; + !default_id.empty() && + root_id.value() != default_id}; if (has_explicit_different_id) { - const auto default_id_canonical{URI::canonicalize(default_id.value())}; + const auto default_id_canonical{URI::canonicalize(default_id)}; + // Use this->root_ as base - it contains root_id.value() and persists store(this->locations_, SchemaReferenceType::Static, - SchemaFrame::LocationType::Resource, default_id_canonical, root_id, - root_id.value(), path, sourcemeta::core::empty_pointer, - root_dialect.value(), root_base_dialect.value(), std::nullopt); + SchemaFrame::LocationType::Resource, default_id_canonical, + this->root_, path, path.size(), root_dialect, + root_base_dialect.value(), std::nullopt); base_uris.insert({path, {root_id.value(), default_id_canonical}}); } @@ -434,16 +502,25 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, } // Dialect - assert(entry.dialect.has_value()); - base_dialects.insert({entry.pointer, {entry.dialect.value()}}); + assert(!entry.dialect.empty()); + base_dialects.insert({entry.pointer, {entry.dialect}}); // Base dialect assert(entry.base_dialect.has_value()); // Schema identifier - std::optional id{sourcemeta::core::identify( - entry.subschema.get(), entry.base_dialect.value(), - entry.pointer.empty() ? root_id : std::nullopt)}; + // We need to store the default_id in a local variable to ensure + // it survives the identify() call, as identify() returns a string_view + const std::string default_id_for_entry{ + entry.pointer.empty() && root_id.has_value() ? root_id.value() + : std::string{}}; + const auto maybe_id{sourcemeta::core::identify(entry.subschema.get(), + entry.base_dialect.value(), + default_id_for_entry)}; + std::optional id{ + !maybe_id.empty() + ? std::make_optional(std::string{maybe_id}) + : std::nullopt}; // Store information subschemas.emplace(entry.pointer, CacheSubschema{.orphan = entry.orphan, @@ -455,24 +532,29 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, for (const auto &entry_index : current_subschema_entries) { const auto &entry{subschema_entries[entry_index]}; + const auto &common_pointer_weak{entry.common.pointer}; + const auto &common_parent{entry.common.parent}; if (entry.id.has_value()) { + assert(entry.common.base_dialect.has_value()); const bool ref_overrides = ref_overrides_adjacent_keywords(entry.common.base_dialect.value()); const bool is_pre_2019_09_location_independent_identifier = supports_id_anchors(entry.common.base_dialect.value()) && - sourcemeta::core::URI{entry.id.value()}.is_fragment_only(); + entry.id.value().starts_with('#'); if ((!entry.common.subschema.get().defines("$ref") || !ref_overrides) && // If we are dealing with a pre-2019-09 location independent // identifier, we ignore it as a traditional identifier and take // care of it as an anchor !is_pre_2019_09_location_independent_identifier) { - const auto bases{ - find_nearest_bases(base_uris, entry.common.pointer, entry.id)}; + const auto bases{find_nearest_bases( + base_uris, common_pointer_weak, + entry.id ? std::optional{*entry.id} + : std::nullopt)}; for (const auto &base_string : bases.first) { // Otherwise we end up pushing the top-level resource twice if (entry_index == 0 && has_explicit_different_id && - default_id.has_value() && default_id.value() == base_string) { + !default_id.empty() && default_id == base_string) { continue; } @@ -496,7 +578,7 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, const auto maybe_match{ this->locations_.find({SchemaReferenceType::Static, new_id})}; if (maybe_match != this->locations_.cend() && - maybe_match->second.pointer != entry.common.pointer) { + maybe_match->second.pointer != common_pointer_weak) { throw_already_exists(new_id); } @@ -505,14 +587,13 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, assert(entry.common.base_dialect.has_value()); store(this->locations_, SchemaReferenceType::Static, - SchemaFrame::LocationType::Resource, new_id, root_id, - new_id, entry.common.pointer, - sourcemeta::core::empty_pointer, - entry.common.dialect.value(), - entry.common.base_dialect.value(), entry.common.parent); + SchemaFrame::LocationType::Resource, new_id, new_id, + common_pointer_weak, common_pointer_weak.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent); } - auto base_uri_match{base_uris.find(entry.common.pointer)}; + auto base_uri_match{base_uris.find(common_pointer_weak)}; if (base_uri_match != base_uris.cend()) { if (std::find(base_uri_match->second.cbegin(), base_uri_match->second.cend(), @@ -520,7 +601,7 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, base_uri_match->second.push_back(new_id); } } else { - base_uris.insert({entry.common.pointer, {new_id}}); + base_uris.insert({common_pointer_weak, {new_id}}); } } } @@ -530,33 +611,38 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, // Handle metaschema references const auto maybe_metaschema{ sourcemeta::core::dialect(entry.common.subschema.get())}; - if (maybe_metaschema.has_value()) { - sourcemeta::core::URI metaschema{maybe_metaschema.value()}; - const auto nearest_bases{ - find_nearest_bases(base_uris, entry.common.pointer, entry.id)}; + if (!maybe_metaschema.empty()) { + sourcemeta::core::URI metaschema{maybe_metaschema}; + const auto nearest_bases{find_nearest_bases( + base_uris, common_pointer_weak, + entry.id ? std::optional{*entry.id} + : std::nullopt)}; if (!nearest_bases.first.empty()) { metaschema.resolve_from(nearest_bases.first.front()); } metaschema.canonicalize(); - const JSON::String destination{metaschema.recompose()}; assert(entry.common.subschema.get().defines("$schema")); - this->references_.insert_or_assign( - {SchemaReferenceType::Static, - entry.common.pointer.concat({"$schema"})}, - SchemaFrame::ReferencesEntry{ - .original = maybe_metaschema.value(), - .destination = destination, - .base = metaschema.recompose_without_fragment(), - .fragment = fragment_string(metaschema)}); + auto schema_pointer{common_pointer_weak}; + schema_pointer.push_back(std::cref(KEYWORD_SCHEMA)); + const auto [it, inserted] = this->references_.insert_or_assign( + {SchemaReferenceType::Static, std::move(schema_pointer)}, + SchemaFrame::ReferencesEntry{.original = maybe_metaschema, + .destination = + metaschema.recompose(), + .base = std::string_view{}, + .fragment = std::nullopt}); + set_base_and_fragment(it->second); } } // Handle schema anchors for (const auto &[name, type] : find_anchors(entry.common.subschema.get(), entry.common.vocabularies)) { - const auto bases{ - find_nearest_bases(base_uris, entry.common.pointer, entry.id)}; + const auto bases{find_nearest_bases( + base_uris, common_pointer_weak, + entry.id ? std::optional{*entry.id} + : std::nullopt)}; if (bases.first.empty()) { const auto anchor_uri{sourcemeta::core::URI::from_fragment(name)}; @@ -564,31 +650,27 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, if (type == AnchorType::Static || type == AnchorType::All) { store(this->locations_, SchemaReferenceType::Static, - SchemaFrame::LocationType::Anchor, relative_anchor_uri, - root_id, "", entry.common.pointer, - entry.common.pointer.resolve_from(bases.second), - entry.common.dialect.value(), - entry.common.base_dialect.value(), entry.common.parent); + SchemaFrame::LocationType::Anchor, relative_anchor_uri, "", + common_pointer_weak, bases.second.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent); } if (type == AnchorType::Dynamic || type == AnchorType::All) { store(this->locations_, SchemaReferenceType::Dynamic, - SchemaFrame::LocationType::Anchor, relative_anchor_uri, - root_id, "", entry.common.pointer, - entry.common.pointer.resolve_from(bases.second), - entry.common.dialect.value(), - entry.common.base_dialect.value(), entry.common.parent); + SchemaFrame::LocationType::Anchor, relative_anchor_uri, "", + common_pointer_weak, bases.second.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent); // Register a dynamic anchor as a static anchor if possible too if (entry.common.vocabularies.contains( Vocabularies::Known::JSON_Schema_2020_12_Core)) { store(this->locations_, SchemaReferenceType::Static, - SchemaFrame::LocationType::Anchor, relative_anchor_uri, - root_id, "", entry.common.pointer, - entry.common.pointer.resolve_from(bases.second), - entry.common.dialect.value(), - entry.common.base_dialect.value(), entry.common.parent, - true); + SchemaFrame::LocationType::Anchor, relative_anchor_uri, "", + common_pointer_weak, bases.second.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent, true); } } } else { @@ -604,36 +686,40 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, continue; } + const auto base_entry{this->locations_.find( + {SchemaReferenceType::Static, base_string})}; + + const std::string_view base_view{ + base_entry != this->locations_.cend() + ? std::string_view{base_entry->first.second} + : std::string_view{base_string}}; + if (type == AnchorType::Static || type == AnchorType::All) { store(this->locations_, sourcemeta::core::SchemaReferenceType::Static, - SchemaFrame::LocationType::Anchor, anchor_uri, root_id, - base_string, entry.common.pointer, - entry.common.pointer.resolve_from(bases.second), - entry.common.dialect.value(), - entry.common.base_dialect.value(), entry.common.parent); + SchemaFrame::LocationType::Anchor, anchor_uri, base_view, + common_pointer_weak, bases.second.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent); } if (type == AnchorType::Dynamic || type == AnchorType::All) { store(this->locations_, sourcemeta::core::SchemaReferenceType::Dynamic, - SchemaFrame::LocationType::Anchor, anchor_uri, root_id, - base_string, entry.common.pointer, - entry.common.pointer.resolve_from(bases.second), - entry.common.dialect.value(), - entry.common.base_dialect.value(), entry.common.parent); + SchemaFrame::LocationType::Anchor, anchor_uri, base_view, + common_pointer_weak, bases.second.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent); // Register a dynamic anchor as a static anchor if possible too if (entry.common.vocabularies.contains( Vocabularies::Known::JSON_Schema_2020_12_Core)) { store(this->locations_, sourcemeta::core::SchemaReferenceType::Static, - SchemaFrame::LocationType::Anchor, anchor_uri, root_id, - base_string, entry.common.pointer, - entry.common.pointer.resolve_from(bases.second), - entry.common.dialect.value(), - entry.common.base_dialect.value(), entry.common.parent, - true); + SchemaFrame::LocationType::Anchor, anchor_uri, base_view, + common_pointer_weak, bases.second.size(), + entry.common.dialect, entry.common.base_dialect.value(), + common_parent, true); } } @@ -645,28 +731,36 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, // It is important for the loop that follows to assume a specific ordering // where smaller pointers (by number of tokens) are scanned first. - // TODO: Perform the pointer walking using weak pointers only - const auto pointer_walker{sourcemeta::core::PointerWalker{schema}}; - std::vector pointers{pointer_walker.cbegin(), - pointer_walker.cend()}; + std::vector pointers; + for (const auto &weak_pointer : sourcemeta::core::PointerWalker{schema}) { + pointers.push_back(weak_pointer); + } + std::ranges::sort(pointers, std::less<>()); // Pre-compute every possible pointer to the schema for (const auto &relative_pointer : pointers) { - const auto pointer{path.concat(relative_pointer)}; + const auto pointer_weak{path.concat(relative_pointer)}; - const auto dialects{ - find_nearest_bases(base_dialects, pointer, root_dialect)}; - assert(dialects.first.size() == 1); + const auto dialect_match{ + find_nearest_bases_ref(base_dialects, pointer_weak)}; + const auto &dialect_for_pointer{dialect_match.has_value() + ? dialect_match->first.get().front() + : root_dialect}; - auto every_base_result = find_every_base(base_uris, pointer); + auto every_base_result = find_every_base(base_uris, pointer_weak); + WeakPointer cached_base{}; for (const auto &base : every_base_result) { + const auto resolved{cached_base == base.second + ? pointer_weak.resolve_from(cached_base) + : pointer_weak.resolve_from(base.second)}; + cached_base = base.second; + auto relative_pointer_uri{ base.first.empty() - ? sourcemeta::core::to_uri(pointer.resolve_from(base.second)) - : sourcemeta::core::to_uri(pointer.resolve_from(base.second)) - .resolve_from({base.first})}; + ? sourcemeta::core::to_uri(resolved) + : sourcemeta::core::to_uri(resolved, base.first)}; relative_pointer_uri.canonicalize(); auto result{relative_pointer_uri.recompose()}; @@ -676,33 +770,38 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, if (!contains) { const auto nearest_bases{ - find_nearest_bases(base_uris, pointer, base.first)}; + find_nearest_bases(base_uris, pointer_weak, + std::optional{base.first})}; assert(!nearest_bases.first.empty()); const auto ¤t_base{nearest_bases.first.front()}; - const auto maybe_base_entry{this->locations_.find( + const auto base_entry{this->locations_.find( {SchemaReferenceType::Static, current_base})}; - const auto current_base_dialect{ - maybe_base_entry == this->locations_.cend() - ? root_base_dialect.value() - : maybe_base_entry->second.base_dialect}; + const std::string_view base_view{ + base_entry != this->locations_.cend() + ? std::string_view{base_entry->first.second} + : std::string_view{current_base}}; - const auto subschema{subschemas.find(pointer)}; + const sourcemeta::core::SchemaBaseDialect current_base_dialect{ + base_entry != this->locations_.cend() + ? base_entry->second.base_dialect + : root_base_dialect.value()}; + const auto subschema{subschemas.find(pointer_weak)}; if (subschema != subschemas.cend()) { store(this->locations_, SchemaReferenceType::Static, - SchemaFrame::LocationType::Subschema, result, root_id, - current_base, pointer, - pointer.resolve_from(nearest_bases.second), - dialects.first.front(), current_base_dialect, + SchemaFrame::LocationType::Subschema, std::move(result), + base_view, pointer_weak, nearest_bases.second.size(), + dialect_for_pointer, current_base_dialect, subschema->second.parent, false, true); } else { store(this->locations_, SchemaReferenceType::Static, - SchemaFrame::LocationType::Pointer, result, root_id, - current_base, pointer, - pointer.resolve_from(nearest_bases.second), - dialects.first.front(), current_base_dialect, dialects.second, + SchemaFrame::LocationType::Pointer, std::move(result), + base_view, pointer_weak, nearest_bases.second.size(), + dialect_for_pointer, current_base_dialect, + dialect_match.has_value() ? dialect_match->second + : empty_weak_pointer, false, true); } } @@ -716,9 +815,12 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, // Resolve references after all framing was performed for (const auto &entry : subschema_entries) { + const auto &common_pointer_weak{entry.common.pointer}; if (entry.common.subschema.get().is_object()) { - const auto nearest_bases{ - find_nearest_bases(base_uris, entry.common.pointer, entry.id)}; + const auto nearest_bases{find_nearest_bases( + base_uris, common_pointer_weak, + entry.id ? std::optional{*entry.id} + : std::nullopt)}; if (entry.common.subschema.get().defines("$ref")) { if (entry.common.subschema.get().at("$ref").is_string()) { const auto &original{ @@ -729,14 +831,15 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, } ref.canonicalize(); - this->references_.insert_or_assign( - {SchemaReferenceType::Static, - entry.common.pointer.concat({"$ref"})}, + auto ref_pointer{common_pointer_weak}; + ref_pointer.push_back(std::cref(KEYWORD_REF)); + const auto [it, inserted] = this->references_.insert_or_assign( + {SchemaReferenceType::Static, std::move(ref_pointer)}, SchemaFrame::ReferencesEntry{.original = original, .destination = ref.recompose(), - .base = - ref.recompose_without_fragment(), - .fragment = fragment_string(ref)}); + .base = std::string_view{}, + .fragment = std::nullopt}); + set_base_and_fragment(it->second); } } @@ -754,7 +857,7 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, if (ref != "#") { throw sourcemeta::core::SchemaReferenceError( entry.id.value_or(""), - entry.common.pointer.concat({"$recursiveRef"}), + to_pointer(common_pointer_weak).concat({"$recursiveRef"}), "Invalid recursive reference"); } @@ -766,13 +869,15 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, ? SchemaReferenceType::Static : SchemaReferenceType::Dynamic}; const sourcemeta::core::URI anchor_uri{anchor_uri_string}; - this->references_.insert_or_assign( - {reference_type, entry.common.pointer.concat({"$recursiveRef"})}, - SchemaFrame::ReferencesEntry{ - .original = ref, - .destination = anchor_uri.recompose(), - .base = anchor_uri.recompose_without_fragment(), - .fragment = fragment_string(anchor_uri)}); + auto recursive_ref_pointer{common_pointer_weak}; + recursive_ref_pointer.push_back(std::cref(KEYWORD_RECURSIVE_REF)); + const auto [it, inserted] = this->references_.insert_or_assign( + {reference_type, std::move(recursive_ref_pointer)}, + SchemaFrame::ReferencesEntry{.original = ref, + .destination = anchor_uri.recompose(), + .base = std::string_view{}, + .fragment = std::nullopt}); + set_base_and_fragment(it->second); } if (entry.common.vocabularies.contains( @@ -802,15 +907,17 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, !has_fragment || (has_fragment && maybe_static_frame != this->locations_.end() && maybe_dynamic_frame == this->locations_.end())}; - this->references_.insert_or_assign( + auto dynamic_ref_pointer{common_pointer_weak}; + dynamic_ref_pointer.push_back(std::cref(KEYWORD_DYNAMIC_REF)); + const auto [it, inserted] = this->references_.insert_or_assign( {behaves_as_static ? SchemaReferenceType::Static : SchemaReferenceType::Dynamic, - entry.common.pointer.concat({"$dynamicRef"})}, + std::move(dynamic_ref_pointer)}, SchemaFrame::ReferencesEntry{.original = original, .destination = std::move(ref_string), - .base = - ref.recompose_without_fragment(), - .fragment = fragment_string(ref)}); + .base = std::string_view{}, + .fragment = std::nullopt}); + set_base_and_fragment(it->second); } } } @@ -819,7 +926,9 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, // A schema is standalone if all references can be resolved within itself if (this->standalone()) { // Find all dynamic anchors - std::map> dynamic_anchors; + // Values are pointers to full URIs in locations_ + std::unordered_map> + dynamic_anchors; for (const auto &entry : this->locations_) { if (entry.first.first != SchemaReferenceType::Dynamic || entry.second.type != SchemaFrame::LocationType::Anchor) { @@ -827,12 +936,9 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, } const URI anchor_uri{entry.first.second}; + // Copy the fragment to avoid dangling string_view (anchor_uri is local) const JSON::String fragment{anchor_uri.fragment().value_or("")}; - if (!dynamic_anchors.contains(fragment)) { - dynamic_anchors.emplace(fragment, std::vector{}); - } - - dynamic_anchors[fragment].push_back(entry.first.second); + dynamic_anchors[fragment].push_back(&entry.first.second); } // If there is a dynamic reference that only has one possible @@ -846,7 +952,8 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, continue; } - const auto match{dynamic_anchors.find(reference.second.fragment.value())}; + const auto match{dynamic_anchors.find( + JSON::String{reference.second.fragment.value()})}; assert(match != dynamic_anchors.cend()); // Otherwise we can assume there is only one possible target for the // dynamic reference @@ -855,14 +962,12 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, } to_delete.push_back(reference.first); - const URI new_destination{match->second.front()}; to_insert.emplace_back( SchemaFrame::References::key_type{SchemaReferenceType::Static, reference.first.second}, SchemaFrame::References::mapped_type{ - match->second.front(), match->second.front(), - new_destination.recompose_without_fragment(), - fragment_string(new_destination)}); + reference.second.original, *match->second.front(), + std::string_view{}, std::nullopt}); } // Because we can't mutate a map as we are traversing it @@ -872,7 +977,8 @@ auto SchemaFrame::analyse(const JSON &root, const SchemaWalker &walker, } for (auto &&entry : to_insert) { - this->references_.emplace(std::move(entry)); + const auto [it, inserted] = this->references_.emplace(std::move(entry)); + set_base_and_fragment(it->second); } } } @@ -885,6 +991,17 @@ auto SchemaFrame::references() const noexcept -> const References & { return this->references_; } +auto SchemaFrame::reference(const SchemaReferenceType type, + const WeakPointer &pointer) const + -> std::optional> { + const auto result{this->references_.find({type, pointer})}; + if (result != this->references_.cend()) { + return result->second; + } + + return std::nullopt; +} + auto SchemaFrame::standalone() const -> bool { return std::ranges::all_of(this->references_, [&](const auto &reference) { assert(!reference.first.second.empty()); @@ -899,6 +1016,10 @@ auto SchemaFrame::standalone() const -> bool { }); } +auto SchemaFrame::root() const noexcept -> const JSON::String & { + return this->root_; +} + auto SchemaFrame::vocabularies(const Location &location, const SchemaResolver &resolver) const -> Vocabularies { @@ -907,15 +1028,16 @@ auto SchemaFrame::vocabularies(const Location &location, } auto SchemaFrame::uri(const Location &location, - const Pointer &relative_schema_location) const + const WeakPointer &relative_schema_location) const -> JSON::String { - return to_uri(location.relative_pointer.concat(relative_schema_location), + return to_uri(this->relative_instance_location(location).concat( + relative_schema_location), location.base) .recompose(); } auto SchemaFrame::traverse(const Location &location, - const Pointer &relative_schema_location) const + const WeakPointer &relative_schema_location) const -> const Location & { const auto new_uri{this->uri(location, relative_schema_location)}; const auto static_match{ @@ -930,16 +1052,17 @@ auto SchemaFrame::traverse(const Location &location, return dynamic_match->second; } -auto SchemaFrame::traverse(const JSON::String &uri) const +auto SchemaFrame::traverse(const std::string_view uri) const -> std::optional> { + const JSON::String uri_string{uri}; const auto static_result{ - this->locations_.find({SchemaReferenceType::Static, uri})}; + this->locations_.find({SchemaReferenceType::Static, uri_string})}; if (static_result != this->locations_.cend()) { return static_result->second; } const auto dynamic_result{ - this->locations_.find({SchemaReferenceType::Dynamic, uri})}; + this->locations_.find({SchemaReferenceType::Dynamic, uri_string})}; if (dynamic_result != this->locations_.cend()) { return dynamic_result->second; } @@ -947,10 +1070,21 @@ auto SchemaFrame::traverse(const JSON::String &uri) const return std::nullopt; } -auto SchemaFrame::uri(const Pointer &pointer) const +auto SchemaFrame::traverse(const WeakPointer &pointer) const + -> std::optional> { + // TODO: This is slow. Consider adding a pointer-indexed secondary + // lookup structure to SchemaFrame + for (const auto &entry : this->locations_) { + if (entry.second.pointer == pointer) { + return entry.second; + } + } + + return std::nullopt; +} + +auto SchemaFrame::uri(const WeakPointer &pointer) const -> std::optional> { - // TODO: This is potentially very slow. Traversing by pointer shouldn't - // require an O(N) operation for (const auto &entry : this->locations_) { if (entry.second.pointer == pointer) { return entry.first.second; @@ -961,11 +1095,11 @@ auto SchemaFrame::uri(const Pointer &pointer) const } auto SchemaFrame::dereference(const Location &location, - const Pointer &relative_schema_location) const + const WeakPointer &relative_schema_location) const -> std::pair>> { const auto effective_location{ - location.pointer.concat({relative_schema_location})}; + location.pointer.concat(relative_schema_location)}; const auto maybe_reference_entry{this->references_.find( {SchemaReferenceType::Static, effective_location})}; if (maybe_reference_entry == this->references_.cend()) { @@ -987,13 +1121,26 @@ auto SchemaFrame::dereference(const Location &location, return {SchemaReferenceType::Static, destination->second}; } -auto SchemaFrame::references_to(const Pointer &pointer) const -> std::vector< - std::reference_wrapper> { - std::vector> - result; +auto SchemaFrame::for_each_resource_uri( + const std::function &callback) const -> void { + for (const auto &[key, location] : this->locations_) { + if (location.type == LocationType::Resource) { + callback(key.second); + } + } +} - // TODO: This is currently very slow, as we need to loop on every reference - // to brute force whether it points to the desired entry or not +auto SchemaFrame::for_each_unresolved_reference( + const std::function + &callback) const -> void { + for (const auto &[key, reference] : this->references_) { + if (!this->traverse(reference.destination).has_value()) { + callback(key.second, reference); + } + } +} + +auto SchemaFrame::has_references_to(const WeakPointer &pointer) const -> bool { for (const auto &reference : this->references_) { assert(!reference.first.second.empty()); assert(reference.first.second.back().is_property()); @@ -1003,7 +1150,7 @@ auto SchemaFrame::references_to(const Pointer &pointer) const -> std::vector< {reference.first.first, reference.second.destination})}; if (match != this->locations_.cend() && match->second.pointer == pointer) { - result.emplace_back(reference); + return true; } } else { for (const auto &location : this->locations_) { @@ -1013,14 +1160,92 @@ auto SchemaFrame::references_to(const Pointer &pointer) const -> std::vector< if (!reference.second.fragment.has_value() || URI{location.first.second}.fragment().value_or("") == reference.second.fragment.value()) { - result.emplace_back(reference); + return true; } } } } } - return result; + return false; +} + +auto SchemaFrame::has_references_through(const WeakPointer &pointer) const + -> bool { + for (const auto &reference : this->references_) { + assert(!reference.first.second.empty()); + assert(reference.first.second.back().is_property()); + + if (reference.first.first == SchemaReferenceType::Static) { + const auto match{this->locations_.find( + {reference.first.first, reference.second.destination})}; + if (match != this->locations_.cend() && + match->second.pointer.starts_with(pointer)) { + return true; + } + } else { + for (const auto &location : this->locations_) { + if (location.second.type == LocationType::Anchor && + location.first.first == SchemaReferenceType::Dynamic && + location.second.pointer.starts_with(pointer)) { + if (!reference.second.fragment.has_value() || + URI{location.first.second}.fragment().value_or("") == + reference.second.fragment.value()) { + return true; + } + } + } + } + } + + return false; +} + +auto SchemaFrame::has_references_through(const WeakPointer &pointer, + const WeakPointer::Token &tail) const + -> bool { + for (const auto &reference : this->references_) { + assert(!reference.first.second.empty()); + assert(reference.first.second.back().is_property()); + + if (reference.first.first == SchemaReferenceType::Static) { + const auto match{this->locations_.find( + {reference.first.first, reference.second.destination})}; + if (match != this->locations_.cend() && + match->second.pointer.starts_with(pointer, tail)) { + return true; + } + } else { + for (const auto &location : this->locations_) { + if (location.second.type == LocationType::Anchor && + location.first.first == SchemaReferenceType::Dynamic && + location.second.pointer.starts_with(pointer, tail)) { + if (!reference.second.fragment.has_value() || + URI{location.first.second}.fragment().value_or("") == + reference.second.fragment.value()) { + return true; + } + } + } + } + } + + return false; +} + +auto SchemaFrame::relative_instance_location(const Location &location) const + -> WeakPointer { + return location.pointer.slice(location.relative_pointer); +} + +auto SchemaFrame::empty() const noexcept -> bool { + return this->locations_.empty() && this->references_.empty(); +} + +auto SchemaFrame::reset() -> void { + this->root_.clear(); + this->locations_.clear(); + this->references_.clear(); } } // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h index 4bd7284b..a18eeada 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h @@ -38,14 +38,26 @@ namespace sourcemeta::core { /// @ingroup jsonschema /// A default resolver that relies on built-in official schemas. SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto schema_resolver(std::string_view identifier) -> std::optional; +auto schema_resolver(const std::string_view identifier) -> std::optional; /// @ingroup jsonschema /// A default schema walker with support for a wide range of drafs SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto schema_walker(std::string_view keyword, const Vocabularies &vocabularies) +auto schema_walker(const std::string_view keyword, + const Vocabularies &vocabularies) -> const SchemaWalkerResult &; +/// @ingroup jsonschema +/// Stringify a base dialect to its URI +SOURCEMETA_CORE_JSONSCHEMA_EXPORT +auto to_string(const SchemaBaseDialect base_dialect) -> std::string_view; + +/// @ingroup jsonschema +/// Parse a base dialect URI to its enum representation +SOURCEMETA_CORE_JSONSCHEMA_EXPORT +auto to_base_dialect(const std::string_view base_dialect) + -> std::optional; + /// @ingroup jsonschema /// /// Calculate the priority of a keyword that determines the ordering in which a @@ -82,7 +94,7 @@ auto schema_walker(std::string_view keyword, const Vocabularies &vocabularies) /// sourcemeta::core::schema_walker) == 1); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto schema_keyword_priority(std::string_view keyword, +auto schema_keyword_priority(const std::string_view keyword, const Vocabularies &vocabularies, const SchemaWalker &walker) -> std::uint64_t; @@ -120,8 +132,8 @@ auto is_empty_schema(const JSON &schema) -> bool; /// @ingroup jsonschema /// -/// This function returns the URI identifier of the given schema, if any. For -/// example: +/// This function returns the URI identifier of the given schema, or an empty +/// string view if the schema has no identifier. For example: /// /// ```cpp /// #include @@ -134,25 +146,23 @@ auto is_empty_schema(const JSON &schema) -> bool; /// "$id": "https://sourcemeta.com/example-schema" /// })JSON"); /// -/// std::optional id{sourcemeta::core::identify( +/// const auto id{sourcemeta::core::identify( /// document, sourcemeta::core::schema_resolver)}; -/// assert(id.has_value()); -/// assert(id.value() == "https://sourcemeta.com/example-schema"); +/// assert(!id.empty()); +/// assert(id == "https://sourcemeta.com/example-schema"); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto identify(const JSON &schema, const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt) - -> std::optional; + std::string_view default_dialect = "", + std::string_view default_id = "") -> std::string_view; /// @ingroup jsonschema /// /// A shortcut to sourcemeta::core::identify if you know the base dialect /// of the schema. SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto identify(const JSON &schema, const std::string &base_dialect, - const std::optional &default_id = std::nullopt) - -> std::optional; +auto identify(const JSON &schema, const SchemaBaseDialect base_dialect, + std::string_view default_id = "") -> std::string_view; /// @ingroup jsonschema /// @@ -173,14 +183,14 @@ auto identify(const JSON &schema, const std::string &base_dialect, /// })JSON"); /// /// sourcemeta::core::anonymize(document, -/// "https://json-schema.org/draft/2020-12/schema"); +/// sourcemeta::core::SchemaBaseDialect::JSON_Schema_2020_12); /// -/// std::optional id{sourcemeta::core::identify( +/// const auto id{sourcemeta::core::identify( /// document, sourcemeta::core::schema_resolver)}; -/// assert(!id.has_value()); +/// assert(id.empty()); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto anonymize(JSON &schema, const std::string &base_dialect) -> void; +auto anonymize(JSON &schema, const SchemaBaseDialect base_dialect) -> void; /// @ingroup jsonschema /// @@ -202,24 +212,23 @@ auto anonymize(JSON &schema, const std::string &base_dialect) -> void; /// "https://example.com/my-new-id", /// sourcemeta::core::schema_resolver); /// -/// std::optional id{sourcemeta::core::identify( +/// const auto id{sourcemeta::core::identify( /// document, sourcemeta::core::schema_resolver)}; -/// assert(id.has_value()); -/// assert(id.value() == "https://example.com/my-new-id"); +/// assert(!id.empty()); +/// assert(id == "https://example.com/my-new-id"); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto reidentify( - JSON &schema, const std::string &new_identifier, - const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) -> void; +auto reidentify(JSON &schema, std::string_view new_identifier, + const SchemaResolver &resolver, + std::string_view default_dialect = "") -> void; /// @ingroup jsonschema /// /// A shortcut to sourcemeta::core::reidentify if you know the base /// dialect of the schema. SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto reidentify(JSON &schema, const std::string &new_identifier, - const std::string &base_dialect) -> void; +auto reidentify(JSON &schema, std::string_view new_identifier, + const SchemaBaseDialect base_dialect) -> void; /// @ingroup jsonschema /// @@ -237,16 +246,13 @@ auto reidentify(JSON &schema, const std::string &new_identifier, /// "type": "object" /// })JSON"); /// -/// const std::optional -/// dialect{sourcemeta::core::dialect(document)}; -/// assert(dialect.has_value()); -/// assert(dialect.value() == -/// "https://json-schema.org/draft/2020-12/schema"); +/// const auto dialect{sourcemeta::core::dialect(document)}; +/// assert(!dialect.empty()); +/// assert(dialect == "https://json-schema.org/draft/2020-12/schema"); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto dialect(const JSON &schema, - const std::optional &default_dialect = std::nullopt) - -> std::optional; +auto dialect(const JSON &schema, std::string_view default_dialect = "") + -> std::string_view; /// @ingroup jsonschema /// @@ -273,16 +279,14 @@ auto dialect(const JSON &schema, /// /// This function will throw if the metaschema cannot be determined or resolved. SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto metaschema( - const JSON &schema, const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) -> JSON; +auto metaschema(const JSON &schema, const SchemaResolver &resolver, + std::string_view default_dialect = "") -> JSON; /// @ingroup jsonschema /// -/// Get the URI of the base dialect that applies to the given schema. If you set +/// Get the base dialect that applies to the given schema. If you set /// a default dialect URI, this will be used if the given schema does not -/// declare the `$schema` keyword. The result of this function is unset -/// if its base dialect could not be determined. For example: +/// declare the `$schema` keyword. For example: /// /// ```cpp /// #include @@ -295,18 +299,18 @@ auto metaschema( /// "type": "object" /// })JSON"); /// -/// const std::optional base_dialect{ +/// const auto base_dialect{ /// sourcemeta::core::base_dialect( /// document, sourcemeta::core::schema_resolver)}; /// /// assert(base_dialect.has_value()); /// assert(base_dialect.value() == -/// "https://json-schema.org/draft/2020-12/schema"); +/// sourcemeta::core::SchemaBaseDialect::JSON_Schema_2020_12); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto base_dialect(const JSON &schema, const SchemaResolver &resolver, - const std::optional &default_dialect = - std::nullopt) -> std::optional; + std::string_view default_dialect = "") + -> std::optional; /// @ingroup jsonschema /// @@ -342,8 +346,7 @@ auto base_dialect(const JSON &schema, const SchemaResolver &resolver, /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto vocabularies(const JSON &schema, const SchemaResolver &resolver, - const std::optional &default_dialect = - std::nullopt) -> Vocabularies; + std::string_view default_dialect = "") -> Vocabularies; /// @ingroup jsonschema /// @@ -351,8 +354,8 @@ auto vocabularies(const JSON &schema, const SchemaResolver &resolver, /// dialect and dialect URI. SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto vocabularies(const SchemaResolver &resolver, - const std::string &base_dialect, const std::string &dialect) - -> Vocabularies; + const SchemaBaseDialect base_dialect, + std::string_view dialect) -> Vocabularies; /// @ingroup jsonschema /// @@ -378,8 +381,7 @@ auto vocabularies(const SchemaResolver &resolver, SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto format(JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) - -> void; + std::string_view default_dialect = "") -> void; /// @ingroup jsonschema /// @@ -401,7 +403,7 @@ auto format(JSON &schema, const SchemaWalker &walker, /// std::cerr << "\n"; /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto wrap(const JSON::String &identifier) -> JSON; +auto wrap(std::string_view identifier) -> JSON; /// @ingroup jsonschema /// @@ -429,8 +431,7 @@ auto wrap(const JSON::String &identifier) -> JSON; /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto wrap(const JSON &schema, const Pointer &pointer, - const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) + const SchemaResolver &resolver, std::string_view default_dialect = "") -> JSON; /// @ingroup jsonschema diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_bundle.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_bundle.h index fedff5b6..18f4fe64 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_bundle.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_bundle.h @@ -13,20 +13,19 @@ #include // NOLINTEND(misc-include-cleaner) -#include // std::function -#include // std::optional, std::nullopt +#include // std::function +#include // std::string_view namespace sourcemeta::core { /// @ingroup jsonschema /// A callback to get dependency information -/// - Origin URI +/// - Origin URI (empty if none) /// - Pointer (reference keyword from the origin) /// - Target URI /// - Target schema -using DependencyCallback = - std::function &, const Pointer &, - const JSON::String &, const JSON &)>; +using DependencyCallback = std::function; /// @ingroup jsonschema /// @@ -67,12 +66,13 @@ using DependencyCallback = /// }); /// ``` SOURCEMETA_CORE_JSONSCHEMA_EXPORT -auto dependencies( - const JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, const DependencyCallback &callback, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt, - const SchemaFrame::Paths &paths = {empty_pointer}) -> void; +auto dependencies(const JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, + const DependencyCallback &callback, + std::string_view default_dialect = "", + std::string_view default_id = "", + const SchemaFrame::Paths &paths = {empty_weak_pointer}) + -> void; /// @ingroup jsonschema /// @@ -126,10 +126,10 @@ auto dependencies( SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto bundle(JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt, + std::string_view default_dialect = "", + std::string_view default_id = "", const std::optional &default_container = std::nullopt, - const SchemaFrame::Paths &paths = {empty_pointer}) -> void; + const SchemaFrame::Paths &paths = {empty_weak_pointer}) -> void; /// @ingroup jsonschema /// @@ -185,10 +185,10 @@ auto bundle(JSON &schema, const SchemaWalker &walker, SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto bundle(const JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt, + std::string_view default_dialect = "", + std::string_view default_id = "", const std::optional &default_container = std::nullopt, - const SchemaFrame::Paths &paths = {empty_pointer}) -> JSON; + const SchemaFrame::Paths &paths = {empty_weak_pointer}) -> JSON; } // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_error.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_error.h index af29c9f8..56eac655 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_error.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_error.h @@ -8,9 +8,9 @@ #include #include -#include // std::exception -#include // std::string -#include // std::move +#include // std::exception +#include // std::string +#include // std::string_view namespace sourcemeta::core { @@ -43,12 +43,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaError : public std::exception { class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaResolutionError : public std::exception { public: - SchemaResolutionError(std::string identifier, const char *message) - : identifier_{std::move(identifier)}, message_{message} {} - SchemaResolutionError(std::string identifier, std::string message) = delete; - SchemaResolutionError(std::string identifier, std::string &&message) = delete; - SchemaResolutionError(std::string identifier, - std::string_view message) = delete; + SchemaResolutionError(const std::string_view identifier, const char *message) + : identifier_{identifier}, message_{message} {} [[nodiscard]] auto what() const noexcept -> const char * override { return this->message_; @@ -70,8 +66,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaResolutionError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaRelativeMetaschemaResolutionError : public SchemaResolutionError { public: - SchemaRelativeMetaschemaResolutionError(std::string identifier) - : SchemaResolutionError{std::move(identifier), + SchemaRelativeMetaschemaResolutionError(const std::string_view identifier) + : SchemaResolutionError{identifier, "Relative meta-schema URIs are not valid " "according to the JSON Schema specification"} {} }; @@ -81,11 +77,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaRelativeMetaschemaResolutionError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaVocabularyError : public std::exception { public: - SchemaVocabularyError(std::string uri, const char *message) - : uri_{std::move(uri)}, message_{message} {} - SchemaVocabularyError(std::string uri, std::string message) = delete; - SchemaVocabularyError(std::string uri, std::string &&message) = delete; - SchemaVocabularyError(std::string uri, std::string_view message) = delete; + SchemaVocabularyError(const std::string_view uri, const char *message) + : uri_{uri}, message_{message} {} [[nodiscard]] auto what() const noexcept -> const char * override { return this->message_; @@ -105,16 +98,10 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaVocabularyError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaReferenceError : public std::exception { public: - SchemaReferenceError(std::string identifier, Pointer schema_location, - const char *message) - : identifier_{std::move(identifier)}, - schema_location_{std::move(schema_location)}, message_{message} {} - SchemaReferenceError(std::string identifier, Pointer schema_location, - std::string message) = delete; - SchemaReferenceError(std::string identifier, Pointer schema_location, - std::string &&message) = delete; - SchemaReferenceError(std::string identifier, Pointer schema_location, - std::string_view message) = delete; + SchemaReferenceError(const std::string_view identifier, + Pointer schema_location, const char *message) + : identifier_{identifier}, schema_location_{std::move(schema_location)}, + message_{message} {} [[nodiscard]] auto what() const noexcept -> const char * override { return this->message_; @@ -189,18 +176,19 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaUnknownBaseDialectError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRuleProcessedTwiceError : public std::exception { public: - SchemaTransformRuleProcessedTwiceError(std::string name, Pointer location) - : name_{std::move(name)}, location_{std::move(location)} {} + SchemaTransformRuleProcessedTwiceError(const std::string_view name, + Pointer location) + : name_{name}, location_{std::move(location)} {} [[nodiscard]] auto what() const noexcept -> const char * override { return "Transformation rules must only be processed once"; } - [[nodiscard]] auto name() const noexcept -> const auto & { + [[nodiscard]] auto name() const noexcept -> std::string_view { return this->name_; } - [[nodiscard]] auto location() const noexcept -> const auto & { + [[nodiscard]] auto location() const noexcept -> const Pointer & { return this->location_; } @@ -215,8 +203,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRuleProcessedTwiceError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaReferenceObjectResourceError : public std::exception { public: - SchemaReferenceObjectResourceError(std::string identifier) - : identifier_{std::move(identifier)} {} + SchemaReferenceObjectResourceError(const std::string_view identifier) + : identifier_{identifier} {} [[nodiscard]] auto what() const noexcept -> const char * override { return "A schema with a top-level `$ref` in JSON Schema Draft 7 and older " @@ -225,7 +213,7 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaReferenceObjectResourceError "bundling, are not possible without undefined behavior"; } - [[nodiscard]] auto identifier() const noexcept -> const auto & { + [[nodiscard]] auto identifier() const noexcept -> std::string_view { return this->identifier_; } @@ -238,8 +226,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaReferenceObjectResourceError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaBaseDialectError : public std::exception { public: - SchemaBaseDialectError(std::string base_dialect) - : base_dialect_{std::move(base_dialect)} {} + SchemaBaseDialectError(const std::string_view base_dialect) + : base_dialect_{base_dialect} {} [[nodiscard]] auto what() const noexcept -> const char * override { return "Unrecognized base dialect"; @@ -258,11 +246,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaBaseDialectError class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrameError : public std::exception { public: - SchemaFrameError(std::string identifier, const char *message) - : identifier_{std::move(identifier)}, message_{message} {} - SchemaFrameError(std::string identifier, std::string message) = delete; - SchemaFrameError(std::string identifier, std::string &&message) = delete; - SchemaFrameError(std::string identifier, std::string_view message) = delete; + SchemaFrameError(const std::string_view identifier, const char *message) + : identifier_{identifier}, message_{message} {} [[nodiscard]] auto what() const noexcept -> const char * override { return this->message_; diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h index ed2ac645..9f95dbe8 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h @@ -52,53 +52,6 @@ namespace sourcemeta::core { /// frame.analyse(document, /// sourcemeta::core::schema_walker, /// sourcemeta::core::schema_resolver); -/// -/// // IDs -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/foo"})); -/// -/// // Anchors -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#test"})); -/// -/// // Root Pointers -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/$id"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/$schema"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/items"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/items/$id"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/items/type"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/properties"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/properties/foo"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/properties/foo/$anchor"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/properties/foo/type"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/properties/bar"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/schema#/properties/bar/$ref"})); -/// -/// // Subpointers -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/foo#/$id"})); -/// assert(frame.locations().contains({sourcemeta::core::SchemaReferenceType::Static, -/// "https://www.example.com/foo#/type"})); -/// -/// // References -/// assert(frame.references().contains({sourcemeta::core::SchemaReferenceType::Static, -/// { "properties", "bar", "$ref" }})); -/// assert(frame.references().at({sourcemeta::core::SchemaReferenceType::Static, -/// { "properties", "bar", "$ref" }}).destination == -/// "https://www.example.com/schema#/properties/foo"); /// ``` class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { public: @@ -113,12 +66,13 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { /// A single entry in a JSON Schema reference map struct ReferencesEntry { - JSON::String original; + std::string_view original; + // TODO: This one is tricky to turn into a view, as there is no + // location entry to point to if it is an external unresolved reference JSON::String destination; - // TODO: This string can be a `string_view` over the `destination` - std::optional base; - // TODO: This string can be a `string_view` over the `destination` - std::optional fragment; + // Empty means no base + std::string_view base; + std::optional fragment; }; /// A JSON Schema reference map is a mapping of a JSON Pointer @@ -129,7 +83,7 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { /// have a static and a dynamic reference to the same location /// on the same schema object. using References = - std::map, ReferencesEntry>; + std::map, ReferencesEntry>; #if defined(__GNUC__) #pragma GCC diagnostic push @@ -152,17 +106,13 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { /// A location entry struct Location { - // TODO: Turn this into a weak pointer - std::optional parent; + std::optional parent; LocationType type; - std::optional root; - JSON::String base; - // TODO: Turn this into a weak pointer - Pointer pointer; - // TODO: Turn this into a weak pointer - Pointer relative_pointer; - JSON::String dialect; - JSON::String base_dialect; + std::string_view base; + WeakPointer pointer; + std::size_t relative_pointer; + std::string_view dialect; + SchemaBaseDialect base_dialect; }; /// A JSON Schema reference frame is a mapping of URIs to schema identifiers, @@ -176,8 +126,8 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { // point to different places. std::map, Location>; - /// A set of paths to frame within a schema wrapper - using Paths = std::set; + /// A list of paths to frame within a schema wrapper + using Paths = std::vector; /// Export the frame entries as JSON [[nodiscard]] auto to_json( @@ -186,12 +136,11 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { /// Analyse a schema or set of schemas from a given root. Passing /// multiple paths that have any overlap is undefined behaviour - auto - analyse(const JSON &root, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt, - const Paths &paths = {empty_pointer}) -> void; + auto analyse(const JSON &root, const SchemaWalker &walker, + const SchemaResolver &resolver, + std::string_view default_dialect = "", + std::string_view default_id = "", + const Paths &paths = {empty_weak_pointer}) -> void; /// Access the analysed schema locations [[nodiscard]] auto locations() const noexcept -> const Locations &; @@ -199,9 +148,17 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { /// Access the analysed schema references [[nodiscard]] auto references() const noexcept -> const References &; + /// Get a specific reference entry by type and pointer + [[nodiscard]] auto reference(const SchemaReferenceType type, + const WeakPointer &pointer) const + -> std::optional>; + /// Check whether the analysed schema has no external references [[nodiscard]] auto standalone() const -> bool; + /// Get the root schema identifier (empty if none) + [[nodiscard]] auto root() const noexcept -> const JSON::String &; + /// Get the vocabularies associated with a location entry [[nodiscard]] auto vocabularies(const Location &location, const SchemaResolver &resolver) const @@ -210,32 +167,65 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { /// Get the URI associated with a location entry [[nodiscard]] auto uri(const Location &location, - const Pointer &relative_schema_location = empty_pointer) const + const WeakPointer &relative_schema_location = empty_weak_pointer) const -> JSON::String; /// Get the location associated by traversing a pointer from another location [[nodiscard]] auto traverse(const Location &location, - const Pointer &relative_schema_location) const + const WeakPointer &relative_schema_location) const -> const Location &; /// Get the location associated with a given URI - [[nodiscard]] auto traverse(const JSON::String &uri) const + [[nodiscard]] auto traverse(const std::string_view uri) const + -> std::optional>; + + /// Get the location associated with a given pointer + [[nodiscard]] auto traverse(const WeakPointer &pointer) const -> std::optional>; /// Turn an absolute pointer into a location URI - [[nodiscard]] auto uri(const Pointer &pointer) const + [[nodiscard]] auto uri(const WeakPointer &pointer) const -> std::optional>; /// Try to dereference a reference location into its destination location - [[nodiscard]] auto - dereference(const Location &location, - const Pointer &relative_schema_location = empty_pointer) const + [[nodiscard]] auto dereference( + const Location &location, + const WeakPointer &relative_schema_location = empty_weak_pointer) const -> std::pair>>; - /// Find all references to a given location pointer - [[nodiscard]] auto references_to(const Pointer &pointer) const -> std::vector< - std::reference_wrapper>; + /// Iterate over all resource URIs in the frame + auto for_each_resource_uri( + const std::function &callback) const -> void; + + /// Iterate over all unresolved references (where destination cannot be + /// traversed) + auto for_each_unresolved_reference( + const std::function + &callback) const -> void; + + /// Check if there are any references to a given location pointer + [[nodiscard]] auto has_references_to(const WeakPointer &pointer) const + -> bool; + + /// Check if there are any references that go through a given location pointer + [[nodiscard]] auto has_references_through(const WeakPointer &pointer) const + -> bool; + /// Check if there are any references that go through a given location pointer + /// with a tail token + [[nodiscard]] auto + has_references_through(const WeakPointer &pointer, + const WeakPointer::Token &tail) const -> bool; + + /// Get the relative instance location pointer for a given location entry + [[nodiscard]] auto relative_instance_location(const Location &location) const + -> WeakPointer; + + /// Check if the frame has no analysed data + [[nodiscard]] auto empty() const noexcept -> bool; + + /// Reset the frame, clearing all analysed data + auto reset() -> void; private: Mode mode_; @@ -245,6 +235,7 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { #if defined(_MSC_VER) #pragma warning(disable : 4251 4275) #endif + JSON::String root_; Locations locations_; References references_; #if defined(_MSC_VER) diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h index eb609061..a3770872 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h @@ -60,7 +60,8 @@ namespace sourcemeta::core { class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRule { public: /// Create a transformation rule. Each rule must have a unique name. - SchemaTransformRule(std::string &&name, std::string &&message); + SchemaTransformRule(const std::string_view name, + const std::string_view message); // Necessary to wrap rules on smart pointers virtual ~SchemaTransformRule() = default; @@ -75,10 +76,10 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRule { auto operator==(const SchemaTransformRule &other) const -> bool; /// Fetch the name of a rule - [[nodiscard]] auto name() const -> const std::string &; + [[nodiscard]] auto name() const noexcept -> std::string_view; /// Fetch the message of a rule - [[nodiscard]] auto message() const -> const std::string &; + [[nodiscard]] auto message() const noexcept -> std::string_view; /// The result of evaluating a rule struct Result { @@ -108,13 +109,6 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRule { std::optional description; }; - /// Apply the rule to a schema - auto apply(JSON &schema, const JSON &root, const Vocabularies &vocabularies, - const SchemaWalker &walker, const SchemaResolver &resolver, - const SchemaFrame &frame, - const SchemaFrame::Location &location) const - -> std::pair; - /// Check if the rule applies to a schema [[nodiscard]] auto check(const JSON &schema, const JSON &root, const Vocabularies &vocabularies, @@ -125,10 +119,9 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRule { /// A method to optionally fix any reference location that was affected by the /// transformation. [[nodiscard]] virtual auto - rereference(const std::string &reference, const Pointer &origin, + rereference(const std::string_view reference, const Pointer &origin, const Pointer &target, const Pointer ¤t) const -> Pointer; -private: /// The rule condition [[nodiscard]] virtual auto condition(const JSON &schema, const JSON &root, @@ -140,6 +133,7 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformRule { /// then the rule condition is considered to not be fixable. virtual auto transform(JSON &schema, const Result &result) const -> void; +private: // Exporting symbols that depends on the standard C++ library is considered // safe. // https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN @@ -237,7 +231,7 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformer { } /// Remove a rule from the bundle - auto remove(const std::string &name) -> bool; + auto remove(const std::string_view name) -> bool; /// The callback that is called whenever the condition of a rule holds true. /// The arguments are as follows: @@ -251,19 +245,19 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformer { const SchemaTransformRule::Result &)>; /// Apply the bundle of rules to a schema - [[nodiscard]] auto - apply(JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, const Callback &callback, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt) const + [[nodiscard]] auto apply(JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, + const Callback &callback, + std::string_view default_dialect = "", + std::string_view default_id = "") const -> std::pair; /// Report back the rules from the bundle that need to be applied to a schema - [[nodiscard]] auto - check(const JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, const Callback &callback, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt) const + [[nodiscard]] auto check(const JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, + const Callback &callback, + std::string_view default_dialect = "", + std::string_view default_id = "") const -> std::pair; [[nodiscard]] auto begin() const -> auto { return this->rules.cbegin(); } diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h index 4267240e..1413d964 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_types.h @@ -35,6 +35,26 @@ using SchemaResolver = std::function(std::string_view)>; /// The reference type enum class SchemaReferenceType : std::uint8_t { Static, Dynamic }; +/// @ingroup jsonschema +/// All the known JSON Schema base dialects +enum class SchemaBaseDialect : std::uint8_t { + JSON_Schema_2020_12, + JSON_Schema_2020_12_Hyper, + JSON_Schema_2019_09, + JSON_Schema_2019_09_Hyper, + JSON_Schema_Draft_7, + JSON_Schema_Draft_7_Hyper, + JSON_Schema_Draft_6, + JSON_Schema_Draft_6_Hyper, + JSON_Schema_Draft_4, + JSON_Schema_Draft_4_Hyper, + JSON_Schema_Draft_3, + JSON_Schema_Draft_3_Hyper, + JSON_Schema_Draft_2_Hyper, + JSON_Schema_Draft_1_Hyper, + JSON_Schema_Draft_0_Hyper +}; + #if defined(__GNUC__) #pragma GCC diagnostic push // For some strange reason, GCC on Debian 11 believes that a member of @@ -191,28 +211,19 @@ struct SchemaWalkerResult { /// For walking purposes, some functions need to understand which JSON Schema /// keywords declare other JSON Schema definitions. To accomplish this in a /// generic and flexible way that does not assume the use any vocabulary other -/// than `core`, these functions take a walker function as argument, of the type -/// sourcemeta::core::SchemaWalker. -/// -/// For convenience, we provide the following default walkers: -/// -/// - sourcemeta::core::schema_walker -/// - sourcemeta::core::schema_walker_none +/// than `core`, these functions take a walker function as argument. using SchemaWalker = std::function; /// @ingroup jsonschema /// An entry of a schema iterator. struct SchemaIteratorEntry { - // TODO: Turn this into a weak pointer - std::optional parent; - // TODO: Turn this into a weak pointer - Pointer pointer; + std::optional parent; + WeakPointer pointer; // TODO: Use "known" enum classes + strings for dialects - std::optional dialect; + std::string_view dialect; Vocabularies vocabularies; - // TODO: Use "known" enum classes for base dialects - std::optional base_dialect; + std::optional base_dialect; std::reference_wrapper subschema; bool orphan; }; diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_vocabularies.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_vocabularies.h index bcbc4360..9333bd65 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_vocabularies.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_vocabularies.h @@ -13,7 +13,6 @@ #include // std::optional #include // std::ostream #include // std::out_of_range -#include // std::string #include // std::string_view #include // std::unordered_map #include // std::unordered_set diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_walker.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_walker.h index 82790233..7c039ed2 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_walker.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_walker.h @@ -11,7 +11,6 @@ #include // std::uint64_t #include // std::optional -#include // std::string #include // std::string_view #include // std::vector @@ -61,10 +60,9 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaIterator { public: using const_iterator = typename internal::const_iterator; - SchemaIterator( - const JSON &input, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt); + SchemaIterator(const JSON &input, const SchemaWalker &walker, + const SchemaResolver &resolver, + std::string_view default_dialect = ""); [[nodiscard]] auto begin() const -> const_iterator; [[nodiscard]] auto end() const -> const_iterator; [[nodiscard]] auto cbegin() const -> const_iterator; @@ -128,10 +126,9 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaIteratorFlat { public: using const_iterator = typename internal::const_iterator; - SchemaIteratorFlat( - const JSON &input, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt); + SchemaIteratorFlat(const JSON &input, const SchemaWalker &walker, + const SchemaResolver &resolver, + std::string_view default_dialect = ""); [[nodiscard]] auto begin() const -> const_iterator; [[nodiscard]] auto end() const -> const_iterator; [[nodiscard]] auto cbegin() const -> const_iterator; @@ -185,10 +182,9 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaKeywordIterator { public: using const_iterator = typename internal::const_iterator; - SchemaKeywordIterator( - const JSON &input, const SchemaWalker &walker, - const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt); + SchemaKeywordIterator(const JSON &input, const SchemaWalker &walker, + const SchemaResolver &resolver, + std::string_view default_dialect = ""); [[nodiscard]] auto begin() const -> const_iterator; [[nodiscard]] auto end() const -> const_iterator; [[nodiscard]] auto cbegin() const -> const_iterator; diff --git a/vendor/core/src/core/jsonschema/jsonschema.cc b/vendor/core/src/core/jsonschema/jsonschema.cc index b7be25a5..937ee43b 100644 --- a/vendor/core/src/core/jsonschema/jsonschema.cc +++ b/vendor/core/src/core/jsonschema/jsonschema.cc @@ -20,66 +20,144 @@ auto sourcemeta::core::is_empty_schema(const sourcemeta::core::JSON &schema) (schema.is_object() && schema.empty()); } +auto sourcemeta::core::to_string(const SchemaBaseDialect base_dialect) + -> std::string_view { + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_2020_12: + return "https://json-schema.org/draft/2020-12/schema"; + case SchemaBaseDialect::JSON_Schema_2020_12_Hyper: + return "https://json-schema.org/draft/2020-12/hyper-schema"; + case SchemaBaseDialect::JSON_Schema_2019_09: + return "https://json-schema.org/draft/2019-09/schema"; + case SchemaBaseDialect::JSON_Schema_2019_09_Hyper: + return "https://json-schema.org/draft/2019-09/hyper-schema"; + case SchemaBaseDialect::JSON_Schema_Draft_7: + return "http://json-schema.org/draft-07/schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + return "http://json-schema.org/draft-07/hyper-schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_6: + return "http://json-schema.org/draft-06/schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + return "http://json-schema.org/draft-06/hyper-schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_4: + return "http://json-schema.org/draft-04/schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + return "http://json-schema.org/draft-04/hyper-schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_3: + return "http://json-schema.org/draft-03/schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + return "http://json-schema.org/draft-03/hyper-schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_2_Hyper: + return "http://json-schema.org/draft-02/hyper-schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_1_Hyper: + return "http://json-schema.org/draft-01/hyper-schema#"; + case SchemaBaseDialect::JSON_Schema_Draft_0_Hyper: + return "http://json-schema.org/draft-00/hyper-schema#"; + } + + assert(false); + return {}; +} + +auto sourcemeta::core::to_base_dialect(const std::string_view base_dialect) + -> std::optional { + if (base_dialect == "https://json-schema.org/draft/2020-12/schema") { + return SchemaBaseDialect::JSON_Schema_2020_12; + } else if (base_dialect == + "https://json-schema.org/draft/2020-12/hyper-schema") { + return SchemaBaseDialect::JSON_Schema_2020_12_Hyper; + } else if (base_dialect == "https://json-schema.org/draft/2019-09/schema") { + return SchemaBaseDialect::JSON_Schema_2019_09; + } else if (base_dialect == + "https://json-schema.org/draft/2019-09/hyper-schema") { + return SchemaBaseDialect::JSON_Schema_2019_09_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-07/schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_7; + } else if (base_dialect == "http://json-schema.org/draft-07/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_7_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-06/schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_6; + } else if (base_dialect == "http://json-schema.org/draft-06/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_6_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-04/schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_4; + } else if (base_dialect == "http://json-schema.org/draft-04/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_4_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-03/schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_3; + } else if (base_dialect == "http://json-schema.org/draft-03/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_3_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-02/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_2_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-01/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_1_Hyper; + } else if (base_dialect == "http://json-schema.org/draft-00/hyper-schema#") { + return SchemaBaseDialect::JSON_Schema_Draft_0_Hyper; + } + + return std::nullopt; +} + namespace { -static auto id_keyword(const std::string &base_dialect) -> std::string { - if (base_dialect == "https://json-schema.org/draft/2020-12/schema" || - base_dialect == "https://json-schema.org/draft/2020-12/hyper-schema" || - base_dialect == "https://json-schema.org/draft/2019-09/schema" || - base_dialect == "https://json-schema.org/draft/2019-09/hyper-schema" || - base_dialect == "http://json-schema.org/draft-07/schema#" || - base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-06/schema#" || - base_dialect == "http://json-schema.org/draft-06/hyper-schema#") { - return "$id"; - } - - if (base_dialect == "http://json-schema.org/draft-04/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-03/schema#" || - base_dialect == "http://json-schema.org/draft-03/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-02/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-01/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-00/hyper-schema#") { - return "id"; - } - - throw sourcemeta::core::SchemaBaseDialectError(base_dialect); +static auto id_keyword(const sourcemeta::core::SchemaBaseDialect base_dialect) + -> std::string_view { + using sourcemeta::core::SchemaBaseDialect; + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_2020_12: + case SchemaBaseDialect::JSON_Schema_2020_12_Hyper: + case SchemaBaseDialect::JSON_Schema_2019_09: + case SchemaBaseDialect::JSON_Schema_2019_09_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_7: + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_6: + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + return "$id"; + case SchemaBaseDialect::JSON_Schema_Draft_4: + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_3: + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_2_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_1_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_0_Hyper: + return "id"; + } + + assert(false); + return {}; } } // namespace -auto sourcemeta::core::identify( - const sourcemeta::core::JSON &schema, const SchemaResolver &resolver, - const std::optional &default_dialect, - const std::optional &default_id) - -> std::optional { +auto sourcemeta::core::identify(const sourcemeta::core::JSON &schema, + const SchemaResolver &resolver, + std::string_view default_dialect, + std::string_view default_id) + -> std::string_view { try { const auto maybe_base_dialect{ sourcemeta::core::base_dialect(schema, resolver, default_dialect)}; if (maybe_base_dialect.has_value()) { return identify(schema, maybe_base_dialect.value(), default_id); - } else { - return default_id; } + return default_id; } catch (const SchemaResolutionError &) { - if (default_id.has_value()) { + if (!default_id.empty()) { return default_id; - } else { - throw; } + throw; } } auto sourcemeta::core::identify(const JSON &schema, - const std::string &base_dialect, - const std::optional &default_id) - -> std::optional { + const SchemaBaseDialect base_dialect, + std::string_view default_id) + -> std::string_view { if (!schema.is_object()) { return default_id; } - const auto keyword{id_keyword(base_dialect)}; + const std::string keyword{id_keyword(base_dialect)}; if (!schema.defines(keyword)) { return default_id; @@ -97,90 +175,86 @@ auto sourcemeta::core::identify(const JSON &schema, // See // https://json-schema.org/draft-07/draft-handrews-json-schema-01#rfc.section.8.3 if (schema.defines("$ref") && - (base_dialect == "http://json-schema.org/draft-07/schema#" || - base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-06/schema#" || - base_dialect == "http://json-schema.org/draft-06/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-04/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-03/schema#" || - base_dialect == "http://json-schema.org/draft-03/hyper-schema#")) { + (base_dialect == SchemaBaseDialect::JSON_Schema_Draft_7 || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_7_Hyper || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_6 || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_6_Hyper || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_4 || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_4_Hyper || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_3 || + base_dialect == SchemaBaseDialect::JSON_Schema_Draft_3_Hyper)) { return default_id; } return identifier.to_string(); } -auto sourcemeta::core::anonymize(JSON &schema, const std::string &base_dialect) - -> void { +auto sourcemeta::core::anonymize(JSON &schema, + const SchemaBaseDialect base_dialect) -> void { if (schema.is_object()) { - schema.erase(id_keyword(base_dialect)); + schema.erase(std::string{id_keyword(base_dialect)}); } } -auto sourcemeta::core::reidentify( - JSON &schema, const std::string &new_identifier, - const SchemaResolver &resolver, - const std::optional &default_dialect) -> void { - const auto base_dialect{ +auto sourcemeta::core::reidentify(JSON &schema, std::string_view new_identifier, + const SchemaResolver &resolver, + std::string_view default_dialect) -> void { + const auto resolved_base_dialect{ sourcemeta::core::base_dialect(schema, resolver, default_dialect)}; - if (!base_dialect.has_value()) { + if (!resolved_base_dialect.has_value()) { throw sourcemeta::core::SchemaUnknownBaseDialectError(); } - reidentify(schema, new_identifier, base_dialect.value()); + reidentify(schema, new_identifier, resolved_base_dialect.value()); } -auto sourcemeta::core::reidentify(JSON &schema, - const std::string &new_identifier, - const std::string &base_dialect) -> void { +auto sourcemeta::core::reidentify(JSON &schema, std::string_view new_identifier, + const SchemaBaseDialect base_dialect) + -> void { assert(is_schema(schema)); assert(schema.is_object()); - schema.assign(id_keyword(base_dialect), JSON{new_identifier}); + schema.assign(std::string{id_keyword(base_dialect)}, JSON{new_identifier}); // If we reidentify, and the identifier is still not retrievable, then // we are facing the Draft 7 `$ref` sibling edge case, and we cannot // really continue - if (schema.defines("$ref") && !identify(schema, base_dialect).has_value()) { + if (schema.defines("$ref") && identify(schema, base_dialect).empty()) { throw SchemaReferenceObjectResourceError(new_identifier); } } -auto sourcemeta::core::dialect( - const sourcemeta::core::JSON &schema, - const std::optional &default_dialect) - -> std::optional { +auto sourcemeta::core::dialect(const sourcemeta::core::JSON &schema, + std::string_view default_dialect) + -> std::string_view { assert(sourcemeta::core::is_schema(schema)); if (schema.is_boolean() || !schema.defines("$schema")) { return default_dialect; } - const sourcemeta::core::JSON &dialect{schema.at("$schema")}; - assert(dialect.is_string() && !dialect.empty()); - return dialect.to_string(); + return schema.at("$schema").to_string(); } auto sourcemeta::core::metaschema( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) -> JSON { - const auto maybe_dialect{sourcemeta::core::dialect(schema, default_dialect)}; - if (!maybe_dialect.has_value()) { + std::string_view default_dialect) -> JSON { + const auto effective_dialect{ + sourcemeta::core::dialect(schema, default_dialect)}; + if (effective_dialect.empty()) { throw sourcemeta::core::SchemaUnknownDialectError(); } - const auto maybe_metaschema{resolver(maybe_dialect.value())}; + const auto maybe_metaschema{resolver(effective_dialect)}; if (!maybe_metaschema.has_value()) { // Relative meta-schema references are invalid according to the // JSON Schema specifications. They must be absolute ones - const URI effective_dialect_uri{maybe_dialect.value()}; + const URI effective_dialect_uri{effective_dialect}; if (effective_dialect_uri.is_relative()) { throw sourcemeta::core::SchemaRelativeMetaschemaResolutionError( - maybe_dialect.value()); + effective_dialect); } else { throw sourcemeta::core::SchemaResolutionError( - maybe_dialect.value(), - "Could not resolve the metaschema of the schema"); + effective_dialect, "Could not resolve the metaschema of the schema"); } } @@ -190,53 +264,21 @@ auto sourcemeta::core::metaschema( auto sourcemeta::core::base_dialect( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) - -> std::optional { + std::string_view default_dialect) -> std::optional { assert(sourcemeta::core::is_schema(schema)); - const std::optional dialect{ + const std::string_view effective_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; // There is no metaschema information whatsoever // Nothing we can do at this point - if (!dialect.has_value()) { + if (effective_dialect.empty()) { return std::nullopt; } - const std::string &effective_dialect{dialect.value()}; - - // As a performance optimization shortcut - if (effective_dialect == "https://json-schema.org/draft/2020-12/schema" || - effective_dialect == "https://json-schema.org/draft/2019-09/schema" || - effective_dialect == "http://json-schema.org/draft-07/schema#" || - effective_dialect == "http://json-schema.org/draft-06/schema#") { - return effective_dialect; - } - - // For compatibility with older JSON Schema drafts that didn't support $id nor - // $vocabulary - if ( - // In Draft 0, 1, and 2, the official metaschema is defined on top of - // the official hyper-schema metaschema. See - // http://json-schema.org/draft-00/schema# - effective_dialect == "http://json-schema.org/draft-00/hyper-schema#" || - effective_dialect == "http://json-schema.org/draft-01/hyper-schema#" || - effective_dialect == "http://json-schema.org/draft-02/hyper-schema#" || - - // Draft 3 and 4 have both schema and hyper-schema dialects - effective_dialect == "http://json-schema.org/draft-03/hyper-schema#" || - effective_dialect == "http://json-schema.org/draft-03/schema#" || - effective_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - effective_dialect == "http://json-schema.org/draft-04/schema#") { - return effective_dialect; - } - - // If we reach the bottom of the metaschema hierarchy, where the schema - // defines itself, then we got to the base dialect - if (schema.is_object() && schema.defines("$id")) { - assert(schema.at("$id").is_string()); - if (schema.at("$id").to_string() == effective_dialect) { - return schema.at("$id").to_string(); - } + // Check for known base dialects + const auto result{to_base_dialect(effective_dialect)}; + if (result.has_value()) { + return result; } // Otherwise, traverse the metaschema hierarchy up @@ -248,32 +290,45 @@ auto sourcemeta::core::base_dialect( const URI effective_dialect_uri{effective_dialect}; if (effective_dialect_uri.is_relative()) { throw sourcemeta::core::SchemaRelativeMetaschemaResolutionError( - effective_dialect); + std::string{effective_dialect}); } else { throw sourcemeta::core::SchemaResolutionError( - effective_dialect, "Could not resolve the metaschema of the schema"); + std::string{effective_dialect}, + "Could not resolve the metaschema of the schema"); } } + // If the metaschema declares the same dialect (self-descriptive), and it's + // not an official dialect, we cannot determine the base dialect + const std::string_view metaschema_dialect{ + dialect(metaschema.value(), effective_dialect)}; + if (metaschema_dialect == effective_dialect) { + throw sourcemeta::core::SchemaUnknownBaseDialectError(); + } + return base_dialect(metaschema.value(), resolver, effective_dialect); } namespace { -auto core_vocabulary_known(std::string_view base_dialect) +auto core_vocabulary_known( + const sourcemeta::core::SchemaBaseDialect base_dialect) -> sourcemeta::core::Vocabularies::Known { - if (base_dialect == "https://json-schema.org/draft/2020-12/schema" || - base_dialect == "https://json-schema.org/draft/2020-12/hyper-schema") { - return sourcemeta::core::Vocabularies::Known::JSON_Schema_2020_12_Core; - } else if (base_dialect == "https://json-schema.org/draft/2019-09/schema" || - base_dialect == - "https://json-schema.org/draft/2019-09/hyper-schema") { - return sourcemeta::core::Vocabularies::Known::JSON_Schema_2019_09_Core; - } else { - throw sourcemeta::core::SchemaBaseDialectError(std::string{base_dialect}); + using sourcemeta::core::SchemaBaseDialect; + using sourcemeta::core::Vocabularies; + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_2020_12: + case SchemaBaseDialect::JSON_Schema_2020_12_Hyper: + return Vocabularies::Known::JSON_Schema_2020_12_Core; + case SchemaBaseDialect::JSON_Schema_2019_09: + case SchemaBaseDialect::JSON_Schema_2019_09_Hyper: + return Vocabularies::Known::JSON_Schema_2019_09_Core; + default: + assert(false); + return Vocabularies::Known::JSON_Schema_2020_12_Core; } } -auto dialect_to_known(std::string_view dialect) +auto dialect_to_known(const std::string_view dialect) -> std::optional { using sourcemeta::core::Vocabularies; if (dialect == "http://json-schema.org/draft-07/schema#") { @@ -320,39 +375,94 @@ auto dialect_to_known(std::string_view dialect) } return std::nullopt; } + +auto base_dialect_to_known(const sourcemeta::core::SchemaBaseDialect dialect) + -> sourcemeta::core::Vocabularies::Known { + using sourcemeta::core::SchemaBaseDialect; + using sourcemeta::core::Vocabularies; + switch (dialect) { + case SchemaBaseDialect::JSON_Schema_Draft_7: + return Vocabularies::Known::JSON_Schema_Draft_7; + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_7_Hyper; + case SchemaBaseDialect::JSON_Schema_Draft_6: + return Vocabularies::Known::JSON_Schema_Draft_6; + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_6_Hyper; + case SchemaBaseDialect::JSON_Schema_Draft_4: + return Vocabularies::Known::JSON_Schema_Draft_4; + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_4_Hyper; + case SchemaBaseDialect::JSON_Schema_Draft_3: + return Vocabularies::Known::JSON_Schema_Draft_3; + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_3_Hyper; + case SchemaBaseDialect::JSON_Schema_Draft_2_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_2_Hyper; + case SchemaBaseDialect::JSON_Schema_Draft_1_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_1_Hyper; + case SchemaBaseDialect::JSON_Schema_Draft_0_Hyper: + return Vocabularies::Known::JSON_Schema_Draft_0_Hyper; + default: + assert(false); + return Vocabularies::Known::JSON_Schema_Draft_7; + } +} + +auto is_pre_vocabulary_base_dialect( + const sourcemeta::core::SchemaBaseDialect base_dialect) -> bool { + using sourcemeta::core::SchemaBaseDialect; + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_Draft_7: + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_6: + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_4: + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_3: + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_2_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_1_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_0_Hyper: + return true; + default: + return false; + } +} } // namespace auto sourcemeta::core::vocabularies( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) - -> sourcemeta::core::Vocabularies { - const std::optional maybe_base_dialect{ + std::string_view default_dialect) -> sourcemeta::core::Vocabularies { + const auto resolved_base_dialect{ sourcemeta::core::base_dialect(schema, resolver, default_dialect)}; - if (!maybe_base_dialect.has_value()) { + if (!resolved_base_dialect.has_value()) { throw sourcemeta::core::SchemaUnknownBaseDialectError(); } - const std::optional maybe_dialect{ + const std::string_view resolved_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; - if (!maybe_dialect.has_value()) { + if (resolved_dialect.empty()) { // If the schema has no declared metaschema and the user didn't // provide a explicit default, then we cannot do anything. // Better to abort instead of trying to guess. throw sourcemeta::core::SchemaUnknownDialectError(); } - return vocabularies(resolver, maybe_base_dialect.value(), - maybe_dialect.value()); + return vocabularies(resolver, resolved_base_dialect.value(), + resolved_dialect); } auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, - const std::string &base_dialect, - const std::string &dialect) + const SchemaBaseDialect base_dialect, + std::string_view dialect) -> sourcemeta::core::Vocabularies { + const auto base_dialect_string{to_string(base_dialect)}; + // As a performance optimization shortcut - if (base_dialect == dialect) { - if (dialect == "https://json-schema.org/draft/2020-12/schema") { + if (base_dialect_string == dialect) { + if (base_dialect == SchemaBaseDialect::JSON_Schema_2020_12) { return Vocabularies{ {Vocabularies::Known::JSON_Schema_2020_12_Core, true}, {Vocabularies::Known::JSON_Schema_2020_12_Applicator, true}, @@ -361,7 +471,7 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, {Vocabularies::Known::JSON_Schema_2020_12_Meta_Data, true}, {Vocabularies::Known::JSON_Schema_2020_12_Format_Annotation, true}, {Vocabularies::Known::JSON_Schema_2020_12_Content, true}}; - } else if (dialect == "https://json-schema.org/draft/2019-09/schema") { + } else if (base_dialect == SchemaBaseDialect::JSON_Schema_2019_09) { return Vocabularies{ {Vocabularies::Known::JSON_Schema_2019_09_Core, true}, {Vocabularies::Known::JSON_Schema_2019_09_Applicator, true}, @@ -390,7 +500,7 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, if (known.has_value()) { return Vocabularies{{known.value(), true}}; } - return Vocabularies{{dialect, true}}; + return Vocabularies{{std::string{dialect}, true}}; } /* @@ -398,23 +508,8 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, * base dialect itself is conceptually the only vocabulary */ - // This is an exhaustive list of all base dialects in the pre-vocabulary world - if (base_dialect == "http://json-schema.org/draft-07/schema#" || - base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-06/schema#" || - base_dialect == "http://json-schema.org/draft-06/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-04/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-03/schema#" || - base_dialect == "http://json-schema.org/draft-03/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-02/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-01/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-00/hyper-schema#") { - const auto known = dialect_to_known(base_dialect); - if (known.has_value()) { - return Vocabularies{{known.value(), true}}; - } - return Vocabularies{{base_dialect, true}}; + if (is_pre_vocabulary_base_dialect(base_dialect)) { + return Vocabularies{{base_dialect_to_known(base_dialect), true}}; } /* @@ -425,7 +520,7 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, resolver(dialect)}; if (!maybe_schema_dialect.has_value()) { throw sourcemeta::core::SchemaResolutionError( - dialect, "Could not resolve the metaschema of the schema"); + std::string{dialect}, "Could not resolve the metaschema of the schema"); } const sourcemeta::core::JSON &schema_dialect{maybe_schema_dialect.value()}; // At this point we are sure that the dialect is vocabulary aware and the @@ -493,7 +588,7 @@ auto sourcemeta::core::schema_keyword_priority( return std::max(priority_from_dependencies, priority_from_order_dependencies); } -auto sourcemeta::core::wrap(const sourcemeta::core::JSON::String &identifier) +auto sourcemeta::core::wrap(const std::string_view identifier) -> sourcemeta::core::JSON { auto result{JSON::make_object()}; // JSON Schema 2020-12 is the first dialect that truly supports cross-dialect @@ -507,7 +602,7 @@ auto sourcemeta::core::wrap(const sourcemeta::core::JSON::String &identifier) auto sourcemeta::core::wrap(const sourcemeta::core::JSON &schema, const sourcemeta::core::Pointer &pointer, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) + std::string_view default_dialect) -> sourcemeta::core::JSON { assert(try_get(schema, pointer)); if (pointer.empty()) { @@ -516,8 +611,8 @@ auto sourcemeta::core::wrap(const sourcemeta::core::JSON &schema, auto copy = schema; const auto effective_dialect{dialect(copy, default_dialect)}; - if (effective_dialect.has_value()) { - copy.assign("$schema", JSON{effective_dialect.value()}); + if (!effective_dialect.empty()) { + copy.assign("$schema", JSON{effective_dialect}); } else { throw SchemaUnknownBaseDialectError(); } @@ -535,9 +630,11 @@ auto sourcemeta::core::wrap(const sourcemeta::core::JSON &schema, // However, note that we use a relative URI so that references to // other schemas whose top-level identifiers are relative URIs don't // get affected. Otherwise, we would cause unintended base resolution. - constexpr auto WRAPPER_IDENTIFIER{"__sourcemeta-core-wrap__"}; - const auto id{ - identify(copy, resolver, default_dialect).value_or(WRAPPER_IDENTIFIER)}; + constexpr std::string_view WRAPPER_IDENTIFIER{"__sourcemeta-core-wrap__"}; + const auto maybe_id{identify(copy, resolver, default_dialect)}; + const auto id{maybe_id.empty() ? WRAPPER_IDENTIFIER : maybe_id}; + + URI uri{id}; try { reidentify(copy, id, resolver, default_dialect); @@ -555,7 +652,6 @@ auto sourcemeta::core::wrap(const sourcemeta::core::JSON &schema, result.at("$defs").assign_assume_new("schema", std::move(copy)); // Add a reference to the schema - URI uri{id}; if (!uri.fragment().has_value() || uri.fragment().value().empty()) { uri.fragment(to_string(pointer)); result.assign_assume_new("$ref", JSON{uri.recompose()}); diff --git a/vendor/core/src/core/jsonschema/known_resolver.in.cc b/vendor/core/src/core/jsonschema/known_resolver.in.cc index 0c6bdcfd..3cf290cc 100644 --- a/vendor/core/src/core/jsonschema/known_resolver.in.cc +++ b/vendor/core/src/core/jsonschema/known_resolver.in.cc @@ -1,6 +1,6 @@ #include -auto sourcemeta::core::schema_resolver(std::string_view identifier) +auto sourcemeta::core::schema_resolver(const std::string_view identifier) -> std::optional { // JSON Schema 2020-12 if (identifier == "https://json-schema.org/draft/2020-12/schema" || diff --git a/vendor/core/src/core/jsonschema/known_walker.cc b/vendor/core/src/core/jsonschema/known_walker.cc index 88e19770..a0157806 100644 --- a/vendor/core/src/core/jsonschema/known_walker.cc +++ b/vendor/core/src/core/jsonschema/known_walker.cc @@ -2194,7 +2194,8 @@ auto handle_example(const Vocabularies &vocabularies) } // anonymous namespace -auto schema_walker(std::string_view keyword, const Vocabularies &vocabularies) +auto schema_walker(const std::string_view keyword, + const Vocabularies &vocabularies) -> const SchemaWalkerResult & { // TODO: Make use of JSON key's perfect hashes, as we mostly run the walker by // checking JSON property names diff --git a/vendor/core/src/core/jsonschema/transformer.cc b/vendor/core/src/core/jsonschema/transformer.cc index 4e297d5c..d044eb56 100644 --- a/vendor/core/src/core/jsonschema/transformer.cc +++ b/vendor/core/src/core/jsonschema/transformer.cc @@ -3,14 +3,26 @@ #include // std::erase_if #include // assert -#include // std::set +#include // std::hash #include // std::ostringstream #include // std::tuple #include // std::unordered_set #include // std::move, std::pair +#include // std::vector namespace { +struct ProcessedRuleHasher { + auto + operator()(const std::tuple &value) const noexcept + -> std::size_t { + return std::hash{}(std::get<0>(value)) ^ + (std::hash{}(std::get<1>(value)) << 1) ^ + (std::hash{}(std::get<2>(value)) << 2); + } +}; + auto calculate_health_percentage(const std::size_t subschemas, const std::size_t failed_subschemas) -> std::uint8_t { @@ -28,20 +40,20 @@ auto calculate_health_percentage(const std::size_t subschemas, namespace sourcemeta::core { -SchemaTransformRule::SchemaTransformRule(std::string &&name, - std::string &&message) - : name_{std::move(name)}, message_{std::move(message)} {} +SchemaTransformRule::SchemaTransformRule(const std::string_view name, + const std::string_view message) + : name_{name}, message_{message} {} auto SchemaTransformRule::operator==(const SchemaTransformRule &other) const -> bool { return this->name() == other.name(); } -auto SchemaTransformRule::name() const -> const std::string & { +auto SchemaTransformRule::name() const noexcept -> std::string_view { return this->name_; } -auto SchemaTransformRule::message() const -> const std::string & { +auto SchemaTransformRule::message() const noexcept -> std::string_view { return this->message_; } @@ -49,47 +61,13 @@ auto SchemaTransformRule::transform(JSON &, const Result &) const -> void { throw SchemaAbortError("This rule cannot be automatically transformed"); } -auto SchemaTransformRule::rereference(const std::string &reference, +auto SchemaTransformRule::rereference(const std::string_view reference, const Pointer &origin, const Pointer &, const Pointer &) const -> Pointer { throw SchemaBrokenReferenceError(reference, origin, "The reference broke after transformation"); } -auto SchemaTransformRule::apply(JSON &schema, const JSON &root, - const Vocabularies &vocabularies, - const SchemaWalker &walker, - const SchemaResolver &resolver, - const SchemaFrame &frame, - const SchemaFrame::Location &location) const - -> std::pair { - auto outcome{this->condition(schema, root, vocabularies, frame, location, - walker, resolver)}; - if (!outcome.applies) { - return {true, std::move(outcome)}; - } - - try { - this->transform(schema, outcome); - } catch (const SchemaAbortError &) { - return {false, std::move(outcome)}; - } - - // The condition must always be false after applying the - // transformation in order to avoid infinite loops - if (this->condition(schema, root, vocabularies, frame, location, walker, - resolver) - .applies) { - // TODO: Throw a better custom error that also highlights the schema - // location - std::ostringstream error; - error << "Rule condition holds after application: " << this->name(); - throw std::runtime_error(error.str()); - } - - return {true, std::move(outcome)}; -} - auto SchemaTransformRule::check(const JSON &schema, const JSON &root, const Vocabularies &vocabularies, const SchemaWalker &walker, @@ -101,18 +79,17 @@ auto SchemaTransformRule::check(const JSON &schema, const JSON &root, resolver); } -auto SchemaTransformer::check( - const JSON &schema, const SchemaWalker &walker, - const SchemaResolver &resolver, const SchemaTransformer::Callback &callback, - const std::optional &default_dialect, - const std::optional &default_id) const +auto SchemaTransformer::check(const JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, + const SchemaTransformer::Callback &callback, + std::string_view default_dialect, + std::string_view default_id) const -> std::pair { SchemaFrame frame{SchemaFrame::Mode::References}; // If we use the default id when there is already one, framing will duplicate // the locations leading to duplicate check reports - if (sourcemeta::core::identify(schema, resolver, default_dialect) - .has_value()) { + if (!sourcemeta::core::identify(schema, resolver, default_dialect).empty()) { frame.analyse(schema, walker, resolver, default_dialect); } else { frame.analyse(schema, walker, resolver, default_dialect, default_id); @@ -130,13 +107,16 @@ auto SchemaTransformer::check( // Framing may report resource twice or more given default identifiers and // nested resources, risking reporting the same errors twice - if (!visited.insert(entry.second.pointer).second) { + const auto [visited_iterator, inserted] = + visited.insert(to_pointer(entry.second.pointer)); + if (!inserted) { continue; } + const auto &entry_pointer{*visited_iterator}; subschema_count += 1; - const auto ¤t{get(schema, entry.second.pointer)}; + const auto ¤t{get(schema, entry_pointer)}; const auto current_vocabularies{frame.vocabularies(entry.second, resolver)}; bool subresult{true}; for (const auto &rule : this->rules) { @@ -144,7 +124,7 @@ auto SchemaTransformer::check( walker, resolver, frame, entry.second)}; if (outcome.applies) { subresult = false; - callback(entry.second.pointer, rule->name(), rule->message(), outcome); + callback(entry_pointer, rule->name(), rule->message(), outcome); } } @@ -158,28 +138,43 @@ auto SchemaTransformer::check( calculate_health_percentage(subschema_count, subschema_failures)}; } -auto SchemaTransformer::apply( - JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, - const SchemaTransformer::Callback &callback, - const std::optional &default_dialect, - const std::optional &default_id) const +auto SchemaTransformer::apply(JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, + const SchemaTransformer::Callback &callback, + std::string_view default_dialect, + std::string_view default_id) const -> std::pair { - // There is no point in applying an empty bundle assert(!this->rules.empty()); - std::set> + std::unordered_set, + ProcessedRuleHasher> processed_rules; bool result{true}; std::size_t subschema_count{0}; std::size_t subschema_failures{0}; + + SchemaFrame frame{SchemaFrame::Mode::References}; + + struct PotentiallyBrokenReference { + Pointer origin; + JSON::String original; + JSON::String destination; + Pointer target_pointer; + std::size_t target_relative_pointer; + }; + + std::vector potentially_broken_references; + while (true) { - SchemaFrame frame{SchemaFrame::Mode::References}; - frame.analyse(schema, walker, resolver, default_dialect, default_id); - std::unordered_set visited; + if (frame.empty()) { + frame.analyse(schema, walker, resolver, default_dialect, default_id); + } + std::unordered_set visited; bool applied{false}; subschema_count = 0; subschema_failures = 0; + for (const auto &entry : frame.locations()) { if (entry.second.type != SchemaFrame::LocationType::Resource && entry.second.type != SchemaFrame::LocationType::Subschema) { @@ -188,82 +183,129 @@ auto SchemaTransformer::apply( // Framing may report resource twice or more given default identifiers and // nested resources, risking reporting the same errors twice - if (!visited.insert(entry.second.pointer).second) { + const auto [visited_iterator, inserted] = + visited.insert(to_pointer(entry.second.pointer)); + if (!inserted) { continue; } + const auto &entry_pointer{*visited_iterator}; subschema_count += 1; - auto ¤t{get(schema, entry.second.pointer)}; + auto ¤t{get(schema, entry_pointer)}; const auto current_vocabularies{ frame.vocabularies(entry.second, resolver)}; bool subschema_failed{false}; for (const auto &rule : this->rules) { - const auto subresult{rule->apply(current, schema, current_vocabularies, - walker, resolver, frame, - entry.second)}; - // This means the rule is fixable - if (subresult.first) { - applied = subresult.second.applies || applied; - } else { - result = false; - subschema_failed = true; - callback(entry.second.pointer, rule->name(), rule->message(), - subresult.second); - } + auto outcome{rule->condition(current, schema, current_vocabularies, + frame, entry.second, walker, resolver)}; - if (!applied) { + if (!outcome.applies) { continue; } - std::tuple mark{ - ¤t, &rule->name(), - // Allow applying the same rule to the same location if the schema - // has changed, which means we are still "making progress". The - // hashing is not perfect, but its enough - current.fast_hash()}; - if (processed_rules.contains(mark)) { - throw SchemaTransformRuleProcessedTwiceError(rule->name(), - entry.second.pointer); - } - - // Identify and try to address broken references, if any + // Collect reference information BEFORE invalidating the frame. + // We need to save this data because after the transform, the old + // frame's views may point to invalid memory, and a new frame won't + // have location entries for paths that no longer exist. + potentially_broken_references.clear(); for (const auto &reference : frame.references()) { const auto destination{frame.traverse(reference.second.destination)}; if (!destination.has_value() || - // We only care about references with JSON Pointer fragments, - // as these are the only cases, by definition, where the target - // is location-dependent. !reference.second.fragment.has_value() || !reference.second.fragment.value().starts_with('/')) { continue; } const auto &target{destination.value().get()}; + potentially_broken_references.push_back( + {to_pointer(reference.first.second), + JSON::String{reference.second.original}, + reference.second.destination, to_pointer(target.pointer), + target.relative_pointer}); + } + + try { + rule->transform(current, outcome); + } catch (const SchemaAbortError &) { + result = false; + subschema_failed = true; + callback(entry_pointer, rule->name(), rule->message(), outcome); + continue; + } + + applied = true; + + frame.analyse(schema, walker, resolver, default_dialect, default_id); + + const auto new_location{frame.traverse(to_weak_pointer(entry_pointer))}; + // The location should still exist after transform + assert(new_location.has_value()); + + // Get vocabularies from the new frame + const auto new_vocabularies{ + frame.vocabularies(new_location.value().get(), resolver)}; + + // The condition must always be false after applying the + // transformation in order to avoid infinite loops + if (rule->condition(current, schema, new_vocabularies, frame, + new_location.value().get(), walker, resolver) + .applies) { + std::ostringstream error; + error << "Rule condition holds after application: " << rule->name(); + throw std::runtime_error(error.str()); + } + + // Identify and fix broken references using the saved data + bool references_fixed{false}; + for (const auto &saved_reference : potentially_broken_references) { // The destination still exists, so we don't have to do anything - if (try_get(schema, target.pointer)) { + if (try_get(schema, saved_reference.target_pointer)) { continue; } // If the source no longer exists, we don't need to fix the reference - if (!try_get(schema, reference.first.second.initial())) { + if (!try_get(schema, saved_reference.origin.initial())) { continue; } const auto new_fragment{rule->rereference( - reference.second.destination, reference.first.second, - target.relative_pointer, entry.second.relative_pointer)}; + saved_reference.destination, saved_reference.origin, + saved_reference.target_pointer.slice( + saved_reference.target_relative_pointer), + entry_pointer.slice( + new_location.value().get().relative_pointer))}; // Note we use the base from the original reference before any // canonicalisation takes place so that we don't overly change // user's references when only fixing up their pointer fragments - URI original{reference.second.original}; + URI original{saved_reference.original}; original.fragment(to_string(new_fragment)); - set(schema, reference.first.second, JSON{original.recompose()}); + set(schema, saved_reference.origin, JSON{original.recompose()}); + references_fixed = true; + } + + std::tuple mark{ + ¤t, rule->name(), + // Allow applying the same rule to the same location if the schema + // has changed, which means we are still "making progress". The + // hashing is not perfect, but its enough + current.fast_hash()}; + if (processed_rules.contains(mark)) { + throw SchemaTransformRuleProcessedTwiceError(rule->name(), + entry_pointer); } processed_rules.emplace(std::move(mark)); + + // If we fixed references, the schema changed again, so we need to + // invalidate the frame. Otherwise, we can reuse it for the next + // iteration. + if (references_fixed) { + frame.reset(); + } + goto core_transformer_start_again; } @@ -282,7 +324,7 @@ auto SchemaTransformer::apply( calculate_health_percentage(subschema_count, subschema_failures)}; } -auto SchemaTransformer::remove(const std::string &name) -> bool { +auto SchemaTransformer::remove(const std::string_view name) -> bool { return std::erase_if(this->rules, [&name](const auto &rule) { return rule->name() == name; }) > 0; diff --git a/vendor/core/src/core/jsonschema/vocabularies.cc b/vendor/core/src/core/jsonschema/vocabularies.cc index 4bc2e2f2..1759ecb7 100644 --- a/vendor/core/src/core/jsonschema/vocabularies.cc +++ b/vendor/core/src/core/jsonschema/vocabularies.cc @@ -71,7 +71,7 @@ X(OpenAPI_3_2_Base, "https://spec.openapis.org/oas/3.2/vocab/base") namespace { -auto uri_to_known_vocabulary(std::string_view uri) +auto uri_to_known_vocabulary(const std::string_view uri) -> std::optional { using sourcemeta::core::Vocabularies; diff --git a/vendor/core/src/core/jsonschema/walker.cc b/vendor/core/src/core/jsonschema/walker.cc index 78706955..1ae0b372 100644 --- a/vendor/core/src/core/jsonschema/walker.cc +++ b/vendor/core/src/core/jsonschema/walker.cc @@ -6,28 +6,36 @@ namespace { enum class SchemaWalkerType_t : std::uint8_t { Deep, Flat }; -auto ref_overrides_adjacent_keywords(const std::string &base_dialect) -> bool { +auto ref_overrides_adjacent_keywords( + const sourcemeta::core::SchemaBaseDialect base_dialect) -> bool { + using sourcemeta::core::SchemaBaseDialect; // In older drafts, the presence of `$ref` would override any sibling // keywords // See // https://json-schema.org/draft-07/draft-handrews-json-schema-01#rfc.section.8.3 - return base_dialect == "http://json-schema.org/draft-07/schema#" || - base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-06/schema#" || - base_dialect == "http://json-schema.org/draft-06/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-04/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-03/schema#" || - base_dialect == "http://json-schema.org/draft-03/hyper-schema#"; + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_Draft_7: + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_6: + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_4: + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_3: + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + return true; + default: + return false; + } } -auto walk(const std::optional &parent, - const sourcemeta::core::Pointer &pointer, +auto walk(const std::optional &parent, + const sourcemeta::core::WeakPointer &pointer, std::vector &subschemas, const sourcemeta::core::JSON &subschema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::string &dialect, const std::string &base_dialect, + const std::string_view dialect, + const sourcemeta::core::SchemaBaseDialect base_dialect, const SchemaWalkerType_t type, const std::size_t level, const bool orphan) -> void { if (!is_schema(subschema)) { @@ -48,28 +56,30 @@ auto walk(const std::optional &parent, // enough information to detect those cases and throw an error if they desire // to be more strict. auto maybe_current_dialect{sourcemeta::core::dialect(subschema, dialect)}; - assert(maybe_current_dialect.has_value()); + assert(!maybe_current_dialect.empty()); // TODO: Note that we determine the identifier here, but the framing does it // all over again. Maybe we should be storing this instead? auto id{ sourcemeta::core::identify(subschema, resolver, maybe_current_dialect)}; - const auto different_parent_dialect{maybe_current_dialect.value() != dialect}; - if (!id.has_value() && different_parent_dialect) { + const auto different_parent_dialect{maybe_current_dialect != dialect}; + if (id.empty() && different_parent_dialect) { id = sourcemeta::core::identify(subschema, base_dialect); - if (id.has_value()) { + if (!id.empty()) { maybe_current_dialect = dialect; } } - const auto is_schema_resource{level == 0 || id.has_value()}; - const auto ¤t_dialect{is_schema_resource ? maybe_current_dialect.value() - : dialect}; - auto current_base_dialect{ + const auto is_schema_resource{level == 0 || !id.empty()}; + const std::string_view current_dialect{ + is_schema_resource ? maybe_current_dialect : dialect}; + const auto maybe_resolved_base_dialect{ is_schema_resource && current_dialect != dialect ? sourcemeta::core::base_dialect(subschema, resolver, current_dialect) - .value_or(base_dialect) - : base_dialect}; + : std::nullopt}; + const auto current_base_dialect{maybe_resolved_base_dialect.has_value() + ? maybe_resolved_base_dialect.value() + : base_dialect}; const auto vocabularies{sourcemeta::core::vocabularies( resolver, current_base_dialect, current_dialect)}; @@ -110,60 +120,60 @@ auto walk(const std::optional &parent, switch (keyword_info.type) { case sourcemeta::core::SchemaKeywordType:: ApplicatorValueTraverseSomeProperty: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType:: ApplicatorValueTraverseAnyPropertyKey: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType:: ApplicatorValueTraverseAnyItem: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType:: ApplicatorValueTraverseSomeItem: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType::ApplicatorValueTraverseParent: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType::ApplicatorValueInPlaceOther: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType::ApplicatorValueInPlaceNegate: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } break; case sourcemeta::core::SchemaKeywordType::ApplicatorValueInPlaceMaybe: { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } break; @@ -171,8 +181,8 @@ auto walk(const std::optional &parent, case sourcemeta::core::SchemaKeywordType::ApplicatorElementsTraverseItem: if (pair.second.is_array()) { for (std::size_t index = 0; index < pair.second.size(); index++) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); new_pointer.emplace_back(index); walk(pointer, new_pointer, subschemas, pair.second.at(index), walker, resolver, current_dialect, current_base_dialect, type, @@ -185,8 +195,8 @@ auto walk(const std::optional &parent, case sourcemeta::core::SchemaKeywordType::ApplicatorElementsInPlace: if (pair.second.is_array()) { for (std::size_t index = 0; index < pair.second.size(); index++) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); new_pointer.emplace_back(index); walk(pointer, new_pointer, subschemas, pair.second.at(index), walker, resolver, current_dialect, current_base_dialect, type, @@ -199,8 +209,8 @@ auto walk(const std::optional &parent, case sourcemeta::core::SchemaKeywordType::ApplicatorElementsInPlaceSome: if (pair.second.is_array()) { for (std::size_t index = 0; index < pair.second.size(); index++) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); new_pointer.emplace_back(index); walk(pointer, new_pointer, subschemas, pair.second.at(index), walker, resolver, current_dialect, current_base_dialect, type, @@ -214,8 +224,8 @@ auto walk(const std::optional &parent, ApplicatorElementsInPlaceSomeNegate: if (pair.second.is_array()) { for (std::size_t index = 0; index < pair.second.size(); index++) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); new_pointer.emplace_back(index); walk(pointer, new_pointer, subschemas, pair.second.at(index), walker, resolver, current_dialect, current_base_dialect, type, @@ -229,9 +239,9 @@ auto walk(const std::optional &parent, ApplicatorMembersTraversePropertyStatic: if (pair.second.is_object()) { for (auto &subpair : pair.second.as_object()) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - new_pointer.emplace_back(subpair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + new_pointer.push_back(std::cref(subpair.first)); walk(pointer, new_pointer, subschemas, subpair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); @@ -244,9 +254,9 @@ auto walk(const std::optional &parent, ApplicatorMembersTraversePropertyRegex: if (pair.second.is_object()) { for (auto &subpair : pair.second.as_object()) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - new_pointer.emplace_back(subpair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + new_pointer.push_back(std::cref(subpair.first)); walk(pointer, new_pointer, subschemas, subpair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); @@ -258,9 +268,9 @@ auto walk(const std::optional &parent, case sourcemeta::core::SchemaKeywordType::ApplicatorMembersInPlaceSome: if (pair.second.is_object()) { for (auto &subpair : pair.second.as_object()) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - new_pointer.emplace_back(subpair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + new_pointer.push_back(std::cref(subpair.first)); walk(pointer, new_pointer, subschemas, subpair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); @@ -272,9 +282,9 @@ auto walk(const std::optional &parent, case sourcemeta::core::SchemaKeywordType::LocationMembers: if (pair.second.is_object()) { for (auto &subpair : pair.second.as_object()) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); - new_pointer.emplace_back(subpair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); + new_pointer.push_back(std::cref(subpair.first)); walk(pointer, new_pointer, subschemas, subpair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, true); @@ -287,16 +297,16 @@ auto walk(const std::optional &parent, ApplicatorValueOrElementsTraverseAnyItemOrItem: if (pair.second.is_array()) { for (std::size_t index = 0; index < pair.second.size(); index++) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); new_pointer.emplace_back(index); walk(pointer, new_pointer, subschemas, pair.second.at(index), walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } } else { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } @@ -307,16 +317,16 @@ auto walk(const std::optional &parent, ApplicatorValueOrElementsInPlace: if (pair.second.is_array()) { for (std::size_t index = 0; index < pair.second.size(); index++) { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); new_pointer.emplace_back(index); walk(pointer, new_pointer, subschemas, pair.second.at(index), walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } } else { - sourcemeta::core::Pointer new_pointer{pointer}; - new_pointer.emplace_back(pair.first); + sourcemeta::core::WeakPointer new_pointer{pointer}; + new_pointer.push_back(std::cref(pair.first)); walk(pointer, new_pointer, subschemas, pair.second, walker, resolver, current_dialect, current_base_dialect, type, level + 1, orphan); } @@ -341,30 +351,30 @@ sourcemeta::core::SchemaIterator::SchemaIterator( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) { - const std::optional dialect{ + std::string_view default_dialect) { + const std::string_view resolved_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; - sourcemeta::core::Pointer pointer; + sourcemeta::core::WeakPointer pointer; // If the given schema declares no dialect and the user didn't // not pass a default, then there is nothing we can do. We know // the current schema is a subschema, but cannot walk any further. - if (!dialect.has_value()) { + if (resolved_dialect.empty()) { sourcemeta::core::SchemaIteratorEntry entry{.parent = std::nullopt, .pointer = pointer, - .dialect = std::nullopt, + .dialect = "", .vocabularies = {}, .base_dialect = std::nullopt, .subschema = schema, .orphan = false}; this->subschemas.push_back(std::move(entry)); } else { - const auto base_dialect{ - sourcemeta::core::base_dialect(schema, resolver, dialect)}; - assert(base_dialect.has_value()); + const auto resolved_base_dialect{ + sourcemeta::core::base_dialect(schema, resolver, resolved_dialect)}; + assert(resolved_base_dialect.has_value()); walk(std::nullopt, pointer, this->subschemas, schema, walker, resolver, - dialect.value(), base_dialect.value(), SchemaWalkerType_t::Deep, 0, - false); + resolved_dialect, resolved_base_dialect.value(), + SchemaWalkerType_t::Deep, 0, false); } } @@ -372,17 +382,17 @@ sourcemeta::core::SchemaIteratorFlat::SchemaIteratorFlat( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) { - const std::optional dialect{ + const std::string_view default_dialect) { + const std::string_view resolved_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; - if (dialect.has_value()) { - sourcemeta::core::Pointer pointer; - const auto base_dialect{ - sourcemeta::core::base_dialect(schema, resolver, dialect)}; - assert(base_dialect.has_value()); + if (!resolved_dialect.empty()) { + sourcemeta::core::WeakPointer pointer; + const auto resolved_base_dialect{ + sourcemeta::core::base_dialect(schema, resolver, resolved_dialect)}; + assert(resolved_base_dialect.has_value()); walk(std::nullopt, pointer, this->subschemas, schema, walker, resolver, - dialect.value(), base_dialect.value(), SchemaWalkerType_t::Flat, 0, - false); + resolved_dialect, resolved_base_dialect.value(), + SchemaWalkerType_t::Flat, 0, false); } } @@ -390,30 +400,32 @@ sourcemeta::core::SchemaKeywordIterator::SchemaKeywordIterator( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) { + const std::string_view default_dialect) { assert(is_schema(schema)); if (schema.is_boolean()) { return; } - const std::optional dialect{ + const std::string_view resolved_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; - const std::optional base_dialect{ - sourcemeta::core::base_dialect(schema, resolver, dialect)}; + const auto maybe_base_dialect{ + sourcemeta::core::base_dialect(schema, resolver, resolved_dialect)}; Vocabularies vocabularies{ - base_dialect.has_value() && dialect.has_value() - ? sourcemeta::core::vocabularies(resolver, base_dialect.value(), - dialect.value()) + maybe_base_dialect.has_value() && !resolved_dialect.empty() + ? sourcemeta::core::vocabularies(resolver, maybe_base_dialect.value(), + resolved_dialect) : Vocabularies{}}; for (const auto &entry : schema.as_object()) { + sourcemeta::core::WeakPointer entry_pointer; + entry_pointer.push_back(std::cref(entry.first)); sourcemeta::core::SchemaIteratorEntry subschema_entry{ .parent = std::nullopt, - .pointer = {entry.first}, - .dialect = dialect, + .pointer = std::move(entry_pointer), + .dialect = resolved_dialect, .vocabularies = vocabularies, - .base_dialect = base_dialect, + .base_dialect = maybe_base_dialect, .subschema = entry.second, .orphan = false}; this->entries.push_back(std::move(subschema_entry)); diff --git a/vendor/core/src/core/md5/include/sourcemeta/core/md5.h b/vendor/core/src/core/md5/include/sourcemeta/core/md5.h index c6705c7f..e39796fa 100644 --- a/vendor/core/src/core/md5/include/sourcemeta/core/md5.h +++ b/vendor/core/src/core/md5/include/sourcemeta/core/md5.h @@ -31,7 +31,7 @@ namespace sourcemeta::core { /// sourcemeta::hydra::md5("foo bar", result); /// std::cout << result.str() << "\n"; /// ``` -auto SOURCEMETA_CORE_MD5_EXPORT md5(std::string_view input, +auto SOURCEMETA_CORE_MD5_EXPORT md5(const std::string_view input, std::ostream &output) -> void; } // namespace sourcemeta::core diff --git a/vendor/core/src/core/md5/md5.cc b/vendor/core/src/core/md5/md5.cc index a637dad8..92431c9a 100644 --- a/vendor/core/src/core/md5/md5.cc +++ b/vendor/core/src/core/md5/md5.cc @@ -91,7 +91,7 @@ inline auto md5_process_block(const unsigned char *block, namespace sourcemeta::core { -auto md5(std::string_view input, std::ostream &output) -> void { +auto md5(const std::string_view input, std::ostream &output) -> void { // Initial state as per RFC 1321 std::array state{}; state[0] = 0x67452301U; diff --git a/vendor/core/src/core/punycode/include/sourcemeta/core/punycode.h b/vendor/core/src/core/punycode/include/sourcemeta/core/punycode.h index 3e41fdb6..b996e60e 100644 --- a/vendor/core/src/core/punycode/include/sourcemeta/core/punycode.h +++ b/vendor/core/src/core/punycode/include/sourcemeta/core/punycode.h @@ -75,7 +75,7 @@ auto utf8_to_punycode(std::istream &input, std::ostream &output) -> void; /// "Mnchen-3ya"); /// ``` SOURCEMETA_CORE_PUNYCODE_EXPORT -auto utf8_to_punycode(std::string_view input) -> std::string; +auto utf8_to_punycode(const std::string_view input) -> std::string; /// @ingroup punycode /// Decode Punycode to Unicode code points (UTF-32). For example: @@ -97,7 +97,7 @@ auto utf8_to_punycode(std::string_view input) -> std::string; /// (`std::ctype`) for `std::basic_istream` and /// `std::basic_ostream` to function properly. SOURCEMETA_CORE_PUNYCODE_EXPORT -auto punycode_to_utf32(std::string_view input) -> std::u32string; +auto punycode_to_utf32(const std::string_view input) -> std::u32string; /// @ingroup punycode /// Decode Punycode to UTF-8 using streams. For example: @@ -126,7 +126,7 @@ auto punycode_to_utf8(std::istream &input, std::ostream &output) -> void; /// "M\xC3\xBCnchen"); /// ``` SOURCEMETA_CORE_PUNYCODE_EXPORT -auto punycode_to_utf8(std::string_view input) -> std::string; +auto punycode_to_utf8(const std::string_view input) -> std::string; } // namespace sourcemeta::core diff --git a/vendor/core/src/core/punycode/punycode.cc b/vendor/core/src/core/punycode/punycode.cc index f114577a..835d93b9 100644 --- a/vendor/core/src/core/punycode/punycode.cc +++ b/vendor/core/src/core/punycode/punycode.cc @@ -259,7 +259,7 @@ auto utf32_to_punycode(std::u32string_view input) -> std::string { return result; } -auto punycode_to_utf32(std::string_view input) -> std::u32string { +auto punycode_to_utf32(const std::string_view input) -> std::u32string { std::u32string result; punycode_decode(input, result); return result; @@ -288,7 +288,7 @@ auto punycode_to_utf8(std::istream &input, std::ostream &output) -> void { utf32_to_utf8(decoded, output); } -auto utf8_to_punycode(std::string_view input) -> std::string { +auto utf8_to_punycode(const std::string_view input) -> std::string { std::istringstream input_stream{std::string{input}}; const auto codepoints = utf8_to_utf32(input_stream); if (!codepoints.has_value()) { @@ -300,7 +300,7 @@ auto utf8_to_punycode(std::string_view input) -> std::string { return result; } -auto punycode_to_utf8(std::string_view input) -> std::string { +auto punycode_to_utf8(const std::string_view input) -> std::string { std::u32string decoded; punycode_decode(input, decoded); std::ostringstream output_stream; diff --git a/vendor/core/src/core/regex/preprocess.h b/vendor/core/src/core/regex/preprocess.h index 485aeec7..227b41c4 100644 --- a/vendor/core/src/core/regex/preprocess.h +++ b/vendor/core/src/core/regex/preprocess.h @@ -538,7 +538,7 @@ inline auto expand_char_class(const std::string &content) return result.none() ? "(?!)" : bitset_to_class(result); } -inline auto translate_property(std::string_view name, bool negated) +inline auto translate_property(const std::string_view name, const bool negated) -> std::optional { for (const auto &[prop_name, pcre_name] : unicode_property_map) { if (name == prop_name) { diff --git a/vendor/core/src/core/uri/canonicalize.cc b/vendor/core/src/core/uri/canonicalize.cc index be961d68..8ec0d39c 100644 --- a/vendor/core/src/core/uri/canonicalize.cc +++ b/vendor/core/src/core/uri/canonicalize.cc @@ -9,7 +9,7 @@ namespace { -auto to_lowercase(std::string_view input) -> std::string { +auto to_lowercase(const std::string_view input) -> std::string { std::string result; result.reserve(input.size()); for (const auto character : input) { @@ -62,7 +62,7 @@ auto URI::canonicalize() -> URI & { return *this; } -auto URI::canonicalize(const std::string &input) -> std::string { +auto URI::canonicalize(const std::string_view input) -> std::string { return URI{input}.canonicalize().recompose(); } diff --git a/vendor/core/src/core/uri/escaping.h b/vendor/core/src/core/uri/escaping.h index 697bb1f8..f9d7b554 100644 --- a/vendor/core/src/core/uri/escaping.h +++ b/vendor/core/src/core/uri/escaping.h @@ -177,7 +177,8 @@ inline auto uri_unescape_selective_inplace(std::string &str) -> void { // Full unescaping for URI normalization (copy version for compatibility) // Decodes all percent-encoded sequences -inline auto uri_unescape_selective(std::string_view input) -> std::string { +inline auto uri_unescape_selective(const std::string_view input) + -> std::string { std::string result{input}; uri_unescape_selective_inplace(result); return result; diff --git a/vendor/core/src/core/uri/include/sourcemeta/core/uri.h b/vendor/core/src/core/uri/include/sourcemeta/core/uri.h index 13956038..a4bae290 100644 --- a/vendor/core/src/core/uri/include/sourcemeta/core/uri.h +++ b/vendor/core/src/core/uri/include/sourcemeta/core/uri.h @@ -9,6 +9,7 @@ #include // NOLINTEND(misc-include-cleaner) +#include // std::convertible_to #include // std::uint32_t #include // std::filesystem #include // std::istream @@ -17,6 +18,7 @@ #include // std::span #include // std::string #include // std::string_view +#include // std::is_same_v #include // std::vector /// @defgroup uri URI @@ -48,14 +50,19 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// Move assignment operator auto operator=(URI &&) noexcept -> URI & = default; - /// This constructor creates a URI from a string type. For example: + /// This constructor creates a URI from a string. For example: /// /// ```cpp /// #include /// /// const sourcemeta::core::URI uri{"https://www.sourcemeta.com"}; /// ``` - URI(const std::string &input); + template + requires std::convertible_to && + (!std::is_same_v, URI>) + URI(T &&input) { + this->parse(std::string_view{std::forward(input)}); + } /// This constructor creates a URI from a C++ input stream. For example: /// @@ -291,7 +298,7 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// assert(uri.fragment().has_value()); /// assert(uri.fragment().value() == "foo"); /// ``` - auto fragment(std::string_view fragment) -> URI &; + auto fragment(const std::string_view fragment) -> URI &; /// Get the non-dissected query part of the URI, if any. For example: /// @@ -434,7 +441,7 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// sourcemeta::core::URI::from_fragment("foo")}; /// assert(uri.recompose() == "#foo"); /// ``` - static auto from_fragment(std::string_view fragment) -> URI; + static auto from_fragment(const std::string_view fragment) -> URI; /// Create a URI from a file system path. For example: /// @@ -460,10 +467,10 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// sourcemeta::core::URI::canonicalize("hTtP://exAmpLe.com:80/TEST")}; /// assert(result == "http://example.com/TEST"); /// ``` - static auto canonicalize(const std::string &input) -> std::string; + static auto canonicalize(std::string_view input) -> std::string; private: - auto parse(const std::string &input) -> void; + auto parse(std::string_view input) -> void; // Exporting symbols that depends on the standard C++ library is considered // safe. diff --git a/vendor/core/src/core/uri/parse.cc b/vendor/core/src/core/uri/parse.cc index 2bbf4cb9..fa95685b 100644 --- a/vendor/core/src/core/uri/parse.cc +++ b/vendor/core/src/core/uri/parse.cc @@ -8,14 +8,15 @@ #include // std::uint64_t #include // std::optional #include // std::string, std::stoul +#include // std::string_view namespace { using namespace sourcemeta::core; -auto validate_percent_encoded_utf8(const std::string &input, - std::string::size_type position) - -> std::string::size_type { +auto validate_percent_encoded_utf8(const std::string_view input, + std::string_view::size_type position) + -> std::string_view::size_type { if (input[position] != URI_PERCENT) { return 3; } @@ -85,7 +86,8 @@ auto validate_percent_encoded_utf8(const std::string &input, return 3 * (1 + continuation_count); } -auto parse_scheme(const std::string &input, std::string::size_type &position) +auto parse_scheme(const std::string_view input, + std::string_view::size_type &position) -> std::optional { if (position >= input.size() || !std::isalpha(static_cast(input[position]))) { @@ -100,7 +102,7 @@ auto parse_scheme(const std::string &input, std::string::size_type &position) } if (position < input.size() && input[position] == URI_COLON) { - auto scheme = input.substr(start, position - start); + std::string scheme{input.substr(start, position - start)}; position += 1; return scheme; } @@ -109,7 +111,8 @@ auto parse_scheme(const std::string &input, std::string::size_type &position) return std::nullopt; } -auto parse_port(const std::string &input, std::string::size_type &position) +auto parse_port(const std::string_view input, + std::string_view::size_type &position) -> std::optional { if (position >= input.size() || !std::isdigit(static_cast(input[position]))) { @@ -122,12 +125,12 @@ auto parse_port(const std::string &input, std::string::size_type &position) position += 1; } - const auto port_string = input.substr(start, position - start); + const std::string port_string{input.substr(start, position - start)}; return std::stoul(port_string); } -auto parse_ipv6(const std::string &input, std::string::size_type &position) - -> std::string { +auto parse_ipv6(const std::string_view input, + std::string_view::size_type &position) -> std::string { assert(input[position] == URI_OPEN_BRACKET); const auto start = position; @@ -142,13 +145,13 @@ auto parse_ipv6(const std::string &input, std::string::size_type &position) static_cast(start + 1)}; } - auto ipv6 = input.substr(start + 1, position - start - 1); + std::string ipv6{input.substr(start + 1, position - start - 1)}; position += 1; return ipv6; } -auto parse_host(const std::string &input, std::string::size_type &position) - -> std::string { +auto parse_host(const std::string_view input, + std::string_view::size_type &position) -> std::string { if (position >= input.size()) { return std::string{}; } @@ -180,16 +183,17 @@ auto parse_host(const std::string &input, std::string::size_type &position) return std::string{}; } - return input.substr(start, position - start); + return std::string{input.substr(start, position - start)}; } -auto parse_userinfo(const std::string &input, std::string::size_type &position) +auto parse_userinfo(const std::string_view input, + std::string_view::size_type &position) -> std::optional { const auto start = position; while (position < input.size()) { const auto current = input[position]; if (current == URI_AT) { - auto userinfo = input.substr(start, position - start); + std::string userinfo{input.substr(start, position - start)}; position += 1; return userinfo; } @@ -209,7 +213,8 @@ auto parse_userinfo(const std::string &input, std::string::size_type &position) return std::nullopt; } -auto parse_path(const std::string &input, std::string::size_type &position) +auto parse_path(const std::string_view input, + std::string_view::size_type &position) -> std::optional { if (position >= input.size()) { return std::nullopt; @@ -238,10 +243,11 @@ auto parse_path(const std::string &input, std::string::size_type &position) } } - return input.substr(start, position - start); + return std::string{input.substr(start, position - start)}; } -auto parse_query(const std::string &input, std::string::size_type &position) +auto parse_query(const std::string_view input, + std::string_view::size_type &position) -> std::optional { if (position >= input.size() || input[position] != URI_QUESTION) { return std::nullopt; @@ -268,10 +274,11 @@ auto parse_query(const std::string &input, std::string::size_type &position) } } - return input.substr(start, position - start); + return std::string{input.substr(start, position - start)}; } -auto parse_fragment(const std::string &input, std::string::size_type &position) +auto parse_fragment(const std::string_view input, + std::string_view::size_type &position) -> std::optional { if (position >= input.size() || input[position] != URI_HASH) { return std::nullopt; @@ -295,14 +302,15 @@ auto parse_fragment(const std::string &input, std::string::size_type &position) } } - return input.substr(start, position - start); + return std::string{input.substr(start, position - start)}; } } // namespace namespace sourcemeta::core { -auto parse_authority(const std::string &input, std::string::size_type &position, +auto parse_authority(const std::string_view input, + std::string_view::size_type &position, std::optional &userinfo, std::optional &host, std::optional &port) -> void { @@ -332,7 +340,7 @@ auto parse_authority(const std::string &input, std::string::size_type &position, } } -auto URI::parse(const std::string &input) -> void { +auto URI::parse(const std::string_view input) -> void { assert(!this->scheme_.has_value()); assert(!this->userinfo_.has_value()); assert(!this->host_.has_value()); @@ -345,7 +353,7 @@ auto URI::parse(const std::string &input) -> void { return; } - auto position = std::string::size_type{0}; + std::string_view::size_type position{0}; this->scheme_ = parse_scheme(input, position); diff --git a/vendor/core/src/core/uri/setters.cc b/vendor/core/src/core/uri/setters.cc index 637ab360..6ba2984a 100644 --- a/vendor/core/src/core/uri/setters.cc +++ b/vendor/core/src/core/uri/setters.cc @@ -25,7 +25,7 @@ auto apply_leading_slash_transform(std::optional parsed_path, return parsed_path; } -auto normalize_fragment(std::string_view input) -> std::string { +auto normalize_fragment(const std::string_view input) -> std::string { if (input.empty()) { return ""; } @@ -149,7 +149,7 @@ auto URI::extension(std::string &&extension) -> URI & { return *this; } -auto URI::fragment(std::string_view fragment) -> URI & { +auto URI::fragment(const std::string_view fragment) -> URI & { this->fragment_ = normalize_fragment(std::string{fragment}); return *this; } diff --git a/vendor/core/src/core/uri/uri.cc b/vendor/core/src/core/uri/uri.cc index 3c7ec98e..ef61941c 100644 --- a/vendor/core/src/core/uri/uri.cc +++ b/vendor/core/src/core/uri/uri.cc @@ -6,15 +6,13 @@ namespace sourcemeta::core { -URI::URI(const std::string &input) { this->parse(input); } - URI::URI(std::istream &input) { std::ostringstream output; output << input.rdbuf(); this->parse(output.str()); } -auto URI::from_fragment(std::string_view fragment) -> URI { +auto URI::from_fragment(const std::string_view fragment) -> URI { URI result; result.fragment(fragment); return result; diff --git a/vendor/core/src/core/uritemplate/CMakeLists.txt b/vendor/core/src/core/uritemplate/CMakeLists.txt new file mode 100644 index 00000000..427d7955 --- /dev/null +++ b/vendor/core/src/core/uritemplate/CMakeLists.txt @@ -0,0 +1,9 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME uritemplate + PRIVATE_HEADERS error.h token.h router.h + SOURCES helpers.h uritemplate.cc uritemplate_router.cc uritemplate_router_view.cc) + +if(SOURCEMETA_CORE_INSTALL) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME uritemplate) +endif() + +target_link_libraries(sourcemeta_core_uritemplate PUBLIC sourcemeta::core::io) diff --git a/vendor/core/src/core/uritemplate/helpers.h b/vendor/core/src/core/uritemplate/helpers.h new file mode 100644 index 00000000..5b84b56b --- /dev/null +++ b/vendor/core/src/core/uritemplate/helpers.h @@ -0,0 +1,402 @@ +#ifndef SOURCEMETA_CORE_URITEMPLATE_HELPERS_H_ +#define SOURCEMETA_CORE_URITEMPLATE_HELPERS_H_ + +#include + +#include // std::array +#include // std::size_t +#include // std::string +#include // std::string_view +#include // std::void_t + +namespace sourcemeta::core { + +// Type traits to detect optional static members +template struct has_op : std::false_type {}; +template +struct has_op> : std::true_type {}; + +template struct has_prefix : std::false_type {}; +template +struct has_prefix> : std::true_type {}; + +template +struct has_empty_suffix : std::false_type {}; +template +struct has_empty_suffix> + : std::true_type {}; + +inline auto is_unreserved(const char character) -> bool { + return (character >= 'A' && character <= 'Z') || + (character >= 'a' && character <= 'z') || + (character >= '0' && character <= '9') || character == '-' || + character == '.' || character == '_' || character == '~'; +} + +inline auto is_reserved(const char character) -> bool { + return character == ':' || character == '/' || character == '?' || + character == '#' || character == '[' || character == ']' || + character == '@' || character == '!' || character == '$' || + character == '&' || character == '\'' || character == '(' || + character == ')' || character == '*' || character == '+' || + character == ',' || character == ';' || character == '='; +} + +inline auto is_hex(const char character) -> bool { + return (character >= '0' && character <= '9') || + (character >= 'A' && character <= 'F') || + (character >= 'a' && character <= 'f'); +} + +static constexpr std::array HEX_DIGITS = { + {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', + 'F'}}; + +inline auto append_percent_encoded(std::string &output, const char character) + -> void { + const auto byte = static_cast(character); + output += '%'; + output += HEX_DIGITS[byte >> 4]; + output += HEX_DIGITS[byte & 0x0F]; +} + +inline auto percent_encode(std::string &output, const std::string_view input) + -> void { + output.reserve(output.size() + input.size() * 3); + for (const char character : input) { + if (is_unreserved(character)) { + output += character; + } else { + append_percent_encoded(output, character); + } + } +} + +inline auto percent_encode_reserved(std::string &output, + const std::string_view input) -> void { + output.reserve(output.size() + input.size() * 3); + for (std::size_t index = 0; index < input.size(); ++index) { + const char character = input[index]; + if (is_unreserved(character) || is_reserved(character) || + (character == '%' && index + 2 < input.size() && + is_hex(input[index + 1]) && is_hex(input[index + 2]))) { + output += character; + } else { + append_percent_encoded(output, character); + } + } +} + +template +inline auto encode(std::string &output, const std::string_view input) -> void { + if constexpr (T::allow_reserved) { + percent_encode_reserved(output, input); + } else { + percent_encode(output, input); + } +} + +template +inline auto append_name(std::string &result, const std::string_view name, + const bool value_empty, const bool has_more) -> void { + if constexpr (T::named) { + result += name; + if (value_empty && !has_more) { + if constexpr (has_empty_suffix::value) { + result += T::empty_suffix; + } + } else { + result += '='; + } + } +} + +// RFC 6570 Section 2.3: varchar = ALPHA / DIGIT / "_" +inline auto is_varchar(const char character) noexcept -> bool { + return (character >= 'A' && character <= 'Z') || + (character >= 'a' && character <= 'z') || + (character >= '0' && character <= '9') || character == '_'; +} + +// Variable name character including dot for dotted names like "foo.bar" +inline auto is_varname_char(const char character) noexcept -> bool { + return is_varchar(character) || character == '.'; +} + +// RFC 6570 Section 2.2: operator = op-level2 / op-level3 / op-reserve +inline auto is_operator(const char character) noexcept -> bool { + return character == '+' || character == '#' || character == '.' || + character == '/' || character == ';' || character == '?' || + character == '&'; +} + +// RFC 6570 Section 2.2: op-reserve = "=" / "," / "!" / "@" / "|" +inline auto is_reserved_operator(const char character) noexcept -> bool { + return character == '=' || character == ',' || character == '!' || + character == '@' || character == '|'; +} + +// RFC 6570 Section 2.4: modifier = prefix / explode +inline auto is_modifier(const char character) noexcept -> bool { + return character == ':' || character == '*'; +} + +inline auto parse_varname(const std::string_view input, std::size_t position) + -> std::size_t { + if (position >= input.size() || + (!is_varchar(input[position]) && input[position] != '%')) { + throw URITemplateParseError(position + 1); + } + + while (position < input.size() && input[position] != '}' && + input[position] != ',' && input[position] != ':' && + input[position] != '*') { + const char character = input[position]; + + if (is_varchar(character)) { + position++; + } else if (character == '.') { + position++; + if (position >= input.size() || + (!is_varchar(input[position]) && input[position] != '%')) { + throw URITemplateParseError(position + 1); + } + } else if (character == '%') { + if (position + 2 >= input.size()) { + throw URITemplateParseError(position + 1); + } + if (!is_hex(input[position + 1]) || !is_hex(input[position + 2])) { + throw URITemplateParseError(position + 1); + } + position += 3; + } else { + throw URITemplateParseError(position + 1); + } + } + + return position; +} + +inline auto +parse_variable_list(const std::string_view input, std::size_t position, + std::vector &variables) + -> std::size_t { + while (true) { + const auto start = position; + position = parse_varname(input, position); + + if (position == start) { + throw URITemplateParseError(position + 1); + } + + const auto name = input.substr(start, position - start); + std::uint16_t length = 0; + bool explode = false; + + if (position >= input.size()) { + throw URITemplateParseError(1); + } + + if (input[position] == ':') { + position++; + if (position >= input.size() || input[position] < '1' || + input[position] > '9') { + throw URITemplateParseError(position + 1); + } + + const auto prefix_start = position; + while (position < input.size() && input[position] >= '0' && + input[position] <= '9') { + position++; + if (position - prefix_start > 4) { + throw URITemplateParseError(position); + } + } + + const auto prefix_str = + input.substr(prefix_start, position - prefix_start); + std::uint16_t value = 0; + for (const char character : prefix_str) { + value = static_cast( + value * 10 + static_cast(character - '0')); + } + + if (value > 9999 || value == 0) { + throw URITemplateParseError(prefix_start + 1); + } + + length = value; + } else if (input[position] == '*') { + explode = true; + position++; + } + + variables.push_back(URITemplateVariableSpecification{ + .name = name, .length = length, .explode = explode}); + + if (position >= input.size()) { + throw URITemplateParseError(1); + } + + if (input[position] == '}') { + break; + } + + if (input[position] == ',') { + position++; + } + } + + return position; +} + +template +auto parse_expression(const std::string_view input) -> URITemplateParseResult { + if constexpr (std::is_same_v) { + if (input.empty() || input[0] == '{') { + return std::nullopt; + } + + if (input[0] == '}') { + throw URITemplateParseError(1); + } + + std::size_t position = 1; + while (position < input.size()) { + if (input[position] == '{') { + break; + } + if (input[position] == '}') { + throw URITemplateParseError(position + 1); + } + position++; + } + + return std::make_pair( + URITemplateToken{URITemplateTokenLiteral{input.substr(0, position)}}, + position); + } else { + if (input.empty() || input[0] != '{') { + return std::nullopt; + } + + std::size_t var_start; + if constexpr (has_op::value) { + if (input.size() < 3 || input[1] != T::op) { + return std::nullopt; + } + var_start = 2; + } else { + if (input.size() < 2) { + throw URITemplateParseError(1); + } + // Not a simple variable if it has an operator + if (is_operator(input[1])) { + return std::nullopt; + } + var_start = 1; + } + + std::vector variables; + const auto end_position = parse_variable_list(input, var_start, variables); + return std::make_pair(URITemplateToken{T{std::move(variables)}}, + end_position + 1); + } +} + +template +auto expand_expression( + std::string &result, + const std::vector &variables, + const std::function &callback) -> void { + bool first_var = true; + + for (const auto &variable : variables) { + auto response = callback(variable.name); + if (!response.has_value()) { + continue; + } + + bool first_value = true; + + while (true) { + const auto &[value, object_key, has_more] = response.value(); + + if (variable.length > 0 && + (has_more || object_key.has_value() || !first_value)) { + throw URITemplateExpansionError{ + "Prefix modifier cannot be applied to composite values"}; + } + + auto actual_value = value; + if (variable.length > 0) { + actual_value = actual_value.substr(0, variable.length); + } + + if (variable.explode) { + if (first_var && first_value) { + if constexpr (has_prefix::value) { + result += T::prefix; + } + first_var = false; + } else { + result += T::separator; + } + + if (object_key.has_value()) { + encode(result, object_key.value()); + result += '='; + encode(result, actual_value); + } else if constexpr (T::named) { + result += variable.name; + if (actual_value.empty()) { + if constexpr (has_empty_suffix::value) { + result += T::empty_suffix; + } + } else { + result += '='; + encode(result, actual_value); + } + } else { + encode(result, actual_value); + } + } else { + if (first_var && first_value) { + if constexpr (has_prefix::value) { + result += T::prefix; + } + first_var = false; + append_name(result, variable.name, actual_value.empty(), has_more); + } else if (first_value) { + result += T::separator; + append_name(result, variable.name, actual_value.empty(), has_more); + } else { + result += ','; + } + + if (!first_value || !actual_value.empty() || has_more) { + if (object_key.has_value()) { + encode(result, object_key.value()); + result += ','; + } + encode(result, actual_value); + } + } + + first_value = false; + + if (!has_more) { + break; + } + + response = callback(variable.name); + if (!response.has_value()) { + break; + } + } + } +} + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate.h b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate.h new file mode 100644 index 00000000..4f5540ff --- /dev/null +++ b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate.h @@ -0,0 +1,118 @@ +#ifndef SOURCEMETA_CORE_URITEMPLATE_H_ +#define SOURCEMETA_CORE_URITEMPLATE_H_ + +#ifndef SOURCEMETA_CORE_URITEMPLATE_EXPORT +#include +#endif + +// NOLINTBEGIN(misc-include-cleaner) +#include +#include +#include +// NOLINTEND(misc-include-cleaner) + +#include // std::size_t +#include // std::uint64_t +#include // std::function +#include // std::optional +#include // std::string +#include // std::string_view +#include // std::tuple +#include // std::void_t +#include // std::vector + +/// @defgroup uritemplate URI Template +/// @brief A strict RFC 6570 URI Template implementation. +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +namespace sourcemeta::core { + +/// @ingroup uritemplate +/// The return type for URI Template variable callbacks (value, key?, has_more) +using URITemplateValue = std::optional< + std::tuple, bool>>; + +/// @ingroup uritemplate +/// The result of parsing a token: the token and how many characters were +/// consumed +using URITemplateParseResult = + std::optional>; + +/// @ingroup uritemplate +/// A parsed URI Template per RFC 6570. This class behaves like a view. The +/// source string must outlive the template +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplate { +public: + /// Parse a URI Template from a string view. For example: + /// + /// ```cpp + /// #include + /// + /// const std::string source{"http://example.com/~{username}/"}; + /// const sourcemeta::core::URITemplate uri_template{source}; + /// ``` + URITemplate(const std::string_view source); + + /// Get the number of tokens in the template + [[nodiscard]] auto size() const noexcept -> std::uint64_t; + + /// Check if the template is empty + [[nodiscard]] auto empty() const noexcept -> bool; + + /// Get the token at the given index + [[nodiscard]] auto at(std::size_t index) const & -> const URITemplateToken &; + + /// Get the token at the given index (move overload) + [[nodiscard]] auto at(std::size_t index) && -> URITemplateToken; + + /// Iterator to the beginning of the tokens + [[nodiscard]] auto begin() const noexcept + -> std::vector::const_iterator; + + /// Iterator to the end of the tokens + [[nodiscard]] auto end() const noexcept + -> std::vector::const_iterator; + + /// Expand the template by looking up variable values via a callback. + /// The callback is called repeatedly for composite values + [[nodiscard]] auto expand( + const std::function &callback) const + -> std::string; + + /// Expand the template using an associative container (string values only) + template > + [[nodiscard]] auto expand(const Container &variables) const -> std::string { + return this->expand([&variables]( + const std::string_view name) -> URITemplateValue { + const auto iterator{variables.find(typename Container::key_type{name})}; + if (iterator == variables.end()) { + return std::nullopt; + } else { + return std::make_tuple(std::string_view{iterator->second}, std::nullopt, + false); + } + }); + } + +private: +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251) +#endif + std::vector tokens_; +#if defined(_MSC_VER) +#pragma warning(default : 4251) +#endif +}; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_error.h b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_error.h new file mode 100644 index 00000000..0898a39c --- /dev/null +++ b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_error.h @@ -0,0 +1,132 @@ +#ifndef SOURCEMETA_CORE_URITEMPLATE_ERROR_H_ +#define SOURCEMETA_CORE_URITEMPLATE_ERROR_H_ + +#ifndef SOURCEMETA_CORE_URITEMPLATE_EXPORT +#include +#endif + +#include // std::uint64_t +#include // std::exception +#include // std::filesystem::path +#include // std::runtime_error +#include // std::string +#include // std::string_view +#include // std::move + +namespace sourcemeta::core { + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251 4275) +#endif + +/// @ingroup uritemplate +/// An error that represents a URI Template parsing failure +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateParseError + : public std::exception { +public: + URITemplateParseError(const std::uint64_t column) : column_{column} {} + + [[nodiscard]] auto what() const noexcept -> const char * override { + return "The input is not a valid URI Template"; + } + + /// Get the column number of the error + [[nodiscard]] auto column() const noexcept -> std::uint64_t { + return this->column_; + } + +private: + std::uint64_t column_; +}; + +/// @ingroup uritemplate +/// An error that represents a URI Template expansion failure +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateExpansionError + : public std::runtime_error { +public: + URITemplateExpansionError(const std::string &message) + : std::runtime_error{message} {} +}; + +/// @ingroup uritemplate +/// An error that represents a variable name mismatch when adding routes +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterVariableMismatchError + : public std::exception { +public: + URITemplateRouterVariableMismatchError(const std::string_view left, + const std::string_view right) + : left_{left}, right_{right} {} + + [[nodiscard]] auto what() const noexcept -> const char * override { + return "Variable name mismatch when adding route"; + } + + /// Get the existing variable name + [[nodiscard]] auto left() const noexcept -> const std::string & { + return this->left_; + } + + /// Get the conflicting variable name + [[nodiscard]] auto right() const noexcept -> const std::string & { + return this->right_; + } + +private: + std::string left_; + std::string right_; +}; + +/// @ingroup uritemplate +/// An error for invalid segments when adding routes +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterInvalidSegmentError + : public std::exception { +public: + URITemplateRouterInvalidSegmentError(const char *message, + const std::string_view segment) + : message_{message}, segment_{segment} {} + + [[nodiscard]] auto what() const noexcept -> const char * override { + return this->message_; + } + + /// Get the offending segment + [[nodiscard]] auto segment() const noexcept -> const std::string & { + return this->segment_; + } + +private: + const char *message_; + std::string segment_; +}; + +/// @ingroup uritemplate +/// An error that represents a failure to save the router to disk +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterSaveError + : public std::exception { +public: + URITemplateRouterSaveError(std::filesystem::path path, const char *message) + : path_{std::move(path)}, message_{message} {} + + [[nodiscard]] auto what() const noexcept -> const char * override { + return this->message_; + } + + [[nodiscard]] auto path() const noexcept -> const std::filesystem::path & { + return this->path_; + } + +private: + std::filesystem::path path_; + const char *message_; +}; + +#if defined(_MSC_VER) +#pragma warning(default : 4251 4275) +#endif + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h new file mode 100644 index 00000000..81d80103 --- /dev/null +++ b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h @@ -0,0 +1,139 @@ +#ifndef SOURCEMETA_CORE_URITEMPLATE_ROUTER_H_ +#define SOURCEMETA_CORE_URITEMPLATE_ROUTER_H_ + +#ifndef SOURCEMETA_CORE_URITEMPLATE_EXPORT +#include +#endif + +#include + +#include // std::uint16_t, std::uint32_t, std::uint8_t +#include // std::filesystem::path +#include // std::function +#include // std::unique_ptr +#include // std::string_view +#include // std::vector + +namespace sourcemeta::core { + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4251) +#endif + +/// @ingroup uritemplate +/// A URI Template path router. Keep in mind that the URI Template specification +/// DOES NOT define expansion. So this is an opinionated non-standard adaptation +/// of URI Template for path routing purposes +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouter { +public: + /// A handler identifier 0 means "no handler" + using Identifier = std::uint16_t; + + /// The variable index type + using Index = std::uint8_t; + + /// The match callback (index, name, value) + using Callback = + std::function; + + /// The type of a node in the router trie + enum class NodeType : std::uint8_t { + Root = 0, + Literal = 1, + Variable = 2, + Expansion = 3 + }; + + /// A node in the router trie + struct Node { + Identifier identifier{0}; + NodeType type{NodeType::Root}; + std::string_view value; + + // This children distinction enforces that there can only be one non-literal + // child at the type level. Also allows us to more efficiently search on + // literals + std::vector> literals; + std::unique_ptr variable; + }; + + /// Construct an empty router + URITemplateRouter() = default; + + // To avoid mistakes + URITemplateRouter(const URITemplateRouter &) = delete; + URITemplateRouter(URITemplateRouter &&) = delete; + auto operator=(const URITemplateRouter &) -> URITemplateRouter & = delete; + auto operator=(URITemplateRouter &&) -> URITemplateRouter & = delete; + + /// Add a route to the router. Make sure the string lifetime survives the + /// router + auto add(const std::string_view uri_template, const Identifier identifier) + -> void; + + /// Match a path against the router. Note the callback might fire for + /// initial matches even though the entire match might still fail + [[nodiscard]] auto match(const std::string_view path, + const Callback &callback) const -> Identifier; + + /// Access the root node of the trie + [[nodiscard]] auto root() const noexcept -> const Node &; + +private: + Node root_; +}; + +/// @ingroup uritemplate +/// A read-only memory-mapped view of a serialized URI Template router +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterView { +public: + /// A serialized node in the binary format +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4324) +#endif + struct alignas(8) Node { + std::uint32_t string_offset; + std::uint32_t string_length; + std::uint32_t first_literal_child; + std::uint32_t literal_child_count; + std::uint32_t variable_child; + URITemplateRouter::NodeType type; + std::uint8_t padding; + URITemplateRouter::Identifier identifier; + }; +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /// Save a router to a binary file + static auto save(const URITemplateRouter &router, + const std::filesystem::path &path) -> void; + + URITemplateRouterView(const std::filesystem::path &path); + + // To avoid mistakes + URITemplateRouterView(const URITemplateRouterView &) = delete; + URITemplateRouterView(URITemplateRouterView &&) = delete; + auto operator=(const URITemplateRouterView &) + -> URITemplateRouterView & = delete; + auto operator=(URITemplateRouterView &&) -> URITemplateRouterView & = delete; + + /// Match a path against the router. Note the callback might fire for + /// initial matches even though the entire match might still fail + [[nodiscard]] auto match(const std::string_view path, + const URITemplateRouter::Callback &callback) const + -> URITemplateRouter::Identifier; + +private: + FileView file_view_; +}; + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_token.h b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_token.h new file mode 100644 index 00000000..5b7f19bc --- /dev/null +++ b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_token.h @@ -0,0 +1,142 @@ +#ifndef SOURCEMETA_CORE_URITEMPLATE_TOKEN_H_ +#define SOURCEMETA_CORE_URITEMPLATE_TOKEN_H_ + +#ifndef SOURCEMETA_CORE_URITEMPLATE_EXPORT +#include +#endif + +#include // std::uint16_t +#include // std::string_view +#include // std::variant +#include // std::vector + +namespace sourcemeta::core { + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4251) +#endif + +/// @ingroup uritemplate +/// A literal string segment in a URI Template +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenLiteral { + std::string_view value; +}; + +/// @ingroup uritemplate +/// A variable specification within a URI Template expression +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateVariableSpecification { + std::string_view name; + // As per the RFC, the range is 1-9999. 0 means "no prefix length" + std::uint16_t length{0}; + bool explode{false}; +}; + +/// @ingroup uritemplate +/// A simple string variable expansion {var} in a URI Template (Level 1) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenVariable { + std::vector variables; + static constexpr char separator = ','; + static constexpr bool named = false; + static constexpr bool allow_reserved = false; +}; + +/// @ingroup uritemplate +/// A reserved expansion {+var} in a URI Template (Level 2) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenReservedExpansion { + std::vector variables; + static constexpr char op = '+'; + static constexpr char separator = ','; + static constexpr bool named = false; + static constexpr bool allow_reserved = true; +}; + +/// @ingroup uritemplate +/// A fragment expansion {#var} in a URI Template (Level 2) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenFragmentExpansion { + std::vector variables; + static constexpr char op = '#'; + static constexpr char separator = ','; + static constexpr char prefix = '#'; + static constexpr bool named = false; + static constexpr bool allow_reserved = true; +}; + +/// @ingroup uritemplate +/// A label expansion {.var} in a URI Template (Level 3) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenLabelExpansion { + std::vector variables; + static constexpr char op = '.'; + static constexpr char separator = '.'; + static constexpr char prefix = '.'; + static constexpr bool named = false; + static constexpr bool allow_reserved = false; +}; + +/// @ingroup uritemplate +/// A path expansion {/var} in a URI Template (Level 3) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenPathExpansion { + std::vector variables; + static constexpr char op = '/'; + static constexpr char separator = '/'; + static constexpr char prefix = '/'; + static constexpr bool named = false; + static constexpr bool allow_reserved = false; +}; + +/// @ingroup uritemplate +/// A path parameter expansion {;var} in a URI Template (Level 3) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT + URITemplateTokenPathParameterExpansion { + std::vector variables; + static constexpr char op = ';'; + static constexpr char separator = ';'; + static constexpr char prefix = ';'; + static constexpr bool named = true; + static constexpr bool allow_reserved = false; +}; + +/// @ingroup uritemplate +/// A query expansion {?var} in a URI Template (Level 3) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateTokenQueryExpansion { + std::vector variables; + static constexpr char op = '?'; + static constexpr char separator = '&'; + static constexpr char prefix = '?'; + static constexpr bool named = true; + static constexpr bool allow_reserved = false; + static constexpr char empty_suffix = '='; +}; + +/// @ingroup uritemplate +/// A query continuation expansion {&var} in a URI Template (Level 3) +struct SOURCEMETA_CORE_URITEMPLATE_EXPORT + URITemplateTokenQueryContinuationExpansion { + std::vector variables; + static constexpr char op = '&'; + static constexpr char separator = '&'; + static constexpr char prefix = '&'; + static constexpr bool named = true; + static constexpr bool allow_reserved = false; + static constexpr char empty_suffix = '='; +}; + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +/// @ingroup uritemplate +/// A token in a parsed URI Template +using URITemplateToken = std::variant< + URITemplateTokenLiteral, URITemplateTokenVariable, + URITemplateTokenReservedExpansion, URITemplateTokenFragmentExpansion, + URITemplateTokenLabelExpansion, URITemplateTokenPathExpansion, + URITemplateTokenPathParameterExpansion, URITemplateTokenQueryExpansion, + URITemplateTokenQueryContinuationExpansion>; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/uritemplate/uritemplate.cc b/vendor/core/src/core/uritemplate/uritemplate.cc new file mode 100644 index 00000000..16ab6763 --- /dev/null +++ b/vendor/core/src/core/uritemplate/uritemplate.cc @@ -0,0 +1,103 @@ +#include + +#include "helpers.h" + +#include // assert +#include // std::pair +#include // std::vector + +namespace sourcemeta::core { + +template +static auto try_parse(std::string_view &remaining, std::size_t &offset, + std::vector &tokens) -> bool { + if (auto result = parse_expression(remaining)) { + tokens.emplace_back(std::move(result->first)); + remaining.remove_prefix(result->second); + offset += result->second; + return true; + } + + return false; +} + +template +static auto try_parse_any(std::string_view &remaining, std::size_t &offset, + std::vector &tokens) -> bool { + return (try_parse(remaining, offset, tokens) || ...); +} + +URITemplate::URITemplate(const std::string_view source) { + std::string_view remaining{source}; + std::size_t offset = 0; + + while (!remaining.empty()) { + try { + if (!try_parse_any( + remaining, offset, this->tokens_)) { + break; + } + } catch (URITemplateParseError &error) { + throw URITemplateParseError(offset + error.column()); + } + } +} + +auto URITemplate::size() const noexcept -> std::uint64_t { + return static_cast(this->tokens_.size()); +} + +auto URITemplate::empty() const noexcept -> bool { + return this->tokens_.empty(); +} + +auto URITemplate::at(const std::size_t index) const & -> const + URITemplateToken & { + assert(index < this->tokens_.size()); + return this->tokens_[index]; +} + +auto URITemplate::at(const std::size_t index) && -> URITemplateToken { + assert(index < this->tokens_.size()); + return std::move(this->tokens_[index]); +} + +auto URITemplate::begin() const noexcept + -> std::vector::const_iterator { + return this->tokens_.cbegin(); +} + +auto URITemplate::end() const noexcept + -> std::vector::const_iterator { + return this->tokens_.cend(); +} + +auto URITemplate::expand( + const std::function &callback) + const -> std::string { + std::string result; + + for (const auto &token : this->tokens_) { + std::visit( + [&result, &callback](const auto &expansion) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + result += expansion.value; + } else { + expand_expression(result, expansion.variables, callback); + } + }, + token); + } + + return result; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/uritemplate/uritemplate_router.cc b/vendor/core/src/core/uritemplate/uritemplate_router.cc new file mode 100644 index 00000000..2fedf142 --- /dev/null +++ b/vendor/core/src/core/uritemplate/uritemplate_router.cc @@ -0,0 +1,343 @@ +#include + +#include "helpers.h" + +#include // std::ranges::lower_bound +#include // assert +#include // std::numeric_limits + +namespace sourcemeta::core { + +namespace { + +using Node = URITemplateRouter::Node; +using NodeType = URITemplateRouter::NodeType; + +constexpr auto node_value = + [](const std::unique_ptr &child) -> decltype(auto) { + return child->value; +}; + +auto find_literal_child(const std::vector> &literals, + const std::string_view segment) -> Node * { + const auto iterator = + std::ranges::lower_bound(literals, segment, {}, node_value); + if (iterator != literals.end() && (*iterator)->value == segment) { + return iterator->get(); + } + return nullptr; +} + +auto find_or_create_literal_child(std::vector> &literals, + const std::string_view value) -> Node & { + auto iterator = std::ranges::lower_bound(literals, value, {}, node_value); + if (iterator != literals.end() && (*iterator)->value == value) { + return **iterator; + } + + auto child = std::make_unique(); + child->type = NodeType::Literal; + child->value = value; + auto &result = *child; + literals.insert(iterator, std::move(child)); + return result; +} + +auto find_or_create_variable_child(std::unique_ptr &variable, + const std::string_view name, + const NodeType type) -> Node * { + if (!variable) { + variable = std::make_unique(); + variable->type = type; + variable->value = name; + return variable.get(); + } + + if (variable->value != name) { + throw URITemplateRouterVariableMismatchError{variable->value, name}; + } + + if (type == NodeType::Expansion) { + if (variable->type == NodeType::Variable) { + variable->type = NodeType::Expansion; + return variable.get(); + } + } else if (variable->type == NodeType::Expansion) { + return nullptr; + } + + return variable.get(); +} + +// Find the end of a brace expression (including the closing brace) +inline auto find_expression_end(const char *start, const char *end) -> const + char * { + const char *position = start + 1; + while (position < end && *position != '}') { + ++position; + } + if (position < end) { + ++position; // include the '}' + } + return position; +} + +// Extract the current segment (from segment start to next / or end) +inline auto extract_segment(const char *start, const char *end) + -> std::string_view { + const char *position = start; + while (position < end && *position != '/') { + ++position; + } + return {start, static_cast(position - start)}; +} + +} // namespace + +auto URITemplateRouter::add(const std::string_view uri_template, + const Identifier identifier) -> void { + assert(identifier > 0); + + if (uri_template.empty()) { + this->root_.identifier = identifier; + return; + } + + Node *current = nullptr; + bool absorbed = false; + const char *position = uri_template.data(); + const char *const end = position + uri_template.size(); + + while (position < end && !absorbed) { + while (position < end && *position == '/') { + ++position; + } + + if (position >= end) { + break; + } + + const char *segment_start = position; + + if (*position == '}') { + throw URITemplateRouterInvalidSegmentError{ + "Unmatched closing brace", extract_segment(segment_start, end)}; + } + + if (*position == '{') { + const char *expression_start = position; + const char *expression_end = find_expression_end(position, end); + std::string_view expression{ + expression_start, + static_cast(expression_end - expression_start)}; + + ++position; + + if (position >= end) { + throw URITemplateRouterInvalidSegmentError{"Unclosed brace", + expression}; + } + + NodeType type = NodeType::Variable; + if (*position == '+') { + type = NodeType::Expansion; + ++position; + if (position >= end || *position == '}') { + throw URITemplateRouterInvalidSegmentError{"Empty variable name", + expression}; + } + } else if (is_operator(*position) && *position != '+') { + throw URITemplateRouterInvalidSegmentError{ + "Unsupported URI Template operator", expression}; + } else if (is_reserved_operator(*position)) { + throw URITemplateRouterInvalidSegmentError{ + "Reserved URI Template operator", expression}; + } else if (*position == '{') { + throw URITemplateRouterInvalidSegmentError{ + "Nested opening brace", extract_segment(expression_start, end)}; + } else if (*position == ' ') { + throw URITemplateRouterInvalidSegmentError{"Space before variable name", + expression}; + } else if (*position == '}') { + throw URITemplateRouterInvalidSegmentError{"Empty variable name", + expression}; + } + + const char *varname_start = position; + while (position < end && *position != '}' && *position != ' ' && + !is_modifier(*position) && *position != ',') { + if (!is_varname_char(*position)) { + throw URITemplateRouterInvalidSegmentError{ + "Invalid character in variable name", expression}; + } + ++position; + } + + if (position >= end) { + throw URITemplateRouterInvalidSegmentError{"Unclosed brace", + expression}; + } + + if (*position == ' ') { + throw URITemplateRouterInvalidSegmentError{ + "Space in variable expression", expression}; + } + + if (*position == ':') { + throw URITemplateRouterInvalidSegmentError{ + "Prefix modifier not supported", expression}; + } + + if (*position == '*') { + throw URITemplateRouterInvalidSegmentError{ + "Explode modifier not supported", expression}; + } + + if (*position == ',') { + throw URITemplateRouterInvalidSegmentError{ + "Multiple variables not supported", expression}; + } + + const std::string_view varname{ + varname_start, static_cast(position - varname_start)}; + + ++position; // skip '}' + + if (position < end && *position != '/') { + throw URITemplateRouterInvalidSegmentError{ + "Path segment cannot mix literals and variables", + extract_segment(expression_start, end)}; + } + + if (type == NodeType::Expansion && position < end) { + throw URITemplateRouterInvalidSegmentError{ + "Reserved expansion must be the last segment", expression}; + } + + auto &variable = current ? current->variable : this->root_.variable; + auto *result = find_or_create_variable_child(variable, varname, type); + if (result == nullptr) { + absorbed = true; + } else { + current = result; + } + } else { + while (position < end && *position != '/' && *position != '{') { + if (*position == '}') { + throw URITemplateRouterInvalidSegmentError{ + "Unmatched closing brace", extract_segment(segment_start, end)}; + } + ++position; + } + + if (position < end && *position == '{') { + const char *expr_end = find_expression_end(position, end); + const char *seg_end = expr_end; + while (seg_end < end && *seg_end != '/') { + ++seg_end; + } + throw URITemplateRouterInvalidSegmentError{ + "Path segment cannot mix literals and variables", + std::string_view{segment_start, static_cast( + seg_end - segment_start)}}; + } + + const std::string_view segment{ + segment_start, static_cast(position - segment_start)}; + + auto &literals = current ? current->literals : this->root_.literals; + current = &find_or_create_literal_child(literals, segment); + } + } + + if (current == nullptr && uri_template.size() == 1 && + uri_template[0] == '/') { + current = &find_or_create_literal_child(this->root_.literals, ""); + } + + if (!absorbed && current != nullptr) { + current->identifier = identifier; + } +} + +auto URITemplateRouter::root() const noexcept -> const Node & { + return this->root_; +} + +auto URITemplateRouter::match(const std::string_view path, + const Callback &callback) const -> Identifier { + if (path.empty()) { + return this->root_.identifier; + } + + if (path.size() == 1 && path[0] == '/') { + if (auto *child = find_literal_child(this->root_.literals, "")) { + return child->identifier; + } + return 0; + } + + const Node *current = nullptr; + const char *position = path.data(); + const char *const path_end = position + path.size(); + + const std::vector> *literal_children = + &this->root_.literals; + const std::unique_ptr *variable_child = &this->root_.variable; + + std::size_t variable_index = 0; + + // Skip leading slash + if (position < path_end && *position == '/') { + ++position; + } + + while (true) { + const char *segment_start = position; + while (position < path_end && *position != '/') { + ++position; + } + const std::string_view segment{ + segment_start, static_cast(position - segment_start)}; + + // Empty segment (from double slash or trailing slash) doesn't match + if (segment.empty()) { + return 0; + } + + if (auto *literal_match = find_literal_child(*literal_children, segment)) { + current = literal_match; + } else if (*variable_child) { + assert(variable_index <= + std::numeric_limits::max()); + if ((*variable_child)->type == NodeType::Expansion) { + const std::string_view remaining{ + segment_start, static_cast(path_end - segment_start)}; + callback(static_cast(variable_index), + (*variable_child)->value, remaining); + return (*variable_child)->identifier; + } + callback(static_cast(variable_index), + (*variable_child)->value, segment); + ++variable_index; + current = variable_child->get(); + } else { + return 0; + } + + literal_children = ¤t->literals; + variable_child = ¤t->variable; + + // Check if there's more path + if (position >= path_end) { + break; + } + + // Skip the slash and continue to next segment + ++position; + } + + return current ? current->identifier : this->root_.identifier; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/uritemplate/uritemplate_router_view.cc b/vendor/core/src/core/uritemplate/uritemplate_router_view.cc new file mode 100644 index 00000000..425e0f34 --- /dev/null +++ b/vendor/core/src/core/uritemplate/uritemplate_router_view.cc @@ -0,0 +1,289 @@ +#include + +#include // assert +#include // std::memcmp +#include // std::ofstream +#include // std::numeric_limits +#include // std::queue +#include // std::string +#include // std::unordered_map +#include // std::vector + +namespace sourcemeta::core { + +namespace { + +constexpr std::uint32_t ROUTER_MAGIC = 0x52544552; // "RTER" +constexpr std::uint32_t ROUTER_VERSION = 1; +constexpr std::uint32_t NO_CHILD = std::numeric_limits::max(); + +struct RouterHeader { + std::uint32_t magic; + std::uint32_t version; + std::uint32_t node_count; + std::uint32_t string_table_offset; +}; + +// Binary search for a literal child matching the given segment +inline auto binary_search_literal_children( + const URITemplateRouterView::Node *nodes, const char *string_table, + const std::uint32_t first_child, const std::uint32_t child_count, + const char *segment, const std::uint32_t segment_length) noexcept + -> std::uint32_t { + std::uint32_t low = 0; + std::uint32_t high = child_count; + + while (low < high) { + const auto middle = low + (high - low) / 2; + const auto child_index = first_child + middle; + const auto &child = nodes[child_index]; + + // Compare segments lexicographically (content first, then length) + const auto min_length = segment_length < child.string_length + ? segment_length + : child.string_length; + const auto content_comparison = + std::memcmp(segment, string_table + child.string_offset, min_length); + const auto comparison = content_comparison != 0 + ? content_comparison + : static_cast(segment_length) - + static_cast(child.string_length); + + if (comparison < 0) { + high = middle; + } else if (comparison > 0) { + low = middle + 1; + } else { + return child_index; + } + } + + return NO_CHILD; +} + +} // namespace + +auto URITemplateRouterView::save(const URITemplateRouter &router, + const std::filesystem::path &path) -> void { + std::vector nodes; + std::string string_table; + std::queue queue; + std::unordered_map + node_indices; + + const auto &root = router.root(); + + Node root_serialized{}; + root_serialized.string_offset = 0; + root_serialized.string_length = 0; + root_serialized.type = URITemplateRouter::NodeType::Root; + root_serialized.padding = 0; + root_serialized.identifier = root.identifier; + + if (root.literals.empty()) { + root_serialized.first_literal_child = NO_CHILD; + root_serialized.literal_child_count = 0; + } else { + root_serialized.first_literal_child = 1; + root_serialized.literal_child_count = + static_cast(root.literals.size()); + for (const auto &child : root.literals) { + node_indices[child.get()] = static_cast(queue.size() + 1); + queue.push(child.get()); + } + } + + if (root.variable) { + root_serialized.variable_child = + static_cast(queue.size() + 1); + node_indices[root.variable.get()] = root_serialized.variable_child; + queue.push(root.variable.get()); + } else { + root_serialized.variable_child = NO_CHILD; + } + + nodes.push_back(root_serialized); + + while (!queue.empty()) { + const auto *node = queue.front(); + queue.pop(); + + Node serialized{}; + serialized.string_offset = static_cast(string_table.size()); + serialized.type = node->type; + serialized.string_length = static_cast(node->value.size()); + string_table += node->value; + + serialized.padding = 0; + serialized.identifier = node->identifier; + + const auto first_child_index = + static_cast(nodes.size() + queue.size() + 1); + + if (!node->literals.empty()) { + serialized.first_literal_child = first_child_index; + serialized.literal_child_count = + static_cast(node->literals.size()); + for (const auto &child : node->literals) { + node_indices[child.get()] = + static_cast(nodes.size() + queue.size() + 1); + queue.push(child.get()); + } + } else { + serialized.first_literal_child = NO_CHILD; + serialized.literal_child_count = 0; + } + + if (node->variable) { + serialized.variable_child = + static_cast(nodes.size() + queue.size() + 1); + node_indices[node->variable.get()] = serialized.variable_child; + queue.push(node->variable.get()); + } else { + serialized.variable_child = NO_CHILD; + } + + nodes.push_back(serialized); + } + + RouterHeader header{}; + header.magic = ROUTER_MAGIC; + header.version = ROUTER_VERSION; + header.node_count = static_cast(nodes.size()); + header.string_table_offset = static_cast( + sizeof(RouterHeader) + nodes.size() * sizeof(Node)); + + std::ofstream file(path, std::ios::binary); + if (!file) { + throw URITemplateRouterSaveError{path, "Failed to open file for writing"}; + } + + file.write(reinterpret_cast(&header), sizeof(header)); + file.write(reinterpret_cast(nodes.data()), + static_cast(nodes.size() * sizeof(Node))); + file.write(string_table.data(), + static_cast(string_table.size())); + + if (!file) { + throw URITemplateRouterSaveError{path, + "Failed to write router data to file"}; + } +} + +URITemplateRouterView::URITemplateRouterView(const std::filesystem::path &path) + : file_view_{path} {} + +auto URITemplateRouterView::match(const std::string_view path, + const URITemplateRouter::Callback &callback) + const -> URITemplateRouter::Identifier { + const auto *header = this->file_view_.as(); + assert(header->magic == ROUTER_MAGIC); + assert(header->version == ROUTER_VERSION); + + const auto *nodes = this->file_view_.as(sizeof(RouterHeader)); + const auto *string_table = + header->string_table_offset < this->file_view_.size() + ? this->file_view_.as(header->string_table_offset) + : nullptr; + + // Empty path matches empty template + if (path.empty()) { + return nodes[0].identifier; + } + + // Root path "/" is stored as an empty literal segment + if (path.size() == 1 && path[0] == '/') { + const auto &root = nodes[0]; + if (root.first_literal_child == NO_CHILD) { + return 0; + } + + const auto match = binary_search_literal_children( + nodes, string_table, root.first_literal_child, root.literal_child_count, + "", 0); + return match != NO_CHILD ? nodes[match].identifier : 0; + } + + // Walk the trie, matching each path segment + std::uint32_t current_node = 0; + const char *position = path.data(); + const char *const path_end = position + path.size(); + + std::size_t variable_index = 0; + + // Skip leading slash + if (position < path_end && *position == '/') { + ++position; + } + + while (true) { + // Extract segment + const char *segment_start = position; + while (position < path_end && *position != '/') { + ++position; + } + + const auto segment_length = + static_cast(position - segment_start); + + // Empty segment (from double slash or trailing slash) doesn't match + if (segment_length == 0) { + return 0; + } + + const auto &node = nodes[current_node]; + + // Try literal children first + if (node.first_literal_child != NO_CHILD) { + const auto literal_match = binary_search_literal_children( + nodes, string_table, node.first_literal_child, + node.literal_child_count, segment_start, segment_length); + if (literal_match != NO_CHILD) { + current_node = literal_match; + if (position >= path_end) { + break; + } + ++position; + continue; + } + } + + // Fall back to variable child + if (node.variable_child != NO_CHILD) { + assert(variable_index <= + std::numeric_limits::max()); + const auto &variable_node = nodes[node.variable_child]; + + // Check if this is an expansion (catch-all) + if (variable_node.type == URITemplateRouter::NodeType::Expansion) { + const auto remaining_length = + static_cast(path_end - segment_start); + callback(static_cast(variable_index), + {string_table + variable_node.string_offset, + variable_node.string_length}, + {segment_start, remaining_length}); + return variable_node.identifier; + } + + // Regular variable - match single segment + callback(static_cast(variable_index), + {string_table + variable_node.string_offset, + variable_node.string_length}, + {segment_start, segment_length}); + ++variable_index; + current_node = node.variable_child; + if (position >= path_end) { + break; + } + ++position; + continue; + } + + // No match + return 0; + } + + return nodes[current_node].identifier; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h b/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h index f630fa02..5ac5ff46 100644 --- a/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h +++ b/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h @@ -58,11 +58,11 @@ class SOURCEMETA_CORE_YAML_EXPORT YAMLParseError : public std::exception { class SOURCEMETA_CORE_YAML_EXPORT YAMLUnknownAnchorError : public YAMLParseError { public: - YAMLUnknownAnchorError(std::string anchor_name) + YAMLUnknownAnchorError(const std::string_view anchor_name) : YAMLParseError{"YAML alias references undefined anchor"}, - anchor_name_{std::move(anchor_name)} {} + anchor_name_{anchor_name} {} - [[nodiscard]] auto anchor() const noexcept -> const std::string & { + [[nodiscard]] auto anchor() const noexcept -> std::string_view { return this->anchor_name_; } diff --git a/vendor/core/src/extension/alterschema/common/content_schema_without_media_type.h b/vendor/core/src/extension/alterschema/common/content_schema_without_media_type.h index f6d627b2..fce5a85e 100644 --- a/vendor/core/src/extension/alterschema/common/content_schema_without_media_type.h +++ b/vendor/core/src/extension/alterschema/common/content_schema_without_media_type.h @@ -1,4 +1,7 @@ class ContentSchemaWithoutMediaType final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"contentSchema"}; + public: ContentSchemaWithoutMediaType() : SchemaTransformRule{ @@ -10,20 +13,22 @@ class ContentSchemaWithoutMediaType final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { ONLY_CONTINUE_IF(vocabularies.contains_any( {Vocabularies::Known::JSON_Schema_2020_12_Content, Vocabularies::Known::JSON_Schema_2019_09_Content}) && - schema.is_object() && schema.defines("contentSchema") && + schema.is_object() && schema.defines(KEYWORD) && !schema.defines("contentMediaType")); - return APPLIES_TO_KEYWORDS("contentSchema"); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("contentSchema"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/common/duplicate_allof_branches.h b/vendor/core/src/extension/alterschema/common/duplicate_allof_branches.h index 687a8835..cf39f146 100644 --- a/vendor/core/src/extension/alterschema/common/duplicate_allof_branches.h +++ b/vendor/core/src/extension/alterschema/common/duplicate_allof_branches.h @@ -56,7 +56,7 @@ class DuplicateAllOfBranches final : public SchemaTransformRule { schema.assign("allOf", std::move(result)); } - [[nodiscard]] auto rereference(const std::string &, const Pointer &, + [[nodiscard]] auto rereference(const std::string_view, const Pointer &, const Pointer &target, const Pointer ¤t) const -> Pointer override { diff --git a/vendor/core/src/extension/alterschema/common/duplicate_anyof_branches.h b/vendor/core/src/extension/alterschema/common/duplicate_anyof_branches.h index c0959146..1465628b 100644 --- a/vendor/core/src/extension/alterschema/common/duplicate_anyof_branches.h +++ b/vendor/core/src/extension/alterschema/common/duplicate_anyof_branches.h @@ -56,7 +56,7 @@ class DuplicateAnyOfBranches final : public SchemaTransformRule { schema.assign("anyOf", std::move(result)); } - [[nodiscard]] auto rereference(const std::string &, const Pointer &, + [[nodiscard]] auto rereference(const std::string_view, const Pointer &, const Pointer &target, const Pointer ¤t) const -> Pointer override { diff --git a/vendor/core/src/extension/alterschema/common/else_empty.h b/vendor/core/src/extension/alterschema/common/else_empty.h index 1dfb6e7b..fe883ad5 100644 --- a/vendor/core/src/extension/alterschema/common/else_empty.h +++ b/vendor/core/src/extension/alterschema/common/else_empty.h @@ -1,4 +1,7 @@ class ElseEmpty final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"else"}; + public: ElseEmpty() : SchemaTransformRule{"else_empty", @@ -7,7 +10,7 @@ class ElseEmpty final : public SchemaTransformRule { [[nodiscard]] auto condition(const JSON &schema, const JSON &, const Vocabularies &vocabularies, - const SchemaFrame &, const SchemaFrame::Location &, + const SchemaFrame &frame, const SchemaFrame::Location &location, const SchemaWalker &, const SchemaResolver &) const -> SchemaTransformRule::Result override { ONLY_CONTINUE_IF( @@ -15,15 +18,17 @@ class ElseEmpty final : public SchemaTransformRule { {Vocabularies::Known::JSON_Schema_2020_12_Applicator, Vocabularies::Known::JSON_Schema_2019_09_Applicator, Vocabularies::Known::JSON_Schema_Draft_7}) && - schema.is_object() && schema.defines("else") && - is_schema(schema.at("else")) && is_empty_schema(schema.at("else")) && - (schema.at("else").is_object() || + schema.is_object() && schema.defines(KEYWORD) && + is_schema(schema.at(KEYWORD)) && is_empty_schema(schema.at(KEYWORD)) && + (schema.at(KEYWORD).is_object() || (!schema.defines("if") || !(schema.at("if").is_boolean() && schema.at("if").to_boolean())))); - return APPLIES_TO_KEYWORDS("else"); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("else"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/common/else_without_if.h b/vendor/core/src/extension/alterschema/common/else_without_if.h index da3349c5..7b871dd0 100644 --- a/vendor/core/src/extension/alterschema/common/else_without_if.h +++ b/vendor/core/src/extension/alterschema/common/else_without_if.h @@ -1,4 +1,7 @@ class ElseWithoutIf final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"else"}; + public: ElseWithoutIf() : SchemaTransformRule{"else_without_if", @@ -9,8 +12,8 @@ class ElseWithoutIf final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -18,12 +21,14 @@ class ElseWithoutIf final : public SchemaTransformRule { {Vocabularies::Known::JSON_Schema_2020_12_Applicator, Vocabularies::Known::JSON_Schema_2019_09_Applicator, Vocabularies::Known::JSON_Schema_Draft_7}) && - schema.is_object() && schema.defines("else") && + schema.is_object() && schema.defines(KEYWORD) && !schema.defines("if")); - return APPLIES_TO_KEYWORDS("else"); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("else"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/common/if_without_then_else.h b/vendor/core/src/extension/alterschema/common/if_without_then_else.h index a767054e..3f19961c 100644 --- a/vendor/core/src/extension/alterschema/common/if_without_then_else.h +++ b/vendor/core/src/extension/alterschema/common/if_without_then_else.h @@ -1,4 +1,7 @@ class IfWithoutThenElse final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"if"}; + public: IfWithoutThenElse() : SchemaTransformRule{ @@ -10,8 +13,8 @@ class IfWithoutThenElse final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -19,12 +22,14 @@ class IfWithoutThenElse final : public SchemaTransformRule { {Vocabularies::Known::JSON_Schema_2020_12_Applicator, Vocabularies::Known::JSON_Schema_2019_09_Applicator, Vocabularies::Known::JSON_Schema_Draft_7}) && - schema.is_object() && schema.defines("if") && + schema.is_object() && schema.defines(KEYWORD) && !schema.defines("then") && !schema.defines("else")); - return APPLIES_TO_KEYWORDS("if"); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("if"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/common/ignored_metaschema.h b/vendor/core/src/extension/alterschema/common/ignored_metaschema.h index 8831cff1..4fa07a18 100644 --- a/vendor/core/src/extension/alterschema/common/ignored_metaschema.h +++ b/vendor/core/src/extension/alterschema/common/ignored_metaschema.h @@ -18,8 +18,8 @@ class IgnoredMetaschema final : public SchemaTransformRule { ONLY_CONTINUE_IF(schema.is_object() && schema.defines("$schema") && schema.at("$schema").is_string()); const auto dialect{sourcemeta::core::dialect(schema)}; - ONLY_CONTINUE_IF(dialect.has_value()); - ONLY_CONTINUE_IF(dialect.value() != location.dialect); + ONLY_CONTINUE_IF(!dialect.empty()); + ONLY_CONTINUE_IF(dialect != location.dialect); return APPLIES_TO_KEYWORDS("$schema"); } diff --git a/vendor/core/src/extension/alterschema/common/non_applicable_additional_items.h b/vendor/core/src/extension/alterschema/common/non_applicable_additional_items.h index 84317b21..6b7f39e2 100644 --- a/vendor/core/src/extension/alterschema/common/non_applicable_additional_items.h +++ b/vendor/core/src/extension/alterschema/common/non_applicable_additional_items.h @@ -1,4 +1,7 @@ class NonApplicableAdditionalItems final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"additionalItems"}; + public: NonApplicableAdditionalItems() : SchemaTransformRule{ @@ -10,8 +13,8 @@ class NonApplicableAdditionalItems final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -21,18 +24,20 @@ class NonApplicableAdditionalItems final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_6, Vocabularies::Known::JSON_Schema_Draft_4, Vocabularies::Known::JSON_Schema_Draft_3}) && - schema.is_object() && schema.defines("additionalItems")); + schema.is_object() && schema.defines(KEYWORD)); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); if (schema.defines("items") && is_schema(schema.at("items"))) { - return APPLIES_TO_KEYWORDS("additionalItems", "items"); + return APPLIES_TO_KEYWORDS(KEYWORD, "items"); } else if (!schema.defines("items")) { - return APPLIES_TO_KEYWORDS("additionalItems"); + return APPLIES_TO_KEYWORDS(KEYWORD); } else { return false; } } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("additionalItems"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/common/non_applicable_type_specific_keywords.h b/vendor/core/src/extension/alterschema/common/non_applicable_type_specific_keywords.h index 4d1445ac..98bebe19 100644 --- a/vendor/core/src/extension/alterschema/common/non_applicable_type_specific_keywords.h +++ b/vendor/core/src/extension/alterschema/common/non_applicable_type_specific_keywords.h @@ -9,8 +9,8 @@ class NonApplicableTypeSpecificKeywords final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -73,6 +73,12 @@ class NonApplicableTypeSpecificKeywords final : public SchemaTransformRule { // If none of the types that the keyword applies to is a valid // type for the current schema, then by definition we can remove it if ((metadata.instances & current_types).none()) { + // Skip keywords that have references pointing to them + if (frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(entry.first)})) { + continue; + } + positions.push_back(Pointer{entry.first}); } } diff --git a/vendor/core/src/extension/alterschema/common/not_false.h b/vendor/core/src/extension/alterschema/common/not_false.h index 5c021943..f9016890 100644 --- a/vendor/core/src/extension/alterschema/common/not_false.h +++ b/vendor/core/src/extension/alterschema/common/not_false.h @@ -1,4 +1,7 @@ class NotFalse final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"not"}; + public: NotFalse() : SchemaTransformRule{"not_false", @@ -8,7 +11,7 @@ class NotFalse final : public SchemaTransformRule { [[nodiscard]] auto condition(const JSON &schema, const JSON &, const Vocabularies &vocabularies, - const SchemaFrame &, const SchemaFrame::Location &, + const SchemaFrame &frame, const SchemaFrame::Location &location, const SchemaWalker &, const SchemaResolver &) const -> SchemaTransformRule::Result override { ONLY_CONTINUE_IF(vocabularies.contains_any( @@ -17,13 +20,15 @@ class NotFalse final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_7, Vocabularies::Known::JSON_Schema_Draft_6, Vocabularies::Known::JSON_Schema_Draft_4}) && - schema.is_object() && schema.defines("not") && - schema.at("not").is_boolean() && - !schema.at("not").to_boolean()); - return APPLIES_TO_KEYWORDS("not"); + schema.is_object() && schema.defines(KEYWORD) && + schema.at(KEYWORD).is_boolean() && + !schema.at(KEYWORD).to_boolean()); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("not"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/common/orphan_definitions.h b/vendor/core/src/extension/alterschema/common/orphan_definitions.h index 0ff2e3e6..426cd562 100644 --- a/vendor/core/src/extension/alterschema/common/orphan_definitions.h +++ b/vendor/core/src/extension/alterschema/common/orphan_definitions.h @@ -28,7 +28,6 @@ class OrphanDefinitions final : public SchemaTransformRule { schema.defines("definitions")}; ONLY_CONTINUE_IF(has_defs || has_definitions); - const auto prefix_size{location.pointer.size()}; bool has_external_to_defs{false}; bool has_external_to_definitions{false}; std::unordered_set outside_referenced_defs; @@ -37,16 +36,16 @@ class OrphanDefinitions final : public SchemaTransformRule { for (const auto &[key, reference] : frame.references()) { const auto destination_location{frame.traverse(reference.destination)}; if (destination_location.has_value()) { + const auto &destination_pointer{destination_location->get().pointer}; if (has_defs) { - process_reference(key.second, destination_location->get().pointer, - location.pointer, prefix_size, "$defs", - has_external_to_defs, outside_referenced_defs); + process_reference(key.second, destination_pointer, location.pointer, + "$defs", has_external_to_defs, + outside_referenced_defs); } if (has_definitions) { - process_reference(key.second, destination_location->get().pointer, - location.pointer, prefix_size, "definitions", - has_external_to_definitions, + process_reference(key.second, destination_pointer, location.pointer, + "definitions", has_external_to_definitions, outside_referenced_definitions); } } @@ -77,18 +76,16 @@ class OrphanDefinitions final : public SchemaTransformRule { } private: - static auto - process_reference(const Pointer &source_pointer, - const Pointer &destination_pointer, const Pointer &prefix, - const std::size_t prefix_size, std::string_view container, - bool &has_external, - std::unordered_set &referenced) -> void { + static auto process_reference( + const WeakPointer &source_pointer, const WeakPointer &destination_pointer, + const WeakPointer &prefix, std::string_view container, bool &has_external, + std::unordered_set &referenced) -> void { if (!destination_pointer.starts_with(prefix, container) || - destination_pointer.size() <= prefix_size + 1) { + destination_pointer.size() <= prefix.size() + 1) { return; } - const auto &entry_token{destination_pointer.at(prefix_size + 1)}; + const auto &entry_token{destination_pointer.at(prefix.size() + 1)}; if (entry_token.is_property()) { const auto &entry_name{entry_token.to_property()}; if (!source_pointer.starts_with(prefix, container)) { diff --git a/vendor/core/src/extension/alterschema/common/required_properties_in_properties.h b/vendor/core/src/extension/alterschema/common/required_properties_in_properties.h index 7be8acca..8cddcb46 100644 --- a/vendor/core/src/extension/alterschema/common/required_properties_in_properties.h +++ b/vendor/core/src/extension/alterschema/common/required_properties_in_properties.h @@ -77,11 +77,12 @@ class RequiredPropertiesInProperties final : public SchemaTransformRule { const SchemaResolver &resolver, const JSON::String &property) const -> bool { if (location.parent.has_value()) { + const auto &parent_pointer{location.parent.value()}; const auto relative_pointer{ - location.pointer.resolve_from(location.parent.value())}; + location.pointer.resolve_from(parent_pointer)}; assert(!relative_pointer.empty() && relative_pointer.at(0).is_property()); const auto parent{ - frame.traverse(frame.uri(location.parent.value()).value().get())}; + frame.traverse(frame.uri(parent_pointer).value().get())}; assert(parent.has_value()); const auto type{walker(relative_pointer.at(0).to_property(), frame.vocabularies(parent.value().get(), resolver)) @@ -91,8 +92,8 @@ class RequiredPropertiesInProperties final : public SchemaTransformRule { type == SchemaKeywordType::ApplicatorValueInPlaceMaybe || type == SchemaKeywordType::ApplicatorValueInPlaceNegate || type == SchemaKeywordType::ApplicatorValueInPlaceOther) { - return this->defined_in_properties_sibling( - get(root, location.parent.value()), property); + return this->defined_in_properties_sibling(get(root, parent_pointer), + property); } } diff --git a/vendor/core/src/extension/alterschema/common/then_empty.h b/vendor/core/src/extension/alterschema/common/then_empty.h index 77baf134..36c1d220 100644 --- a/vendor/core/src/extension/alterschema/common/then_empty.h +++ b/vendor/core/src/extension/alterschema/common/then_empty.h @@ -1,4 +1,7 @@ class ThenEmpty final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"then"}; + public: ThenEmpty() : SchemaTransformRule{"then_empty", @@ -7,7 +10,7 @@ class ThenEmpty final : public SchemaTransformRule { [[nodiscard]] auto condition(const JSON &schema, const JSON &, const Vocabularies &vocabularies, - const SchemaFrame &, const SchemaFrame::Location &, + const SchemaFrame &frame, const SchemaFrame::Location &location, const SchemaWalker &, const SchemaResolver &) const -> SchemaTransformRule::Result override { ONLY_CONTINUE_IF( @@ -15,15 +18,17 @@ class ThenEmpty final : public SchemaTransformRule { {Vocabularies::Known::JSON_Schema_2020_12_Applicator, Vocabularies::Known::JSON_Schema_2019_09_Applicator, Vocabularies::Known::JSON_Schema_Draft_7}) && - schema.is_object() && schema.defines("then") && - is_schema(schema.at("then")) && is_empty_schema(schema.at("then")) && - (schema.at("then").is_object() || + schema.is_object() && schema.defines(KEYWORD) && + is_schema(schema.at(KEYWORD)) && is_empty_schema(schema.at(KEYWORD)) && + (schema.at(KEYWORD).is_object() || (!schema.defines("if") || !(schema.at("if").is_boolean() && schema.at("if").to_boolean())))); - return APPLIES_TO_KEYWORDS("then"); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("then"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/common/then_without_if.h b/vendor/core/src/extension/alterschema/common/then_without_if.h index b1e3c5ab..ed557568 100644 --- a/vendor/core/src/extension/alterschema/common/then_without_if.h +++ b/vendor/core/src/extension/alterschema/common/then_without_if.h @@ -1,4 +1,7 @@ class ThenWithoutIf final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"then"}; + public: ThenWithoutIf() : SchemaTransformRule{"then_without_if", @@ -9,8 +12,8 @@ class ThenWithoutIf final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -18,12 +21,14 @@ class ThenWithoutIf final : public SchemaTransformRule { {Vocabularies::Known::JSON_Schema_2020_12_Applicator, Vocabularies::Known::JSON_Schema_2019_09_Applicator, Vocabularies::Known::JSON_Schema_Draft_7}) && - schema.is_object() && schema.defines("then") && + schema.is_object() && schema.defines(KEYWORD) && !schema.defines("if")); - return APPLIES_TO_KEYWORDS("then"); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("then"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/common/unknown_local_ref.h b/vendor/core/src/extension/alterschema/common/unknown_local_ref.h index fea9ab97..c204a319 100644 --- a/vendor/core/src/extension/alterschema/common/unknown_local_ref.h +++ b/vendor/core/src/extension/alterschema/common/unknown_local_ref.h @@ -1,4 +1,7 @@ class UnknownLocalRef final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"$ref"}; + public: UnknownLocalRef() : SchemaTransformRule{ @@ -22,37 +25,35 @@ class UnknownLocalRef final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_6, Vocabularies::Known::JSON_Schema_Draft_4, Vocabularies::Known::JSON_Schema_Draft_3})); - ONLY_CONTINUE_IF(schema.is_object() && schema.defines("$ref") && - schema.at("$ref").is_string()); + ONLY_CONTINUE_IF(schema.is_object() && schema.defines(KEYWORD) && + schema.at(KEYWORD).is_string()); // Find the keyword location entry - const auto absolute_ref_pointer{location.pointer.concat({"$ref"})}; - const auto reference_entry{frame.references().find( - {SchemaReferenceType::Static, absolute_ref_pointer})}; - ONLY_CONTINUE_IF(reference_entry != frame.references().end()); + auto keyword_pointer{location.pointer}; + keyword_pointer.push_back(std::cref(KEYWORD)); + const auto reference_entry{ + frame.reference(SchemaReferenceType::Static, keyword_pointer)}; + ONLY_CONTINUE_IF(reference_entry.has_value()); // If the keyword has no fragment, continue - const auto &reference_fragment{reference_entry->second.fragment}; + const auto &reference_fragment{reference_entry->get().fragment}; ONLY_CONTINUE_IF(reference_fragment.has_value()); // Only continue if the reference target does not exist - const auto target_location{frame.locations().find( - {SchemaReferenceType::Static, reference_entry->second.destination})}; - ONLY_CONTINUE_IF(target_location == frame.locations().end()); + ONLY_CONTINUE_IF( + !frame.traverse(reference_entry->get().destination).has_value()); // If there is a base beyond the fragment, the base must exist. // Otherwise it is likely an external reference? - const auto &reference_base{reference_entry->second.base}; - if (reference_base.has_value()) { - const auto base_location{frame.locations().find( - {SchemaReferenceType::Static, reference_base.value()})}; - ONLY_CONTINUE_IF(base_location != frame.locations().end()); + const auto &reference_base{reference_entry->get().base}; + if (!reference_base.empty()) { + ONLY_CONTINUE_IF(frame.traverse(reference_base).has_value()); } - return APPLIES_TO_KEYWORDS("$ref"); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("$ref"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/common/unnecessary_allof_wrapper.h b/vendor/core/src/extension/alterschema/common/unnecessary_allof_wrapper.h index 4541a864..0ab990ba 100644 --- a/vendor/core/src/extension/alterschema/common/unnecessary_allof_wrapper.h +++ b/vendor/core/src/extension/alterschema/common/unnecessary_allof_wrapper.h @@ -1,4 +1,7 @@ class UnnecessaryAllOfWrapper final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"allOf"}; + public: UnnecessaryAllOfWrapper() : SchemaTransformRule{"unnecessary_allof_wrapper", @@ -16,9 +19,9 @@ class UnnecessaryAllOfWrapper final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_7, Vocabularies::Known::JSON_Schema_Draft_6, Vocabularies::Known::JSON_Schema_Draft_4})); - ONLY_CONTINUE_IF(schema.is_object() && schema.defines("allOf") && - schema.at("allOf").is_array() && - !schema.at("allOf").empty()); + ONLY_CONTINUE_IF(schema.is_object() && schema.defines(KEYWORD) && + schema.at(KEYWORD).is_array() && + !schema.at(KEYWORD).empty()); std::unordered_set dependency_blocked; for (const auto &entry : schema.as_object()) { @@ -39,7 +42,7 @@ class UnnecessaryAllOfWrapper final : public SchemaTransformRule { ? parse_schema_type(schema.at("type")) : JSON::TypeSet{}}; - const auto &all_of{schema.at("allOf")}; + const auto &all_of{schema.at(KEYWORD)}; std::vector locations; std::unordered_set elevated; @@ -52,9 +55,10 @@ class UnnecessaryAllOfWrapper final : public SchemaTransformRule { } // Skip entries that have direct references pointing to them - const auto entry_pointer{ - location.relative_pointer.concat({"allOf", index - 1})}; - if (!frame.references_to(entry_pointer).empty()) { + auto entry_pointer{location.pointer}; + entry_pointer.push_back(std::cref(KEYWORD)); + entry_pointer.push_back(index - 1); + if (frame.has_references_to(entry_pointer)) { continue; } @@ -92,7 +96,7 @@ class UnnecessaryAllOfWrapper final : public SchemaTransformRule { continue; } - locations.push_back(Pointer{"allOf", index - 1, keyword}); + locations.push_back(Pointer{KEYWORD, index - 1, keyword}); elevated.emplace(keyword); for (const auto &dependency : metadata.dependencies) { @@ -113,17 +117,17 @@ class UnnecessaryAllOfWrapper final : public SchemaTransformRule { const auto allof_index{location.at(1).to_index()}; const auto &keyword{location.at(2).to_property()}; schema.try_assign_before( - keyword, schema.at("allOf").at(allof_index).at(keyword), "allOf"); - schema.at("allOf").at(allof_index).erase(keyword); + keyword, schema.at(KEYWORD).at(allof_index).at(keyword), KEYWORD); + schema.at(KEYWORD).at(allof_index).erase(keyword); } } - [[nodiscard]] auto rereference(const std::string &, const Pointer &, + [[nodiscard]] auto rereference(const std::string_view, const Pointer &, const Pointer &target, const Pointer ¤t) const -> Pointer override { // The rule moves keywords from /allOf// to / - const auto allof_prefix{current.concat({"allOf"})}; + const auto allof_prefix{current.concat({KEYWORD})}; const auto relative{target.resolve_from(allof_prefix)}; const auto &keyword{relative.at(1).to_property()}; const Pointer old_prefix{allof_prefix.concat({relative.at(0), keyword})}; diff --git a/vendor/core/src/extension/alterschema/linter/additional_properties_default.h b/vendor/core/src/extension/alterschema/linter/additional_properties_default.h index ce81205f..0e1eed09 100644 --- a/vendor/core/src/extension/alterschema/linter/additional_properties_default.h +++ b/vendor/core/src/extension/alterschema/linter/additional_properties_default.h @@ -1,4 +1,7 @@ class AdditionalPropertiesDefault final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"additionalProperties"}; + public: AdditionalPropertiesDefault() : SchemaTransformRule{ @@ -10,32 +13,32 @@ class AdditionalPropertiesDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { - ONLY_CONTINUE_IF(vocabularies.contains_any( - {Vocabularies::Known::JSON_Schema_2020_12_Applicator, - Vocabularies::Known::JSON_Schema_2019_09_Applicator, - Vocabularies::Known::JSON_Schema_Draft_7, - Vocabularies::Known::JSON_Schema_Draft_6, - Vocabularies::Known::JSON_Schema_Draft_4, - Vocabularies::Known::JSON_Schema_Draft_3, - Vocabularies::Known::JSON_Schema_Draft_2, - Vocabularies::Known::JSON_Schema_Draft_2_Hyper, - Vocabularies::Known::JSON_Schema_Draft_1, - Vocabularies::Known::JSON_Schema_Draft_1_Hyper}) && - schema.is_object() && - schema.defines("additionalProperties") && - ((schema.at("additionalProperties").is_boolean() && - schema.at("additionalProperties").to_boolean()) || - (schema.at("additionalProperties").is_object() && - schema.at("additionalProperties").empty()))); - return APPLIES_TO_KEYWORDS("additionalProperties"); + ONLY_CONTINUE_IF( + vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Applicator, + Vocabularies::Known::JSON_Schema_2019_09_Applicator, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_4, + Vocabularies::Known::JSON_Schema_Draft_3, + Vocabularies::Known::JSON_Schema_Draft_2, + Vocabularies::Known::JSON_Schema_Draft_2_Hyper, + Vocabularies::Known::JSON_Schema_Draft_1, + Vocabularies::Known::JSON_Schema_Draft_1_Hyper}) && + schema.is_object() && schema.defines(KEYWORD) && + ((schema.at(KEYWORD).is_boolean() && schema.at(KEYWORD).to_boolean()) || + (schema.at(KEYWORD).is_object() && schema.at(KEYWORD).empty()))); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("additionalProperties"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/linter/content_schema_default.h b/vendor/core/src/extension/alterschema/linter/content_schema_default.h index 536acbf7..b4827230 100644 --- a/vendor/core/src/extension/alterschema/linter/content_schema_default.h +++ b/vendor/core/src/extension/alterschema/linter/content_schema_default.h @@ -1,4 +1,7 @@ class ContentSchemaDefault final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"contentSchema"}; + public: ContentSchemaDefault() : SchemaTransformRule{ @@ -10,23 +13,24 @@ class ContentSchemaDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { - ONLY_CONTINUE_IF(vocabularies.contains_any( - {Vocabularies::Known::JSON_Schema_2020_12_Content, - Vocabularies::Known::JSON_Schema_2019_09_Content}) && - schema.is_object() && schema.defines("contentSchema") && - ((schema.at("contentSchema").is_boolean() && - schema.at("contentSchema").to_boolean()) || - (schema.at("contentSchema").is_object() && - schema.at("contentSchema").empty()))); - return APPLIES_TO_KEYWORDS("contentSchema"); + ONLY_CONTINUE_IF( + vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Content, + Vocabularies::Known::JSON_Schema_2019_09_Content}) && + schema.is_object() && schema.defines(KEYWORD) && + ((schema.at(KEYWORD).is_boolean() && schema.at(KEYWORD).to_boolean()) || + (schema.at(KEYWORD).is_object() && schema.at(KEYWORD).empty()))); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("contentSchema"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/linter/definitions_to_defs.h b/vendor/core/src/extension/alterschema/linter/definitions_to_defs.h index 4571b7ea..9efe13e3 100644 --- a/vendor/core/src/extension/alterschema/linter/definitions_to_defs.h +++ b/vendor/core/src/extension/alterschema/linter/definitions_to_defs.h @@ -26,7 +26,7 @@ class DefinitionsToDefs final : public SchemaTransformRule { schema.rename("definitions", "$defs"); } - [[nodiscard]] auto rereference(const std::string &, const Pointer &, + [[nodiscard]] auto rereference(const std::string_view, const Pointer &, const Pointer &target, const Pointer ¤t) const -> Pointer override { diff --git a/vendor/core/src/extension/alterschema/linter/dependencies_default.h b/vendor/core/src/extension/alterschema/linter/dependencies_default.h index ec5cb10b..cb84cfe3 100644 --- a/vendor/core/src/extension/alterschema/linter/dependencies_default.h +++ b/vendor/core/src/extension/alterschema/linter/dependencies_default.h @@ -1,4 +1,7 @@ class DependenciesDefault final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"dependencies"}; + public: DependenciesDefault() : SchemaTransformRule{ @@ -10,8 +13,8 @@ class DependenciesDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -20,13 +23,14 @@ class DependenciesDefault final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_6, Vocabularies::Known::JSON_Schema_Draft_4, Vocabularies::Known::JSON_Schema_Draft_3}) && - schema.is_object() && schema.defines("dependencies") && - schema.at("dependencies").is_object() && - schema.at("dependencies").empty()); - return APPLIES_TO_KEYWORDS("dependencies"); + schema.is_object() && schema.defines(KEYWORD) && + schema.at(KEYWORD).is_object() && schema.at(KEYWORD).empty()); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("dependencies"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/linter/items_schema_default.h b/vendor/core/src/extension/alterschema/linter/items_schema_default.h index 1425ab79..8d77ee8a 100644 --- a/vendor/core/src/extension/alterschema/linter/items_schema_default.h +++ b/vendor/core/src/extension/alterschema/linter/items_schema_default.h @@ -1,4 +1,7 @@ class ItemsSchemaDefault final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"items"}; + public: ItemsSchemaDefault() : SchemaTransformRule{"items_schema_default", @@ -9,8 +12,8 @@ class ItemsSchemaDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -26,13 +29,15 @@ class ItemsSchemaDefault final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_Draft_2_Hyper, Vocabularies::Known::JSON_Schema_Draft_1, Vocabularies::Known::JSON_Schema_Draft_1_Hyper}) && - schema.is_object() && schema.defines("items") && - ((schema.at("items").is_boolean() && schema.at("items").to_boolean()) || - (schema.at("items").is_object() && schema.at("items").empty()))); - return APPLIES_TO_KEYWORDS("items"); + schema.is_object() && schema.defines(KEYWORD) && + ((schema.at(KEYWORD).is_boolean() && schema.at(KEYWORD).to_boolean()) || + (schema.at(KEYWORD).is_object() && schema.at(KEYWORD).empty()))); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("items"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/linter/property_names_default.h b/vendor/core/src/extension/alterschema/linter/property_names_default.h index 0bb4fcc8..49cd48f0 100644 --- a/vendor/core/src/extension/alterschema/linter/property_names_default.h +++ b/vendor/core/src/extension/alterschema/linter/property_names_default.h @@ -1,4 +1,7 @@ class PropertyNamesDefault final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"propertyNames"}; + public: PropertyNamesDefault() : SchemaTransformRule{ @@ -10,8 +13,8 @@ class PropertyNamesDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -20,13 +23,15 @@ class PropertyNamesDefault final : public SchemaTransformRule { Vocabularies::Known::JSON_Schema_2019_09_Applicator, Vocabularies::Known::JSON_Schema_Draft_7, Vocabularies::Known::JSON_Schema_Draft_6}) && - schema.is_object() && schema.defines("propertyNames") && - schema.at("propertyNames").is_object() && - schema.at("propertyNames").empty()); - return APPLIES_TO_KEYWORDS("propertyNames"); + schema.is_object() && schema.defines(KEYWORD) && + schema.at(KEYWORD).is_object() && + schema.at(KEYWORD).empty()); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("propertyNames"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/linter/simple_properties_identifiers.h b/vendor/core/src/extension/alterschema/linter/simple_properties_identifiers.h index e4e36519..07cf73ef 100644 --- a/vendor/core/src/extension/alterschema/linter/simple_properties_identifiers.h +++ b/vendor/core/src/extension/alterschema/linter/simple_properties_identifiers.h @@ -46,8 +46,10 @@ class SimplePropertiesIdentifiers final : public SchemaTransformRule { } } else { // Skip pre-vocabulary meta-schemas + JSON::String base_with_hash{location.base}; + base_with_hash += '#'; ONLY_CONTINUE_IF(location.base != location.dialect && - (location.base + "#") != location.dialect); + base_with_hash != location.dialect); } std::vector offenders; diff --git a/vendor/core/src/extension/alterschema/linter/unevaluated_items_default.h b/vendor/core/src/extension/alterschema/linter/unevaluated_items_default.h index 23bdebc9..7139b483 100644 --- a/vendor/core/src/extension/alterschema/linter/unevaluated_items_default.h +++ b/vendor/core/src/extension/alterschema/linter/unevaluated_items_default.h @@ -1,4 +1,7 @@ class UnevaluatedItemsDefault final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"unevaluatedItems"}; + public: UnevaluatedItemsDefault() : SchemaTransformRule{ @@ -10,8 +13,8 @@ class UnevaluatedItemsDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -19,15 +22,15 @@ class UnevaluatedItemsDefault final : public SchemaTransformRule { vocabularies.contains_any( {Vocabularies::Known::JSON_Schema_2020_12_Unevaluated, Vocabularies::Known::JSON_Schema_2019_09_Applicator}) && - schema.is_object() && schema.defines("unevaluatedItems") && - ((schema.at("unevaluatedItems").is_boolean() && - schema.at("unevaluatedItems").to_boolean()) || - (schema.at("unevaluatedItems").is_object() && - schema.at("unevaluatedItems").empty()))); - return APPLIES_TO_KEYWORDS("unevaluatedItems"); + schema.is_object() && schema.defines(KEYWORD) && + ((schema.at(KEYWORD).is_boolean() && schema.at(KEYWORD).to_boolean()) || + (schema.at(KEYWORD).is_object() && schema.at(KEYWORD).empty()))); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("unevaluatedItems"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/alterschema/linter/unevaluated_properties_default.h b/vendor/core/src/extension/alterschema/linter/unevaluated_properties_default.h index b9dc5bf1..f241c66f 100644 --- a/vendor/core/src/extension/alterschema/linter/unevaluated_properties_default.h +++ b/vendor/core/src/extension/alterschema/linter/unevaluated_properties_default.h @@ -1,4 +1,7 @@ class UnevaluatedPropertiesDefault final : public SchemaTransformRule { +private: + static inline const std::string KEYWORD{"unevaluatedProperties"}; + public: UnevaluatedPropertiesDefault() : SchemaTransformRule{ @@ -10,8 +13,8 @@ class UnevaluatedPropertiesDefault final : public SchemaTransformRule { condition(const sourcemeta::core::JSON &schema, const sourcemeta::core::JSON &, const sourcemeta::core::Vocabularies &vocabularies, - const sourcemeta::core::SchemaFrame &, - const sourcemeta::core::SchemaFrame::Location &, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { @@ -19,15 +22,15 @@ class UnevaluatedPropertiesDefault final : public SchemaTransformRule { vocabularies.contains_any( {Vocabularies::Known::JSON_Schema_2020_12_Unevaluated, Vocabularies::Known::JSON_Schema_2019_09_Applicator}) && - schema.is_object() && schema.defines("unevaluatedProperties") && - ((schema.at("unevaluatedProperties").is_boolean() && - schema.at("unevaluatedProperties").to_boolean()) || - (schema.at("unevaluatedProperties").is_object() && - schema.at("unevaluatedProperties").empty()))); - return APPLIES_TO_KEYWORDS("unevaluatedProperties"); + schema.is_object() && schema.defines(KEYWORD) && + ((schema.at(KEYWORD).is_boolean() && schema.at(KEYWORD).to_boolean()) || + (schema.at(KEYWORD).is_object() && schema.at(KEYWORD).empty()))); + ONLY_CONTINUE_IF(!frame.has_references_through( + location.pointer, WeakPointer::Token{std::cref(KEYWORD)})); + return APPLIES_TO_KEYWORDS(KEYWORD); } auto transform(JSON &schema, const Result &) const -> void override { - schema.erase("unevaluatedProperties"); + schema.erase(KEYWORD); } }; diff --git a/vendor/core/src/extension/editorschema/editorschema.cc b/vendor/core/src/extension/editorschema/editorschema.cc index 81ac6894..4080abaf 100644 --- a/vendor/core/src/extension/editorschema/editorschema.cc +++ b/vendor/core/src/extension/editorschema/editorschema.cc @@ -10,10 +10,11 @@ namespace { // See https://arxiv.org/abs/2503.11288 for an academic study of this topic auto top_dynamic_anchor_location( const sourcemeta::core::SchemaFrame &frame, - const sourcemeta::core::Pointer ¤t, - const sourcemeta::core::JSON::String &fragment, + const sourcemeta::core::WeakPointer ¤t, + const std::string_view fragment, const sourcemeta::core::JSON::String &default_uri) - -> std::optional { + -> std::optional< + std::reference_wrapper> { // Get the location object of where we are at the moment const auto uri{frame.uri(current)}; assert(uri.has_value()); @@ -23,7 +24,9 @@ auto top_dynamic_anchor_location( // Try to locate an anchor with the given name on the current base assert(!fragment.starts_with('#')); - const auto anchor_uri{location.base + "#" + fragment}; + sourcemeta::core::JSON::String anchor_uri{location.base}; + anchor_uri += '#'; + anchor_uri += fragment; const auto anchor{frame.traverse(anchor_uri)}; if (location.parent.has_value()) { @@ -36,14 +39,14 @@ auto top_dynamic_anchor_location( // If we are at the top of the schema and it declares the dynamic anchor, we // should use that } else if (anchor.has_value()) { - return anchor.value().get().pointer; + return std::cref(anchor.value().get().pointer); // Otherwise, if we are at the top and the dynamic anchor is not there, use // the default we have so far } else { const auto default_location{frame.traverse(default_uri)}; assert(default_location.has_value()); - return default_location.value().get().pointer; + return std::cref(default_location.value().get().pointer); } } @@ -51,94 +54,137 @@ auto top_dynamic_anchor_location( namespace sourcemeta::core { +// Collected information about a reference to modify +struct ReferenceChange { + Pointer pointer; + JSON::String new_value; + JSON::String keyword; + bool rename_to_ref; +}; + +// Collected information about a subschema to modify +struct SubschemaChange { + Pointer pointer; + SchemaBaseDialect base_dialect; + bool add_schema_declaration; + bool erase_2020_12_keywords; + bool erase_2019_09_keywords; +}; + auto for_editor(JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, - const std::optional &default_dialect) -> void { + std::string_view default_dialect) -> void { // (1) Bring in all of the references bundle(schema, walker, resolver, default_dialect); - // (2) Re-frame before changing anything - SchemaFrame frame{SchemaFrame::Mode::References}; - frame.analyse(schema, walker, resolver, default_dialect); + // (2) Frame the schema and collect all changes we need to make + std::vector reference_changes; + std::vector subschema_changes; + + { + SchemaFrame frame{SchemaFrame::Mode::References}; + frame.analyse(schema, walker, resolver, default_dialect); + + // Collect reference changes + for (const auto &[key, reference] : frame.references()) { + assert(!key.second.empty()); + assert(key.second.back().is_property()); + const auto &keyword{key.second.back().to_property()}; + + if (key.first == SchemaReferenceType::Dynamic) { + if (reference.fragment.has_value()) { + const auto destination{top_dynamic_anchor_location( + frame, key.second, reference.fragment.value(), + reference.destination)}; + if (!destination.has_value()) { + continue; + } + + reference_changes.push_back( + {to_pointer(key.second), + to_uri(destination.value().get()).recompose(), keyword, true}); + } else { + reference_changes.push_back( + {to_pointer(key.second), "", keyword, true}); + } + } else { + if (keyword == "$schema") { + const auto uri{frame.uri(key.second)}; + assert(uri.has_value()); + const auto origin{frame.traverse(uri.value().get())}; + assert(origin.has_value()); + reference_changes.push_back( + {to_pointer(key.second), + JSON::String{to_string(origin.value().get().base_dialect)}, + keyword, false}); + continue; + } - // (3) Pre-process all subschemas - for (const auto &entry : frame.locations()) { - if (entry.second.type != SchemaFrame::LocationType::Resource && - entry.second.type != SchemaFrame::LocationType::Subschema) { - continue; + const auto result{frame.traverse(reference.destination)}; + if (result.has_value()) { + const bool should_rename = + keyword == "$dynamicRef" || keyword == "$recursiveRef"; + reference_changes.push_back( + {to_pointer(key.second), + to_uri(result.value().get().pointer).recompose(), keyword, + should_rename}); + } else { + reference_changes.push_back( + {to_pointer(key.second), reference.destination, keyword, false}); + } + } } - auto &subschema{get(schema, entry.second.pointer)}; - if (subschema.is_boolean()) { - continue; - } + // Collect subschema changes + for (const auto &entry : frame.locations()) { + if (entry.second.type != SchemaFrame::LocationType::Resource && + entry.second.type != SchemaFrame::LocationType::Subschema) { + continue; + } - // Make sure that the top-level schema ALWAYS has a `$schema` declaration - if (entry.second.pointer.empty() && !subschema.defines("$schema")) { - subschema.assign_assume_new("$schema", JSON{entry.second.base_dialect}); - } + const auto &subschema{get(schema, entry.second.pointer)}; + if (subschema.is_boolean()) { + continue; + } - // Get rid of the keywords we don't want anymore - anonymize(subschema, entry.second.base_dialect); - const auto vocabularies{frame.vocabularies(entry.second, resolver)}; - if (vocabularies.contains(Vocabularies::Known::JSON_Schema_2020_12_Core)) { - subschema.erase_keys({"$vocabulary", "$anchor", "$dynamicAnchor"}); - } else if (vocabularies.contains( - Vocabularies::Known::JSON_Schema_2019_09_Core)) { - subschema.erase_keys({"$vocabulary", "$anchor", "$recursiveAnchor"}); + const bool add_schema = + entry.second.pointer.empty() && !subschema.defines("$schema"); + const auto vocabularies{frame.vocabularies(entry.second, resolver)}; + + subschema_changes.push_back( + {to_pointer(entry.second.pointer), entry.second.base_dialect, + add_schema, + vocabularies.contains(Vocabularies::Known::JSON_Schema_2020_12_Core), + vocabularies.contains( + Vocabularies::Known::JSON_Schema_2019_09_Core)}); } } - // (4) Fix-up static and dynamic references - for (const auto &[key, reference] : frame.references()) { - assert(!key.second.empty()); - assert(key.second.back().is_property()); - const auto &keyword{key.second.back().to_property()}; - - if (key.first == SchemaReferenceType::Dynamic) { - if (reference.fragment.has_value()) { - auto destination{top_dynamic_anchor_location(frame, key.second, - reference.fragment.value(), - reference.destination)}; - if (!destination.has_value()) { - continue; - } + // (3) Apply reference changes + for (const auto &change : reference_changes) { + if (!change.new_value.empty()) { + set(schema, change.pointer, JSON{change.new_value}); + } + if (change.rename_to_ref) { + get(schema, change.pointer.initial()).rename(change.keyword, "$ref"); + } + } - set(schema, key.second, - JSON{to_uri(std::move(destination).value()).recompose()}); - } + // (4) Apply subschema changes + for (const auto &change : subschema_changes) { + auto &subschema{get(schema, change.pointer)}; - get(schema, key.second.initial()).rename(keyword, "$ref"); - } else { - // The `$schema` keyword is not allowed to take relative URIs (for - // example, pointers going from the root). Because we remove identifiers, - // the only sane thing we can do here is default it to the base dialect, - // which editors will likely understand - if (keyword == "$schema") { - const auto uri{frame.uri(key.second)}; - assert(uri.has_value()); - const auto origin{frame.traverse(uri.value().get())}; - assert(origin.has_value()); - set(schema, key.second, JSON{origin.value().get().base_dialect}); - continue; - } + if (change.add_schema_declaration) { + subschema.assign_assume_new( + "$schema", JSON{JSON::String{to_string(change.base_dialect)}}); + } - // As we get rid of identifiers, we rephrase every reference to be the URI - // representation of the JSON Pointer to the destination from the root - const auto result{frame.traverse(reference.destination)}; - if (result.has_value()) { - set(schema, key.second, - JSON{to_uri(result.value().get().pointer).recompose()}); - - // If we have a dynamic reference to a static location, - // we can just rename the keyword - if (keyword == "$dynamicRef" || keyword == "$recursiveRef") { - get(schema, key.second.initial()).rename(keyword, "$ref"); - } + anonymize(subschema, change.base_dialect); - } else { - set(schema, key.second, JSON{reference.destination}); - } + if (change.erase_2020_12_keywords) { + subschema.erase_keys({"$vocabulary", "$anchor", "$dynamicAnchor"}); + } else if (change.erase_2019_09_keywords) { + subschema.erase_keys({"$vocabulary", "$anchor", "$recursiveAnchor"}); } } } diff --git a/vendor/core/src/extension/editorschema/include/sourcemeta/core/editorschema.h b/vendor/core/src/extension/editorschema/include/sourcemeta/core/editorschema.h index 21de72f6..716c17a1 100644 --- a/vendor/core/src/extension/editorschema/include/sourcemeta/core/editorschema.h +++ b/vendor/core/src/extension/editorschema/include/sourcemeta/core/editorschema.h @@ -17,8 +17,7 @@ #include #include -#include // std::optional, std::nullopt -#include // std::string +#include // std::string_view namespace sourcemeta::core { @@ -47,9 +46,9 @@ namespace sourcemeta::core { /// sourcemeta::core::schema_resolver); /// ``` SOURCEMETA_CORE_EDITORSCHEMA_EXPORT -auto for_editor( - JSON &schema, const SchemaWalker &walker, const SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) -> void; +auto for_editor(JSON &schema, const SchemaWalker &walker, + const SchemaResolver &resolver, + std::string_view default_dialect = "") -> void; } // namespace sourcemeta::core diff --git a/vendor/core/src/extension/options/include/sourcemeta/core/options.h b/vendor/core/src/extension/options/include/sourcemeta/core/options.h index 76aeb9c1..5880f75e 100644 --- a/vendor/core/src/extension/options/include/sourcemeta/core/options.h +++ b/vendor/core/src/extension/options/include/sourcemeta/core/options.h @@ -85,11 +85,11 @@ class SOURCEMETA_CORE_OPTIONS_EXPORT Options { -> void; /// Access the values (if any) set for an option or flag, by its main name - [[nodiscard]] auto at(std::string_view name) const + [[nodiscard]] auto at(const std::string_view name) const -> const std::vector &; /// Check if an option or flag was set, by its main name - [[nodiscard]] auto contains(std::string_view name) const -> bool; + [[nodiscard]] auto contains(const std::string_view name) const -> bool; /// Access the positional arguments, if any [[nodiscard]] auto positional() const diff --git a/vendor/core/src/extension/options/include/sourcemeta/core/options_error.h b/vendor/core/src/extension/options/include/sourcemeta/core/options_error.h index 65367a94..fd8f1a8c 100644 --- a/vendor/core/src/extension/options/include/sourcemeta/core/options_error.h +++ b/vendor/core/src/extension/options/include/sourcemeta/core/options_error.h @@ -8,7 +8,6 @@ #include // std::exception #include // std::string #include // std::string_view -#include // std::move namespace sourcemeta::core { @@ -40,9 +39,11 @@ class SOURCEMETA_CORE_OPTIONS_EXPORT OptionsError : public std::exception { /// This class represents a unknown option error struct SOURCEMETA_CORE_OPTIONS_EXPORT OptionsUnknownOptionError : public OptionsError { - explicit OptionsUnknownOptionError(std::string option) - : OptionsError{"Unknown option"}, option_{std::move(option)} {} - [[nodiscard]] auto option() const -> const auto & { return this->option_; } + explicit OptionsUnknownOptionError(const std::string_view option) + : OptionsError{"Unknown option"}, option_{option} {} + [[nodiscard]] auto option() const noexcept -> std::string_view { + return this->option_; + } private: std::string option_; @@ -52,10 +53,11 @@ struct SOURCEMETA_CORE_OPTIONS_EXPORT OptionsUnknownOptionError /// This class represents a value being passed to a flag struct SOURCEMETA_CORE_OPTIONS_EXPORT OptionsUnexpectedValueFlagError : public OptionsError { - explicit OptionsUnexpectedValueFlagError(std::string option) - : OptionsError{"This flag cannot take a value"}, - option_{std::move(option)} {} - [[nodiscard]] auto option() const -> const auto & { return this->option_; } + explicit OptionsUnexpectedValueFlagError(const std::string_view option) + : OptionsError{"This flag cannot take a value"}, option_{option} {} + [[nodiscard]] auto option() const noexcept -> std::string_view { + return this->option_; + } private: std::string option_; @@ -65,10 +67,11 @@ struct SOURCEMETA_CORE_OPTIONS_EXPORT OptionsUnexpectedValueFlagError /// This class represents a missing value from an option struct SOURCEMETA_CORE_OPTIONS_EXPORT OptionsMissingOptionValueError : public OptionsError { - explicit OptionsMissingOptionValueError(std::string option) - : OptionsError{"This option must take a value"}, - option_{std::move(option)} {} - [[nodiscard]] auto option() const -> const auto & { return this->option_; } + explicit OptionsMissingOptionValueError(const std::string_view option) + : OptionsError{"This option must take a value"}, option_{option} {} + [[nodiscard]] auto option() const noexcept -> std::string_view { + return this->option_; + } private: std::string option_; diff --git a/vendor/core/src/extension/options/options.cc b/vendor/core/src/extension/options/options.cc index 12c0650e..b754701c 100644 --- a/vendor/core/src/extension/options/options.cc +++ b/vendor/core/src/extension/options/options.cc @@ -17,7 +17,7 @@ auto find_canonical_name(const T &aliases, const typename T::key_type &alias) -> const typename T::mapped_type & { const auto iterator{aliases.find(alias)}; if (iterator == aliases.cend()) { - throw sourcemeta::core::OptionsUnknownOptionError(std::string{alias}); + throw sourcemeta::core::OptionsUnknownOptionError(alias); } else { return iterator->second; } @@ -57,14 +57,14 @@ auto Options::flag(std::string &&name, this->flags.emplace(view); } -auto Options::at(std::string_view name) const +auto Options::at(const std::string_view name) const -> const std::vector & { assert(!name.empty()); const auto iterator{this->options_.find(name)}; return iterator == this->options_.cend() ? Options::EMPTY : iterator->second; } -auto Options::contains(std::string_view name) const -> bool { +auto Options::contains(const std::string_view name) const -> bool { return this->options_.contains(name); } @@ -104,7 +104,7 @@ auto Options::parse(const int argc, if (eq == std::string_view::npos) { this->options_[canonical].push_back(token.substr(2)); } else { - throw OptionsUnexpectedValueFlagError(std::string{name}); + throw OptionsUnexpectedValueFlagError(name); } } else if (eq != std::string_view::npos) { this->options_[canonical].push_back(token.substr(eq + 1)); @@ -112,7 +112,7 @@ auto Options::parse(const int argc, this->options_[canonical].emplace_back(next); index += 1; } else { - throw OptionsMissingOptionValueError(std::string{name}); + throw OptionsMissingOptionValueError(name); } // Parse short options @@ -132,7 +132,7 @@ auto Options::parse(const int argc, index += 1; break; } else { - throw OptionsMissingOptionValueError(std::string{name}); + throw OptionsMissingOptionValueError(name); } } diff --git a/vendor/core/src/lang/io/CMakeLists.txt b/vendor/core/src/lang/io/CMakeLists.txt index 1911c620..a890265e 100644 --- a/vendor/core/src/lang/io/CMakeLists.txt +++ b/vendor/core/src/lang/io/CMakeLists.txt @@ -1,4 +1,6 @@ -sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME io SOURCES io.cc) +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME io + PRIVATE_HEADERS error.h fileview.h + SOURCES io.cc io_fileview.cc) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME io) diff --git a/vendor/core/src/lang/io/include/sourcemeta/core/io.h b/vendor/core/src/lang/io/include/sourcemeta/core/io.h index 821a09f3..ae03a233 100644 --- a/vendor/core/src/lang/io/include/sourcemeta/core/io.h +++ b/vendor/core/src/lang/io/include/sourcemeta/core/io.h @@ -5,6 +5,11 @@ #include #endif +// NOLINTBEGIN(misc-include-cleaner) +#include +#include +// NOLINTEND(misc-include-cleaner) + #include // assert #include // std::filesystem #include // std::basic_ifstream diff --git a/vendor/core/src/lang/io/include/sourcemeta/core/io_error.h b/vendor/core/src/lang/io/include/sourcemeta/core/io_error.h new file mode 100644 index 00000000..2d4fe5d1 --- /dev/null +++ b/vendor/core/src/lang/io/include/sourcemeta/core/io_error.h @@ -0,0 +1,52 @@ +#ifndef SOURCEMETA_CORE_IO_ERROR_H_ +#define SOURCEMETA_CORE_IO_ERROR_H_ + +#ifndef SOURCEMETA_CORE_IO_EXPORT +#include +#endif + +#include // std::exception +#include // std::filesystem::path +#include // std::string +#include // std::string_view +#include // std::move + +namespace sourcemeta::core { + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251 4275) +#endif + +/// @ingroup io +/// An error that represents a failure to memory-map a file +class SOURCEMETA_CORE_IO_EXPORT FileViewError : public std::exception { +public: + FileViewError(std::filesystem::path path, const char *message) + : path_{std::move(path)}, message_{message} {} + FileViewError(std::filesystem::path path, std::string message) = delete; + FileViewError(std::filesystem::path path, std::string &&message) = delete; + FileViewError(std::filesystem::path path, std::string_view message) = delete; + + [[nodiscard]] auto what() const noexcept -> const char * override { + return this->message_; + } + + [[nodiscard]] auto path() const noexcept -> const std::filesystem::path & { + return this->path_; + } + +private: + std::filesystem::path path_; + const char *message_; +}; + +#if defined(_MSC_VER) +#pragma warning(default : 4251 4275) +#endif + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/lang/io/include/sourcemeta/core/io_fileview.h b/vendor/core/src/lang/io/include/sourcemeta/core/io_fileview.h new file mode 100644 index 00000000..4cddf041 --- /dev/null +++ b/vendor/core/src/lang/io/include/sourcemeta/core/io_fileview.h @@ -0,0 +1,67 @@ +#ifndef SOURCEMETA_CORE_IO_FILEVIEW_H_ +#define SOURCEMETA_CORE_IO_FILEVIEW_H_ + +#ifndef SOURCEMETA_CORE_IO_EXPORT +#include +#endif + +#include // assert +#include // std::size_t +#include // std::uint8_t +#include // std::filesystem::path + +namespace sourcemeta::core { + +/// @ingroup io +/// A read-only memory-mapped file. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// struct Header { +/// std::uint32_t magic; +/// std::uint32_t version; +/// }; +/// +/// sourcemeta::core::FileView view{"/path/to/file.bin"}; +/// const auto *header = view.as
(); +/// assert(header->magic == 0x12345678); +/// ``` +class SOURCEMETA_CORE_IO_EXPORT FileView { +public: + FileView(const std::filesystem::path &path); + ~FileView(); + + // Disable copying and moving + FileView(const FileView &) = delete; + FileView(FileView &&) = delete; + auto operator=(const FileView &) -> FileView & = delete; + auto operator=(FileView &&) -> FileView & = delete; + + /// The size of the memory-mapped data in bytes + [[nodiscard]] auto size() const noexcept -> std::size_t; + + /// Interpret the memory-mapped data as a pointer to T at the given offset. + template + [[nodiscard]] auto as(const std::size_t offset = 0) const noexcept + -> const T * { + assert(offset + sizeof(T) <= this->size_); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return reinterpret_cast(this->data_ + offset); + } + +private: + const std::uint8_t *data_{nullptr}; + std::size_t size_{0}; +#if defined(_WIN32) + void *file_handle_{nullptr}; + void *mapping_handle_{nullptr}; +#else + int file_descriptor_{-1}; +#endif +}; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/lang/io/io_fileview.cc b/vendor/core/src/lang/io/io_fileview.cc new file mode 100644 index 00000000..20905721 --- /dev/null +++ b/vendor/core/src/lang/io/io_fileview.cc @@ -0,0 +1,105 @@ +#include +#include + +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#include +#else +#include // open, O_RDONLY +#include // mmap, munmap +#include // fstat +#include // close +#endif + +namespace sourcemeta::core { + +#if defined(_WIN32) + +FileView::FileView(const std::filesystem::path &path) { + this->file_handle_ = + CreateFileW(path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); + if (this->file_handle_ == INVALID_HANDLE_VALUE) { + throw FileViewError(path, "Could not open the file"); + } + + LARGE_INTEGER file_size; + if (GetFileSizeEx(this->file_handle_, &file_size) == 0) { + CloseHandle(this->file_handle_); + throw FileViewError(path, "Could not determine the file size"); + } + this->size_ = static_cast(file_size.QuadPart); + + this->mapping_handle_ = CreateFileMappingW(this->file_handle_, nullptr, + PAGE_READONLY, 0, 0, nullptr); + if (this->mapping_handle_ == nullptr) { + CloseHandle(this->file_handle_); + throw FileViewError(path, "Could not create a file mapping"); + } + + this->data_ = static_cast( + MapViewOfFile(this->mapping_handle_, FILE_MAP_READ, 0, 0, 0)); + if (this->data_ == nullptr) { + CloseHandle(this->mapping_handle_); + CloseHandle(this->file_handle_); + throw FileViewError(path, "Could not map the file into memory"); + } +} + +FileView::~FileView() { + if (this->data_ != nullptr) { + UnmapViewOfFile(this->data_); + } + + if (this->mapping_handle_ != nullptr) { + CloseHandle(this->mapping_handle_); + } + + if (this->file_handle_ != nullptr && + this->file_handle_ != INVALID_HANDLE_VALUE) { + CloseHandle(this->file_handle_); + } +} + +#else + +FileView::FileView(const std::filesystem::path &path) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) + this->file_descriptor_ = open(path.c_str(), O_RDONLY); + if (this->file_descriptor_ == -1) { + throw FileViewError(path, "Could not open the file"); + } + + struct stat file_stat; + if (fstat(this->file_descriptor_, &file_stat) != 0) { + close(this->file_descriptor_); + throw FileViewError(path, "Could not determine the file size"); + } + this->size_ = static_cast(file_stat.st_size); + + void *mapped = mmap(nullptr, this->size_, PROT_READ, MAP_PRIVATE, + this->file_descriptor_, 0); + if (mapped == MAP_FAILED) { + close(this->file_descriptor_); + throw FileViewError(path, "Could not map the file into memory"); + } + + this->data_ = static_cast(mapped); +} + +FileView::~FileView() { + if (this->data_ != nullptr && this->size_ > 0) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) + munmap(const_cast(this->data_), this->size_); + } + + if (this->file_descriptor_ != -1) { + close(this->file_descriptor_); + } +} + +#endif + +auto FileView::size() const noexcept -> std::size_t { return this->size_; } + +} // namespace sourcemeta::core diff --git a/vendor/core/src/lang/process/include/sourcemeta/core/process_error.h b/vendor/core/src/lang/process/include/sourcemeta/core/process_error.h index c06ba237..5de68d01 100644 --- a/vendor/core/src/lang/process/include/sourcemeta/core/process_error.h +++ b/vendor/core/src/lang/process/include/sourcemeta/core/process_error.h @@ -27,8 +27,8 @@ namespace sourcemeta::core { class SOURCEMETA_CORE_PROCESS_EXPORT ProcessProgramNotNotFoundError : public std::exception { public: - ProcessProgramNotNotFoundError(std::string program) - : program_{std::move(program)} {} + ProcessProgramNotNotFoundError(const std::string_view program) + : program_{program} {} [[nodiscard]] auto what() const noexcept -> const char * override { return "Could not locate the requested program"; @@ -46,15 +46,13 @@ class SOURCEMETA_CORE_PROCESS_EXPORT ProcessProgramNotNotFoundError /// A spawned process terminated abnormally class SOURCEMETA_CORE_PROCESS_EXPORT ProcessSpawnError : public std::exception { public: - ProcessSpawnError(std::string program, + ProcessSpawnError(const std::string_view program, std::initializer_list arguments) - : program_{std::move(program)}, - arguments_{arguments.begin(), arguments.end()} {} + : program_{program}, arguments_{arguments.begin(), arguments.end()} {} - ProcessSpawnError(std::string program, + ProcessSpawnError(const std::string_view program, std::span arguments) - : program_{std::move(program)}, - arguments_{arguments.begin(), arguments.end()} {} + : program_{program}, arguments_{arguments.begin(), arguments.end()} {} [[nodiscard]] auto what() const noexcept -> const char * override { return "Process terminated abnormally"; diff --git a/vendor/jsonbinpack/src/compiler/compiler.cc b/vendor/jsonbinpack/src/compiler/compiler.cc index e8e60ca9..c53742d8 100644 --- a/vendor/jsonbinpack/src/compiler/compiler.cc +++ b/vendor/jsonbinpack/src/compiler/compiler.cc @@ -23,7 +23,7 @@ namespace sourcemeta::jsonbinpack { auto canonicalize(sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) -> void { + const std::string_view default_dialect) -> void { sourcemeta::core::SchemaTransformer canonicalizer; sourcemeta::core::add(canonicalizer, sourcemeta::core::AlterSchemaMode::Canonicalizer); @@ -61,7 +61,7 @@ auto make_encoding(sourcemeta::core::JSON &document, auto compile(sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect) -> void { + const std::string_view default_dialect) -> void { canonicalize(schema, walker, resolver, default_dialect); sourcemeta::core::SchemaTransformer mapper; @@ -94,7 +94,7 @@ auto compile(sourcemeta::core::JSON &schema, // The "any" encoding is always the last resort const auto dialect{sourcemeta::core::dialect(schema)}; - if (!dialect.has_value() || dialect.value() != ENCODING_V1) { + if (dialect.empty() || dialect != ENCODING_V1) { make_encoding(schema, "ANY_PACKED_TYPE_TAG_BYTE_PREFIX", sourcemeta::core::JSON::make_object()); } diff --git a/vendor/jsonbinpack/src/compiler/include/sourcemeta/jsonbinpack/compiler.h b/vendor/jsonbinpack/src/compiler/include/sourcemeta/jsonbinpack/compiler.h index c976dedd..c1ec85a9 100644 --- a/vendor/jsonbinpack/src/compiler/include/sourcemeta/jsonbinpack/compiler.h +++ b/vendor/jsonbinpack/src/compiler/include/sourcemeta/jsonbinpack/compiler.h @@ -17,8 +17,7 @@ #include #include -#include // std::optional -#include // std::string +#include // std::string_view namespace sourcemeta::jsonbinpack { @@ -50,8 +49,7 @@ SOURCEMETA_JSONBINPACK_COMPILER_EXPORT auto compile(sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) - -> void; + std::string_view default_dialect = "") -> void; /// @ingroup compiler /// @@ -80,11 +78,10 @@ auto compile(sourcemeta::core::JSON &schema, /// std::cout << std::endl; /// ``` SOURCEMETA_JSONBINPACK_COMPILER_EXPORT -auto canonicalize( - sourcemeta::core::JSON &schema, - const sourcemeta::core::SchemaWalker &walker, - const sourcemeta::core::SchemaResolver &resolver, - const std::optional &default_dialect = std::nullopt) -> void; +auto canonicalize(sourcemeta::core::JSON &schema, + const sourcemeta::core::SchemaWalker &walker, + const sourcemeta::core::SchemaResolver &resolver, + std::string_view default_dialect = "") -> void; } // namespace sourcemeta::jsonbinpack