Skip to content

Commit b9d603a

Browse files
committed
perf: optimize vocabulary lookup with bitset
Signed-off-by: Azeez Syed <syedazeez337@gmail.com>
1 parent 182df37 commit b9d603a

File tree

11 files changed

+562
-57
lines changed

11 files changed

+562
-57
lines changed

benchmark/e2e/runner.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
#include <sourcemeta/blaze/compiler.h>
1313
#include <sourcemeta/blaze/evaluator.h>
1414

15+
// clang-format off
1516
#define REGISTER_E2E_COMPILER(name, directory_name) \
16-
static auto E2E_Compiler_##name(benchmark::State &state)->void { \
17+
static auto E2E_Compiler_##name(benchmark::State &state) -> void { \
1718
const std::filesystem::path directory{CURRENT_DIRECTORY \
1819
"/e2e/" directory_name}; \
1920
const auto schema{sourcemeta::core::read_json(directory / "schema.json")}; \
@@ -30,7 +31,7 @@
3031
BENCHMARK(E2E_Compiler_##name)
3132

3233
#define REGISTER_E2E_EVALUATOR(name, directory_name) \
33-
static auto E2E_Evaluator_##name(benchmark::State &state)->void { \
34+
static auto E2E_Evaluator_##name(benchmark::State &state) -> void { \
3435
const std::filesystem::path directory{CURRENT_DIRECTORY \
3536
"/e2e/" directory_name}; \
3637
const auto schema{sourcemeta::core::read_json(directory / "schema.json")}; \
@@ -56,6 +57,7 @@
5657
} \
5758
} \
5859
BENCHMARK(E2E_Evaluator_##name)
60+
// clang-format on
5961

6062
REGISTER_E2E_COMPILER(adaptivecard, "adaptivecard");
6163
REGISTER_E2E_COMPILER(ansible_meta, "ansible-meta");

src/compiler/compile.cc

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <utility> // std::move, std::pair
1111

1212
#include "compile_helpers.h"
13+
#include "vocabulary_lookup.h"
1314

1415
namespace {
1516

@@ -33,6 +34,12 @@ auto compile_subschema(const sourcemeta::blaze::Context &context,
3334
}
3435

3536
Instructions steps;
37+
// Reserve capacity to avoid reallocations - typical schemas have 3-10
38+
// keywords
39+
if (schema_context.schema.is_object()) {
40+
steps.reserve(schema_context.schema.size());
41+
}
42+
3643
for (const auto &entry : sourcemeta::core::SchemaKeywordIterator{
3744
schema_context.schema, context.walker, context.resolver,
3845
default_dialect}) {
@@ -285,11 +292,14 @@ auto compile(const sourcemeta::core::JSON &schema,
285292
///////////////////////////////////////////////////////////////////
286293

287294
Instructions compiler_template;
295+
// Reserve initial capacity for the template
296+
compiler_template.reserve(32);
297+
288298
if (uses_dynamic_scopes &&
289-
(schema_context.vocabularies.contains(
290-
"https://json-schema.org/draft/2019-09/vocab/core") ||
291-
schema_context.vocabularies.contains(
292-
"https://json-schema.org/draft/2020-12/vocab/core"))) {
299+
(has_vocabulary(schema_context.vocabularies,
300+
"https://json-schema.org/draft/2019-09/vocab/core") ||
301+
has_vocabulary(schema_context.vocabularies,
302+
"https://json-schema.org/draft/2020-12/vocab/core"))) {
293303
for (const auto &entry : context.frame.locations()) {
294304
// We are only trying to find dynamic anchors
295305
if (entry.second.type !=

src/compiler/compile_helpers.h

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,30 @@
1111
#include <utility> // std::declval, std::move
1212
#include <variant> // std::visit
1313

14+
#include "vocabulary_lookup.h"
15+
1416
namespace sourcemeta::blaze {
1517

18+
// Static empty string for DynamicContext with no keyword
19+
inline const std::string &empty_keyword() {
20+
static const std::string value{""};
21+
return value;
22+
}
23+
1624
inline auto relative_dynamic_context() -> DynamicContext {
17-
return {"", sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer,
18-
false};
25+
return {empty_keyword(), sourcemeta::core::empty_pointer,
26+
sourcemeta::core::empty_pointer, false};
1927
}
2028

2129
inline auto relative_dynamic_context(const DynamicContext &dynamic_context)
2230
-> DynamicContext {
23-
return {"", sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer,
24-
dynamic_context.property_as_target};
31+
return {empty_keyword(), sourcemeta::core::empty_pointer,
32+
sourcemeta::core::empty_pointer, dynamic_context.property_as_target};
2533
}
2634

2735
inline auto property_relative_dynamic_context() -> DynamicContext {
28-
return {"", sourcemeta::core::empty_pointer, sourcemeta::core::empty_pointer,
29-
true};
36+
return {empty_keyword(), sourcemeta::core::empty_pointer,
37+
sourcemeta::core::empty_pointer, true};
3038
}
3139

3240
inline auto schema_resource_id(const std::vector<std::string> &resources,
@@ -222,7 +230,7 @@ inline auto find_adjacent(const Context &context,
222230

223231
if (std::any_of(vocabularies.cbegin(), vocabularies.cend(),
224232
[&subschema_vocabularies](const auto &vocabulary) {
225-
return subschema_vocabularies.contains(vocabulary);
233+
return has_vocabulary(subschema_vocabularies, vocabulary);
226234
}) &&
227235
subschema.type() == type) {
228236
result.emplace_back(subschema);

src/compiler/default_compiler.cc

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "default_compiler_draft4.h"
66
#include "default_compiler_draft6.h"
77
#include "default_compiler_draft7.h"
8+
#include "vocabulary_lookup.h"
89

910
#include <cassert> // assert
1011
#include <string> // std::string
@@ -39,7 +40,8 @@ auto sourcemeta::blaze::default_schema_compiler(
3940
"http://json-schema.org/draft-06/hyper-schema#",
4041
"http://json-schema.org/draft-04/schema#",
4142
"http://json-schema.org/draft-04/hyper-schema#"};
42-
for (const auto &vocabulary : schema_context.vocabularies) {
43+
for (const auto &vocabulary :
44+
schema_context.vocabularies.all_vocabularies()) {
4345
if (!SUPPORTED_VOCABULARIES.contains(vocabulary.first) &&
4446
vocabulary.second) {
4547
throw sourcemeta::core::SchemaVocabularyError(
@@ -50,22 +52,22 @@ auto sourcemeta::blaze::default_schema_compiler(
5052
using namespace sourcemeta::blaze;
5153

5254
#define COMPILE(vocabulary, _keyword, handler) \
53-
if (schema_context.vocabularies.contains(vocabulary) && \
55+
if (has_vocabulary(schema_context.vocabularies, vocabulary) && \
5456
dynamic_context.keyword == (_keyword)) { \
5557
return internal::handler(context, schema_context, dynamic_context, \
5658
current); \
5759
}
5860

5961
#define COMPILE_ANY(vocabulary_1, vocabulary_2, _keyword, handler) \
60-
if ((schema_context.vocabularies.contains(vocabulary_1) || \
61-
schema_context.vocabularies.contains(vocabulary_2)) && \
62+
if ((has_vocabulary(schema_context.vocabularies, vocabulary_1) || \
63+
has_vocabulary(schema_context.vocabularies, vocabulary_2)) && \
6264
dynamic_context.keyword == (_keyword)) { \
6365
return internal::handler(context, schema_context, dynamic_context, \
6466
current); \
6567
}
6668

6769
#define STOP_IF_SIBLING_KEYWORD(vocabulary, _keyword) \
68-
if (schema_context.vocabularies.contains(vocabulary) && \
70+
if (has_vocabulary(schema_context.vocabularies, vocabulary) && \
6971
schema_context.schema.is_object() && \
7072
schema_context.schema.defines(_keyword)) { \
7173
return {}; \

src/compiler/include/sourcemeta/blaze/compiler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ struct SchemaContext {
5151
/// disposal to implement a keyword
5252
struct DynamicContext {
5353
/// The schema keyword
54-
const std::string keyword;
54+
const std::string &keyword;
5555
/// The schema base keyword path
5656
const sourcemeta::core::Pointer &base_schema_location;
5757
/// The base instance location that the keyword must be evaluated to

src/compiler/unevaluated.cc

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
#include <sourcemeta/blaze/compiler.h>
22

3+
#include "vocabulary_lookup.h"
4+
35
namespace {
46
using namespace sourcemeta::core;
7+
using sourcemeta::blaze::has_vocabulary;
58

69
auto find_adjacent_dependencies(
710
const JSON::String &current, const JSON &schema, const SchemaFrame &frame,
@@ -22,7 +25,8 @@ auto find_adjacent_dependencies(
2225
continue;
2326
} else if (keywords.contains(property.first)) {
2427
// In 2019-09, `additionalItems` takes no effect without `items`
25-
if (subschema_vocabularies.contains(
28+
if (has_vocabulary(
29+
subschema_vocabularies,
2630
"https://json-schema.org/draft/2019-09/vocab/applicator") &&
2731
property.first == "additionalItems" && !subschema.defines("items")) {
2832
continue;
@@ -154,9 +158,11 @@ auto unevaluated(const JSON &schema, const SchemaFrame &frame,
154158
const auto keyword_uri{frame.uri(entry.second, {pair.first})};
155159
SchemaUnevaluatedEntry unevaluated;
156160

157-
if ((subschema_vocabularies.contains(
161+
if ((has_vocabulary(
162+
subschema_vocabularies,
158163
"https://json-schema.org/draft/2020-12/vocab/unevaluated") &&
159-
subschema_vocabularies.contains(
164+
has_vocabulary(
165+
subschema_vocabularies,
160166
"https://json-schema.org/draft/2020-12/vocab/applicator")) &&
161167
// NOLINTNEXTLINE(bugprone-branch-clone)
162168
pair.first == "unevaluatedProperties") {
@@ -167,29 +173,31 @@ auto unevaluated(const JSON &schema, const SchemaFrame &frame,
167173
entry.second, entry.second, true, unevaluated);
168174
result.emplace(keyword_uri, std::move(unevaluated));
169175
} else if (
170-
(subschema_vocabularies.contains(
176+
(has_vocabulary(
177+
subschema_vocabularies,
171178
"https://json-schema.org/draft/2020-12/vocab/unevaluated") &&
172-
subschema_vocabularies.contains(
179+
has_vocabulary(
180+
subschema_vocabularies,
173181
"https://json-schema.org/draft/2020-12/vocab/applicator")) &&
174182
pair.first == "unevaluatedItems") {
175183
find_adjacent_dependencies(
176184
pair.first, schema, frame, walker, resolver,
177185
{"prefixItems", "items", "contains", "unevaluatedItems"},
178186
entry.second, entry.second, true, unevaluated);
179187
result.emplace(keyword_uri, std::move(unevaluated));
180-
} else if (subschema_vocabularies.contains(
181-
"https://json-schema.org/draft/2019-09/vocab/"
182-
"applicator") &&
188+
} else if (has_vocabulary(subschema_vocabularies,
189+
"https://json-schema.org/draft/2019-09/vocab/"
190+
"applicator") &&
183191
pair.first == "unevaluatedProperties") {
184192
find_adjacent_dependencies(
185193
pair.first, schema, frame, walker, resolver,
186194
{"properties", "patternProperties", "additionalProperties",
187195
"unevaluatedProperties"},
188196
entry.second, entry.second, true, unevaluated);
189197
result.emplace(keyword_uri, std::move(unevaluated));
190-
} else if (subschema_vocabularies.contains(
191-
"https://json-schema.org/draft/2019-09/vocab/"
192-
"applicator") &&
198+
} else if (has_vocabulary(subschema_vocabularies,
199+
"https://json-schema.org/draft/2019-09/vocab/"
200+
"applicator") &&
193201
pair.first == "unevaluatedItems") {
194202
find_adjacent_dependencies(
195203
pair.first, schema, frame, walker, resolver,

src/compiler/vocabulary_lookup.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#ifndef SOURCEMETA_BLAZE_COMPILER_VOCABULARY_LOOKUP_H_
2+
#define SOURCEMETA_BLAZE_COMPILER_VOCABULARY_LOOKUP_H_
3+
4+
#include <sourcemeta/core/jsonschema.h>
5+
6+
#include <string_view> // std::string_view
7+
8+
namespace sourcemeta::blaze {
9+
10+
// Optimized vocabulary lookup using bitwise operations
11+
// This directly uses the Vocabularies::contains() method which performs
12+
// O(1) bitwise AND operations for known vocabularies
13+
inline auto has_vocabulary(const sourcemeta::core::Vocabularies &vocabularies,
14+
std::string_view uri) -> bool {
15+
return vocabularies.contains(uri);
16+
}
17+
18+
} // namespace sourcemeta::blaze
19+
20+
#endif

src/evaluator/evaluator.cc

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,18 @@ auto Evaluator::validate(const Template &schema,
6565
assert(this->instance_location.empty());
6666
assert(this->resources.empty());
6767
this->labels.clear();
68+
// Reserve capacity to avoid rehashing during evaluation
69+
this->labels.reserve(16);
6870

69-
if (schema.track && schema.dynamic) {
71+
if (schema.track && schema.dynamic) [[unlikely]] {
7072
this->evaluated_.clear();
7173
return complete::evaluate(instance, *this, schema, nullptr);
72-
} else if (schema.track) {
74+
} else if (schema.track) [[unlikely]] {
7375
this->evaluated_.clear();
7476
return track::evaluate(instance, *this, schema);
75-
} else if (schema.dynamic) {
77+
} else if (schema.dynamic) [[unlikely]] {
7678
return dynamic::evaluate(instance, *this, schema);
77-
} else {
79+
} else [[likely]] {
7880
return fast::evaluate(instance, *this, schema);
7981
}
8082
}
@@ -87,6 +89,7 @@ auto Evaluator::validate(const Template &schema,
8789
assert(this->instance_location.empty());
8890
assert(this->resources.empty());
8991
this->labels.clear();
92+
this->labels.reserve(16);
9093
this->evaluated_.clear();
9194

9295
return complete::evaluate(instance, *this, schema, callback);

0 commit comments

Comments
 (0)