Skip to content

Commit eef7fcf

Browse files
authored
RNET-1131: optimize many OR'd terms on UUID/ObjectId queries (#7582)
* optimize many OR'd terms on UUID/ObjectId queries * use a type_index rather than hash * lint * optimize parsed IN queries * code review changes
1 parent 4d13b9a commit eef7fcf

File tree

9 files changed

+501
-24
lines changed

9 files changed

+501
-24
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
* Add `SyncClientConfig::security_access_group` which allows specifying the access group to use for the sync metadata Realm's encryption key. Setting this is required when sharing the metadata Realm between apps on Apple platforms ([#7552](https://github.com/realm/realm-core/pull/7552)).
66
* When connecting to multiple server apps, a unique encryption key is used for each of the metadata Realms rather than sharing one between them ([#7552](https://github.com/realm/realm-core/pull/7552)).
77
* Introduce the new `SyncUser` interface which can be implemented by SDKs to use sync without the core App Services implementation (or just for greater control over user behavior in tests). ([PR #7300](https://github.com/realm/realm-core/pull/7300).
8+
* Improve perfomance of "chained OR equality" queries for UUID/ObjectId types and RQL parsed "IN" queries on string/int/uuid/objectid types. ([.Net #3566](https://github.com/realm/realm-dotnet/issues/3566), since the introduction of these types)
9+
* Introducing `Query::in()` which allows SDKs to take advantage of improved performance when building equality conditions against many constants. ([#7582](https://github.com/realm/realm-core/pull/7582))
810

911
### Fixed
1012
* <How do the end-user experience this issue? what was the impact?> ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?)

src/realm/parser/driver.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,19 @@ Query EqualityNode::visit(ParserDriver* drv)
537537
}
538538
}
539539

540+
if (op == CompareType::IN || op == CompareType::EQUAL) {
541+
if (auto mixed_list = dynamic_cast<ConstantMixedList*>(right.get());
542+
mixed_list && mixed_list->size() &&
543+
mixed_list->get_comparison_type().value_or(ExpressionComparisonType::Any) ==
544+
ExpressionComparisonType::Any) {
545+
if (auto lhs = dynamic_cast<ObjPropertyBase*>(left.get());
546+
lhs && lhs->column_key() && !lhs->column_key().is_collection() && !lhs->links_exist() &&
547+
lhs->column_key().get_type() != col_type_Mixed) {
548+
return drv->m_base_table->where().in(lhs->column_key(), mixed_list->begin(), mixed_list->end());
549+
}
550+
}
551+
}
552+
540553
if (left_type == type_Link && left_type == right_type && right->has_constant_evaluation()) {
541554
if (auto link_column = dynamic_cast<const Columns<Link>*>(left.get())) {
542555
if (link_column->link_map().get_nb_hops() == 1 &&

src/realm/query.cpp

Lines changed: 98 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,12 @@ struct MakeConditionNode {
278278
return std::unique_ptr<ParentNode>{new Node(null{}, col_key)};
279279
}
280280

281+
static std::unique_ptr<ParentNode> make(ColKey col_key, Mixed value)
282+
{
283+
return std::unique_ptr<ParentNode>{new Node(value.get<typename Node::TConditionValue>(), col_key)};
284+
}
285+
286+
// overload for optional types
281287
template <class T = typename Node::TConditionValue>
282288
static typename std::enable_if<!std::is_same<typename util::RemoveOptional<T>::type, T>::value,
283289
std::unique_ptr<ParentNode>>::type
@@ -326,6 +332,35 @@ struct MakeConditionNode<StringNode<Cond>> {
326332
}
327333
};
328334

335+
template <class Cond>
336+
struct MakeConditionNode<TimestampNode<Cond>> {
337+
static std::unique_ptr<ParentNode> make(ColKey col_key, Timestamp value)
338+
{
339+
return std::unique_ptr<ParentNode>{new TimestampNode<Cond>(std::move(value), col_key)};
340+
}
341+
342+
// only enable certain template conditions of supported timestamp operations
343+
template <typename... SubstitutionEnabler, typename U = Cond>
344+
static std::enable_if_t<is_any_v<U, Equal, NotEqual, Greater, Less, GreaterEqual, LessEqual>,
345+
std::unique_ptr<ParentNode>>
346+
make(ColKey col_key, Mixed value)
347+
{
348+
static_assert(sizeof...(SubstitutionEnabler) == 0, "Do not specify template arguments");
349+
return std::unique_ptr<ParentNode>{new TimestampNode<Cond>(value.get<Timestamp>(), col_key)};
350+
}
351+
352+
static std::unique_ptr<ParentNode> make(ColKey col_key, null)
353+
{
354+
return std::unique_ptr<ParentNode>{new TimestampNode<Cond>(null{}, col_key)};
355+
}
356+
357+
template <class T>
358+
REALM_FORCEINLINE static std::unique_ptr<ParentNode> make(ColKey, T&&)
359+
{
360+
throw_type_mismatch_error();
361+
}
362+
};
363+
329364
template <class Cond>
330365
struct MakeConditionNode<MixedNode<Cond>> {
331366
template <class T>
@@ -851,6 +886,66 @@ Query& Query::like(ColKey column_key, Mixed value, bool case_sensitive)
851886
add_condition<LikeIns>(column_key, value);
852887
return *this;
853888
}
889+
Query& Query::in(ColKey column_key, const Mixed* begin, const Mixed* end)
890+
{
891+
REALM_ASSERT(!column_key.is_collection());
892+
ColumnType col_type = column_key.get_type();
893+
std::unique_ptr<ParentNode> node;
894+
try {
895+
if (begin == end) {
896+
node = std::make_unique<ExpressionNode>(std::make_unique<FalseExpression>());
897+
}
898+
else if (col_type == col_type_UUID) {
899+
node = std::make_unique<UUIDNode<Equal>>(column_key, begin, end);
900+
}
901+
else if (col_type == col_type_ObjectId) {
902+
node = std::make_unique<ObjectIdNode<Equal>>(column_key, begin, end);
903+
}
904+
else if (col_type == col_type_String) {
905+
node = std::make_unique<StringNode<Equal>>(column_key, begin, end);
906+
}
907+
else if (col_type == col_type_Int) {
908+
if (column_key.is_nullable()) {
909+
node = std::make_unique<IntegerNode<ArrayIntNull, Equal>>(column_key, begin, end);
910+
}
911+
else {
912+
node = std::make_unique<IntegerNode<ArrayInteger, Equal>>(column_key, begin, end);
913+
}
914+
}
915+
else {
916+
// general path for nodes that don't have this optimization yet
917+
Query cond = this->m_table->where();
918+
if (col_type == col_type_Mixed) {
919+
for (const Mixed* it = begin; it != end; ++it) {
920+
cond.add_node(make_condition_node<Equal>(*m_table, column_key, *it));
921+
cond.Or();
922+
}
923+
}
924+
else {
925+
for (const Mixed* it = begin; it != end; ++it) {
926+
if (it->is_type(DataType(col_type))) {
927+
cond.add_node(make_condition_node<Equal>(*m_table, column_key, *it));
928+
cond.Or();
929+
}
930+
else if (it->is_null() && column_key.is_nullable()) {
931+
cond.add_node(make_condition_node<Equal>(*m_table, column_key, realm::null()));
932+
cond.Or();
933+
}
934+
}
935+
}
936+
this->and_query(cond);
937+
return *this;
938+
}
939+
}
940+
catch (const InvalidArgument&) {
941+
// if none of the arguments matched the right type we'd end up with an
942+
// empty condition node which won't evaluate correctly. The right behaviour
943+
// is to match nothing, so make a false condition
944+
node = std::make_unique<ExpressionNode>(std::make_unique<FalseExpression>());
945+
}
946+
add_node(std::move(node));
947+
return *this;
948+
}
854949

855950
// ------------- size
856951
Query& Query::size_equal(ColKey column_key, int64_t value)
@@ -1017,9 +1112,7 @@ void Query::aggregate(QueryStateBase& st, ColKey column_key) const
10171112
auto pn = root_node();
10181113
auto best = find_best_node(pn);
10191114
auto node = pn->m_children[best];
1020-
if (node->has_search_index()) {
1021-
auto keys = node->index_based_keys();
1022-
REALM_ASSERT(keys);
1115+
if (auto keys = node->index_based_keys()) {
10231116
// The node having the search index can be removed from the query as we know that
10241117
// all the objects will match this condition
10251118
pn->m_children[best] = pn->m_children.back();
@@ -1344,10 +1437,7 @@ void Query::do_find_all(QueryStateBase& st) const
13441437
auto pn = root_node();
13451438
auto best = find_best_node(pn);
13461439
auto node = pn->m_children[best];
1347-
if (node->has_search_index()) {
1348-
auto keys = node->index_based_keys();
1349-
REALM_ASSERT(keys);
1350-
1440+
if (auto keys = node->index_based_keys()) {
13511441
// The node having the search index can be removed from the query as we know that
13521442
// all the objects will match this condition
13531443
pn->m_children[best] = pn->m_children.back();
@@ -1463,9 +1553,7 @@ size_t Query::do_count(size_t limit) const
14631553
auto pn = root_node();
14641554
auto best = find_best_node(pn);
14651555
auto node = pn->m_children[best];
1466-
if (node->has_search_index()) {
1467-
auto keys = node->index_based_keys();
1468-
REALM_ASSERT(keys);
1556+
if (auto keys = node->index_based_keys()) {
14691557
if (pn->m_children.size() > 1) {
14701558
// The node having the search index can be removed from the query as we know that
14711559
// all the objects will match this condition

src/realm/query.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ class Query final {
188188
Query& ends_with(ColKey column_key, Mixed value, bool case_sensitive = true);
189189
Query& contains(ColKey column_key, Mixed value, bool case_sensitive = true);
190190
Query& like(ColKey column_key, Mixed value, bool case_sensitive = true);
191+
Query& in(ColKey column_key, const Mixed* begin, const Mixed* end);
191192

192193
// Conditions: size
193194
Query& size_equal(ColKey column_key, int64_t value);

src/realm/query_engine.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,28 @@ size_t IndexEvaluator::do_search_index(const Cluster* cluster, size_t start, siz
380380
return not_found;
381381
}
382382

383+
StringNode<Equal>::StringNode(ColKey col, const Mixed* begin, const Mixed* end)
384+
: StringNodeEqualBase(StringData(), col)
385+
{
386+
// Don't use the search index if present since we're in a scenario where
387+
// it'd be slower
388+
m_index_evaluator.reset();
389+
390+
for (const Mixed* it = begin; it != end; ++it) {
391+
if (it->is_null()) {
392+
m_needles.emplace();
393+
}
394+
else if (const StringData* str = it->get_if<StringData>()) {
395+
m_needle_storage.push_back(std::make_unique<char[]>(str->size()));
396+
std::copy(str->data(), str->data() + str->size(), m_needle_storage.back().get());
397+
m_needles.insert(StringData(m_needle_storage.back().get(), str->size()));
398+
}
399+
}
400+
if (m_needles.empty()) {
401+
throw InvalidArgument("No string arguments in query");
402+
}
403+
}
404+
383405
void StringNode<Equal>::_search_index_init()
384406
{
385407
REALM_ASSERT(bool(m_index_evaluator));

0 commit comments

Comments
 (0)