Skip to content

Commit ac249ef

Browse files
committed
Improve FlexCompressor::find_all<Cond>
1 parent 27d73a4 commit ac249ef

File tree

3 files changed

+141
-118
lines changed

3 files changed

+141
-118
lines changed

src/realm/integer_flex_compressor.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,18 @@ void FlexCompressor::copy_data(const Array& arr, const std::vector<int64_t>& val
7070
bool FlexCompressor::find_all_match(size_t start, size_t end, size_t baseindex, QueryStateBase* state)
7171
{
7272
REALM_ASSERT_DEBUG(state->match_count() < state->limit());
73-
const auto process = state->limit() - state->match_count();
74-
const auto end2 = end - start > process ? start + process : end;
75-
for (; start < end2; start++)
73+
while (start < end) {
7674
if (!state->match(start + baseindex))
7775
return false;
76+
start++;
77+
}
7878
return true;
7979
}
80+
81+
size_t FlexCompressor::lower_bound(size_t size, int64_t value, uint64_t mask, BfIterator& data_iterator) noexcept
82+
{
83+
return impl::lower_bound(nullptr, 0, size, value, [&](auto, size_t ndx) {
84+
data_iterator.move(ndx);
85+
return sign_extend_field_by_mask(mask, *data_iterator);
86+
});
87+
}

src/realm/integer_flex_compressor.hpp

Lines changed: 103 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,7 @@ class FlexCompressor {
5050

5151
private:
5252
static bool find_all_match(size_t, size_t, size_t, QueryStateBase*);
53-
54-
template <typename Cond>
55-
static bool find_linear(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*);
56-
57-
template <typename VectorCond1, typename VectorCond2>
58-
static bool find_parallel(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*);
59-
60-
template <typename LinearCond, typename VectorCond1, typename VectorCond2>
61-
static bool do_find_all(const Array&, int64_t, size_t, size_t, size_t, QueryStateBase*);
62-
63-
template <typename Cond>
64-
static bool run_parallel_subscan(size_t, size_t, size_t);
53+
static size_t lower_bound(size_t, int64_t, uint64_t, BfIterator&) noexcept;
6554
};
6655

6756
inline int64_t FlexCompressor::get(const IntegerCompressor& c, size_t ndx)
@@ -164,17 +153,32 @@ inline void FlexCompressor::set_direct(const IntegerCompressor& c, size_t ndx, i
164153
data_iterator.set_value(value);
165154
}
166155

156+
template <typename T>
157+
class IndexCond {
158+
public:
159+
using type = T;
160+
};
161+
162+
template <>
163+
class IndexCond<Greater> {
164+
public:
165+
using type = GreaterEqual;
166+
};
167+
167168
template <typename Cond>
168169
inline bool FlexCompressor::find_all(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
169170
QueryStateBase* state)
170171
{
172+
static constexpr size_t RANGE_LIMIT = 20;
173+
static constexpr size_t WIDTH_LIMIT = 16;
174+
171175
REALM_ASSERT_DEBUG(start <= arr.m_size && (end <= arr.m_size || end == size_t(-1)) && start <= end);
172176
Cond c;
173177

174178
if (end == npos)
175179
end = arr.m_size;
176180

177-
if (!(arr.m_size > start && start < end))
181+
if (start >= arr.m_size || start >= end)
178182
return true;
179183

180184
const auto lbound = arr.m_lbound;
@@ -189,116 +193,105 @@ inline bool FlexCompressor::find_all(const Array& arr, int64_t value, size_t sta
189193

190194
REALM_ASSERT_DEBUG(arr.m_width != 0);
191195

192-
if constexpr (std::is_same_v<Equal, Cond>) {
193-
return do_find_all<Equal, Equal, Equal>(arr, value, start, end, baseindex, state);
196+
const auto& compressor = arr.integer_compressor();
197+
const auto v_width = arr.m_width;
198+
const auto v_size = compressor.v_size();
199+
const auto mask = compressor.v_mask();
200+
uint64_t* data = (uint64_t*)arr.m_data;
201+
size_t v_start = realm::not_found;
202+
203+
/**************** Search the values ****************/
204+
205+
int64_t modified_value = value;
206+
if constexpr (std::is_same_v<Cond, Greater>) {
207+
modified_value++; // We use GreaterEqual below, so this will effectively be Greater
194208
}
195-
else if constexpr (std::is_same_v<NotEqual, Cond>) {
196-
return do_find_all<NotEqual, Equal, NotEqual>(arr, value, start, end, baseindex, state);
209+
210+
if (v_size >= RANGE_LIMIT) {
211+
if (v_width <= WIDTH_LIMIT) {
212+
auto search_vector = populate(v_width, modified_value);
213+
v_start = parallel_subword_find(find_all_fields<GreaterEqual>, data, 0, v_width, compressor.msb(),
214+
search_vector, 0, v_size);
215+
}
216+
else {
217+
BfIterator data_iterator{data, 0, v_width, v_width, 0};
218+
v_start = lower_bound(v_size, modified_value, mask, data_iterator);
219+
}
197220
}
198-
else if constexpr (std::is_same_v<Less, Cond>) {
199-
return do_find_all<Less, GreaterEqual, Less>(arr, value, start, end, baseindex, state);
221+
else {
222+
BfIterator data_iterator{data, 0, v_width, v_width, 0};
223+
size_t idx = 0;
224+
while (idx < v_size) {
225+
if (sign_extend_field_by_mask(mask, *data_iterator) >= modified_value) {
226+
break;
227+
}
228+
data_iterator.move(++idx);
229+
}
230+
v_start = idx;
200231
}
201-
else if constexpr (std::is_same_v<Greater, Cond>) {
202-
return do_find_all<Greater, Greater, GreaterEqual>(arr, value, start, end, baseindex, state);
232+
233+
if constexpr (realm::is_any_v<Cond, Equal, NotEqual>) {
234+
// Check for equality.
235+
if (v_start < v_size) {
236+
BfIterator it{data, 0, v_width, v_width, v_start};
237+
if (sign_extend_field_by_mask(mask, *it) > value) {
238+
v_start = v_size; // Mark as not found
239+
}
240+
}
203241
}
204-
return true;
205-
}
206242

207-
template <typename LinearCond, typename VectorCond1, typename VectorCond2>
208-
inline bool FlexCompressor::do_find_all(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
209-
QueryStateBase* state)
210-
{
211-
const auto v_width = arr.m_width;
212-
const auto v_range = arr.integer_compressor().v_size();
213-
const auto ndx_range = end - start;
214-
if (!run_parallel_subscan<LinearCond>(v_width, v_range, ndx_range))
215-
return find_linear<LinearCond>(arr, value, start, end, baseindex, state);
216-
return find_parallel<VectorCond1, VectorCond2>(arr, value, start, end, baseindex, state);
217-
}
243+
/***************** Some early outs *****************/
218244

219-
template <typename Cond>
220-
inline bool FlexCompressor::find_linear(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
221-
QueryStateBase* state)
222-
{
223-
const auto cmp = [](int64_t item, int64_t key) {
224-
if constexpr (std::is_same_v<Cond, Equal>)
225-
return item == key;
226-
if constexpr (std::is_same_v<Cond, NotEqual>)
227-
return item != key;
228-
if constexpr (std::is_same_v<Cond, Less>)
229-
return item < key;
230-
if constexpr (std::is_same_v<Cond, Greater>)
231-
return item > key;
232-
REALM_UNREACHABLE();
233-
};
234-
235-
const auto& c = arr.integer_compressor();
236-
const auto offset = c.v_width() * c.v_size();
237-
const auto ndx_w = c.ndx_width();
238-
const auto v_w = c.v_width();
239-
const auto data = c.data();
240-
const auto mask = c.v_mask();
241-
BfIterator ndx_iterator{data, offset, ndx_w, ndx_w, start};
242-
BfIterator data_iterator{data, 0, v_w, v_w, static_cast<size_t>(*ndx_iterator)};
243-
while (start < end) {
244-
const auto sv = sign_extend_field_by_mask(mask, *data_iterator);
245-
if (cmp(sv, value) && !state->match(start + baseindex))
246-
return false;
247-
ndx_iterator.move(++start);
248-
data_iterator.move(static_cast<size_t>(*ndx_iterator));
245+
if (v_start == v_size) {
246+
if constexpr (realm::is_any_v<Cond, Equal, Greater>) {
247+
return true; // No Matches
248+
}
249+
if constexpr (realm::is_any_v<Cond, NotEqual, Less>) {
250+
return find_all_match(start, end, baseindex, state); // All matches
251+
}
252+
}
253+
else if (v_start == 0) {
254+
if constexpr (std::is_same_v<Cond, Less>) {
255+
// No index is less than 0
256+
return true; // No Matches
257+
}
258+
if constexpr (std::is_same_v<Cond, Greater>) {
259+
// All index is greater than or equal to 0
260+
return find_all_match(start, end, baseindex, state);
261+
}
249262
}
250-
return true;
251-
}
252263

253-
template <typename VectorCond1, typename VectorCond2>
254-
inline bool FlexCompressor::find_parallel(const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
255-
QueryStateBase* state)
256-
{
257-
//
258-
// algorithm idea: first try to find in the array of values (should be shorter in size but more bits) using
259-
// VectorCond1.
260-
// Then match the index found in the array of indices using VectorCond2
261-
//
264+
/*************** Search the indexes ****************/
262265

263-
const auto& compressor = arr.integer_compressor();
264-
const auto v_width = compressor.v_width();
265-
const auto v_size = compressor.v_size();
266+
using U = typename IndexCond<Cond>::type;
267+
const auto ndx_range = end - start;
266268
const auto ndx_width = compressor.ndx_width();
267-
const auto offset = v_size * v_width;
268-
uint64_t* data = (uint64_t*)arr.m_data;
269-
270-
auto MSBs = compressor.msb();
271-
auto search_vector = populate(v_width, value);
272-
auto v_start =
273-
parallel_subword_find(find_all_fields<VectorCond1>, data, 0, v_width, MSBs, search_vector, 0, v_size);
274-
275-
if constexpr (!std::is_same_v<VectorCond2, NotEqual>) {
276-
if (start == v_size)
277-
return true;
269+
const auto v_offset = v_size * v_width;
270+
if (ndx_range >= RANGE_LIMIT) {
271+
auto search_vector = populate(ndx_width, v_start);
272+
while (start < end) {
273+
start = parallel_subword_find(find_all_fields_unsigned<U>, data, v_offset, ndx_width,
274+
compressor.ndx_msb(), search_vector, start, end);
275+
if (start < end) {
276+
if (!state->match(start + baseindex))
277+
return false;
278+
}
279+
++start;
280+
}
278281
}
279-
280-
MSBs = compressor.ndx_msb();
281-
search_vector = populate(ndx_width, v_start);
282-
while (start < end) {
283-
start = parallel_subword_find(find_all_fields_unsigned<VectorCond2>, data, offset, ndx_width, MSBs,
284-
search_vector, start, end);
285-
286-
if (start < end && !state->match(start + baseindex))
287-
return false;
288-
289-
++start;
282+
else {
283+
U index_c;
284+
BfIterator ndx_iterator{data, v_offset, ndx_width, ndx_width, start};
285+
while (start < end) {
286+
if (index_c(int64_t(*ndx_iterator), int64_t(v_start))) {
287+
if (!state->match(start + baseindex))
288+
return false;
289+
}
290+
ndx_iterator.move(++start);
291+
}
290292
}
291-
return true;
292-
}
293293

294-
template <typename Cond>
295-
inline bool FlexCompressor::run_parallel_subscan(size_t v_width, size_t v_range, size_t ndx_range)
296-
{
297-
if constexpr (std::is_same_v<Cond, Equal> || std::is_same_v<Cond, NotEqual>) {
298-
return v_width < 32 && v_range >= 20 && ndx_range >= 20;
299-
}
300-
// > and < need looks slower in parallel scan for large values
301-
return v_width <= 16 && v_range >= 20 && ndx_range >= 20;
294+
return true;
302295
}
303296

304297
} // namespace realm

src/realm/query_conditions.hpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,10 @@ struct Equal {
302302
{
303303
return (v == 0 && ubound == 0 && lbound == 0);
304304
}
305+
bool operator()(int64_t v1, int64_t v2) const
306+
{
307+
return v1 == v2;
308+
}
305309

306310
static std::string description()
307311
{
@@ -344,6 +348,10 @@ struct NotEqual {
344348
{
345349
return (v > ubound || v < lbound);
346350
}
351+
bool operator()(int64_t v1, int64_t v2) const
352+
{
353+
return v1 != v2;
354+
}
347355

348356
template <class A, class B, class C, class D>
349357
bool operator()(A, B, C, D) const = delete;
@@ -816,6 +824,10 @@ struct Greater {
816824
static_cast<void>(ubound);
817825
return lbound > v;
818826
}
827+
bool operator()(int64_t v1, int64_t v2) const
828+
{
829+
return v1 > v2;
830+
}
819831

820832
static std::string description()
821833
{
@@ -890,7 +902,6 @@ struct NotNull {
890902
}
891903
};
892904

893-
894905
struct Less {
895906
static const int avx = 0x11; // _CMP_LT_OQ
896907
template <class T>
@@ -907,21 +918,24 @@ struct Less {
907918
return Mixed::types_are_comparable(m1, m2) && (m1 < m2);
908919
}
909920

921+
bool operator()(int64_t v1, int64_t v2) const
922+
{
923+
return v1 < v2;
924+
}
925+
910926
template <class A, class B, class C, class D>
911927
bool operator()(A, B, C, D) const
912928
{
913929
REALM_ASSERT(false);
914930
return false;
915931
}
916932
static const int condition = cond_Less;
917-
bool can_match(int64_t v, int64_t lbound, int64_t ubound)
933+
bool can_match(int64_t v, int64_t lbound, int64_t)
918934
{
919-
static_cast<void>(ubound);
920935
return lbound < v;
921936
}
922-
bool will_match(int64_t v, int64_t lbound, int64_t ubound)
937+
bool will_match(int64_t v, int64_t, int64_t ubound)
923938
{
924-
static_cast<void>(lbound);
925939
return ubound < v;
926940
}
927941
static std::string description()
@@ -952,6 +966,10 @@ struct LessEqual : public HackClass {
952966
{
953967
return (m1.is_null() && m2.is_null()) || (Mixed::types_are_comparable(m1, m2) && (m1 <= m2));
954968
}
969+
bool operator()(int64_t v1, int64_t v2) const
970+
{
971+
return v1 <= v2;
972+
}
955973

956974
template <class A, class B, class C, class D>
957975
bool operator()(A, B, C, D) const
@@ -988,6 +1006,10 @@ struct GreaterEqual : public HackClass {
9881006
{
9891007
return (m1.is_null() && m2.is_null()) || (Mixed::types_are_comparable(m1, m2) && (m1 >= m2));
9901008
}
1009+
bool operator()(int64_t v1, int64_t v2) const
1010+
{
1011+
return v1 >= v2;
1012+
}
9911013

9921014
template <class A, class B, class C, class D>
9931015
bool operator()(A, B, C, D) const

0 commit comments

Comments
 (0)