@@ -50,18 +50,7 @@ class FlexCompressor {
5050
5151private:
5252 static bool find_all_match (size_t , size_t , size_t , QueryStateBase*);
53-
54- template <typename Cond>
55- static bool find_linear (const Array&, int64_t , size_t , size_t , size_t , QueryStateBase*);
56-
57- template <typename VectorCond1, typename VectorCond2>
58- static bool find_parallel (const Array&, int64_t , size_t , size_t , size_t , QueryStateBase*);
59-
60- template <typename LinearCond, typename VectorCond1, typename VectorCond2>
61- static bool do_find_all (const Array&, int64_t , size_t , size_t , size_t , QueryStateBase*);
62-
63- template <typename Cond>
64- static bool run_parallel_subscan (size_t , size_t , size_t );
53+ static size_t lower_bound (size_t , int64_t , uint64_t , BfIterator&) noexcept ;
6554};
6655
6756inline int64_t FlexCompressor::get (const IntegerCompressor& c, size_t ndx)
@@ -164,17 +153,32 @@ inline void FlexCompressor::set_direct(const IntegerCompressor& c, size_t ndx, i
164153 data_iterator.set_value (value);
165154}
166155
156+ template <typename T>
157+ class IndexCond {
158+ public:
159+ using type = T;
160+ };
161+
162+ template <>
163+ class IndexCond <Greater> {
164+ public:
165+ using type = GreaterEqual;
166+ };
167+
167168template <typename Cond>
168169inline bool FlexCompressor::find_all (const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
169170 QueryStateBase* state)
170171{
172+ static constexpr size_t RANGE_LIMIT = 20 ;
173+ static constexpr size_t WIDTH_LIMIT = 16 ;
174+
171175 REALM_ASSERT_DEBUG (start <= arr.m_size && (end <= arr.m_size || end == size_t (-1 )) && start <= end);
172176 Cond c;
173177
174178 if (end == npos)
175179 end = arr.m_size ;
176180
177- if (!( arr.m_size > start && start < end) )
181+ if (start >= arr.m_size || start >= end)
178182 return true ;
179183
180184 const auto lbound = arr.m_lbound ;
@@ -189,116 +193,105 @@ inline bool FlexCompressor::find_all(const Array& arr, int64_t value, size_t sta
189193
190194 REALM_ASSERT_DEBUG (arr.m_width != 0 );
191195
192- if constexpr (std::is_same_v<Equal, Cond>) {
193- return do_find_all<Equal, Equal, Equal>(arr, value, start, end, baseindex, state);
196+ const auto & compressor = arr.integer_compressor ();
197+ const auto v_width = arr.m_width ;
198+ const auto v_size = compressor.v_size ();
199+ const auto mask = compressor.v_mask ();
200+ uint64_t * data = (uint64_t *)arr.m_data ;
201+ size_t v_start = realm::not_found;
202+
203+ /* *************** Search the values ****************/
204+
205+ int64_t modified_value = value;
206+ if constexpr (std::is_same_v<Cond, Greater>) {
207+ modified_value++; // We use GreaterEqual below, so this will effectively be Greater
194208 }
195- else if constexpr (std::is_same_v<NotEqual, Cond>) {
196- return do_find_all<NotEqual, Equal, NotEqual>(arr, value, start, end, baseindex, state);
209+
210+ if (v_size >= RANGE_LIMIT) {
211+ if (v_width <= WIDTH_LIMIT) {
212+ auto search_vector = populate (v_width, modified_value);
213+ v_start = parallel_subword_find (find_all_fields<GreaterEqual>, data, 0 , v_width, compressor.msb (),
214+ search_vector, 0 , v_size);
215+ }
216+ else {
217+ BfIterator data_iterator{data, 0 , v_width, v_width, 0 };
218+ v_start = lower_bound (v_size, modified_value, mask, data_iterator);
219+ }
197220 }
198- else if constexpr (std::is_same_v<Less, Cond>) {
199- return do_find_all<Less, GreaterEqual, Less>(arr, value, start, end, baseindex, state);
221+ else {
222+ BfIterator data_iterator{data, 0 , v_width, v_width, 0 };
223+ size_t idx = 0 ;
224+ while (idx < v_size) {
225+ if (sign_extend_field_by_mask (mask, *data_iterator) >= modified_value) {
226+ break ;
227+ }
228+ data_iterator.move (++idx);
229+ }
230+ v_start = idx;
200231 }
201- else if constexpr (std::is_same_v<Greater, Cond>) {
202- return do_find_all<Greater, Greater, GreaterEqual>(arr, value, start, end, baseindex, state);
232+
233+ if constexpr (realm::is_any_v<Cond, Equal, NotEqual>) {
234+ // Check for equality.
235+ if (v_start < v_size) {
236+ BfIterator it{data, 0 , v_width, v_width, v_start};
237+ if (sign_extend_field_by_mask (mask, *it) > value) {
238+ v_start = v_size; // Mark as not found
239+ }
240+ }
203241 }
204- return true ;
205- }
206242
207- template <typename LinearCond, typename VectorCond1, typename VectorCond2>
208- inline bool FlexCompressor::do_find_all (const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
209- QueryStateBase* state)
210- {
211- const auto v_width = arr.m_width ;
212- const auto v_range = arr.integer_compressor ().v_size ();
213- const auto ndx_range = end - start;
214- if (!run_parallel_subscan<LinearCond>(v_width, v_range, ndx_range))
215- return find_linear<LinearCond>(arr, value, start, end, baseindex, state);
216- return find_parallel<VectorCond1, VectorCond2>(arr, value, start, end, baseindex, state);
217- }
243+ /* **************** Some early outs *****************/
218244
219- template <typename Cond>
220- inline bool FlexCompressor::find_linear (const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
221- QueryStateBase* state)
222- {
223- const auto cmp = [](int64_t item, int64_t key) {
224- if constexpr (std::is_same_v<Cond, Equal>)
225- return item == key;
226- if constexpr (std::is_same_v<Cond, NotEqual>)
227- return item != key;
228- if constexpr (std::is_same_v<Cond, Less>)
229- return item < key;
230- if constexpr (std::is_same_v<Cond, Greater>)
231- return item > key;
232- REALM_UNREACHABLE ();
233- };
234-
235- const auto & c = arr.integer_compressor ();
236- const auto offset = c.v_width () * c.v_size ();
237- const auto ndx_w = c.ndx_width ();
238- const auto v_w = c.v_width ();
239- const auto data = c.data ();
240- const auto mask = c.v_mask ();
241- BfIterator ndx_iterator{data, offset, ndx_w, ndx_w, start};
242- BfIterator data_iterator{data, 0 , v_w, v_w, static_cast <size_t >(*ndx_iterator)};
243- while (start < end) {
244- const auto sv = sign_extend_field_by_mask (mask, *data_iterator);
245- if (cmp (sv, value) && !state->match (start + baseindex))
246- return false ;
247- ndx_iterator.move (++start);
248- data_iterator.move (static_cast <size_t >(*ndx_iterator));
245+ if (v_start == v_size) {
246+ if constexpr (realm::is_any_v<Cond, Equal, Greater>) {
247+ return true ; // No Matches
248+ }
249+ if constexpr (realm::is_any_v<Cond, NotEqual, Less>) {
250+ return find_all_match (start, end, baseindex, state); // All matches
251+ }
252+ }
253+ else if (v_start == 0 ) {
254+ if constexpr (std::is_same_v<Cond, Less>) {
255+ // No index is less than 0
256+ return true ; // No Matches
257+ }
258+ if constexpr (std::is_same_v<Cond, Greater>) {
259+ // All index is greater than or equal to 0
260+ return find_all_match (start, end, baseindex, state);
261+ }
249262 }
250- return true ;
251- }
252263
253- template <typename VectorCond1, typename VectorCond2>
254- inline bool FlexCompressor::find_parallel (const Array& arr, int64_t value, size_t start, size_t end, size_t baseindex,
255- QueryStateBase* state)
256- {
257- //
258- // algorithm idea: first try to find in the array of values (should be shorter in size but more bits) using
259- // VectorCond1.
260- // Then match the index found in the array of indices using VectorCond2
261- //
264+ /* ************** Search the indexes ****************/
262265
263- const auto & compressor = arr.integer_compressor ();
264- const auto v_width = compressor.v_width ();
265- const auto v_size = compressor.v_size ();
266+ using U = typename IndexCond<Cond>::type;
267+ const auto ndx_range = end - start;
266268 const auto ndx_width = compressor.ndx_width ();
267- const auto offset = v_size * v_width;
268- uint64_t * data = (uint64_t *)arr.m_data ;
269-
270- auto MSBs = compressor.msb ();
271- auto search_vector = populate (v_width, value);
272- auto v_start =
273- parallel_subword_find (find_all_fields<VectorCond1>, data, 0 , v_width, MSBs, search_vector, 0 , v_size);
274-
275- if constexpr (!std::is_same_v<VectorCond2, NotEqual>) {
276- if (start == v_size)
277- return true ;
269+ const auto v_offset = v_size * v_width;
270+ if (ndx_range >= RANGE_LIMIT) {
271+ auto search_vector = populate (ndx_width, v_start);
272+ while (start < end) {
273+ start = parallel_subword_find (find_all_fields_unsigned<U>, data, v_offset, ndx_width,
274+ compressor.ndx_msb (), search_vector, start, end);
275+ if (start < end) {
276+ if (!state->match (start + baseindex))
277+ return false ;
278+ }
279+ ++start;
280+ }
278281 }
279-
280- MSBs = compressor.ndx_msb ();
281- search_vector = populate (ndx_width, v_start);
282- while (start < end) {
283- start = parallel_subword_find (find_all_fields_unsigned<VectorCond2>, data, offset, ndx_width, MSBs,
284- search_vector, start, end);
285-
286- if (start < end && !state->match (start + baseindex))
287- return false ;
288-
289- ++start;
282+ else {
283+ U index_c;
284+ BfIterator ndx_iterator{data, v_offset, ndx_width, ndx_width, start};
285+ while (start < end) {
286+ if (index_c (int64_t (*ndx_iterator), int64_t (v_start))) {
287+ if (!state->match (start + baseindex))
288+ return false ;
289+ }
290+ ndx_iterator.move (++start);
291+ }
290292 }
291- return true ;
292- }
293293
294- template <typename Cond>
295- inline bool FlexCompressor::run_parallel_subscan (size_t v_width, size_t v_range, size_t ndx_range)
296- {
297- if constexpr (std::is_same_v<Cond, Equal> || std::is_same_v<Cond, NotEqual>) {
298- return v_width < 32 && v_range >= 20 && ndx_range >= 20 ;
299- }
300- // > and < need looks slower in parallel scan for large values
301- return v_width <= 16 && v_range >= 20 && ndx_range >= 20 ;
294+ return true ;
302295}
303296
304297} // namespace realm
0 commit comments