Skip to content

Commit 81d263c

Browse files
authored
Merge pull request ClickHouse#79124 from ClickHouse/ahmadov/avoid-index-condition-rpn-eval-duplication
avoid duplication in the MergeTreeIndexCondition::alwaysUnknownOrTrue impls
2 parents 60579e2 + f9b6ccc commit 81d263c

File tree

5 files changed

+150
-134
lines changed

5 files changed

+150
-134
lines changed

src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp

Lines changed: 9 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -205,49 +205,15 @@ MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter(
205205

206206
bool MergeTreeIndexConditionBloomFilter::alwaysUnknownOrTrue() const
207207
{
208-
std::vector<bool> rpn_stack;
209-
210-
for (const auto & element : rpn)
211-
{
212-
if (element.function == RPNElement::FUNCTION_UNKNOWN
213-
|| element.function == RPNElement::ALWAYS_TRUE)
214-
{
215-
rpn_stack.push_back(true);
216-
}
217-
else if (element.function == RPNElement::FUNCTION_EQUALS
218-
|| element.function == RPNElement::FUNCTION_NOT_EQUALS
219-
|| element.function == RPNElement::FUNCTION_HAS
220-
|| element.function == RPNElement::FUNCTION_HAS_ANY
221-
|| element.function == RPNElement::FUNCTION_HAS_ALL
222-
|| element.function == RPNElement::FUNCTION_IN
223-
|| element.function == RPNElement::FUNCTION_NOT_IN
224-
|| element.function == RPNElement::ALWAYS_FALSE)
225-
{
226-
rpn_stack.push_back(false);
227-
}
228-
else if (element.function == RPNElement::FUNCTION_NOT)
229-
{
230-
// do nothing
231-
}
232-
else if (element.function == RPNElement::FUNCTION_AND)
233-
{
234-
auto arg1 = rpn_stack.back();
235-
rpn_stack.pop_back();
236-
auto arg2 = rpn_stack.back();
237-
rpn_stack.back() = arg1 && arg2;
238-
}
239-
else if (element.function == RPNElement::FUNCTION_OR)
240-
{
241-
auto arg1 = rpn_stack.back();
242-
rpn_stack.pop_back();
243-
auto arg2 = rpn_stack.back();
244-
rpn_stack.back() = arg1 || arg2;
245-
}
246-
else
247-
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement");
248-
}
249-
250-
return rpn_stack[0];
208+
return rpnEvaluatesAlwaysUnknownOrTrue(
209+
rpn,
210+
{RPNElement::FUNCTION_EQUALS,
211+
RPNElement::FUNCTION_NOT_EQUALS,
212+
RPNElement::FUNCTION_HAS,
213+
RPNElement::FUNCTION_HAS_ANY,
214+
RPNElement::FUNCTION_HAS_ALL,
215+
RPNElement::FUNCTION_IN,
216+
RPNElement::FUNCTION_NOT_IN});
251217
}
252218

253219
bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndexGranuleBloomFilter * granule) const

src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp

Lines changed: 12 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -171,52 +171,18 @@ MergeTreeConditionBloomFilterText::MergeTreeConditionBloomFilterText(
171171
/// Keep in-sync with MergeTreeConditionGinFilter::alwaysUnknownOrTrue
172172
bool MergeTreeConditionBloomFilterText::alwaysUnknownOrTrue() const
173173
{
174-
/// Check like in KeyCondition.
175-
std::vector<bool> rpn_stack;
176-
177-
for (const auto & element : rpn)
178-
{
179-
if (element.function == RPNElement::FUNCTION_UNKNOWN
180-
|| element.function == RPNElement::ALWAYS_TRUE)
181-
{
182-
rpn_stack.push_back(true);
183-
}
184-
else if (element.function == RPNElement::FUNCTION_EQUALS
185-
|| element.function == RPNElement::FUNCTION_NOT_EQUALS
186-
|| element.function == RPNElement::FUNCTION_HAS
187-
|| element.function == RPNElement::FUNCTION_IN
188-
|| element.function == RPNElement::FUNCTION_NOT_IN
189-
|| element.function == RPNElement::FUNCTION_MULTI_SEARCH
190-
|| element.function == RPNElement::FUNCTION_MATCH
191-
|| element.function == RPNElement::FUNCTION_HAS_ANY
192-
|| element.function == RPNElement::FUNCTION_HAS_ALL
193-
|| element.function == RPNElement::ALWAYS_FALSE)
194-
{
195-
rpn_stack.push_back(false);
196-
}
197-
else if (element.function == RPNElement::FUNCTION_NOT)
198-
{
199-
// do nothing
200-
}
201-
else if (element.function == RPNElement::FUNCTION_AND)
202-
{
203-
auto arg1 = rpn_stack.back();
204-
rpn_stack.pop_back();
205-
auto arg2 = rpn_stack.back();
206-
rpn_stack.back() = arg1 && arg2;
207-
}
208-
else if (element.function == RPNElement::FUNCTION_OR)
209-
{
210-
auto arg1 = rpn_stack.back();
211-
rpn_stack.pop_back();
212-
auto arg2 = rpn_stack.back();
213-
rpn_stack.back() = arg1 || arg2;
214-
}
215-
else
216-
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement");
217-
}
218-
219-
return rpn_stack[0];
174+
return rpnEvaluatesAlwaysUnknownOrTrue(
175+
rpn,
176+
{RPNElement::FUNCTION_EQUALS,
177+
RPNElement::FUNCTION_NOT_EQUALS,
178+
RPNElement::FUNCTION_HAS,
179+
RPNElement::FUNCTION_IN,
180+
RPNElement::FUNCTION_NOT_IN,
181+
RPNElement::FUNCTION_MULTI_SEARCH,
182+
RPNElement::FUNCTION_MATCH,
183+
RPNElement::FUNCTION_HAS_ANY,
184+
RPNElement::FUNCTION_HAS_ALL,
185+
RPNElement::ALWAYS_FALSE});
220186
}
221187

222188
/// Keep in-sync with MergeTreeIndexConditionGin::mayBeTrueOnTranuleInPart

src/Storages/MergeTree/MergeTreeIndexGin.cpp

Lines changed: 9 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -216,50 +216,15 @@ MergeTreeIndexConditionGin::MergeTreeIndexConditionGin(
216216
/// Keep in-sync with MergeTreeIndexConditionGin::alwaysUnknownOrTrue
217217
bool MergeTreeIndexConditionGin::alwaysUnknownOrTrue() const
218218
{
219-
/// Check like in KeyCondition.
220-
std::vector<bool> rpn_stack;
221-
222-
for (const auto & element : rpn)
223-
{
224-
if (element.function == RPNElement::FUNCTION_UNKNOWN
225-
|| element.function == RPNElement::ALWAYS_TRUE)
226-
{
227-
rpn_stack.push_back(true);
228-
}
229-
else if (element.function == RPNElement::FUNCTION_EQUALS
230-
|| element.function == RPNElement::FUNCTION_NOT_EQUALS
231-
|| element.function == RPNElement::FUNCTION_HAS
232-
|| element.function == RPNElement::FUNCTION_IN
233-
|| element.function == RPNElement::FUNCTION_NOT_IN
234-
|| element.function == RPNElement::FUNCTION_MULTI_SEARCH
235-
|| element.function == RPNElement::FUNCTION_MATCH
236-
|| element.function == RPNElement::ALWAYS_FALSE)
237-
{
238-
rpn_stack.push_back(false);
239-
}
240-
else if (element.function == RPNElement::FUNCTION_NOT)
241-
{
242-
// do nothing
243-
}
244-
else if (element.function == RPNElement::FUNCTION_AND)
245-
{
246-
auto arg1 = rpn_stack.back();
247-
rpn_stack.pop_back();
248-
auto arg2 = rpn_stack.back();
249-
rpn_stack.back() = arg1 && arg2;
250-
}
251-
else if (element.function == RPNElement::FUNCTION_OR)
252-
{
253-
auto arg1 = rpn_stack.back();
254-
rpn_stack.pop_back();
255-
auto arg2 = rpn_stack.back();
256-
rpn_stack.back() = arg1 || arg2;
257-
}
258-
else
259-
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement");
260-
}
261-
262-
return rpn_stack[0];
219+
return rpnEvaluatesAlwaysUnknownOrTrue(
220+
rpn,
221+
{RPNElement::FUNCTION_EQUALS,
222+
RPNElement::FUNCTION_NOT_EQUALS,
223+
RPNElement::FUNCTION_HAS,
224+
RPNElement::FUNCTION_IN,
225+
RPNElement::FUNCTION_NOT_IN,
226+
RPNElement::FUNCTION_MULTI_SEARCH,
227+
RPNElement::FUNCTION_MATCH});
263228
}
264229

265230
bool MergeTreeIndexConditionGin::mayBeTrueOnGranuleInPart(MergeTreeIndexGranulePtr idx_granule,[[maybe_unused]] PostingsCacheForStore & cache_store) const

src/Storages/MergeTree/MergeTreeIndexMinMax.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,17 @@ MergeTreeIndexConditionMinMax::MergeTreeIndexConditionMinMax(
177177

178178
bool MergeTreeIndexConditionMinMax::alwaysUnknownOrTrue() const
179179
{
180-
return condition.alwaysUnknownOrTrue();
180+
return rpnEvaluatesAlwaysUnknownOrTrue(
181+
condition.getRPN(),
182+
{KeyCondition::RPNElement::FUNCTION_NOT_IN_RANGE,
183+
KeyCondition::RPNElement::FUNCTION_IN_RANGE,
184+
KeyCondition::RPNElement::FUNCTION_IN_SET,
185+
KeyCondition::RPNElement::FUNCTION_NOT_IN_SET,
186+
KeyCondition::RPNElement::FUNCTION_ARGS_IN_HYPERRECTANGLE,
187+
KeyCondition::RPNElement::FUNCTION_POINT_IN_POLYGON,
188+
KeyCondition::RPNElement::FUNCTION_IS_NULL,
189+
KeyCondition::RPNElement::FUNCTION_IS_NOT_NULL,
190+
KeyCondition::RPNElement::ALWAYS_FALSE});
181191
}
182192

183193
bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const

src/Storages/MergeTree/MergeTreeIndices.h

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,60 @@ constexpr auto INDEX_FILE_PREFIX = "skp_idx_";
1414
namespace DB
1515
{
1616

17+
namespace Internal
18+
{
19+
20+
enum class RPNEvaluationIndexUsefulnessState : uint8_t
21+
{
22+
// the following states indicate if the index might be useful
23+
TRUE,
24+
FALSE,
25+
// the following states indicate RPN always evaluates to TRUE or FALSE, they are used for short-circuit.
26+
ALWAYS_TRUE,
27+
ALWAYS_FALSE
28+
};
29+
30+
[[nodiscard]] inline RPNEvaluationIndexUsefulnessState
31+
evalAndRpnIndexStates(RPNEvaluationIndexUsefulnessState lhs, RPNEvaluationIndexUsefulnessState rhs)
32+
{
33+
if (lhs == RPNEvaluationIndexUsefulnessState::ALWAYS_FALSE || rhs == RPNEvaluationIndexUsefulnessState::ALWAYS_FALSE)
34+
{
35+
// short circuit
36+
return RPNEvaluationIndexUsefulnessState::ALWAYS_FALSE;
37+
}
38+
else if (lhs == RPNEvaluationIndexUsefulnessState::TRUE || rhs == RPNEvaluationIndexUsefulnessState::TRUE)
39+
{
40+
return RPNEvaluationIndexUsefulnessState::TRUE;
41+
}
42+
else if (lhs == RPNEvaluationIndexUsefulnessState::FALSE || rhs == RPNEvaluationIndexUsefulnessState::FALSE)
43+
{
44+
return RPNEvaluationIndexUsefulnessState::FALSE;
45+
}
46+
chassert(lhs == RPNEvaluationIndexUsefulnessState::ALWAYS_TRUE && rhs == RPNEvaluationIndexUsefulnessState::ALWAYS_TRUE);
47+
return RPNEvaluationIndexUsefulnessState::ALWAYS_TRUE;
48+
}
49+
50+
[[nodiscard]] inline RPNEvaluationIndexUsefulnessState
51+
evalOrRpnIndexStates(RPNEvaluationIndexUsefulnessState lhs, RPNEvaluationIndexUsefulnessState rhs)
52+
{
53+
if (lhs == RPNEvaluationIndexUsefulnessState::ALWAYS_TRUE || rhs == RPNEvaluationIndexUsefulnessState::ALWAYS_TRUE)
54+
{
55+
// short circuit
56+
return RPNEvaluationIndexUsefulnessState::ALWAYS_TRUE;
57+
}
58+
else if (lhs == RPNEvaluationIndexUsefulnessState::TRUE || rhs == RPNEvaluationIndexUsefulnessState::TRUE)
59+
{
60+
return RPNEvaluationIndexUsefulnessState::TRUE;
61+
}
62+
else if (lhs == RPNEvaluationIndexUsefulnessState::FALSE || rhs == RPNEvaluationIndexUsefulnessState::FALSE)
63+
{
64+
return RPNEvaluationIndexUsefulnessState::FALSE;
65+
}
66+
chassert(lhs == RPNEvaluationIndexUsefulnessState::ALWAYS_FALSE && rhs == RPNEvaluationIndexUsefulnessState::ALWAYS_FALSE);
67+
return RPNEvaluationIndexUsefulnessState::ALWAYS_FALSE;
68+
}
69+
}
70+
1771
class ActionsDAG;
1872
class Block;
1973
class IDataPartStorage;
@@ -134,6 +188,61 @@ class IMergeTreeIndexCondition
134188
{
135189
throw Exception(ErrorCodes::LOGICAL_ERROR, "calculateApproximateNearestNeighbors is not implemented for non-vector-similarity indexes");
136190
}
191+
192+
template <typename RPNElement>
193+
bool rpnEvaluatesAlwaysUnknownOrTrue(
194+
const std::vector<RPNElement> & rpn, const std::unordered_set<typename RPNElement::Function> & matchingFunctions) const
195+
{
196+
std::vector<Internal::RPNEvaluationIndexUsefulnessState> rpn_stack;
197+
rpn_stack.reserve(rpn.size() - 1);
198+
199+
for (const auto & element : rpn)
200+
{
201+
if (element.function == RPNElement::ALWAYS_TRUE)
202+
{
203+
rpn_stack.emplace_back(Internal::RPNEvaluationIndexUsefulnessState::ALWAYS_TRUE);
204+
}
205+
else if (element.function == RPNElement::ALWAYS_FALSE)
206+
{
207+
rpn_stack.emplace_back(Internal::RPNEvaluationIndexUsefulnessState::ALWAYS_FALSE);
208+
}
209+
else if (element.function == RPNElement::FUNCTION_UNKNOWN)
210+
{
211+
rpn_stack.emplace_back(Internal::RPNEvaluationIndexUsefulnessState::FALSE);
212+
}
213+
else if (matchingFunctions.contains(element.function))
214+
{
215+
rpn_stack.push_back(Internal::RPNEvaluationIndexUsefulnessState::TRUE);
216+
}
217+
else if (element.function == RPNElement::FUNCTION_NOT)
218+
{
219+
// do nothing
220+
}
221+
else if (element.function == RPNElement::FUNCTION_AND)
222+
{
223+
auto lhs = rpn_stack.back();
224+
rpn_stack.pop_back();
225+
auto rhs = rpn_stack.back();
226+
rpn_stack.back() = evalAndRpnIndexStates(lhs, rhs);
227+
}
228+
else if (element.function == RPNElement::FUNCTION_OR)
229+
{
230+
auto lhs = rpn_stack.back();
231+
rpn_stack.pop_back();
232+
auto rhs = rpn_stack.back();
233+
rpn_stack.back() = evalOrRpnIndexStates(lhs, rhs);
234+
}
235+
else
236+
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in RPNElement");
237+
}
238+
239+
chassert(rpn_stack.size() == 1);
240+
/*
241+
* In case the result is `ALWAYS_TRUE`, it means we don't need any indices at all, it might be a constant result.
242+
* Thus, we only check against the `TRUE` to determine the usefulness of the index condition.
243+
*/
244+
return rpn_stack.front() != Internal::RPNEvaluationIndexUsefulnessState::TRUE;
245+
}
137246
};
138247

139248
using MergeTreeIndexConditionPtr = std::shared_ptr<IMergeTreeIndexCondition>;

0 commit comments

Comments
 (0)