Skip to content

Commit 9b6e23c

Browse files
committed
Fix exact count optimization for non-perfect string prefix
1 parent 5776b89 commit 9b6e23c

File tree

4 files changed

+53
-17
lines changed

4 files changed

+53
-17
lines changed

src/Storages/MergeTree/KeyCondition.cpp

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ extern const int BAD_TYPE_OF_FIELD;
6666
/// - (1) the pattern has a wildcard
6767
/// - (2) the first wildcard is '%' and is only followed by nothing or other '%'
6868
/// e.g. 'test%' or 'test%% has perfect prefix 'test', 'test%x', 'test%_' or 'test_' has no perfect prefix.
69-
String extractFixedPrefixFromLikePattern(std::string_view like_pattern, bool requires_perfect_prefix)
69+
std::tuple<String, bool> extractFixedPrefixFromLikePattern(std::string_view like_pattern, bool requires_perfect_prefix)
7070
{
7171
String fixed_prefix;
7272
fixed_prefix.reserve(like_pattern.size());
@@ -79,27 +79,39 @@ String extractFixedPrefixFromLikePattern(std::string_view like_pattern, bool req
7979
{
8080
case '%':
8181
case '_':
82+
{
83+
bool is_perfect_prefix = std::all_of(pos, end, [](auto c) { return c == '%'; });
8284
if (requires_perfect_prefix)
8385
{
84-
bool is_prefect_prefix = std::all_of(pos, end, [](auto c) { return c == '%'; });
85-
return is_prefect_prefix ? fixed_prefix : "";
86+
if (is_perfect_prefix)
87+
return {fixed_prefix, true};
88+
else
89+
return {"", false};
90+
}
91+
else
92+
{
93+
return {fixed_prefix, is_perfect_prefix};
8694
}
87-
return fixed_prefix;
95+
}
8896
case '\\':
97+
{
8998
++pos;
90-
if (pos == end)
91-
break;
92-
[[fallthrough]];
99+
if (pos == end)
100+
break;
101+
[[fallthrough]];
102+
}
93103
default:
104+
{
94105
fixed_prefix += *pos;
106+
}
95107
}
96108

97109
++pos;
98110
}
99111
/// If we can reach this code, it means there was no wildcard found in the pattern, so it is not a perfect prefix
100112
if (requires_perfect_prefix)
101-
return "";
102-
return fixed_prefix;
113+
return {"", false};
114+
return {fixed_prefix, false};
103115
}
104116

105117
/// for "^prefix..." string it returns "prefix"
@@ -361,10 +373,13 @@ const KeyCondition::AtomMap KeyCondition::atom_map
361373
if (value.getType() != Field::Types::String)
362374
return false;
363375

364-
String prefix = extractFixedPrefixFromLikePattern(value.safeGet<String>(), /*requires_perfect_prefix*/ false);
376+
auto [prefix, is_perfect] = extractFixedPrefixFromLikePattern(value.safeGet<String>(), /*requires_perfect_prefix*/ false);
365377
if (prefix.empty())
366378
return false;
367379

380+
if (!is_perfect)
381+
out.relaxed = true;
382+
368383
String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix);
369384

370385
out.function = RPNElement::FUNCTION_IN_RANGE;
@@ -382,10 +397,12 @@ const KeyCondition::AtomMap KeyCondition::atom_map
382397
if (value.getType() != Field::Types::String)
383398
return false;
384399

385-
String prefix = extractFixedPrefixFromLikePattern(value.safeGet<String>(), /*requires_perfect_prefix*/ true);
400+
auto [prefix, is_perfect] = extractFixedPrefixFromLikePattern(value.safeGet<String>(), /*requires_perfect_prefix*/ true);
386401
if (prefix.empty())
387402
return false;
388403

404+
chassert(is_perfect);
405+
389406
String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix);
390407

391408
out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
@@ -441,6 +458,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map
441458
out.range = !right_bound.empty()
442459
? Range(prefix, true, right_bound, false)
443460
: Range::createLeftBounded(prefix, true);
461+
out.relaxed = true;
444462

445463
return true;
446464
}
@@ -477,7 +495,6 @@ const KeyCondition::AtomMap KeyCondition::atom_map
477495
}
478496
};
479497

480-
static const std::set<std::string_view> always_relaxed_atom_functions = {"match"};
481498
static const std::set<KeyCondition::RPNElement::Function> always_relaxed_atom_elements
482499
= {KeyCondition::RPNElement::FUNCTION_UNKNOWN, KeyCondition::RPNElement::FUNCTION_ARGS_IN_HYPERRECTANGLE, KeyCondition::RPNElement::FUNCTION_POINT_IN_POLYGON};
483500

@@ -2073,9 +2090,6 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme
20732090
return atom_it->second(out, const_value);
20742091
};
20752092

2076-
if (always_relaxed_atom_functions.contains(func_name))
2077-
relaxed = true;
2078-
20792093
bool allow_constant_transformation = !no_relaxed_atom_functions.contains(func_name);
20802094
if (num_args == 1)
20812095
{
@@ -2291,7 +2305,10 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme
22912305
out.monotonic_functions_chain = std::move(chain);
22922306
out.argument_num_of_space_filling_curve = argument_num_of_space_filling_curve;
22932307

2294-
return atom_it->second(out, const_value);
2308+
bool valid_atom = atom_it->second(out, const_value);
2309+
if (valid_atom && out.relaxed)
2310+
relaxed = true;
2311+
return valid_atom;
22952312
}
22962313
if (node.tryGetConstant(const_value, const_type))
22972314
{

src/Storages/MergeTree/KeyCondition.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,9 @@ class KeyCondition
218218

219219
Function function = FUNCTION_UNKNOWN;
220220

221+
/// Whether to relax the key condition (e.g., for LIKE queries without a perfect prefix).
222+
bool relaxed = false;
223+
221224
/// For FUNCTION_IN_RANGE and FUNCTION_NOT_IN_RANGE.
222225
Range range = Range::createWholeUniverse();
223226
size_t key_column = 0;
@@ -481,6 +484,6 @@ class KeyCondition
481484
bool relaxed = false;
482485
};
483486

484-
String extractFixedPrefixFromLikePattern(std::string_view like_pattern, bool requires_perfect_prefix);
487+
std::tuple<String, bool> extractFixedPrefixFromLikePattern(std::string_view like_pattern, bool requires_perfect_prefix);
485488

486489
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-- { echo ON }
2+
3+
DROP TABLE IF EXISTS t;
4+
CREATE TABLE t(k String) ORDER BY k as select 'dst_'||number from numbers(1e6);
5+
SELECT count(*) FROM t WHERE k LIKE 'dst_kkkk_1111%';
6+
0
7+
DROP TABLE t;
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
-- { echo ON }
2+
3+
DROP TABLE IF EXISTS t;
4+
5+
CREATE TABLE t(k String) ORDER BY k as select 'dst_'||number from numbers(1e6);
6+
7+
SELECT count(*) FROM t WHERE k LIKE 'dst_kkkk_1111%';
8+
9+
DROP TABLE t;

0 commit comments

Comments
 (0)