Skip to content

Commit 3db67e4

Browse files
authored
Merge pull request ClickHouse#88436 from ClickHouse/backport/25.8/87987
Backport ClickHouse#87987 to 25.8: Fix index analysis with session_timezone specified
2 parents 18203e8 + e3979ed commit 3db67e4

File tree

4 files changed

+178
-24
lines changed

4 files changed

+178
-24
lines changed

src/Storages/MergeTree/KeyCondition.cpp

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ namespace Setting
5353
{
5454
extern const SettingsBool analyze_index_with_space_filling_curves;
5555
extern const SettingsDateTimeOverflowBehavior date_time_overflow_behavior;
56+
extern const SettingsTimezone session_timezone;
5657
}
5758

5859
namespace ErrorCodes
@@ -1093,7 +1094,7 @@ bool applyFunctionChainToColumn(
10931094
}
10941095

10951096
// And cast it to the argument type of the first function in the chain
1096-
auto in_argument_type = getArgumentTypeOfMonotonicFunction(*functions[0]);
1097+
auto in_argument_type = removeLowCardinality(getArgumentTypeOfMonotonicFunction(*functions[0]));
10971098
if (canBeSafelyCast(result_type, in_argument_type))
10981099
{
10991100
result_column = castColumnAccurate({result_column, result_type, ""}, in_argument_type);
@@ -1122,13 +1123,13 @@ bool applyFunctionChainToColumn(
11221123
if (func->getArgumentTypes().empty())
11231124
return false;
11241125

1125-
auto argument_type = getArgumentTypeOfMonotonicFunction(*func);
1126+
auto argument_type = removeLowCardinality(getArgumentTypeOfMonotonicFunction(*func));
11261127
if (!canBeSafelyCast(result_type, argument_type))
11271128
return false;
11281129

11291130
result_column = castColumnAccurate({result_column, result_type, ""}, argument_type);
1130-
result_column = func->execute({{result_column, argument_type, ""}}, func->getResultType(), result_column->size(), /* dry_run = */ false);
1131-
result_type = func->getResultType();
1131+
result_type = removeLowCardinality(func->getResultType());
1132+
result_column = func->execute({{result_column, argument_type, ""}}, result_type, result_column->size(), /* dry_run = */ false);
11321133

11331134
// Transforming nullable columns to the nested ones, in case no nulls found
11341135
if (result_column->isNullable())
@@ -1141,7 +1142,7 @@ bool applyFunctionChainToColumn(
11411142
return false;
11421143
}
11431144
result_column = result_column_nullable.getNestedColumnPtr();
1144-
result_type = removeNullable(func->getResultType());
1145+
result_type = removeNullable(result_type);
11451146
}
11461147
}
11471148
out_column = result_column;
@@ -1903,48 +1904,57 @@ bool KeyCondition::extractMonotonicFunctionsChainFromKey(
19031904
auto func_name = func->function_base->getName();
19041905
auto func_base = func->function_base;
19051906

1906-
ColumnsWithTypeAndName arguments;
19071907
ColumnWithTypeAndName const_arg;
19081908
FunctionWithOptionalConstArg::Kind kind = FunctionWithOptionalConstArg::Kind::NO_CONST;
19091909

19101910
if (date_time_parsing_functions.contains(func_name))
19111911
{
1912-
const auto & arg_types = func_base->getArgumentTypes();
1913-
if (!arg_types.empty() && isStringOrFixedString(arg_types[0]))
1914-
func_name = func_name + "OrNull";
1915-
}
1912+
const auto & func_arg_types = func_base->getArgumentTypes();
19161913

1917-
auto func_builder = FunctionFactory::instance().tryGet(func_name, context);
1914+
const bool has_string_argument = !func_arg_types.empty() && isStringOrFixedString(func_arg_types[0]);
1915+
const bool has_session_timezone = !context->getSettingsRef()[Setting::session_timezone].value.empty();
19181916

1919-
if (func->children.size() == 1)
1920-
{
1921-
arguments.push_back({nullptr, removeLowCardinality(func->children[0]->result_type), ""});
1917+
// Skipping analysis in case when is requires parsing datetime from string
1918+
// with `session_timezone` specified
1919+
if (has_string_argument && has_session_timezone)
1920+
return false;
1921+
1922+
// Otherwise, in case when datetime parsing is required, rebuilding the function,
1923+
// to get its "-OrNull" version required for safe parsing, and not failing on
1924+
// values with incorrect format
1925+
if (has_string_argument)
1926+
{
1927+
ColumnsWithTypeAndName new_args;
1928+
for (const auto & type : func->function_base->getArgumentTypes())
1929+
new_args.push_back({nullptr, type, ""});
1930+
1931+
const auto func_builder = FunctionFactory::instance().tryGet(func_name + "OrNull", context);
1932+
func_base = func_builder->build(new_args);
1933+
}
19221934
}
1923-
else if (func->children.size() == 2)
1935+
1936+
// For single argument functions, the input may be used as-is, for binary functions,
1937+
// we'll produce a partially applied version of `func` with the reduced arity
1938+
if (func->children.size() == 2)
19241939
{
19251940
const auto * left = func->children[0];
19261941
const auto * right = func->children[1];
19271942
if (left->column && isColumnConst(*left->column))
19281943
{
19291944
const_arg = {left->result_type->createColumnConst(0, (*left->column)[0]), left->result_type, ""};
1930-
arguments.push_back(const_arg);
1931-
arguments.push_back({nullptr, removeLowCardinality(right->result_type), ""});
19321945
kind = FunctionWithOptionalConstArg::Kind::LEFT_CONST;
19331946
}
19341947
else
19351948
{
19361949
const_arg = {right->result_type->createColumnConst(0, (*right->column)[0]), right->result_type, ""};
1937-
arguments.push_back({nullptr, removeLowCardinality(left->result_type), ""});
1938-
arguments.push_back(const_arg);
19391950
kind = FunctionWithOptionalConstArg::Kind::RIGHT_CONST;
19401951
}
19411952
}
19421953

1943-
auto out_func = func_builder->build(arguments);
19441954
if (kind == FunctionWithOptionalConstArg::Kind::NO_CONST)
1945-
out_functions_chain.push_back(out_func);
1955+
out_functions_chain.push_back(func_base);
19461956
else
1947-
out_functions_chain.push_back(std::make_shared<FunctionWithOptionalConstArg>(out_func, const_arg, kind));
1957+
out_functions_chain.push_back(std::make_shared<FunctionWithOptionalConstArg>(func_base, const_arg, kind));
19481958
}
19491959

19501960
out_key_column_num = it->second;

tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
-- Tags: no-msan
1+
-- Tags: no-msan, long
22
-- msan: too slow
33

44
SELECT '-- Single partition by function';
@@ -234,7 +234,7 @@ SELECT toString(toDate('2000-01-01') + 10 * number) FROM numbers(50)
234234
UNION ALL
235235
SELECT toString(toDate('2100-01-01') + 10 * number) FROM numbers(50);
236236

237-
SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('2000-01-21', '2023-05-02') SETTINGS log_comment='03173_nested_date_parsing';
237+
SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('2000-01-21', '2023-05-02') SETTINGS log_comment='03173_nested_date_parsing', session_timezone = '';
238238
SYSTEM FLUSH LOGS query_log;
239239
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_date_parsing';
240240
SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('not a date');
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
-- PK UTC timezone
2+
1
3+
Condition: (toStartOfDay(ts) in [1756857600, 1756857600])
4+
Parts: 1/1
5+
Granules: 1/1
6+
7+
-- PK EST timezone
8+
1
9+
Condition: (toStartOfDay(ts) in [1756857600, 1756857600])
10+
Parts: 1/1
11+
Granules: 1/1
12+
13+
-- Partitions UTC timezone
14+
1
15+
Condition: (ts in [1756882680, 1756882680])
16+
Parts: 1/1
17+
Granules: 1/1
18+
Condition: (toStartOfDay(ts) in [1756857600, 1756857600])
19+
Parts: 1/1
20+
Granules: 1/1
21+
22+
-- Partitions EST timezone
23+
1
24+
Condition: (ts in [1756882680, 1756882680])
25+
Parts: 1/1
26+
Granules: 1/1
27+
Condition: (toStartOfDay(ts) in [1756857600, 1756857600])
28+
Parts: 1/1
29+
Granules: 1/1
30+
31+
-- Partitions UTC timezone
32+
1
33+
Condition: true
34+
Parts: 1/1
35+
Granules: 1/1
36+
37+
-- Partitions EST timezone
38+
1
39+
Condition: true
40+
Parts: 1/1
41+
Granules: 1/1
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
SET session_timezone = 'UTC';
2+
-- For explain with indexes and key condition values verification
3+
SET parallel_replicas_local_plan = 1;
4+
5+
DROP TABLE IF EXISTS 03636_data_pk, 03636_data_partitions, 03636_data_parsed;
6+
7+
CREATE TABLE 03636_data_pk (ts DateTime) ENGINE = MergeTree ORDER BY toStartOfDay(ts)
8+
AS
9+
SELECT 1756882680;
10+
11+
SELECT '-- PK UTC timezone';
12+
13+
SELECT count() FROM 03636_data_pk WHERE ts = 1756882680;
14+
15+
SELECT trim(explain)
16+
FROM (
17+
EXPLAIN indexes = 1 SELECT count() FROM 03636_data_pk WHERE ts = 1756882680
18+
)
19+
WHERE trim(explain) ilike 'condition: %'
20+
OR trim(explain) ilike 'parts: %'
21+
OR trim(explain) ilike 'granules: %';
22+
23+
SELECT '';
24+
SELECT '-- PK EST timezone';
25+
26+
SELECT count() FROM 03636_data_pk WHERE ts = 1756882680 SETTINGS session_timezone = 'EST';
27+
28+
SELECT trim(explain)
29+
FROM (
30+
EXPLAIN indexes = 1 SELECT count() FROM 03636_data_pk WHERE ts = 1756882680
31+
)
32+
WHERE trim(explain) ilike 'condition: %'
33+
OR trim(explain) ilike 'parts: %'
34+
OR trim(explain) ilike 'granules: %'
35+
SETTINGS session_timezone = 'EST';
36+
37+
DROP TABLE 03636_data_pk;
38+
39+
CREATE TABLE 03636_data_partitions (ts DateTime) ENGINE = MergeTree ORDER BY tuple() PARTITION BY toStartOfDay(ts)
40+
AS
41+
SELECT 1756882680;
42+
43+
SELECT '';
44+
SELECT '-- Partitions UTC timezone';
45+
46+
SELECT count() FROM 03636_data_partitions WHERE ts = 1756882680;
47+
48+
SELECT trim(explain)
49+
FROM (
50+
EXPLAIN indexes = 1 SELECT count() FROM 03636_data_partitions WHERE ts = 1756882680
51+
)
52+
WHERE trim(explain) ilike 'condition: %'
53+
OR trim(explain) ilike 'parts: %'
54+
OR trim(explain) ilike 'granules: %';
55+
56+
SELECT '';
57+
SELECT '-- Partitions EST timezone';
58+
59+
SELECT count() FROM 03636_data_partitions WHERE ts = 1756882680 SETTINGS session_timezone = 'EST';
60+
61+
SELECT trim(explain)
62+
FROM (
63+
EXPLAIN indexes = 1 SELECT count() FROM 03636_data_partitions WHERE ts = 1756882680
64+
)
65+
WHERE trim(explain) ilike 'condition: %'
66+
OR trim(explain) ilike 'parts: %'
67+
OR trim(explain) ilike 'granules: %'
68+
SETTINGS session_timezone = 'EST';
69+
70+
DROP TABLE 03636_data_partitions;
71+
72+
CREATE TABLE 03636_data_parsed (ts String) ENGINE = MergeTree ORDER BY toStartOfDay(toDateTime(ts))
73+
AS
74+
SELECT '2025-09-02 19:00:00';
75+
76+
SELECT '';
77+
SELECT '-- Partitions UTC timezone';
78+
79+
SELECT count() FROM 03636_data_parsed WHERE ts = '2025-09-02 19:00:00';
80+
81+
SELECT trim(explain)
82+
FROM (
83+
EXPLAIN indexes = 1 SELECT count() FROM 03636_data_parsed WHERE ts = '2025-09-02 19:00:00'
84+
)
85+
WHERE trim(explain) ilike 'condition: %'
86+
OR trim(explain) ilike 'parts: %'
87+
OR trim(explain) ilike 'granules: %';
88+
89+
SELECT '';
90+
SELECT '-- Partitions EST timezone';
91+
92+
SELECT count() FROM 03636_data_parsed WHERE ts = '2025-09-02 19:00:00' SETTINGS session_timezone = 'EST';
93+
94+
SELECT trim(explain)
95+
FROM (
96+
EXPLAIN indexes = 1 SELECT count() FROM 03636_data_parsed WHERE ts = '2025-09-02 19:00:00'
97+
)
98+
WHERE trim(explain) ilike 'condition: %'
99+
OR trim(explain) ilike 'parts: %'
100+
OR trim(explain) ilike 'granules: %'
101+
SETTINGS session_timezone = 'EST';
102+
103+
DROP TABLE 03636_data_parsed;

0 commit comments

Comments
 (0)