Skip to content

Commit 2ae42a3

Browse files
Backport ClickHouse#83844 to 25.6: Support references to unknown tables in remote table functions
1 parent d68039d commit 2ae42a3

File tree

5 files changed

+107
-7
lines changed

5 files changed

+107
-7
lines changed

src/Analyzer/Resolve/QueryAnalyzer.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4892,7 +4892,26 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
48924892
table_function_node_to_resolve_typed->getArgumentsNode() = table_function_argument_function->getArgumentsNode();
48934893

48944894
QueryTreeNodePtr table_function_node_to_resolve = std::move(table_function_node_to_resolve_typed);
4895-
resolveTableFunction(table_function_node_to_resolve, scope, expressions_visitor, true /*nested_table_function*/);
4895+
if (table_function_argument_function_name == "view")
4896+
{
4897+
/// Subquery in view() table function can reference tables that don't exist on the initiator.
4898+
/// In the following example `users` table may be not available on the initiator:
4899+
/// SELECT *
4900+
/// FROM remoteSecure(<address>, view(
4901+
/// SELECT
4902+
/// t1.age,
4903+
/// t1.name,
4904+
/// t2.name
4905+
/// FROM users AS t1
4906+
/// INNER JOIN users AS t2 ON t1.uid = t2.uid
4907+
/// ), <user>, <password>)
4908+
/// SETTINGS prefer_localhost_replica = 0
4909+
skip_analysis_arguments_indexes.push_back(table_function_argument_index);
4910+
}
4911+
else
4912+
{
4913+
resolveTableFunction(table_function_node_to_resolve, scope, expressions_visitor, true /*nested_table_function*/);
4914+
}
48964915

48974916
result_table_function_arguments.push_back(std::move(table_function_node_to_resolve));
48984917
continue;

src/Storages/StorageDistributed.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -912,9 +912,28 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info,
912912
if (table_expression_modifiers)
913913
table_function_node->setTableExpressionModifiers(*table_expression_modifiers);
914914

915-
QueryAnalysisPass query_analysis_pass;
916-
QueryTreeNodePtr node = table_function_node;
917-
query_analysis_pass.run(node, query_context);
915+
/// Subquery in table function `view` may reference tables that don't exist on the initiator.
916+
if (table_function_node->getTableFunctionName() == "view")
917+
{
918+
auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
919+
auto column_names_and_types = distributed_storage_snapshot->getColumns(get_column_options);
920+
921+
StorageID fake_storage_id = StorageID::createEmpty();
922+
if (auto * table_node = query_info.table_expression->as<TableNode>())
923+
fake_storage_id = table_node->getStorage()->getStorageID();
924+
else if (auto * original_table_function_node = query_info.table_expression->as<TableFunctionNode>())
925+
fake_storage_id = original_table_function_node->getStorage()->getStorageID();
926+
927+
auto storage = std::make_shared<StorageDummy>(fake_storage_id, ColumnsDescription{column_names_and_types});
928+
929+
table_function_node->resolve({}, std::move(storage), query_context, /*unresolved_arguments_indexes_=*/{ 0 });
930+
}
931+
else
932+
{
933+
QueryAnalysisPass query_analysis_pass;
934+
QueryTreeNodePtr node = table_function_node;
935+
query_analysis_pass.run(node, query_context);
936+
}
918937

919938
replacement_table_expression = std::move(table_function_node);
920939
}
@@ -976,9 +995,7 @@ void StorageDistributed::read(
976995

977996
if (settings[Setting::allow_experimental_analyzer])
978997
{
979-
StorageID remote_storage_id = StorageID::createEmpty();
980-
if (!remote_table_function_ptr)
981-
remote_storage_id = StorageID{remote_database, remote_table};
998+
StorageID remote_storage_id = StorageID{remote_database, remote_table};
982999

9831000
auto query_tree_distributed = buildQueryTreeDistributed(modified_query_info,
9841001
query_info.initial_storage_snapshot ? query_info.initial_storage_snapshot : storage_snapshot,

tests/integration/test_remote_function_view/__init__.py

Whitespace-only changes.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<clickhouse>
2+
<remote_servers>
3+
<cluster>
4+
<shard>
5+
<replica>
6+
<host>node1</host>
7+
<port>9000</port>
8+
</replica>
9+
<replica>
10+
<host>node2</host>
11+
<port>9000</port>
12+
</replica>
13+
</shard>
14+
</cluster>
15+
</remote_servers>
16+
</clickhouse>
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import pytest
2+
3+
from helpers.cluster import ClickHouseCluster
4+
5+
cluster = ClickHouseCluster(__file__)
6+
node1 = cluster.add_instance("node1", main_configs=["configs/clusters.xml"])
7+
node2 = cluster.add_instance("node2", main_configs=["configs/clusters.xml"])
8+
9+
10+
@pytest.fixture(scope="module")
11+
def start_cluster():
12+
try:
13+
cluster.start()
14+
15+
node2.query(
16+
"""
17+
CREATE TABLE test_table(
18+
APIKey UInt32,
19+
CustomAttributeId UInt64,
20+
ProfileIDHash UInt64,
21+
DeviceIDHash UInt64,
22+
Data String)
23+
ENGINE = SummingMergeTree()
24+
ORDER BY (APIKey, CustomAttributeId, ProfileIDHash, DeviceIDHash, intHash32(DeviceIDHash))
25+
"""
26+
)
27+
yield cluster
28+
29+
finally:
30+
cluster.shutdown()
31+
32+
33+
def test_remote(start_cluster):
34+
assert (
35+
node1.query(
36+
"SELECT 1 FROM remote('node2', view(SELECT * FROM default.test_table)) WHERE (APIKey = 137715) AND (CustomAttributeId IN (45, 66)) AND (ProfileIDHash != 0) LIMIT 1"
37+
)
38+
== ""
39+
)
40+
41+
42+
def test_remote_fail(start_cluster):
43+
assert (
44+
"Unknown table expression identifier 'default.table_not_exists'"
45+
in node1.query_and_get_error(
46+
"SELECT 1 FROM remote('node2', view(SELECT * FROM default.table_not_exists)) WHERE (APIKey = 137715) AND (CustomAttributeId IN (45, 66)) AND (ProfileIDHash != 0) LIMIT 1"
47+
)
48+
)

0 commit comments

Comments
 (0)