Skip to content

Commit 94047f5

Browse files
robot-clickhouseianton-ru
authored andcommitted
Backport ClickHouse#83844 to 24.8: Support references to unknown tables in remote table functions
1 parent 9a17638 commit 94047f5

File tree

5 files changed

+107
-7
lines changed

5 files changed

+107
-7
lines changed

src/Analyzer/Resolve/QueryAnalyzer.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4629,7 +4629,26 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
46294629
table_function_node_to_resolve_typed->getArgumentsNode() = table_function_argument_function->getArgumentsNode();
46304630

46314631
QueryTreeNodePtr table_function_node_to_resolve = std::move(table_function_node_to_resolve_typed);
4632-
resolveTableFunction(table_function_node_to_resolve, scope, expressions_visitor, true /*nested_table_function*/);
4632+
if (table_function_argument_function_name == "view")
4633+
{
4634+
/// Subquery in view() table function can reference tables that don't exist on the initiator.
4635+
/// In the following example `users` table may be not available on the initiator:
4636+
/// SELECT *
4637+
/// FROM remoteSecure(<address>, view(
4638+
/// SELECT
4639+
/// t1.age,
4640+
/// t1.name,
4641+
/// t2.name
4642+
/// FROM users AS t1
4643+
/// INNER JOIN users AS t2 ON t1.uid = t2.uid
4644+
/// ), <user>, <password>)
4645+
/// SETTINGS prefer_localhost_replica = 0
4646+
skip_analysis_arguments_indexes.push_back(table_function_argument_index);
4647+
}
4648+
else
4649+
{
4650+
resolveTableFunction(table_function_node_to_resolve, scope, expressions_visitor, true /*nested_table_function*/);
4651+
}
46334652

46344653
result_table_function_arguments.push_back(std::move(table_function_node_to_resolve));
46354654
continue;

src/Storages/StorageDistributed.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -759,9 +759,28 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info,
759759
if (table_expression_modifiers)
760760
table_function_node->setTableExpressionModifiers(*table_expression_modifiers);
761761

762-
QueryAnalysisPass query_analysis_pass;
763-
QueryTreeNodePtr node = table_function_node;
764-
query_analysis_pass.run(node, query_context);
762+
/// Subquery in table function `view` may reference tables that don't exist on the initiator.
763+
if (table_function_node->getTableFunctionName() == "view")
764+
{
765+
auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
766+
auto column_names_and_types = distributed_storage_snapshot->getColumns(get_column_options);
767+
768+
StorageID fake_storage_id = StorageID::createEmpty();
769+
if (auto * table_node = query_info.table_expression->as<TableNode>())
770+
fake_storage_id = table_node->getStorage()->getStorageID();
771+
else if (auto * original_table_function_node = query_info.table_expression->as<TableFunctionNode>())
772+
fake_storage_id = original_table_function_node->getStorage()->getStorageID();
773+
774+
auto storage = std::make_shared<StorageDummy>(fake_storage_id, ColumnsDescription{column_names_and_types});
775+
776+
table_function_node->resolve({}, std::move(storage), query_context, /*unresolved_arguments_indexes_=*/{ 0 });
777+
}
778+
else
779+
{
780+
QueryAnalysisPass query_analysis_pass;
781+
QueryTreeNodePtr node = table_function_node;
782+
query_analysis_pass.run(node, query_context);
783+
}
765784

766785
replacement_table_expression = std::move(table_function_node);
767786
}
@@ -809,9 +828,7 @@ void StorageDistributed::read(
809828

810829
if (settings.allow_experimental_analyzer)
811830
{
812-
StorageID remote_storage_id = StorageID::createEmpty();
813-
if (!remote_table_function_ptr)
814-
remote_storage_id = StorageID{remote_database, remote_table};
831+
StorageID remote_storage_id = StorageID{remote_database, remote_table};
815832

816833
auto query_tree_distributed = buildQueryTreeDistributed(modified_query_info,
817834
query_info.merge_storage_snapshot ? query_info.merge_storage_snapshot : storage_snapshot,

tests/integration/test_remote_function_view/__init__.py

Whitespace-only changes.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<clickhouse>
2+
<remote_servers>
3+
<cluster>
4+
<shard>
5+
<replica>
6+
<host>node1</host>
7+
<port>9000</port>
8+
</replica>
9+
<replica>
10+
<host>node2</host>
11+
<port>9000</port>
12+
</replica>
13+
</shard>
14+
</cluster>
15+
</remote_servers>
16+
</clickhouse>
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import pytest
2+
3+
from helpers.cluster import ClickHouseCluster
4+
5+
cluster = ClickHouseCluster(__file__)
6+
node1 = cluster.add_instance("node1", main_configs=["configs/clusters.xml"])
7+
node2 = cluster.add_instance("node2", main_configs=["configs/clusters.xml"])
8+
9+
10+
@pytest.fixture(scope="module")
11+
def start_cluster():
12+
try:
13+
cluster.start()
14+
15+
node2.query(
16+
"""
17+
CREATE TABLE test_table(
18+
APIKey UInt32,
19+
CustomAttributeId UInt64,
20+
ProfileIDHash UInt64,
21+
DeviceIDHash UInt64,
22+
Data String)
23+
ENGINE = SummingMergeTree()
24+
ORDER BY (APIKey, CustomAttributeId, ProfileIDHash, DeviceIDHash, intHash32(DeviceIDHash))
25+
"""
26+
)
27+
yield cluster
28+
29+
finally:
30+
cluster.shutdown()
31+
32+
33+
def test_remote(start_cluster):
34+
assert (
35+
node1.query(
36+
"SELECT 1 FROM remote('node2', view(SELECT * FROM default.test_table)) WHERE (APIKey = 137715) AND (CustomAttributeId IN (45, 66)) AND (ProfileIDHash != 0) LIMIT 1"
37+
)
38+
== ""
39+
)
40+
41+
42+
def test_remote_fail(start_cluster):
43+
assert (
44+
"Unknown table expression identifier 'default.table_not_exists'"
45+
in node1.query_and_get_error(
46+
"SELECT 1 FROM remote('node2', view(SELECT * FROM default.table_not_exists)) WHERE (APIKey = 137715) AND (CustomAttributeId IN (45, 66)) AND (ProfileIDHash != 0) LIMIT 1"
47+
)
48+
)

0 commit comments

Comments
 (0)