Skip to content

Commit 918aa32

Browse files
committed
adjust shuffle join
1 parent dddcffc commit 918aa32

File tree

9 files changed

+26
-9
lines changed

9 files changed

+26
-9
lines changed

programs/server/Server.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
689689
}
690690
}
691691

692-
registerAllStorageDistributedTaskBuilderMakers();
693692
Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024));
694693
std::mutex servers_lock;
695694
std::vector<ProtocolServerAdapter> servers;

src/Core/Settings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
600600
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
601601
M(Bool, count_distinct_optimization, false, "Rewrite count distinct to subquery of group by", 0) \
602602
M(String, use_cluster_for_distributed_shuffle, "", "If you want to run the join and group by in distributed shuffle mode, set it as one of the available cluster.", 0) \
603+
M(Bool, enable_distribute_shuffle, false, "Enable shuffle join", 0) \
603604
M(UInt64, shuffle_storage_session_timeout, 1800, "How long a session can be alive before expired by timeout", 0) \
604605
M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
605606
M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \

src/Interpreters/ASTRewriters/StageQueryDistributedJoinRewriteAction.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,10 @@ bool StageQueryDistributedJoinRewriteAnalyzer::isApplicableJoinType()
354354
{
355355
const auto * join_tables = from_query->join();
356356
auto * table_join = join_tables->table_join->as<ASTTableJoin>();
357-
if (table_join->kind == ASTTableJoin::Kind::Cross)
357+
358+
if (table_join->kind != ASTTableJoin::Kind::Left && table_join->kind != ASTTableJoin::Kind::Inner)
359+
return false;
360+
if (table_join->strictness == ASTTableJoin::Strictness::Asof)
358361
return false;
359362

360363
// TODO if right table is dict or special storage, return false;

src/Interpreters/InterpreterStageQuery.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@ BlockIO InterpreterStageQuery::execute(const QueryBlockIO & output_io, const Que
104104
auto pipeline_builder = query_plan.buildQueryPipeline(
105105
QueryPlanOptimizationSettings::fromContext(context),
106106
BuildQueryPipelineSettings::fromContext(context));
107-
pipeline_builder->addInterpreterContext(context);
108107
BlockIO res;
109108
res.pipeline = QueryPipelineBuilder::getPipeline(std::move(*pipeline_builder));
110109
return res;

src/Interpreters/StorageDistributedTasksBuilder.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <mutex>
12
#include <Interpreters/StorageDistributedTasksBuilder.h>
23
#include <Common/ErrorCodes.h>
34

@@ -7,9 +8,13 @@ namespace ErrorCodes
78
{
89
extern const int LOGICAL_ERROR;
910
}
11+
12+
static std::once_flag init_builder_flag;
13+
void registerAllStorageDistributedTaskBuilderMakers();
1014
StorageDistributedTaskBuilderFactory & StorageDistributedTaskBuilderFactory::getInstance()
1115
{
1216
static StorageDistributedTaskBuilderFactory instance;
17+
std::call_once(init_builder_flag, [](){ registerAllStorageDistributedTaskBuilderMakers(instance); });
1318
return instance;
1419
}
1520

@@ -31,7 +36,9 @@ StorageDistributedTaskBuilderPtr StorageDistributedTaskBuilderFactory::getBuilde
3136
return iter->second();
3237
}
3338

34-
void registerAllStorageDistributedTaskBuilderMakers()
39+
void registerHiveClusterTasksBuilder(StorageDistributedTaskBuilderFactory & instance);
40+
void registerAllStorageDistributedTaskBuilderMakers(StorageDistributedTaskBuilderFactory & instance)
3541
{
42+
registerHiveClusterTasksBuilder(instance);
3643
}
3744
}

src/Interpreters/StorageDistributedTasksBuilder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,5 @@ class StorageDistributedTaskBuilderFactory : boost::noncopyable
3434

3535
};
3636

37-
void registerAllStorageDistributedTaskBuilderMakers();
37+
void registerAllStorageDistributedTaskBuilderMakers(StorageDistributedTaskBuilderFactory & instance);
3838
}

src/Interpreters/executeQuery.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
641641
select_with_union->set_of_modes.size(),
642642
select_with_union->list_of_selects->getID());
643643
}
644-
if (!context->getSettings().use_cluster_for_distributed_shuffle.value.empty())
644+
if (!context->getSettingsRef().use_cluster_for_distributed_shuffle.value.empty() && context->getSettingsRef().enable_distribute_shuffle)
645645
{
646646
MakeFunctionColumnAliasAction function_alias_action;
647647
ASTDepthFirstVisitor<MakeFunctionColumnAliasAction> function_alias_visitor(function_alias_action, ast);
@@ -666,6 +666,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
666666
ast = add_finish_event_result;
667667
}
668668
}
669+
669670
interpreter = InterpreterFactory::get(ast, context, SelectQueryOptions(stage).setInternal(internal));
670671

671672
if (context->getCurrentTransaction() && !interpreter->supportsTransactions() &&

src/Storages/DistributedShuffle/StorageShuffle.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
#include <Processors/Chunk.h>
2323
#include <Processors/Sinks/SinkToStorage.h>
2424
#include <Processors/Sources/RemoteSource.h>
25-
#include <Processors/Sources/SourceWithProgress.h>
25+
#include <Processors/ISource.h>
2626
#include <QueryPipeline/RemoteInserter.h>
2727
#include <QueryPipeline/RemoteQueryExecutor.h>
2828
#include <Storages/DistributedShuffle/ShuffleBlockTable.h>
@@ -45,11 +45,11 @@ namespace ErrorCodes
4545
{
4646
extern const int LOGICAL_ERROR;
4747
}
48-
class StorageShuffleSource : public SourceWithProgress, WithContext
48+
class StorageShuffleSource : public ISource, WithContext
4949
{
5050
public:
5151
StorageShuffleSource(ContextPtr context_, const String & session_id_, const String & table_id_, const Block & header_)
52-
: SourceWithProgress(header_), WithContext(context_), session_id(session_id_), table_id(table_id_), header(header_)
52+
: ISource(header_), WithContext(context_), session_id(session_id_), table_id(table_id_), header(header_)
5353
{
5454
}
5555

src/Storages/Hive/StorageHiveCluster.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#pragma once
2+
#include <memory>
23
#include <Common/config.h>
34
#if USE_HIVE
45
#include <Interpreters/Context.h>
@@ -66,6 +67,12 @@ class StorageHiveCluster : public IStorage, WithContext
6667
void checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const override;
6768
void alter(const AlterCommands & params, ContextPtr local_context, AlterLockHolder & alter_lock_holder) override;
6869

70+
std::shared_ptr<HiveSettings> getStorageHiveSettings() { return storage_settings; }
71+
const String & getHiveMetastoreURL() const { return hive_metastore_url; }
72+
const String & getHiveDatabase() const { return hive_database; }
73+
const String & getHiveTableName() const { return hive_table; }
74+
ASTPtr getPartitionByAst() const { return partition_by_ast; }
75+
6976
private:
7077
String cluster_name;
7178
String hive_metastore_url;

0 commit comments

Comments
 (0)