Skip to content

Commit a57ec6d

Browse files
authored
Revert "Introduce mpp query level processListEntry (#7644)" (#7688)
ref #7687
1 parent 7aafea8 commit a57ec6d

17 files changed

+56
-132
lines changed

dbms/src/Common/TiFlashMetrics.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,7 @@ namespace DB
7777
M(tiflash_coprocessor_request_memory_usage, "Bucketed histogram of request memory usage", Histogram, \
7878
F(type_cop, {{"type", "cop"}}, ExpBuckets{1024 * 1024, 2, 16}), \
7979
F(type_batch, {{"type", "batch"}}, ExpBuckets{1024 * 1024, 2, 20}), \
80-
F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{1024 * 1024, 2, 20}), \
81-
F(type_run_mpp_query, {{"type", "run_mpp_query"}}, ExpBuckets{1024 * 1024, 2, 20})) \
80+
F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{1024 * 1024, 2, 20})) \
8281
M(tiflash_coprocessor_request_error, "Total number of request error", Counter, F(reason_meet_lock, {"reason", "meet_lock"}), \
8382
F(reason_region_not_found, {"reason", "region_not_found"}), F(reason_epoch_not_match, {"reason", "epoch_not_match"}), \
8483
F(reason_kv_client_error, {"reason", "kv_client_error"}), F(reason_internal_error, {"reason", "internal_error"}), \

dbms/src/Debug/dbgQueryExecutor.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ void setTipbRegionInfo(coprocessor::RegionInfo * tipb_region_info, const std::pa
3939
range->set_end(RecordKVFormat::genRawKey(table_id, handle_range.second.handle_id));
4040
}
4141

42-
BlockInputStreamPtr constructRootExchangeReceiverStream(Context & context, tipb::ExchangeReceiver & tipb_exchange_receiver, const DAGProperties & properties, DAGSchema & root_task_schema, const String & root_addr)
42+
BlockInputStreamPtr constructExchangeReceiverStream(Context & context, tipb::ExchangeReceiver & tipb_exchange_receiver, const DAGProperties & properties, DAGSchema & root_task_schema, const String & root_addr, bool enable_local_tunnel)
4343
{
4444
for (auto & field : root_task_schema)
4545
{
@@ -65,7 +65,7 @@ BlockInputStreamPtr constructRootExchangeReceiverStream(Context & context, tipb:
6565
root_tm,
6666
context.getTMTContext().getKVCluster(),
6767
context.getTMTContext().getMPPTaskManager(),
68-
false,
68+
enable_local_tunnel,
6969
context.getSettingsRef().enable_async_grpc_client),
7070
tipb_exchange_receiver.encoded_task_meta_size(),
7171
10,
@@ -77,7 +77,7 @@ BlockInputStreamPtr constructRootExchangeReceiverStream(Context & context, tipb:
7777
return ret;
7878
}
7979

80-
BlockInputStreamPtr prepareRootExchangeReceiver(Context & context, const DAGProperties & properties, std::vector<Int64> & root_task_ids, DAGSchema & root_task_schema)
80+
BlockInputStreamPtr prepareRootExchangeReceiver(Context & context, const DAGProperties & properties, std::vector<Int64> & root_task_ids, DAGSchema & root_task_schema, bool enable_local_tunnel)
8181
{
8282
tipb::ExchangeReceiver tipb_exchange_receiver;
8383
for (const auto root_task_id : root_task_ids)
@@ -93,7 +93,7 @@ BlockInputStreamPtr prepareRootExchangeReceiver(Context & context, const DAGProp
9393
auto * tm_string = tipb_exchange_receiver.add_encoded_task_meta();
9494
tm.AppendToString(tm_string);
9595
}
96-
return constructRootExchangeReceiverStream(context, tipb_exchange_receiver, properties, root_task_schema, Debug::LOCAL_HOST);
96+
return constructExchangeReceiverStream(context, tipb_exchange_receiver, properties, root_task_schema, Debug::LOCAL_HOST, enable_local_tunnel);
9797
}
9898

9999
void prepareExchangeReceiverMetaWithMultipleContext(tipb::ExchangeReceiver & tipb_exchange_receiver, const DAGProperties & properties, Int64 task_id, String & addr)
@@ -116,7 +116,7 @@ BlockInputStreamPtr prepareRootExchangeReceiverWithMultipleContext(Context & con
116116

117117
prepareExchangeReceiverMetaWithMultipleContext(tipb_exchange_receiver, properties, task_id, addr);
118118

119-
return constructRootExchangeReceiverStream(context, tipb_exchange_receiver, properties, root_task_schema, root_addr);
119+
return constructExchangeReceiverStream(context, tipb_exchange_receiver, properties, root_task_schema, root_addr, true);
120120
}
121121

122122
void prepareDispatchTaskRequest(QueryTask & task, std::shared_ptr<mpp::DispatchTaskRequest> req, const DAGProperties & properties, std::vector<Int64> & root_task_ids, DAGSchema & root_task_schema, String & addr)
@@ -222,7 +222,7 @@ BlockInputStreamPtr executeMPPQuery(Context & context, const DAGProperties & pro
222222
if (call.getResp()->has_error())
223223
throw Exception("Meet error while dispatch mpp task: " + call.getResp()->error().msg());
224224
}
225-
return prepareRootExchangeReceiver(context, properties, root_task_ids, root_task_schema);
225+
return prepareRootExchangeReceiver(context, properties, root_task_ids, root_task_schema, context.getSettingsRef().enable_local_tunnel);
226226
}
227227

228228
BlockInputStreamPtr executeNonMPPQuery(Context & context, RegionID region_id, const DAGProperties & properties, QueryTasks & query_tasks, MakeResOutputStream & func_wrap_output_stream)

dbms/src/Flash/Mpp/ExchangeReceiver.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,7 @@ void ExchangeReceiverBase<RPCContext>::setUpLocalConnections(std::vector<Request
562562
LoggerPtr local_log = Logger::get(fmt::format("{} {}", exc_log->identifier(), req_info));
563563

564564
LocalRequestHandler local_request_handler(
565+
getMemoryTracker(),
565566
[this, log = local_log](bool meet_error, const String & local_err_msg) {
566567
this->connectionDone(meet_error, local_err_msg, log);
567568
},

dbms/src/Flash/Mpp/LocalRequestHandler.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@ namespace DB
2222
struct LocalRequestHandler
2323
{
2424
LocalRequestHandler(
25+
MemoryTracker * recv_mem_tracker_,
2526
std::function<void(bool, const String &)> && notify_write_done_,
2627
std::function<void()> && notify_close_,
2728
std::function<void()> && add_local_conn_num_,
2829
ReceiverChannelWriter && channel_writer_)
29-
: notify_write_done(std::move(notify_write_done_))
30+
: recv_mem_tracker(recv_mem_tracker_)
31+
, notify_write_done(std::move(notify_write_done_))
3032
, notify_close(std::move(notify_close_))
3133
, add_local_conn_num(std::move(add_local_conn_num_))
3234
, channel_writer(std::move(channel_writer_))
@@ -73,6 +75,7 @@ struct LocalRequestHandler
7375
return waiting_task_time;
7476
}
7577

78+
MemoryTracker * recv_mem_tracker;
7679
std::function<void(bool, const String &)> notify_write_done;
7780
std::function<void()> notify_close;
7881
std::function<void()> add_local_conn_num;

dbms/src/Flash/Mpp/MPPTask.cpp

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,7 @@ void MPPTaskMonitorHelper::initAndAddself(MPPTaskManager * manager_, const Strin
9999
MPPTaskMonitorHelper::~MPPTaskMonitorHelper()
100100
{
101101
if (initialized)
102-
{
103102
manager->removeMonitoredTask(task_unique_id);
104-
}
105103
}
106104

107105
MPPTask::MPPTask(const mpp::TaskMeta & meta_, const ContextPtr & context_)
@@ -121,9 +119,8 @@ MPPTask::~MPPTask()
121119
{
122120
/// MPPTask maybe destructed by different thread, set the query memory_tracker
123121
/// to current_memory_tracker in the destructor
124-
auto * query_memory_tracker = getMemoryTracker();
125-
if (query_memory_tracker != nullptr && current_memory_tracker != query_memory_tracker)
126-
current_memory_tracker = query_memory_tracker;
122+
if (process_list_entry != nullptr && current_memory_tracker != process_list_entry->get().getMemoryTrackerPtr().get())
123+
current_memory_tracker = process_list_entry->get().getMemoryTrackerPtr().get();
127124
abortTunnels("", true);
128125
LOG_INFO(log, "finish MPPTask: {}", id.toString());
129126
}
@@ -190,8 +187,7 @@ void MPPTask::registerTunnels(const mpp::DispatchTaskRequest & task_request)
190187
if (unlikely(!task_meta.ParseFromString(exchange_sender.encoded_task_meta(i))))
191188
throw TiFlashException("Failed to decode task meta info in ExchangeSender", Errors::Coprocessor::BadRequest);
192189

193-
/// when the receiver task is root task, it should never be local tunnel
194-
bool is_local = context->getSettingsRef().enable_local_tunnel && task_meta.task_id() != -1 && meta.address() == task_meta.address();
190+
bool is_local = context->getSettingsRef().enable_local_tunnel && meta.address() == task_meta.address();
195191
bool is_async = !is_local && context->getSettingsRef().enable_async_server;
196192
MPPTunnelPtr tunnel = std::make_shared<MPPTunnel>(
197193
task_meta,
@@ -297,13 +293,6 @@ void MPPTask::setErrString(const String & message)
297293
err_string = message;
298294
}
299295

300-
MemoryTracker * MPPTask::getMemoryTracker() const
301-
{
302-
if (process_list_entry_holder.process_list_entry != nullptr)
303-
return process_list_entry_holder.process_list_entry->get().getMemoryTrackerPtr().get();
304-
return nullptr;
305-
}
306-
307296
void MPPTask::unregisterTask()
308297
{
309298
auto [result, reason] = manager->unregisterTask(id);
@@ -313,19 +302,6 @@ void MPPTask::unregisterTask()
313302
LOG_WARNING(log, "task failed to unregister, reason: {}", reason);
314303
}
315304

316-
void MPPTask::initProcessListEntry(MPPTaskManagerPtr & task_manager)
317-
{
318-
/// all the mpp tasks of the same mpp query shares the same process list entry
319-
auto [query_process_list_entry, aborted_reason] = task_manager->getOrCreateQueryProcessListEntry(id.query_id, context);
320-
if (!aborted_reason.empty())
321-
throw TiFlashException(fmt::format("MPP query is already aborted, aborted reason: {}", aborted_reason), Errors::Coprocessor::Internal);
322-
assert(query_process_list_entry != nullptr);
323-
process_list_entry_holder.process_list_entry = query_process_list_entry;
324-
dag_context->setProcessListEntry(query_process_list_entry);
325-
context->setProcessListElement(&query_process_list_entry->get());
326-
current_memory_tracker = getMemoryTracker();
327-
}
328-
329305
void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request)
330306
{
331307
dag_req = getDAGRequestFromStringWithRetry(task_request.encoded_plan());
@@ -382,13 +358,13 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request)
382358
dag_context->tidb_host = context->getClientInfo().current_address.toString();
383359

384360
context->setDAGContext(dag_context.get());
385-
386-
auto task_manager = tmt_context.getMPPTaskManager();
387-
initProcessListEntry(task_manager);
361+
process_list_entry = setProcessListElement(*context, dag_context->dummy_query_string, dag_context->dummy_ast.get());
362+
dag_context->setProcessListEntry(process_list_entry);
388363

389364
injectFailPointBeforeRegisterTunnel(dag_context->isRootMPPTask());
390365
registerTunnels(task_request);
391366

367+
auto task_manager = tmt_context.getMPPTaskManager();
392368
LOG_DEBUG(log, "begin to register the task {}", id.toString());
393369

394370
injectFailPointBeforeRegisterMPPTask(dag_context->isRootMPPTask());
@@ -429,7 +405,7 @@ void MPPTask::preprocess()
429405
void MPPTask::runImpl()
430406
{
431407
CPUAffinityManager::getInstance().bindSelfQueryThread();
432-
RUNTIME_ASSERT(current_memory_tracker == getMemoryTracker(), log, "The current memory tracker is not set correctly for MPPTask::runImpl");
408+
RUNTIME_ASSERT(current_memory_tracker == process_list_entry->get().getMemoryTrackerPtr().get(), log, "The current memory tracker is not set correctly for MPPTask::runImpl");
433409
if (!switchStatus(INITIALIZING, RUNNING))
434410
{
435411
LOG_WARNING(log, "task not in initializing state, skip running");
@@ -532,9 +508,9 @@ void MPPTask::runImpl()
532508
// todo when error happens, should try to update the metrics if it is available
533509
if (auto throughput = dag_context->getTableScanThroughput(); throughput.first)
534510
GET_METRIC(tiflash_storage_logical_throughput_bytes).Observe(throughput.second);
535-
/// note that memory_tracker is shared by all the mpp tasks, the peak memory usage is not accurate
536-
/// todo log executor level peak memory usage instead
537-
auto peak_memory = getMemoryTracker()->getPeak();
511+
auto process_info = context->getProcessListElement()->getInfo();
512+
auto peak_memory = process_info.peak_memory_usage > 0 ? process_info.peak_memory_usage : 0;
513+
GET_METRIC(tiflash_coprocessor_request_memory_usage, type_run_mpp_task).Observe(peak_memory);
538514
mpp_task_statistics.setMemoryPeak(peak_memory);
539515
}
540516
}

dbms/src/Flash/Mpp/MPPTask.h

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
namespace DB
3838
{
3939
class MPPTaskManager;
40-
using MPPTaskManagerPtr = std::shared_ptr<MPPTaskManager>;
4140
class DAGContext;
4241
class ProcessListEntry;
4342

@@ -124,27 +123,12 @@ class MPPTask : public std::enable_shared_from_this<MPPTask>
124123

125124
void registerTunnels(const mpp::DispatchTaskRequest & task_request);
126125

127-
void initProcessListEntry(MPPTaskManagerPtr & task_manager);
128-
129126
void initExchangeReceivers();
130127

131128
String getErrString() const;
132129
void setErrString(const String & message);
133130

134-
MemoryTracker * getMemoryTracker() const;
135-
136131
private:
137-
struct ProcessListEntryHolder
138-
{
139-
std::shared_ptr<ProcessListEntry> process_list_entry;
140-
~ProcessListEntryHolder()
141-
{
142-
/// Because MemoryTracker is now saved in `MPPQueryTaskSet` and shared by all the mpp tasks belongs to the same mpp query,
143-
/// it may not be destructed when MPPTask is destructed, so need to manually reset current_memory_tracker to nullptr at the
144-
/// end of the destructor of MPPTask, otherwise, current_memory_tracker may point to a invalid memory tracker
145-
current_memory_tracker = nullptr;
146-
}
147-
};
148132
// We must ensure this member variable is put at this place to be destructed at proper time
149133
MPPTaskMonitorHelper mpp_task_monitor_helper;
150134

@@ -160,11 +144,12 @@ class MPPTask : public std::enable_shared_from_this<MPPTask>
160144

161145
MPPTaskScheduleEntry schedule_entry;
162146

163-
ProcessListEntryHolder process_list_entry_holder;
164147
// `dag_context` holds inputstreams which could hold ref to `context` so it should be destructed
165148
// before `context`.
166149
std::unique_ptr<DAGContext> dag_context;
167150

151+
std::shared_ptr<ProcessListEntry> process_list_entry;
152+
168153
QueryExecutorHolder query_executor_holder;
169154

170155
std::atomic<TaskStatus> status{INITIALIZING};

dbms/src/Flash/Mpp/MPPTaskManager.cpp

Lines changed: 6 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,13 @@
1515
#include <Common/FailPoint.h>
1616
#include <Common/FmtUtils.h>
1717
#include <Common/TiFlashMetrics.h>
18-
#include <Flash/Coprocessor/DAGContext.h>
1918
#include <Flash/Mpp/MPPTask.h>
2019
#include <Flash/Mpp/MPPTaskManager.h>
21-
#include <Interpreters/Context.h>
22-
#include <Interpreters/ProcessList.h>
23-
#include <Interpreters/executeQuery.h>
2420
#include <fmt/core.h>
2521

2622
#include <magic_enum.hpp>
2723
#include <memory>
24+
#include <mutex>
2825
#include <string>
2926
#include <unordered_map>
3027
#include <utility>
@@ -37,15 +34,6 @@ extern const char random_task_manager_find_task_failure_failpoint[];
3734
extern const char pause_before_register_non_root_mpp_task[];
3835
} // namespace FailPoints
3936

40-
MPPQueryTaskSet::~MPPQueryTaskSet()
41-
{
42-
if likely (process_list_entry != nullptr)
43-
{
44-
auto peak_memory = process_list_entry->get().getMemoryTrackerPtr()->getPeak();
45-
GET_METRIC(tiflash_coprocessor_request_memory_usage, type_run_mpp_query).Observe(peak_memory);
46-
}
47-
}
48-
4937
MPPTaskManager::MPPTaskManager(MPPTaskSchedulerPtr scheduler_)
5038
: scheduler(std::move(scheduler_))
5139
, aborted_query_gather_cache(ABORTED_MPPGATHER_CACHE_SIZE)
@@ -247,13 +235,14 @@ std::pair<bool, String> MPPTaskManager::registerTask(MPPTaskPtr task)
247235
{
248236
return {false, fmt::format("query is being aborted, error message = {}", error_msg)};
249237
}
250-
/// query_set must not be nullptr if the current query is not aborted since MPPTask::initProcessListEntry
251-
/// will always create the query_set
252-
RUNTIME_CHECK_MSG(query_set != nullptr, "query set must not be null when register task");
253-
if (query_set->task_map.find(task->id) != query_set->task_map.end())
238+
if (query_set != nullptr && query_set->task_map.find(task->id) != query_set->task_map.end())
254239
{
255240
return {false, "task has been registered"};
256241
}
242+
if (query_set == nullptr) /// the first one
243+
{
244+
query_set = addMPPQueryTaskSet(task->id.query_id);
245+
}
257246
query_set->task_map.emplace(task->id, task);
258247
/// cancel all the alarm waiting on this task
259248
auto alarm_it = query_set->alarms.find(task->id.task_id);
@@ -304,25 +293,6 @@ String MPPTaskManager::toString()
304293
return res + ")";
305294
}
306295

307-
std::pair<std::shared_ptr<ProcessListEntry>, String> MPPTaskManager::getOrCreateQueryProcessListEntry(const MPPQueryId & query_id, const ContextPtr & context)
308-
{
309-
std::lock_guard lock(mu);
310-
auto [query_set, abort_reason] = getQueryTaskSetWithoutLock(query_id);
311-
if (!abort_reason.empty())
312-
return {nullptr, abort_reason};
313-
if (query_set == nullptr)
314-
query_set = addMPPQueryTaskSet(query_id);
315-
if (query_set->process_list_entry == nullptr)
316-
{
317-
query_set->process_list_entry = setProcessListElement(
318-
*context,
319-
context->getDAGContext()->dummy_query_string,
320-
context->getDAGContext()->dummy_ast.get(),
321-
true);
322-
}
323-
return {query_set->process_list_entry, ""};
324-
}
325-
326296
std::pair<MPPQueryTaskSetPtr, String> MPPTaskManager::getQueryTaskSetWithoutLock(const MPPQueryId & query_id)
327297
{
328298
auto it = mpp_query_map.find(query_id);

dbms/src/Flash/Mpp/MPPTaskManager.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ struct MPPQueryTaskSet
4040
/// task can only be registered state is Normal
4141
State state = Normal;
4242
String error_message;
43-
std::shared_ptr<ProcessListEntry> process_list_entry;
4443
MPPTaskMap task_map;
4544
std::unordered_map<Int64, std::unordered_map<Int64, grpc::Alarm>> alarms;
4645
/// only used in scheduler
@@ -53,7 +52,6 @@ struct MPPQueryTaskSet
5352
{
5453
return state == Normal || state == Aborted;
5554
}
56-
~MPPQueryTaskSet();
5755
};
5856

5957
/// A simple thread unsafe FIFO cache used to fix the "lost cancel" issues
@@ -195,8 +193,6 @@ class MPPTaskManager : private boost::noncopyable
195193

196194
void abortMPPQuery(const MPPQueryId & query_id, const String & reason, AbortType abort_type);
197195

198-
std::pair<std::shared_ptr<ProcessListEntry>, String> getOrCreateQueryProcessListEntry(const MPPQueryId & query_id, const ContextPtr & context);
199-
200196
String toString();
201197

202198
private:

dbms/src/Flash/Mpp/MPPTunnel.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,9 @@ std::shared_ptr<DB::TrackedMppDataPacket> LocalTunnelSenderV1::readForLocal()
459459
if (result == MPMCQueueResult::OK)
460460
{
461461
MPPTunnelMetric::subDataSizeMetric(*data_size_in_queue, res->getPacket().ByteSizeLong());
462+
463+
// switch tunnel's memory tracker into receiver's
464+
res->switchMemTracker(current_memory_tracker);
462465
return res;
463466
}
464467
else if (result == MPMCQueueResult::CANCELLED)

dbms/src/Flash/Mpp/MPPTunnel.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,11 @@ class LocalTunnelSenderV2 : public TunnelSender
354354
if (unlikely(checkPacketErr(data)))
355355
return false;
356356

357+
// receiver_mem_tracker pointer will always be valid because ExchangeReceiverBase won't be destructed
358+
// before all local tunnels are destructed so that the MPPTask which contains ExchangeReceiverBase and
359+
// is responsible for deleting receiver_mem_tracker must be destroyed after these local tunnels.
360+
data->switchMemTracker(local_request_handler.recv_mem_tracker);
361+
357362
// When ExchangeReceiver receives data from local and remote tiflash, number of local tunnel threads
358363
// is very large and causes the time of transfering data by grpc threads becomes longer, because
359364
// grpc thread is hard to get chance to push data into MPMCQueue in ExchangeReceiver.

0 commit comments

Comments
 (0)