Skip to content

Commit 9e41e5f

Browse files
authored
[Chore](thrift) add reopen thrift connection on RuntimeQueryStatisticsMgr::report_runtime_query_statistics (apache#56744)
try to avoid asan check fail: ```cpp OpenJDK 64-Bit Server VM warning: Option CriticalJNINatives was deprecated in version 16.0 and will likely be removed in a future release. Listening for transport dt_socket at address: 8871 WARNING: sun.reflect.Reflection.getCallerClass is not supported. This will impact performance. start BE in local mode thrift error, reason=write() send(): Broken pipe================================================================= AddressSanitizer: CHECK failed: asan_thread.cpp:369 "((ptr[0] == kCurrentStackFrameMagic)) != (0)" (0x0, 0x0) (tid=13525) #0 0x557a503468e1 in __asan::CheckUnwind() (/mnt/hdd01/ci/doris-deploy-branch-4.0-local/be/lib/doris_be+0x1f2a08e1) #1 0x557a50361182 in __sanitizer::CheckFailed(char const*, int, char const*, unsigned long long, unsigned long long) (/mnt/hdd01/ci/doris-deploy-branch-4.0-local/be/lib/doris_be+0x1f2bb182) #2 0x557a5034a97b in __asan::AsanThread::GetStackFrameAccessByAddr(unsigned long, __asan::AsanThread::StackFrameAccess*) (/mnt/hdd01/ci/doris-deploy-branch-4.0-local/be/lib/doris_be+0x1f2a497b) #3 0x557a502a4a30 in __asan::AddressDescription::AddressDescription(unsigned long, unsigned long, bool) (/mnt/hdd01/ci/doris-deploy-branch-4.0-local/be/lib/doris_be+0x1f1fea30) #4 0x557a502a67a3 in __asan::ErrorGeneric::ErrorGeneric(unsigned int, unsigned long, unsigned long, unsigned long, unsigned long, bool, unsigned long) (/mnt/hdd01/ci/doris-deploy-branch-4.0-local/be/lib/doris_be+0x1f2007a3) #5 0x557a50343878 in __asan::ReportGenericError(unsigned long, unsigned long, unsigned long, unsigned long, bool, unsigned long, unsigned int, bool) (/mnt/hdd01/ci/doris-deploy-branch-4.0-local/be/lib/doris_be+0x1f29d878) #6 0x557a50344f75 in __asan_report_load8 (/mnt/hdd01/ci/doris-deploy-branch-4.0-local/be/lib/doris_be+0x1f29ef75) #7 0x557a5039791e in std::__cxx11::basic_string, std::allocator>::_M_create(unsigned long&, unsigned long) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/basic_string.tcc:148:11 #8 0x557a503984db in void std::__cxx11::basic_string, std::allocator>::_M_construct(char const*, unsigned long) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/basic_string.tcc:291:12 #9 0x557a503984db in std::__cxx11::basic_string, std::allocator>::basic_string(std::__cxx11::basic_string, std::allocator> const&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/basic_string.h:617:2 #10 0x557a55b4d77d in std::pair, std::allocator> const, doris::TQueryStatistics>::pair(std::pair, std::allocator> const, doris::TQueryStatistics> const&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_pair.h:312:17 #11 0x557a55b4d77d in _ZSt12construct_atISt4pairIKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEN5doris16TQueryStatisticsEEJRKSA_EQaant20is_unbounded_array_vIT_ErqXgsnwcvPvLi0E_SD_pispclsr3stdE7declvalIT0_EEEEEPSD_SG_DpOSF_ /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_construct.h:110:22 #12 0x557a55b4d77d in void std::allocator_traits, std::allocator> const, doris::TQueryStatistics>>>>::construct, std::allocator> const, doris::TQueryStatistics>, std::pair, std::allocator> const, doris::TQueryStatistics> const&>(std::allocator, std::allocator> const, doris::TQueryStatistics>>>&, std::pair, std::allocator> const, doris::TQueryStatistics>*, std::pair, std::allocator> const, doris::TQueryStatistics> const&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/alloc_traits.h:676:4 #13 0x557a55b4d77d in void std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_M_construct_node, std::allocator> const, doris::TQueryStatistics> const&>(std::_Rb_tree_node, std::allocator> const, doris::TQueryStatistics>>*, std::pair, std::allocator> const, doris::TQueryStatistics> const&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_tree.h:1237:8 #14 0x557a55b4cfe3 in std::_Rb_tree_node, std::allocator> const, doris::TQueryStatistics>>* std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node::operator(), std::allocator> const, doris::TQueryStatistics> const&>(std::pair, std::allocator> const, doris::TQueryStatistics> const&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_tree.h #15 0x557a55b4cbe6 in std::_Rb_tree_node, std::allocator> const, doris::TQueryStatistics>>* std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_M_clone_node, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node>(std::_Rb_tree_node, std::allocator> const, doris::TQueryStatistics>>*, std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_tree.h:1287:8 #16 0x557a55b4c6f6 in std::_Rb_tree_node_base* std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_M_copy, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node>(std::_Rb_tree_node, std::allocator> const, doris::TQueryStatistics>>*, std::_Rb_tree_node_base*, std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_tree.h:2546:20 #17 0x557a55b4c7b6 in std::_Rb_tree_node_base* std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_M_copy, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node>(std::_Rb_tree_node, std::allocator> const, doris::TQueryStatistics>>*, std::_Rb_tree_node_base*, std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_tree.h:2554:3 #18 0x557a55b4c7b6 in std::_Rb_tree_node_base* std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_M_copy, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node>(std::_Rb_tree_node, std::allocator> const, doris::TQueryStatistics>>*, std::_Rb_tree_node_base*, std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_tree.h:2554:3 #19 0x557a55b4c7b6 in std::_Rb_tree_node_base* std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_M_copy, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node>(std::_Rb_tree_node, std::allocator> const, doris::TQueryStatistics>>*, std::_Rb_tree_node_base*, std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_tree.h:2554:3 #20 0x557a55b4c7b6 in std::_Rb_tree_node_base* std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_M_copy, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node>(std::_Rb_tree_node, std::allocator> const, doris::TQueryStatistics>>*, std::_Rb_tree_node_base*, std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_tree.h:2554:3 apache#21 0x557a55b4c27a in std::_Rb_tree_node_base* std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_M_copy, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node>(std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>> const&, std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::_Reuse_or_alloc_node&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_tree.h:1523:6 apache#22 0x557a55740d4f in std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::operator=(std::_Rb_tree, std::allocator>, std::pair, std::allocator> const, doris::TQueryStatistics>, std::_Select1st, std::allocator> const, doris::TQueryStatistics>>, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>> const&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_tree.h:2453:18 apache#23 0x557a55740d4f in std::map, std::allocator>, doris::TQueryStatistics, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>>::operator=(std::map, std::allocator>, doris::TQueryStatistics, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>> const&) /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_map.h:354:37 apache#24 0x557a55740d4f in doris::TReportWorkloadRuntimeStatusParams::__set_query_statistics_map(std::map, std::allocator>, doris::TQueryStatistics, std::less, std::allocator>>, std::allocator, std::allocator> const, doris::TQueryStatistics>>> const&) /home/zcp/repo_center/doris_branch-4.0/doris/gensrc/build/gen_cpp/FrontendService_types.cpp:4391:30 apache#25 0x557a5447e13a in doris::RuntimeQueryStatisticsMgr::report_runtime_query_statistics() /home/zcp/repo_center/doris_branch-4.0/doris/be/src/runtime/runtime_query_statistics_mgr.cpp:403:31 apache#26 0x557a504f1192 in doris::Daemon::report_runtime_query_statistics_thread() /home/zcp/repo_center/doris_branch-4.0/doris/be/src/common/daemon.cpp:457:65 apache#27 0x557a54bf2eb6 in std::function::operator()() const /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:593:9 apache#28 0x557a54bf2eb6 in doris::Thread::supervise_thread(void*) /home/zcp/repo_center/doris_branch-4.0/doris/be/src/util/thread.cpp:460:5 apache#29 0x557a50336d26 in asan_thread_start(void*) (/mnt/hdd01/ci/doris-deploy-branch-4.0-local/be/lib/doris_be+0x1f290d26) apache#30 0x7f12a399eac2 in start_thread nptl/pthread_create.c:442:8 apache#31 0x7f12a3a3084f in __closefrom_fallback misc/../sysdeps/unix/sysv/linux/closefrom_fallback.c:45:19 172.20.59.34 last coredump sql: last SQL query not found ```
1 parent e486af8 commit 9e41e5f

File tree

1 file changed

+21
-14
lines changed

1 file changed

+21
-14
lines changed

be/src/runtime/runtime_query_statistics_mgr.cpp

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,13 @@ void RuntimeQueryStatisticsMgr::report_runtime_query_statistics() {
397397
continue;
398398
}
399399

400+
auto reopen_coord = [&coord]() -> Status {
401+
std::this_thread::sleep_for(
402+
std::chrono::milliseconds(config::thrift_client_retry_interval_ms * 2));
403+
// just reopen to disable this connection
404+
return coord.reopen(config::thrift_rpc_timeout_ms);
405+
};
406+
400407
// 2.2 send report
401408
TReportWorkloadRuntimeStatusParams report_runtime_params;
402409
report_runtime_params.__set_backend_id(be_id);
@@ -413,44 +420,44 @@ void RuntimeQueryStatisticsMgr::report_runtime_query_statistics() {
413420
coord->reportExecStatus(res, params);
414421
rpc_result[addr] = true;
415422
} catch (apache::thrift::transport::TTransportException& e) {
423+
rpc_status = reopen_coord();
416424
#ifndef ADDRESS_SANITIZER
417425
LOG_WARNING(
418426
"[report_query_statistics] report to fe {} failed, reason:{}, try reopen.",
419427
add_str, e.what());
420-
rpc_status = coord.reopen(config::thrift_rpc_timeout_ms);
421-
if (!rpc_status.ok()) {
422-
LOG_WARNING(
423-
"[report_query_statistics]reopen thrift client failed when report "
424-
"workload runtime statistics to {}, reason: {}",
425-
add_str, rpc_status.to_string());
426-
} else {
427-
coord->reportExecStatus(res, params);
428-
rpc_result[addr] = true;
429-
}
430428
#else
431429
std::cerr << "thrift error, reason=" << e.what();
432430
#endif
431+
if (rpc_status.ok()) {
432+
coord->reportExecStatus(res, params);
433+
rpc_result[addr] = true;
434+
}
433435
}
434436
} catch (apache::thrift::TApplicationException& e) {
435437
LOG_WARNING(
436438
"[report_query_statistics]fe {} throw exception when report statistics, "
437439
"reason:{}, you can see fe log for details.",
438440
add_str, e.what());
441+
rpc_status = reopen_coord();
439442
} catch (apache::thrift::TException& e) {
440443
LOG_WARNING(
441444
"[report_query_statistics]report workload runtime statistics to {} failed, "
442445
"reason: {}",
443446
add_str, e.what());
444-
std::this_thread::sleep_for(
445-
std::chrono::milliseconds(config::thrift_client_retry_interval_ms * 2));
446-
// just reopen to disable this connection
447-
static_cast<void>(coord.reopen(config::thrift_rpc_timeout_ms));
447+
rpc_status = reopen_coord();
448448
} catch (std::exception& e) {
449449
LOG_WARNING(
450450
"[report_query_statistics]unknown exception when report workload runtime "
451451
"statistics to {}, reason:{}. ",
452452
add_str, e.what());
453453
}
454+
455+
if (!rpc_status.ok()) {
456+
LOG_WARNING(
457+
"[report_query_statistics]reopen thrift client failed when report "
458+
"workload runtime statistics to {}, reason: {}",
459+
add_str, rpc_status.to_string());
460+
}
454461
}
455462

456463
// 3 when query is finished and (last rpc is send success), remove finished query statistics

0 commit comments

Comments
 (0)