Skip to content

Commit 8ffc642

Browse files
authored
Merge pull request #731 from KernelMaker/more_logging_in_ndbcluster_init-22.10
BACKPORT: Add more logging to ndbcluster_init() to improve observabil…
2 parents 639566c + 20052fc commit 8ffc642

File tree

5 files changed

+64
-7
lines changed

5 files changed

+64
-7
lines changed

storage/ndb/plugin/ha_ndb_index_stat.cc

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2194,6 +2194,7 @@ void Ndb_index_stat_thread::do_run() {
21942194
Wait for cluster to start
21952195
*/
21962196
while (!ndb_connection_is_ready(g_ndb_cluster_connection, 1)) {
2197+
log_info("Retrying: checking ndb_connection_is_ready");
21972198
/* NDB not connected yet */
21982199
if (is_stop_requested()) {
21992200
/* Terminated with a stop_request */
@@ -2287,13 +2288,24 @@ void Ndb_index_stat_thread::do_run() {
22872288

22882289
// the Ndb object is needed first
22892290
if (pr.ndb == nullptr) {
2290-
if (create_ndb(&pr, g_ndb_cluster_connection) == -1) break;
2291+
if (create_ndb(&pr, g_ndb_cluster_connection) == -1) {
2292+
log_info("Retrying: create_ndb failed");
2293+
break;
2294+
}
22912295
}
22922296

22932297
// sys objects
22942298
if (check_sys) {
22952299
// at enable check or create stats tables and events
2296-
if (check_systables(pr) == -1 || check_sysevents(pr) == -1) break;
2300+
int check_table_ret = 0;
2301+
int check_event_ret = 0;
2302+
if ((check_table_ret = check_systables(pr)) == -1 ||
2303+
(check_event_ret = check_sysevents(pr)) == -1) {
2304+
log_info("Retrying: checking both check_systable and check_sysevent, "
2305+
"last check result: [%d, %d]",
2306+
check_table_ret, check_event_ret);
2307+
break;
2308+
}
22972309
}
22982310

22992311
// listener is not critical but error means something is wrong

storage/ndb/plugin/ha_ndbcluster.cc

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12232,7 +12232,7 @@ extern THD *ndb_create_thd(char *stackptr);
1223212232
// Functionality used for delaying MySQL Server startup until
1223312233
// connection to NDB and setup (of index stat plus binlog) has completed
1223412234
//
12235-
static bool wait_setup_completed(ulong max_wait_seconds) {
12235+
static bool wait_setup_completed(ulong max_wait_seconds, const std::string info_tag) {
1223612236
DBUG_TRACE;
1223712237

1223812238
const auto timeout_time =
@@ -12241,8 +12241,15 @@ static bool wait_setup_completed(ulong max_wait_seconds) {
1224112241
while (std::chrono::steady_clock::now() < timeout_time) {
1224212242
if (ndb_binlog_is_initialized() &&
1224312243
ndb_index_stat_thread.is_setup_complete()) {
12244+
ndb_log_info("[TRACE][%s][wait_setup_completed] Done!",
12245+
info_tag.c_str());
1224412246
return true;
1224512247
}
12248+
ndb_log_info("[TRACE][%s][wait_setup_completed], "
12249+
"ndb_binlog_is_ready: %u ndb_index_stat_allow_flag: %u",
12250+
info_tag.c_str(),
12251+
ndb_binlog_is_initialized(),
12252+
ndb_index_stat_thread.is_setup_complete());
1224612253
ndb_milli_sleep(100);
1224712254
}
1224812255

@@ -12266,7 +12273,7 @@ static int ndb_wait_setup_server_startup(void *) {
1226612273
ndb_metadata_change_monitor_thread.set_server_started();
1226712274

1226812275
// Wait for connection to NDB and thread(s) setup
12269-
if (wait_setup_completed(opt_ndb_wait_setup) == false) {
12276+
if (wait_setup_completed(opt_ndb_wait_setup, "main") == false) {
1227012277
ndb_log_error(
1227112278
"Tables not available after %lu seconds. Consider "
1227212279
"increasing --ndb-wait-setup value",
@@ -12535,6 +12542,7 @@ static int ndbcluster_init(void *handlerton_ptr) {
1253512542
return 0; // Return before init will disable ndbcluster-SE.
1253612543
}
1253712544

12545+
ndb_log_info("[TRACE][main][ndbcluster_init][13 steps expected for successful completion]");
1253812546
/* Check const alignment */
1253912547
static_assert(DependencyTracker::InvalidTransactionId ==
1254012548
Ndb_binlog_extra_row_info::InvalidTransactionId);
@@ -12549,7 +12557,7 @@ static int ndbcluster_init(void *handlerton_ptr) {
1254912557
}
1255012558

1255112559
std::function<bool()> start_channel_func = []() -> bool {
12552-
if (!wait_setup_completed(opt_ndb_wait_setup)) {
12560+
if (!wait_setup_completed(opt_ndb_wait_setup, "channel")) {
1255312561
ndb_log_error(
1255412562
"Replica: Connection to NDB not ready after %lu seconds. "
1255512563
"Consider increasing --ndb-wait-setup value",
@@ -12559,15 +12567,18 @@ static int ndbcluster_init(void *handlerton_ptr) {
1255912567
return true;
1256012568
};
1256112569

12570+
ndb_log_info("[TRACE][main][ndbcluster_init][1] Calling Ndb_replica::init");
1256212571
if (Ndb_replica::init(start_channel_func, &g_default_channel_stats)) {
1256312572
return ndbcluster_init_abort("Failed to initialize NDB Replica");
1256412573
}
1256512574

12575+
ndb_log_info("[TRACE][main][ndbcluster_init][2] Calling Ndb_index_stat_thread::init");
1256612576
if (ndb_index_stat_thread.init() ||
1256712577
DBUG_EVALUATE_IF("ndbcluster_init_fail1", true, false)) {
1256812578
return ndbcluster_init_abort("Failed to initialize NDB Index Stat");
1256912579
}
1257012580

12581+
ndb_log_info("[TRACE][main][ndbcluster_init][3] Calling Ndb_metadata_change_monitor::init");
1257112582
if (ndb_metadata_change_monitor_thread.init()) {
1257212583
return ndbcluster_init_abort(
1257312584
"Failed to initialize NDB Metadata Change Monitor");
@@ -12591,6 +12602,7 @@ static int ndbcluster_init(void *handlerton_ptr) {
1259112602
hton->get_tablespace_statistics =
1259212603
ndbcluster_get_tablespace_statistics; /* Provide data to I_S */
1259312604
hton->partition_flags = ndbcluster_partition_flags; /* Partition flags */
12605+
ndb_log_info("[TRACE][main][ndbcluster_init][4] Calling ndbcluster_binlog_init");
1259412606
if (!ndbcluster_binlog_init(hton))
1259512607
return ndbcluster_init_abort("Failed to initialize NDB Binlog");
1259612608
hton->flags = HTON_TEMPORARY_NOT_SUPPORTED | HTON_NO_BINLOG_ROW_OPT |
@@ -12620,20 +12632,24 @@ static int ndbcluster_init(void *handlerton_ptr) {
1262012632
// SO, that GSL will not be held unnecessary for non-NDB tables.
1262112633
hton->post_ddl = ndbcluster_post_ddl;
1262212634

12635+
ndb_log_info("[TRACE][main][ndbcluster_init][5] Calling ndb_init_internal");
1262312636
// Initialize NdbApi
1262412637
ndb_init_internal(1);
1262512638

12639+
ndb_log_info("[TRACE][main][ndbcluster_init][6] Calling Ndb_server_hooks::register_server_hooks");
1262612640
if (!ndb_server_hooks.register_server_hooks(ndb_wait_setup_server_startup,
1262712641
ndb_dd_upgrade_hook)) {
1262812642
return ndbcluster_init_abort("Failed to register server start hook");
1262912643
}
1263012644

12645+
ndb_log_info("[TRACE][main][ndbcluster_init][7] Calling NDB_SHARE::initialize");
1263112646
// Initialize NDB_SHARE factory
1263212647
NDB_SHARE::initialize(table_alias_charset);
1263312648

1263412649
/* allocate connection resources and connect to cluster */
1263512650
const uint global_opti_node_select =
1263612651
THDVAR(nullptr, optimized_node_selection);
12652+
ndb_log_info("[TRACE][main][ndbcluster_init][8] Calling ndbcluster_connect");
1263712653
if (ndbcluster_connect(
1263812654
opt_ndb_wait_connected, opt_ndb_cluster_connection_pool,
1263912655
opt_connection_pool_nodeids_str, (global_opti_node_select & 1),
@@ -12653,22 +12669,26 @@ static int ndbcluster_init(void *handlerton_ptr) {
1265312669
}
1265412670
}
1265512671

12672+
ndb_log_info("[TRACE][main][ndbcluster_init][9] Calling ndbcluster_binlog_start");
1265612673
/* start the ndb injector thread */
1265712674
if (ndbcluster_binlog_start()) {
1265812675
return ndbcluster_init_abort("Failed to start NDB Binlog");
1265912676
}
1266012677

12678+
ndb_log_info("[TRACE][main][ndbcluster_init][10] Calling Ndb_index_stat_thread::start");
1266112679
// Create index statistics thread
1266212680
if (ndb_index_stat_thread.start() ||
1266312681
DBUG_EVALUATE_IF("ndbcluster_init_fail2", true, false)) {
1266412682
return ndbcluster_init_abort("Failed to start NDB Index Stat");
1266512683
}
1266612684

12685+
ndb_log_info("[TRACE][main][ndbcluster_init][11] Calling Ndb_metadata_change_monitor::start");
1266712686
// Create metadata change monitor thread
1266812687
if (ndb_metadata_change_monitor_thread.start()) {
1266912688
return ndbcluster_init_abort("Failed to start NDB Metadata Change Monitor");
1267012689
}
1267112690

12691+
ndb_log_info("[TRACE][main][ndbcluster_init][12] Calling ndb_pfs_init");
1267212692
if (ndb_pfs_init()) {
1267312693
return ndbcluster_init_abort("Failed to init pfs");
1267412694
}
@@ -12694,6 +12714,7 @@ static int ndbcluster_init(void *handlerton_ptr) {
1269412714
opt_ndb_slave_conflict_role = opt_ndb_applier_conflict_role;
1269512715
}
1269612716

12717+
ndb_log_info("[TRACE][main][ndbcluster_init][13] Done!");
1269712718
ndbcluster_inited = 1;
1269812719

1269912720
return 0; // OK

storage/ndb/plugin/ha_ndbcluster_binlog.cc

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,8 @@ class Ndb_binlog_setup {
816816
ndb_log_info(" <- sleep");
817817
}
818818

819+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][11 steps expected for successful completion]");
820+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][1] Calling Ndb_global_schema_lock_guard");
819821
// Protect the schema synchronization with GSL(Global Schema Lock)
820822
Ndb_global_schema_lock_guard global_schema_lock_guard(m_thd);
821823
if (global_schema_lock_guard.lock()) {
@@ -831,6 +833,7 @@ class Ndb_binlog_setup {
831833
}
832834
});
833835

836+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][2] Calling Ndb_binlog_setup::detect_initial_restart");
834837
// Check if this is a initial restart/start
835838
bool initial_system_restart = false;
836839
if (!detect_initial_restart(thd_ndb, &initial_system_restart)) {
@@ -851,11 +854,13 @@ class Ndb_binlog_setup {
851854

852855
Ndb_dd_sync dd_sync(m_thd, thd_ndb);
853856
if (initial_system_restart) {
857+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][3] Calling Ndb_dd_sync::remove_all_metadata");
854858
// Remove all NDB metadata from DD since this is an initial restart
855859
if (!dd_sync.remove_all_metadata()) {
856860
return false;
857861
}
858862
} else {
863+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][3] Calling Ndb_dd_sync::remove_deleted_tables");
859864
/*
860865
Not an initial restart. Delete DD table definitions corresponding to NDB
861866
tables that no longer exist in NDB Dictionary. This is to ensure that
@@ -872,6 +877,7 @@ class Ndb_binlog_setup {
872877
thd_ndb_options.set(Thd_ndb::ALLOW_BINLOG_SETUP);
873878

874879
const bool ndb_schema_dist_upgrade_allowed = ndb_allow_ndb_schema_upgrade();
880+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][4] Calling Ndb_schema_dist_table::create_or_upgrade");
875881
Ndb_schema_dist_table schema_dist_table(thd_ndb);
876882
if (!schema_dist_table.create_or_upgrade(m_thd,
877883
ndb_schema_dist_upgrade_allowed))
@@ -901,6 +907,7 @@ class Ndb_binlog_setup {
901907
assert(false);
902908
}
903909

910+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][5] Calling Ndb_schema_result_table::create_or_upgrade");
904911
Ndb_schema_result_table schema_result_table(thd_ndb);
905912
if (!schema_result_table.create_or_upgrade(m_thd,
906913
ndb_schema_dist_upgrade_allowed))
@@ -916,9 +923,11 @@ class Ndb_binlog_setup {
916923
return false;
917924
}
918925

926+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][6] Calling Ndb_index_stat_head_table::create_or_upgrade");
919927
Ndb_index_stat_head_table index_stat_head_table(thd_ndb);
920928
if (!index_stat_head_table.create_or_upgrade(m_thd, true)) return false;
921929

930+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][7] Calling Ndb_index_stat_sample_table::create_or_upgrade");
922931
Ndb_index_stat_sample_table index_stat_sample_table(thd_ndb);
923932
if (!index_stat_sample_table.create_or_upgrade(m_thd, true)) return false;
924933

@@ -929,14 +938,17 @@ class Ndb_binlog_setup {
929938
ndb_index_stat_restart();
930939
}
931940

941+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][8] Calling Ndb_apply_status_table::create_or_upgrade");
932942
Ndb_apply_status_table apply_status_table(thd_ndb);
933943
if (!apply_status_table.create_or_upgrade(m_thd, true)) return false;
934944

945+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][9] Calling Ndb_dd_sync::synchronize");
935946
if (!dd_sync.synchronize()) {
936947
ndb_log_verbose(9, "Failed to synchronize DD with NDB");
937948
return false;
938949
}
939950

951+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][10] Calling Ndb_dd_sync::Ndb_stored_grants::setup");
940952
if (!Ndb_stored_grants::setup(m_thd, thd_ndb)) {
941953
ndb_log_warning("Failed to setup synchronized privileges");
942954
return false;
@@ -949,6 +961,7 @@ class Ndb_binlog_setup {
949961
// nodes run a version that has support for the Data Dictionary.
950962
Ndb_schema_dist_client::block_ddl(!ndb_all_nodes_support_mysql_dd());
951963

964+
ndb_log_info("[TRACE][binlog][Ndb_binlog_setup][11] Done!");
952965
return true; // Setup completed OK
953966
}
954967
};
@@ -7407,8 +7420,13 @@ void Ndb_binlog_thread::do_run() {
74077420

74087421
assert(m_apply_status_share == nullptr);
74097422

7410-
while (!ndb_connection_is_ready(thd_ndb->connection, 1) ||
7411-
!binlog_setup.setup(thd_ndb)) {
7423+
bool ret_conn_is_ready = false;
7424+
bool ret_binlog_setup = false;
7425+
while (!(ret_conn_is_ready = ndb_connection_is_ready(thd_ndb->connection, 1)) ||
7426+
!(ret_binlog_setup = binlog_setup.setup(thd_ndb))) {
7427+
log_info("Retrying: checking both ndb_connection_is_ready "
7428+
"and Ndb_binlog_setup::setup, last check result: [%d, %d]",
7429+
ret_conn_is_ready, ret_binlog_setup);
74127430
// Failed to complete binlog_setup, remove all existing event
74137431
// operations from potential partial setup
74147432
remove_all_event_operations(s_ndb, i_ndb);

storage/ndb/plugin/ndb_metadata_change_monitor.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ void Ndb_metadata_change_monitor::do_run() {
509509
// Outer loop to ensure that if the connection to NDB is lost, a fresh
510510
// connection is established before the thread continues its processing
511511
while (!ndb_connection_is_ready(thd_ndb->connection, 1)) {
512+
log_info("Retrying: checking ndb_connection_is_ready");
512513
// No connection to NDB yet. Retry until connection is established while
513514
// checking if stop has been requested at 1 second intervals
514515
if (is_stop_requested()) {

storage/ndb/plugin/ndb_stored_grants.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -996,23 +996,28 @@ bool Ndb_stored_grants::setup(THD *thd, Thd_ndb *thd_ndb) {
996996
ndb_log_info("Ndb_stored_grants::setup() -- normal setup");
997997
}
998998

999+
ndb_log_info("[TRACE][binlog][Ndb_stored_grants::setup][4 steps expected for successful completion]");
9991000
/* Create or upgrade the ndb_sql_metadata table.
10001001
If this fails, create_or_upgrade() will log an error message,
10011002
and we return false, which will cause the whole binlog setup
10021003
routine to be retried.
10031004
*/
10041005
Ndb_sql_metadata_table sql_metadata_table(thd_ndb);
1006+
ndb_log_info("[TRACE][binlog][Ndb_stored_grants::setup][1] Calling Ndb_sql_metadata_table::create_or_upgrade");
10051007
if (!sql_metadata_table.create_or_upgrade(thd, true)) return false;
10061008

1009+
ndb_log_info("[TRACE][binlog][Ndb_stored_grants::setup][2] Calling Ndb_sql_metadata_table::setup");
10071010
metadata_table.setup(thd_ndb->ndb->getDictionary(),
10081011
sql_metadata_table.get_table());
1012+
ndb_log_info("[TRACE][binlog][Ndb_stored_grants::setup][3] Calling Ndb_sql_metadata_table::initializeSnapshotLock");
10091013
const NdbError &err = metadata_table.initializeSnapshotLock(thd_ndb->ndb);
10101014
if (err.status != NdbError::Success) {
10111015
ndb_log_error("ndb_stored_grants initalizeSnapshotLock failure: %d %s",
10121016
err.code, err.message);
10131017
return false;
10141018
}
10151019

1020+
ndb_log_info("[TRACE][binlog][Ndb_stored_grants::setup][4] Done!");
10161021
return true;
10171022
}
10181023

0 commit comments

Comments
 (0)