Bug#37518267 Improve data node thread watchdog shutdown handling

frazerclement · frazerclement · commit 4c61d4ec638f · 2025-03-07T18:33:13.000Z
Backport to 7.6

Two changes :
 1.  Have node error handling set thread watchdog
     state prior to attempting to serialise or log
     error details to files.
     This helps users understand whether Watchdog
     logs indicate a detected overload, or whether
     they indicate a delay in shutting down a
     data node.

 2.  Have the Watchdog thread treat 'slow logging'
     as a special case.
     If a registered thread exceeds its time allowance
     in a shutdown logging state then the watchdog directly
     calls NdbShutdown(), which is more likely to lead
     to an immediate process exit.
     This improves the system's ability to force a
     timely process failure (and subsequent restart)
     potentially at the expense of some logging.

Test coverage by testNodeRestart -n WatchdogSlowShutdown
is enhanced to cover another case.
Error injection coverage of data node shutdown is refactored
to enable future extensions.

Change-Id: I57eabbdb04423409d0aae1b6e548013a7088f4d0
diff --git a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
@@ -1,5 +1,5 @@
 /*
-   Copyright (c) 2003, 2024, Oracle and/or its affiliates.
+   Copyright (c) 2003, 2025, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License, version 2.0,
@@ -74,11 +74,6 @@
 
 // Used here only to print event reports on stdout/console.
 extern EventLogger * g_eventLogger;
-extern int simulate_error_during_shutdown;
-
-#ifdef ERROR_INSERT
-extern int simulate_error_during_error_reporting;
-#endif
 
 // Index pages used by ACC instances
 Uint32 g_acc_pages_used[1 + MAX_NDBMT_LQH_WORKERS];
@@ -219,17 +214,20 @@ void Cmvmi::execNDB_TAMPER(Signal* signal)
     ndbrequire(false);
   }
 
+#ifdef ERROR_INSERT
 #ifndef NDB_WIN32
   if(ERROR_INSERTED(9996)){
-    simulate_error_during_shutdown= SIGSEGV;
+    globalEmulatorData.theConfiguration->setShutdownHandlingFault(
+        Configuration::SHF_UNIX_SIGNAL, SIGSEGV);
     ndbrequire(false);
   }
 
   if(ERROR_INSERTED(9995)){
-    simulate_error_during_shutdown= SIGSEGV;
+    globalEmulatorData.theConfiguration->setShutdownHandlingFault(
+        Configuration::SHF_UNIX_SIGNAL, SIGSEGV);
     kill(getpid(), SIGABRT);
   }
-  
+#endif
 #endif
 
 }  // execNDB_TAMPER()
@@ -1982,13 +1980,17 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
   if (arg == DumpStateOrd::CmvmiSetErrorHandlingError)
   {
     Uint32 val = 0;
+    Uint32 extra = 0;
     if (signal->length() >= 2)
     {
       val = signal->theData[1];
+      if (signal->length() >= 3) {
+        extra = signal->theData[2];
+      }
     }
-    g_eventLogger->info("Cmvmi : Setting ErrorHandlingError to %u",
-                        val);
-    simulate_error_during_error_reporting = val;
+    g_eventLogger->info("Cmvmi : Setting ShutdownErrorHandling to %u %u", val,
+                        extra);
+    globalEmulatorData.theConfiguration->setShutdownHandlingFault(val, extra);
   }
 #endif
 
diff --git a/storage/ndb/src/kernel/error/ErrorReporter.cpp b/storage/ndb/src/kernel/error/ErrorReporter.cpp
@@ -1,5 +1,5 @@
 /*
-   Copyright (c) 2003, 2022, Oracle and/or its affiliates.
+   Copyright (c) 2003, 2025, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License, version 2.0,
@@ -65,10 +65,6 @@ static void dumpJam(FILE* jamStream,
 
 const char * ndb_basename(const char *path);
 
-#ifdef ERROR_INSERT
-int simulate_error_during_error_reporting = 0;
-#endif
-
 static
 const char*
 formatTimeStampString(char* theDateTimeString, size_t len){
@@ -435,13 +431,33 @@ WriteMessage(int thrdMessageID,
   fflush(stream);
   fclose(stream);
 
+#ifdef ERROR_INSERT
+  if (globalEmulatorData.theConfiguration->getShutdownHandlingFault() ==
+      Configuration::SHF_DELAY_WHILE_WRITING_ERRORLOG) {
+    Uint32 seconds =
+        globalEmulatorData.theConfiguration->getShutdownHandlingFaultExtra();
+    if (seconds == 0) seconds = 300;
+
+    fprintf(stderr,
+            "Stall for %us during error reporting before releasing lock\n",
+            seconds);
+    NdbSleep_SecSleep(seconds);
+    fprintf(stderr, "Stall finished\n");
+  }
+#endif
+
   ErrorReporter::prepare_to_crash(false, (nst == NST_ErrorInsert));
 
 #ifdef ERROR_INSERT
-  if (simulate_error_during_error_reporting == 1)
-  {
-    fprintf(stderr, "Stall during error reporting after releasing lock\n");
-    NdbSleep_MilliSleep(30000);
+  if (globalEmulatorData.theConfiguration->getShutdownHandlingFault() ==
+      Configuration::SHF_DELAY_AFTER_WRITING_ERRORLOG) {
+    Uint32 seconds =
+        globalEmulatorData.theConfiguration->getShutdownHandlingFaultExtra();
+    if (seconds == 0) seconds = 300;
+    fprintf(stderr,
+            "Stall for %us during error reporting after releasing lock\n",
+            seconds);
+    NdbSleep_SecSleep(seconds);
   }
 #endif
 
diff --git a/storage/ndb/src/kernel/ndbd.cpp b/storage/ndb/src/kernel/ndbd.cpp
@@ -1,4 +1,4 @@
-/* Copyright (c) 2009, 2024, Oracle and/or its affiliates.
+/* Copyright (c) 2009, 2025, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License, version 2.0,
@@ -1152,8 +1152,6 @@ extern "C" my_bool opt_core;
 // instantiated and updated in NdbcntrMain.cpp
 extern Uint32 g_currentStartPhase;
 
-int simulate_error_during_shutdown= 0;
-
 void
 NdbShutdown(int error_code,
             NdbShutdownType type,
@@ -1225,6 +1223,19 @@ NdbShutdown(int error_code,
        * Very serious, don't attempt to free, just die!!
        */
       g_eventLogger->info("Watchdog shutdown completed - %s", exitAbort);
+#ifdef ERROR_INSERT
+      const Uint32 shf =
+          globalEmulatorData.theConfiguration->getShutdownHandlingFault();
+      if (shf != 0) {
+        if (shf == Configuration::SHF_DELAY_AFTER_WRITING_ERRORLOG ||
+            shf == Configuration::SHF_DELAY_WHILE_WRITING_ERRORLOG) {
+          g_eventLogger->info(
+              "ERROR_INSERT : Watchdog choosing restart rather than hard exit "
+              "for test pass");
+          childExit(error_code, NRT_NoStart_Restart, g_currentStartPhase);
+        }
+      }
+#endif
       if (opt_core)
       {
 	childAbort(error_code, -1,g_currentStartPhase);
@@ -1235,13 +1246,19 @@ NdbShutdown(int error_code,
       }
     }
 
+#ifdef ERROR_INSERT
 #ifndef NDB_WIN32
-    if (simulate_error_during_shutdown)
-    {
-      kill(getpid(), simulate_error_during_shutdown);
+    if (globalEmulatorData.theConfiguration->getShutdownHandlingFault() ==
+        Configuration::SHF_UNIX_SIGNAL) {
+      const Uint32 sigId =
+          globalEmulatorData.theConfiguration->getShutdownHandlingFaultExtra();
+      g_eventLogger->info("ERROR_INSERT : Raising unix signal %u to self",
+                          sigId);
+      kill(getpid(), sigId);
       while(true)
 	NdbSleep_MilliSleep(10);
     }
+#endif
 #endif
 
     globalEmulatorData.theWatchDog->doStop();
diff --git a/storage/ndb/src/kernel/vm/Configuration.cpp b/storage/ndb/src/kernel/vm/Configuration.cpp
@@ -1,5 +1,5 @@
 /*
-   Copyright (c) 2003, 2021, Oracle and/or its affiliates.
+   Copyright (c) 2003, 2025, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License, version 2.0,
@@ -384,6 +384,9 @@ Configuration::setupConfiguration(){
     ndbout_c("Mixology level set to 0x%x", _mixologyLevel);
     globalTransporterRegistry.setMixologyLevel(_mixologyLevel);
   }
+
+  _shutdownHandlingFault = 0;
+  _shutdownHandlingFaultExtra = 0;
 #endif
   
   /**
@@ -665,6 +668,18 @@ void
 Configuration::setMixologyLevel(Uint32 l){
   _mixologyLevel = l;
 }
+
+Uint32 Configuration::getShutdownHandlingFault() const {
+  return _shutdownHandlingFault;
+};
+Uint32 Configuration::getShutdownHandlingFaultExtra() const {
+  return _shutdownHandlingFaultExtra;
+};
+
+void Configuration ::setShutdownHandlingFault(Uint32 v, Uint32 extra) {
+  _shutdownHandlingFault = v;
+  _shutdownHandlingFaultExtra = extra;
+};
 #endif
 
 const ndb_mgm_configuration_iterator * 
diff --git a/storage/ndb/src/kernel/vm/Configuration.hpp b/storage/ndb/src/kernel/vm/Configuration.hpp
@@ -1,5 +1,5 @@
 /*
-   Copyright (c) 2003, 2021, Oracle and/or its affiliates.
+   Copyright (c) 2003, 2025, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License, version 2.0,
@@ -136,6 +136,23 @@ class Configuration {
 #ifdef ERROR_INSERT
   Uint32 getMixologyLevel() const;
   void setMixologyLevel(Uint32);
+
+  enum {
+    SHF_NONE = 0,
+    /* Delays during crash handling */
+    /* Extra specifies delay in seconds */
+    SHF_DELAY_AFTER_WRITING_ERRORLOG = 1,
+    SHF_DELAY_WHILE_WRITING_ERRORLOG = 2,
+
+    /* Unix signal during crash handling */
+    /* Extra specifies signal number */
+    SHF_UNIX_SIGNAL = 10
+  } ShutdownHandlingFaults;
+
+  Uint32 getShutdownHandlingFault() const;
+  Uint32 getShutdownHandlingFaultExtra() const;
+
+  void setShutdownHandlingFault(Uint32 v, Uint32 extra = 0);
 #endif 
  
   // Cluster configuration
@@ -172,6 +189,8 @@ class Configuration {
   Uint32 _timeBetweenWatchDogCheckInitial;
 #ifdef ERROR_INSERT
   Uint32 _mixologyLevel;
+  Uint32 _shutdownHandlingFault;
+  Uint32 _shutdownHandlingFaultExtra;
 #endif
 
   Vector<struct ThreadInfo> threadInfo;
diff --git a/storage/ndb/src/kernel/vm/SimulatedBlock.cpp b/storage/ndb/src/kernel/vm/SimulatedBlock.cpp
@@ -1,5 +1,5 @@
 /*
-   Copyright (c) 2003, 2023, Oracle and/or its affiliates.
+   Copyright (c) 2003, 2025, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License, version 2.0,
@@ -4838,6 +4838,8 @@ ErrorReporter::prepare_to_crash(bool first_phase, bool error_insert_crash)
 {
   (void)first_phase;
   (void)error_insert_crash;
+
+  globalData.incrementWatchDogCounter(22);  // Handling node stop
 }
 #endif
 
diff --git a/storage/ndb/src/kernel/vm/WatchDog.cpp b/storage/ndb/src/kernel/vm/WatchDog.cpp
@@ -1,5 +1,5 @@
 /*
-   Copyright (c) 2003, 2021, Oracle and/or its affiliates.
+   Copyright (c) 2003, 2025, Oracle and/or its affiliates.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License, version 2.0,
@@ -217,6 +217,9 @@ const char *get_action(char *buf, Uint32 IPValue)
   case 21:
     action = "Initial value in mt_job_thread_main";
     break;
+  case 22:
+    action = "Handling node stop";
+    break;
   default:
     action = NULL;
     break;
@@ -440,6 +443,13 @@ WatchDog::run()
         }
         if ((elapsed[i] > 3 * theInterval) || killer)
         {
+          if (oldCounterValue[i] == 4 ||   // Print Job Buffers at crash
+              oldCounterValue[i] == 22) {  // Handling node stop
+            /* Immediate exit without attempting to trace
+             * to avoid I/O stalls leaving process hanging
+             */
+            NdbShutdown(NDBD_EXIT_WATCHDOG_TERMINATE, NST_Watchdog);
+          }
           shutdownSystem(last_stuck_action);
         }
       }
diff --git a/storage/ndb/src/kernel/vm/mt.cpp b/storage/ndb/src/kernel/vm/mt.cpp
@@ -8049,6 +8049,14 @@ static bool crash_started = false;
 void
 ErrorReporter::prepare_to_crash(bool first_phase, bool error_insert_crash)
 {
+  {
+    void *value= NdbThread_GetTlsKey(NDB_THREAD_TLS_THREAD);
+    thr_data *selfptr = reinterpret_cast<thr_data *>(value);
+    if (selfptr != NULL) {
+      selfptr->m_watchdog_counter = 22;
+    }
+  }
+
   if (first_phase)
   {
     NdbMutex_Lock(&g_thr_repository->stop_for_crash_mutex);
diff --git a/storage/ndb/test/ndbapi/testNodeRestart.cpp b/storage/ndb/test/ndbapi/testNodeRestart.cpp

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`/*`
`2`		`- Copyright (c) 2003, 2023, Oracle and/or its affiliates.`
	`2`	`+ Copyright (c) 2003, 2025, Oracle and/or its affiliates.`
`3`	`3`
`4`	`4`	`This program is free software; you can redistribute it and/or modify`
`5`	`5`	`it under the terms of the GNU General Public License, version 2.0,`
`@@ -4838,6 +4838,8 @@ ErrorReporter::prepare_to_crash(bool first_phase, bool error_insert_crash)`
`4838`	`4838`	`{`
`4839`	`4839`	`(void)first_phase;`
`4840`	`4840`	`(void)error_insert_crash;`
	`4841`	`+`
	`4842`	`+ globalData.incrementWatchDogCounter(22); // Handling node stop`
`4841`	`4843`	`}`
`4842`	`4844`	`#endif`
`4843`	`4845`