Skip to content

Commit 96f98e4

Browse files
committed
Fix: sbd-integration: sync pacemakerd with sbd
Make pacemakerd wait to be pinged by sbd before starting sub-daemons. Pings further reply health-state and timestamp of last successful check. On shutdown bring down all the sub-daemons and wait to be polled for state by sbd before finally exiting pacemakerd. Add new api as not to make the xml-structure an external interface.
1 parent 971b4e9 commit 96f98e4

File tree

9 files changed

+551
-20
lines changed

9 files changed

+551
-20
lines changed

daemons/pacemakerd/pacemakerd.c

Lines changed: 80 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,14 @@ static bool global_keep_tracking = false;
4040
#define PCMK_PROCESS_CHECK_INTERVAL 5
4141

4242
static crm_trigger_t *shutdown_trigger = NULL;
43+
static crm_trigger_t *startup_trigger = NULL;
4344
static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid";
4445

46+
static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
47+
static gboolean running_with_sbd = FALSE;
48+
static uint shutdown_complete_state_reported_to = 0;
49+
static gboolean shutdown_complete_state_reported_client_closed = FALSE;
50+
4551
typedef struct pcmk_child_s {
4652
pid_t pid;
4753
long flag;
@@ -374,21 +380,20 @@ escalate_shutdown(gpointer data)
374380
static gboolean
375381
pcmk_shutdown_worker(gpointer user_data)
376382
{
377-
static int phase = 0;
383+
static int phase = SIZEOF(pcmk_children);
378384
static time_t next_log = 0;
379-
static int max = SIZEOF(pcmk_children);
380385

381386
int lpc = 0;
382387

383-
if (phase == 0) {
388+
if (phase == SIZEOF(pcmk_children)) {
384389
crm_notice("Shutting down Pacemaker");
385-
phase = max;
390+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
386391
}
387392

388393
for (; phase > 0; phase--) {
389394
/* Don't stop anything with start_seq < 1 */
390395

391-
for (lpc = max - 1; lpc >= 0; lpc--) {
396+
for (lpc = SIZEOF(pcmk_children) - 1; lpc >= 0; lpc--) {
392397
pcmk_child_t *child = &(pcmk_children[lpc]);
393398

394399
if (phase != child->start_seq) {
@@ -436,6 +441,11 @@ pcmk_shutdown_worker(gpointer user_data)
436441
}
437442

438443
crm_notice("Shutdown complete");
444+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
445+
if (!fatal_error && running_with_sbd &&
446+
!shutdown_complete_state_reported_client_closed) {
447+
return TRUE;
448+
}
439449

440450
{
441451
const char *delay = pcmk__env_option("shutdown_delay");
@@ -489,6 +499,50 @@ pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
489499
return 0;
490500
}
491501

502+
static void
503+
pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
504+
{
505+
const char *value = NULL;
506+
xmlNode *ping = NULL;
507+
xmlNode *reply = NULL;
508+
time_t pinged = time(NULL);
509+
const char *from = crm_element_value(msg, F_CRM_SYS_FROM);
510+
511+
/* Pinged for status */
512+
crm_trace("Pinged from %s.%s",
513+
crm_element_value(msg, F_CRM_ORIGIN),
514+
from?from:"unknown");
515+
ping = create_xml_node(NULL, XML_CRM_TAG_PING);
516+
value = crm_element_value(msg, F_CRM_SYS_TO);
517+
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
518+
crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
519+
crm_xml_add_ll(ping, XML_ATTR_TSTAMP, (long long) pinged);
520+
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
521+
reply = create_reply(msg, ping);
522+
free_xml(ping);
523+
if (reply) {
524+
if (pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event) <= 0) {
525+
crm_err("Failed sending ping-reply");
526+
}
527+
free_xml(reply);
528+
} else {
529+
crm_err("Failed building ping-reply");
530+
}
531+
/* just proceed state on sbd pinging us */
532+
if (from && strstr(from, "sbd")) {
533+
if (crm_str_eq(pacemakerd_state,
534+
XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE,
535+
TRUE)) {
536+
shutdown_complete_state_reported_to = c->pid;
537+
} else if (crm_str_eq(pacemakerd_state,
538+
XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
539+
TRUE)) {
540+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
541+
mainloop_set_trigger(startup_trigger);
542+
}
543+
}
544+
}
545+
492546
/* Exit code means? */
493547
static int32_t
494548
pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
@@ -514,6 +568,9 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
514568
crm_trace("Ignoring IPC request to purge node "
515569
"because peer cache is not used");
516570

571+
} else if (crm_str_eq(task, CRM_OP_PING, TRUE)) {
572+
pcmk_handle_ping_request(c, msg, id);
573+
517574
} else {
518575
crm_debug("Unrecognized IPC command '%s' sent to pacemakerd",
519576
crm_str(task));
@@ -533,6 +590,12 @@ pcmk_ipc_closed(qb_ipcs_connection_t * c)
533590
return 0;
534591
}
535592
crm_trace("Connection %p", c);
593+
if (shutdown_complete_state_reported_to == client->pid) {
594+
shutdown_complete_state_reported_client_closed = TRUE;
595+
if (shutdown_trigger) {
596+
mainloop_set_trigger(shutdown_trigger);
597+
}
598+
}
536599
pcmk__free_client(client);
537600
return 0;
538601
}
@@ -924,8 +987,8 @@ find_and_track_existing_processes(void)
924987
return pcmk_rc_ok;
925988
}
926989

927-
static void
928-
init_children_processes(void)
990+
static gboolean
991+
init_children_processes(void *user_data)
929992
{
930993
int start_seq = 1, lpc = 0;
931994
static int max = SIZEOF(pcmk_children);
@@ -951,6 +1014,8 @@ init_children_processes(void)
9511014
* This may be useful for the daemons to know
9521015
*/
9531016
setenv("PCMK_respawned", "true", 1);
1017+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
1018+
return TRUE;
9541019
}
9551020

9561021
static void
@@ -1154,6 +1219,7 @@ main(int argc, char **argv)
11541219

11551220
if(pcmk_locate_sbd() > 0) {
11561221
setenv("PCMK_watchdog", "true", 1);
1222+
running_with_sbd = TRUE;
11571223
} else {
11581224
setenv("PCMK_watchdog", "false", 1);
11591225
}
@@ -1170,7 +1236,13 @@ main(int argc, char **argv)
11701236
mainloop_add_signal(SIGTERM, pcmk_shutdown);
11711237
mainloop_add_signal(SIGINT, pcmk_shutdown);
11721238

1173-
init_children_processes();
1239+
if (running_with_sbd) {
1240+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
1241+
startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
1242+
} else {
1243+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
1244+
init_children_processes(NULL);
1245+
}
11741246

11751247
crm_notice("Pacemaker daemon successfully started and accepting connections");
11761248
g_main_loop_run(mainloop);

include/crm/common/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ MAINTAINERCLEANFILES = Makefile.in
1212
headerdir=$(pkgincludedir)/crm/common
1313

1414
header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h \
15-
nvpair.h acl.h ipc_controld.h
15+
nvpair.h acl.h ipc_controld.h ipc_pacemakerd.h
1616
noinst_HEADERS = internal.h alerts_internal.h \
1717
iso8601_internal.h remote_internal.h xml_internal.h \
1818
ipc_internal.h output.h cmdline_internal.h curses_internal.h \
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright 2020 the Pacemaker project contributors
3+
*
4+
* The version control history for this file may have further details.
5+
*
6+
* This source code is licensed under the GNU Lesser General Public License
7+
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8+
*/
9+
10+
#ifndef PCMK__IPC_PACEMAKERD__H
11+
# define PCMK__IPC_PACEMAKERD__H
12+
13+
#ifdef __cplusplus
14+
extern "C" {
15+
#endif
16+
17+
/**
18+
* \file
19+
* \brief IPC commands for Pacemakerd
20+
*
21+
* \ingroup core
22+
*/
23+
24+
#include <stdbool.h> // bool
25+
#include <libxml/tree.h> // xmlNode
26+
#include <crm/common/ipc.h> // pcmk_ipc_api_t
27+
28+
enum pcmk_pacemakerd_state {
29+
pcmk_pacemakerd_state_invalid = -1,
30+
pcmk_pacemakerd_state_init = 0,
31+
pcmk_pacemakerd_state_starting_daemons,
32+
pcmk_pacemakerd_state_wait_for_ping,
33+
pcmk_pacemakerd_state_running,
34+
pcmk_pacemakerd_state_shutting_down,
35+
pcmk_pacemakerd_state_shutdown_complete,
36+
pcmk_pacemakerd_state_max = pcmk_pacemakerd_state_shutdown_complete,
37+
};
38+
39+
//! Possible types of pacemakerd replies
40+
enum pcmk_pacemakerd_api_reply {
41+
pcmk_pacemakerd_reply_unknown,
42+
pcmk_pacemakerd_reply_ping,
43+
};
44+
45+
/*!
46+
* Pacemakerd reply passed to event callback
47+
*/
48+
typedef struct {
49+
enum pcmk_pacemakerd_api_reply reply_type;
50+
51+
union {
52+
// pcmk_pacemakerd_reply_ping
53+
struct {
54+
const char *sys_from;
55+
enum pcmk_pacemakerd_state state;
56+
time_t last_good;
57+
int status;
58+
} ping;
59+
} data;
60+
} pcmk_pacemakerd_api_reply_t;
61+
62+
int pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name);
63+
unsigned int
64+
pcmk_pacemakerd_api_replies_expected(pcmk_ipc_api_t *api);
65+
enum pcmk_pacemakerd_state
66+
pcmk_pacemakerd_api_pacemakerd_state_text2enum(const char *state);
67+
const char
68+
*pcmk_pacemakerd_api_pacemakerd_state_enum2text(enum pcmk_pacemakerd_state state);
69+
70+
#ifdef __cplusplus
71+
}
72+
#endif
73+
74+
#endif // PCMK__IPC_PACEMAKERD__H

include/crm/msg_xml.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ extern "C" {
123123
# define XML_PING_ATTR_STATUS "result"
124124
# define XML_PING_ATTR_SYSFROM "crm_subsystem"
125125
# define XML_PING_ATTR_CRMDSTATE "crmd_state"
126+
# define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state"
127+
# define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init"
128+
# define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons"
129+
# define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping"
130+
# define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running"
131+
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down"
132+
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete"
126133

127134
# define XML_TAG_FRAGMENT "cib_fragment"
128135

lib/common/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ libcrmcommon_la_SOURCES += io.c
5050
libcrmcommon_la_SOURCES += ipc_client.c
5151
libcrmcommon_la_SOURCES += ipc_common.c
5252
libcrmcommon_la_SOURCES += ipc_controld.c
53+
libcrmcommon_la_SOURCES += ipc_pacemakerd.c
5354
libcrmcommon_la_SOURCES += ipc_server.c
5455
libcrmcommon_la_SOURCES += iso8601.c
5556
libcrmcommon_la_SOURCES += logging.c

lib/common/crmcommon_private.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,4 +209,7 @@ bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header);
209209
G_GNUC_INTERNAL
210210
pcmk__ipc_methods_t *pcmk__controld_api_methods(void);
211211

212+
G_GNUC_INTERNAL
213+
pcmk__ipc_methods_t *pcmk__pacemakerd_api_methods(void);
214+
212215
#endif // CRMCOMMON_PRIVATE__H

lib/common/ipc_client.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
8484
break;
8585

8686
case pcmk_ipc_pacemakerd:
87+
(*api)->cmds = pcmk__pacemakerd_api_methods();
8788
break;
8889

8990
case pcmk_ipc_schedulerd:
@@ -258,7 +259,7 @@ pcmk_ipc_name(pcmk_ipc_api_t *api, bool for_log)
258259
return for_log? "fencer" : NULL /* "stonith-ng" */;
259260

260261
case pcmk_ipc_pacemakerd:
261-
return for_log? "launcher" : NULL /* CRM_SYSTEM_MCP */;
262+
return for_log? "launcher" : CRM_SYSTEM_MCP;
262263

263264
case pcmk_ipc_schedulerd:
264265
return for_log? "scheduler" : NULL /* CRM_SYSTEM_PENGINE */;

0 commit comments

Comments
 (0)