Skip to content

Commit 1f84fe0

Browse files
author
Ken Gaillot
authored
Merge pull request #2119 from wenningerk/sync_with_sbd_universal_daemon_ipc_api_master
Fix: sbd-integration: sync pacemakerd with sbd
2 parents 10069b4 + 567cb6e commit 1f84fe0

File tree

12 files changed

+608
-29
lines changed

12 files changed

+608
-29
lines changed

daemons/pacemakerd/pacemakerd.c

Lines changed: 104 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,25 @@ static bool global_keep_tracking = false;
4040
#define PCMK_PROCESS_CHECK_INTERVAL 5
4141

4242
static crm_trigger_t *shutdown_trigger = NULL;
43+
static crm_trigger_t *startup_trigger = NULL;
4344
static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid";
4445

46+
/* state we report when asked via pacemakerd-api status-ping */
47+
static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
48+
static gboolean running_with_sbd = FALSE; /* local copy */
49+
/* When contacted via pacemakerd-api by a client having sbd in
50+
* the name we assume it is sbd-daemon which wants to know
51+
* if pacemakerd shutdown gracefully.
52+
* Thus when everything is shutdown properly pacemakerd
53+
* waits till it has reported the graceful completion of
54+
* shutdown to sbd and just when sbd-client closes the
55+
* connection we can assume that the report has arrived
56+
* properly so that pacemakerd can finally exit.
57+
* Following two variables are used to track that handshake.
58+
*/
59+
static unsigned int shutdown_complete_state_reported_to = 0;
60+
static gboolean shutdown_complete_state_reported_client_closed = FALSE;
61+
4562
typedef struct pcmk_child_s {
4663
pid_t pid;
4764
long flag;
@@ -374,21 +391,20 @@ escalate_shutdown(gpointer data)
374391
static gboolean
375392
pcmk_shutdown_worker(gpointer user_data)
376393
{
377-
static int phase = 0;
394+
static int phase = SIZEOF(pcmk_children);
378395
static time_t next_log = 0;
379-
static int max = SIZEOF(pcmk_children);
380396

381397
int lpc = 0;
382398

383-
if (phase == 0) {
399+
if (phase == SIZEOF(pcmk_children)) {
384400
crm_notice("Shutting down Pacemaker");
385-
phase = max;
401+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
386402
}
387403

388404
for (; phase > 0; phase--) {
389405
/* Don't stop anything with start_seq < 1 */
390406

391-
for (lpc = max - 1; lpc >= 0; lpc--) {
407+
for (lpc = SIZEOF(pcmk_children) - 1; lpc >= 0; lpc--) {
392408
pcmk_child_t *child = &(pcmk_children[lpc]);
393409

394410
if (phase != child->start_seq) {
@@ -436,6 +452,13 @@ pcmk_shutdown_worker(gpointer user_data)
436452
}
437453

438454
crm_notice("Shutdown complete");
455+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
456+
if (!fatal_error && running_with_sbd &&
457+
pcmk__get_sbd_sync_resource_startup() &&
458+
!shutdown_complete_state_reported_client_closed) {
459+
crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
460+
return TRUE;
461+
}
439462

440463
{
441464
const char *delay = pcmk__env_option("shutdown_delay");
@@ -489,6 +512,55 @@ pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
489512
return 0;
490513
}
491514

515+
static void
516+
pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
517+
{
518+
const char *value = NULL;
519+
xmlNode *ping = NULL;
520+
xmlNode *reply = NULL;
521+
time_t pinged = time(NULL);
522+
const char *from = crm_element_value(msg, F_CRM_SYS_FROM);
523+
524+
/* Pinged for status */
525+
crm_trace("Pinged from %s.%s",
526+
crm_str(crm_element_value(msg, F_CRM_ORIGIN)),
527+
from?from:"unknown");
528+
ping = create_xml_node(NULL, XML_CRM_TAG_PING);
529+
value = crm_element_value(msg, F_CRM_SYS_TO);
530+
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
531+
crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
532+
crm_xml_add_ll(ping, XML_ATTR_TSTAMP, (long long) pinged);
533+
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
534+
reply = create_reply(msg, ping);
535+
free_xml(ping);
536+
if (reply) {
537+
if (pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event) !=
538+
pcmk_rc_ok) {
539+
crm_err("Failed sending ping-reply");
540+
}
541+
free_xml(reply);
542+
} else {
543+
crm_err("Failed building ping-reply");
544+
}
545+
/* just proceed state on sbd pinging us */
546+
if (from && strstr(from, "sbd")) {
547+
if (crm_str_eq(pacemakerd_state,
548+
XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE,
549+
TRUE)) {
550+
if (pcmk__get_sbd_sync_resource_startup()) {
551+
crm_notice("Shutdown-complete-state passed to SBD.");
552+
}
553+
shutdown_complete_state_reported_to = c->pid;
554+
} else if (crm_str_eq(pacemakerd_state,
555+
XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
556+
TRUE)) {
557+
crm_notice("Received startup-trigger from SBD.");
558+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
559+
mainloop_set_trigger(startup_trigger);
560+
}
561+
}
562+
}
563+
492564
/* Exit code means? */
493565
static int32_t
494566
pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
@@ -514,6 +586,9 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
514586
crm_trace("Ignoring IPC request to purge node "
515587
"because peer cache is not used");
516588

589+
} else if (crm_str_eq(task, CRM_OP_PING, TRUE)) {
590+
pcmk_handle_ping_request(c, msg, id);
591+
517592
} else {
518593
crm_debug("Unrecognized IPC command '%s' sent to pacemakerd",
519594
crm_str(task));
@@ -533,6 +608,12 @@ pcmk_ipc_closed(qb_ipcs_connection_t * c)
533608
return 0;
534609
}
535610
crm_trace("Connection %p", c);
611+
if (shutdown_complete_state_reported_to == client->pid) {
612+
shutdown_complete_state_reported_client_closed = TRUE;
613+
if (shutdown_trigger) {
614+
mainloop_set_trigger(shutdown_trigger);
615+
}
616+
}
536617
pcmk__free_client(client);
537618
return 0;
538619
}
@@ -924,8 +1005,8 @@ find_and_track_existing_processes(void)
9241005
return pcmk_rc_ok;
9251006
}
9261007

927-
static void
928-
init_children_processes(void)
1008+
static gboolean
1009+
init_children_processes(void *user_data)
9291010
{
9301011
int start_seq = 1, lpc = 0;
9311012
static int max = SIZEOF(pcmk_children);
@@ -951,6 +1032,8 @@ init_children_processes(void)
9511032
* This may be useful for the daemons to know
9521033
*/
9531034
setenv("PCMK_respawned", "true", 1);
1035+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
1036+
return TRUE;
9541037
}
9551038

9561039
static void
@@ -1154,6 +1237,7 @@ main(int argc, char **argv)
11541237

11551238
if(pcmk_locate_sbd() > 0) {
11561239
setenv("PCMK_watchdog", "true", 1);
1240+
running_with_sbd = TRUE;
11571241
} else {
11581242
setenv("PCMK_watchdog", "false", 1);
11591243
}
@@ -1170,7 +1254,19 @@ main(int argc, char **argv)
11701254
mainloop_add_signal(SIGTERM, pcmk_shutdown);
11711255
mainloop_add_signal(SIGINT, pcmk_shutdown);
11721256

1173-
init_children_processes();
1257+
if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) {
1258+
crm_notice("Waiting for startup-trigger from SBD.");
1259+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
1260+
startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
1261+
} else {
1262+
if (running_with_sbd) {
1263+
crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported "
1264+
"by your SBD version) improve reliability of "
1265+
"interworking between SBD & pacemaker.");
1266+
}
1267+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
1268+
init_children_processes(NULL);
1269+
}
11741270

11751271
crm_notice("Pacemaker daemon successfully started and accepting connections");
11761272
g_main_loop_run(mainloop);

include/crm/common/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ MAINTAINERCLEANFILES = Makefile.in
1212
headerdir=$(pkgincludedir)/crm/common
1313

1414
header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h \
15-
nvpair.h acl.h ipc_controld.h
15+
nvpair.h acl.h ipc_controld.h ipc_pacemakerd.h
1616
noinst_HEADERS = internal.h alerts_internal.h \
1717
iso8601_internal.h remote_internal.h xml_internal.h \
1818
ipc_internal.h output.h cmdline_internal.h curses_internal.h \
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Copyright 2020 the Pacemaker project contributors
3+
*
4+
* The version control history for this file may have further details.
5+
*
6+
* This source code is licensed under the GNU Lesser General Public License
7+
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8+
*/
9+
10+
#ifndef PCMK__IPC_PACEMAKERD__H
11+
# define PCMK__IPC_PACEMAKERD__H
12+
13+
#ifdef __cplusplus
14+
extern "C" {
15+
#endif
16+
17+
/**
18+
* \file
19+
* \brief IPC commands for Pacemakerd
20+
*
21+
* \ingroup core
22+
*/
23+
24+
#include <sys/types.h> // time_t
25+
#include <crm/common/ipc.h> // pcmk_ipc_api_t
26+
27+
enum pcmk_pacemakerd_state {
28+
pcmk_pacemakerd_state_invalid = -1,
29+
pcmk_pacemakerd_state_init = 0,
30+
pcmk_pacemakerd_state_starting_daemons,
31+
pcmk_pacemakerd_state_wait_for_ping,
32+
pcmk_pacemakerd_state_running,
33+
pcmk_pacemakerd_state_shutting_down,
34+
pcmk_pacemakerd_state_shutdown_complete,
35+
pcmk_pacemakerd_state_max = pcmk_pacemakerd_state_shutdown_complete,
36+
};
37+
38+
//! Possible types of pacemakerd replies
39+
enum pcmk_pacemakerd_api_reply {
40+
pcmk_pacemakerd_reply_unknown,
41+
pcmk_pacemakerd_reply_ping,
42+
};
43+
44+
/*!
45+
* Pacemakerd reply passed to event callback
46+
*/
47+
typedef struct {
48+
enum pcmk_pacemakerd_api_reply reply_type;
49+
50+
union {
51+
// pcmk_pacemakerd_reply_ping
52+
struct {
53+
const char *sys_from;
54+
enum pcmk_pacemakerd_state state;
55+
time_t last_good;
56+
int status;
57+
} ping;
58+
} data;
59+
} pcmk_pacemakerd_api_reply_t;
60+
61+
int pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name);
62+
enum pcmk_pacemakerd_state
63+
pcmk_pacemakerd_api_daemon_state_text2enum(const char *state);
64+
const char
65+
*pcmk_pacemakerd_api_daemon_state_enum2text(enum pcmk_pacemakerd_state state);
66+
67+
#ifdef __cplusplus
68+
}
69+
#endif
70+
71+
#endif // PCMK__IPC_PACEMAKERD__H

include/crm/common/options_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ bool pcmk__valid_utilization(const char *value);
111111

112112
// from watchdog.c
113113
long pcmk__get_sbd_timeout(void);
114+
bool pcmk__get_sbd_sync_resource_startup(void);
114115
long pcmk__auto_watchdog_timeout(void);
115116
bool pcmk__valid_sbd_timeout(const char *value);
116117

include/crm/crm.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ extern "C" {
5151
* >=3.0.13: Fail counts include operation name and interval
5252
* >=3.2.0: DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED
5353
*/
54-
# define CRM_FEATURE_SET "3.4.0"
54+
# define CRM_FEATURE_SET "3.4.1"
5555

5656
# define EOS '\0'
5757
# define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) )

include/crm/msg_xml.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ extern "C" {
123123
# define XML_PING_ATTR_STATUS "result"
124124
# define XML_PING_ATTR_SYSFROM "crm_subsystem"
125125
# define XML_PING_ATTR_CRMDSTATE "crmd_state"
126+
# define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state"
127+
# define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init"
128+
# define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons"
129+
# define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping"
130+
# define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running"
131+
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down"
132+
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete"
126133

127134
# define XML_TAG_FRAGMENT "cib_fragment"
128135

lib/common/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ libcrmcommon_la_SOURCES += io.c
5050
libcrmcommon_la_SOURCES += ipc_client.c
5151
libcrmcommon_la_SOURCES += ipc_common.c
5252
libcrmcommon_la_SOURCES += ipc_controld.c
53+
libcrmcommon_la_SOURCES += ipc_pacemakerd.c
5354
libcrmcommon_la_SOURCES += ipc_server.c
5455
libcrmcommon_la_SOURCES += iso8601.c
5556
libcrmcommon_la_SOURCES += logging.c

lib/common/crmcommon_private.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ typedef struct pcmk__ipc_methods_s {
175175
struct pcmk_ipc_api_s {
176176
enum pcmk_ipc_server server; // Daemon this IPC API instance is for
177177
enum pcmk_ipc_dispatch dispatch_type; // How replies should be dispatched
178+
size_t ipc_size_max; // maximum IPC buffer size
178179
crm_ipc_t *ipc; // IPC connection
179180
mainloop_io_t *mainloop_io; // If using mainloop, I/O source for IPC
180181
bool free_on_disconnect; // Whether disconnect should free object
@@ -209,4 +210,7 @@ bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header);
209210
G_GNUC_INTERNAL
210211
pcmk__ipc_methods_t *pcmk__controld_api_methods(void);
211212

213+
G_GNUC_INTERNAL
214+
pcmk__ipc_methods_t *pcmk__pacemakerd_api_methods(void);
215+
212216
#endif // CRMCOMMON_PRIVATE__H

0 commit comments

Comments
 (0)