Skip to content

Commit 4a37fbb

Browse files
committed
sync pacemakerd with sbd
1 parent 33e28dc commit 4a37fbb

File tree

3 files changed

+173
-43
lines changed

3 files changed

+173
-43
lines changed

daemons/pacemakerd/pacemakerd.c

Lines changed: 99 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,14 @@ static bool global_keep_tracking = false;
4848
static const char *local_name = NULL;
4949
static uint32_t local_nodeid = 0;
5050
static crm_trigger_t *shutdown_trigger = NULL;
51+
static crm_trigger_t *startup_trigger = NULL;
5152
static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid";
5253

54+
static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
55+
static gboolean running_with_sbd = FALSE;
56+
static gboolean first_state_query_seen = FALSE;
57+
static gboolean shutdown_complete_state_reported = FALSE;
58+
5359
typedef struct pcmk_child_s {
5460
int pid;
5561
long flag;
@@ -444,6 +450,7 @@ pcmk_shutdown_worker(gpointer user_data)
444450
if (phase == 0) {
445451
crm_notice("Shutting down Pacemaker");
446452
phase = max;
453+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
447454
}
448455

449456
for (; phase > 0; phase--) {
@@ -497,6 +504,10 @@ pcmk_shutdown_worker(gpointer user_data)
497504

498505
/* send_cluster_id(); */
499506
crm_notice("Shutdown complete");
507+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
508+
if (running_with_sbd && !shutdown_complete_state_reported) {
509+
return TRUE;
510+
}
500511

501512
{
502513
const char *delay = daemon_option("shutdown_delay");
@@ -563,35 +574,88 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
563574
crm_client_t *c = crm_client_get(qbc);
564575
xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);
565576

566-
crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);
567-
if (msg == NULL) {
568-
return 0;
577+
if (msg != NULL) {
578+
task = crm_element_value(msg, F_CRM_TASK);
569579
}
570580

571-
task = crm_element_value(msg, F_CRM_TASK);
572-
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
573-
/* Time to quit */
574-
crm_notice("Shutting down in response to ticket %s (%s)",
575-
crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN));
576-
pcmk_shutdown(15);
581+
if (crm_str_eq(task, CRM_OP_PING, TRUE)) {
582+
const char *value = NULL;
583+
xmlNode *ping = NULL;
584+
xmlNode *reply = NULL;
585+
time_t pinged = time(NULL);
586+
587+
/* Pinged for status */
588+
crm_trace("Pinged from %s.%s",
589+
crm_element_value(msg, F_CRM_ORIGIN),
590+
crm_element_value(msg, F_CRM_SYS_FROM));
591+
first_state_query_seen = TRUE;
592+
ping = create_xml_node(NULL, XML_CRM_TAG_PING);
593+
value = crm_element_value(msg, F_CRM_SYS_TO);
594+
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
595+
crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
596+
crm_xml_add_int(ping, XML_ATTR_TSTAMP, (int) pinged);
597+
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
598+
reply = create_reply(msg, ping);
599+
free_xml(ping);
600+
if (reply) {
601+
const char *local_name = get_local_node_name();
602+
603+
if ((crm_element_value(reply, F_CRM_HOST_FROM) == NULL) &&
604+
local_name) {
605+
crm_xml_add(reply, F_CRM_HOST_FROM, local_name);
606+
}
607+
if (crm_ipcs_send(c, id, reply, crm_ipc_server_event) <= 0) {
608+
crm_err("Failed sending ping-reply");
609+
}
610+
free_xml(reply);
611+
} else {
612+
crm_err("Failed building ping-reply");
613+
}
614+
if (crm_str_eq(pacemakerd_state,
615+
XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, TRUE)) {
616+
sleep(5); /* get out message - less ugly alternative? */
617+
shutdown_complete_state_reported = TRUE;
618+
if (shutdown_trigger) {
619+
mainloop_set_trigger(shutdown_trigger);
620+
}
621+
} else if (crm_str_eq(pacemakerd_state,
622+
XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
623+
TRUE)) {
624+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
625+
mainloop_set_trigger(startup_trigger);
626+
}
627+
} else {
628+
crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);
577629

578-
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
579-
/* Send to everyone */
580-
struct iovec *iov;
581-
int id = 0;
582-
const char *name = NULL;
630+
if (msg == NULL) {
631+
return 0;
632+
}
583633

584-
crm_element_value_int(msg, XML_ATTR_ID, &id);
585-
name = crm_element_value(msg, XML_ATTR_UNAME);
586-
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
634+
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
635+
/* Time to quit */
636+
crm_notice("Shutting down in response to ticket %s (%s)",
637+
crm_element_value(msg, F_CRM_REFERENCE),
638+
crm_element_value(msg, F_CRM_ORIGIN));
639+
pcmk_shutdown(15);
587640

588-
iov = calloc(1, sizeof(struct iovec));
589-
iov->iov_base = dump_xml_unformatted(msg);
590-
iov->iov_len = 1 + strlen(iov->iov_base);
591-
send_cpg_iov(iov);
641+
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
642+
/* Send to everyone */
643+
struct iovec *iov;
644+
int id = 0;
645+
const char *name = NULL;
592646

593-
} else {
594-
update_process_clients(c);
647+
crm_element_value_int(msg, XML_ATTR_ID, &id);
648+
name = crm_element_value(msg, XML_ATTR_UNAME);
649+
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
650+
651+
iov = calloc(1, sizeof(struct iovec));
652+
iov->iov_base = dump_xml_unformatted(msg);
653+
iov->iov_len = 1 + strlen(iov->iov_base);
654+
send_cpg_iov(iov);
655+
656+
} else {
657+
update_process_clients(c);
658+
}
595659
}
596660

597661
free_xml(msg);
@@ -1051,8 +1115,8 @@ find_and_track_existing_processes(void)
10511115
return (tracking > INT_MAX) ? INT_MAX : tracking;
10521116
}
10531117

1054-
static void
1055-
init_children_processes(void)
1118+
static gboolean
1119+
init_children_processes(gpointer user_data)
10561120
{
10571121
int start_seq = 1, lpc = 0;
10581122
static int max = SIZEOF(pcmk_children);
@@ -1078,6 +1142,8 @@ init_children_processes(void)
10781142
* This may be useful for the daemons to know
10791143
*/
10801144
setenv("PCMK_respawned", "true", 1);
1145+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
1146+
return TRUE;
10811147
}
10821148

10831149
static void
@@ -1356,6 +1422,7 @@ main(int argc, char **argv)
13561422

13571423
if(pcmk_locate_sbd() > 0) {
13581424
setenv("PCMK_watchdog", "true", 1);
1425+
running_with_sbd = TRUE;
13591426
} else {
13601427
setenv("PCMK_watchdog", "false", 1);
13611428
}
@@ -1394,7 +1461,13 @@ main(int argc, char **argv)
13941461
mainloop_add_signal(SIGTERM, pcmk_shutdown);
13951462
mainloop_add_signal(SIGINT, pcmk_shutdown);
13961463

1397-
init_children_processes();
1464+
if (running_with_sbd) {
1465+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
1466+
startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
1467+
} else {
1468+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
1469+
init_children_processes(NULL);
1470+
}
13981471

13991472
crm_notice("Pacemaker daemon successfully started and accepting connections");
14001473
g_main_loop_run(mainloop);

include/crm/msg_xml.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ extern "C" {
123123
# define XML_PING_ATTR_STATUS "result"
124124
# define XML_PING_ATTR_SYSFROM "crm_subsystem"
125125
# define XML_PING_ATTR_CRMDSTATE "crmd_state"
126+
# define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state"
127+
# define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init"
128+
# define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons"
129+
# define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping"
130+
# define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running"
131+
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down"
132+
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete"
126133

127134
# define XML_TAG_FRAGMENT "cib_fragment"
128135

tools/crmadmin.c

Lines changed: 67 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <crm/crm.h>
2424
#include <crm/msg_xml.h>
2525
#include <crm/common/xml.h>
26+
#include <crm/common/iso8601.h>
2627

2728
#include <crm/common/mainloop.h>
2829

@@ -33,6 +34,7 @@ static int message_timeout_ms = 30 * 1000;
3334

3435
static GMainLoop *mainloop = NULL;
3536
static crm_ipc_t *crmd_channel = NULL;
37+
static crm_ipc_t *pacemakerd_channel = NULL;
3638
static char *admin_uuid = NULL;
3739

3840
gboolean do_init(void);
@@ -46,6 +48,7 @@ static gboolean BE_VERBOSE = FALSE;
4648
static int expected_responses = 1;
4749
static gboolean BASH_EXPORT = FALSE;
4850
static gboolean DO_HEALTH = FALSE;
51+
static gboolean DO_PACEMAKERD_HEALTH = FALSE;
4952
static gboolean DO_RESET = FALSE;
5053
static gboolean DO_RESOURCE = FALSE;
5154
static gboolean DO_ELECT_DC = FALSE;
@@ -70,6 +73,8 @@ static struct crm_option long_options[] = {
7073
/* daemon options */
7174
{"status", 1, 0, 'S', "Display the status of the specified node." },
7275
{"-spacer-", 1, 0, '-', "\n\tResult is the node's internal FSM state which can be useful for debugging\n"},
76+
{"pacemakerd",0, 0, 'P', "Display the status of local pacemakerd."},
77+
{"-spacer-", 1, 0, '-', "\n\tResult is the state of the sub-daemons watched by pacemakerd\n"},
7378
{"dc_lookup", 0, 0, 'D', "Display the uname of the node co-ordinating the cluster."},
7479
{"-spacer-", 1, 0, '-', "\n\tThis is an internal detail and is rarely useful to administrators except when deciding on which node to examine the logs.\n"},
7580
{"nodes", 0, 0, 'N', "\tDisplay the uname of all member nodes"},
@@ -142,6 +147,9 @@ main(int argc, char **argv)
142147
case 'q':
143148
BE_SILENT = TRUE;
144149
break;
150+
case 'P':
151+
DO_PACEMAKERD_HEALTH = TRUE;
152+
break;
145153
case 'S':
146154
DO_HEALTH = TRUE;
147155
crm_trace("Option %c => %s", flag, optarg);
@@ -215,19 +223,26 @@ do_work(void)
215223
xmlNode *msg_data = NULL;
216224
gboolean all_is_good = TRUE;
217225

218-
if (DO_HEALTH == TRUE) {
226+
if ((DO_HEALTH == TRUE) || (DO_PACEMAKERD_HEALTH == TRUE)) {
219227
crm_trace("Querying the system");
220228

221229
sys_to = CRM_SYSTEM_DC;
222230

223-
if (dest_node != NULL) {
231+
if ((DO_HEALTH == TRUE) && (dest_node != NULL)) {
224232
sys_to = CRM_SYSTEM_CRMD;
225233
crmd_operation = CRM_OP_PING;
226234

227235
if (BE_VERBOSE) {
228236
expected_responses = 1;
229237
}
230238

239+
} else if (DO_PACEMAKERD_HEALTH == TRUE) {
240+
sys_to = CRM_SYSTEM_MCP;
241+
crmd_operation = CRM_OP_PING;
242+
243+
if (BE_VERBOSE) {
244+
expected_responses = 1;
245+
}
231246
} else {
232247
crm_info("Cluster-wide health not available yet");
233248
all_is_good = FALSE;
@@ -286,7 +301,7 @@ do_work(void)
286301
}
287302

288303
/* send it */
289-
if (crmd_channel == NULL) {
304+
if ((DO_PACEMAKERD_HEALTH?pacemakerd_channel:crmd_channel) == NULL) {
290305
crm_err("The IPC connection is not valid, cannot send anything");
291306
return -1;
292307
}
@@ -303,7 +318,8 @@ do_work(void)
303318
xmlNode *cmd = create_request(crmd_operation, msg_data, dest_node, sys_to,
304319
crm_system_name, admin_uuid);
305320

306-
crm_ipc_send(crmd_channel, cmd, 0, 0, NULL);
321+
crm_ipc_send(DO_PACEMAKERD_HEALTH?pacemakerd_channel:crmd_channel,
322+
cmd, 0, 0, NULL);
307323
free_xml(cmd);
308324
}
309325

@@ -329,21 +345,39 @@ struct ipc_client_callbacks crm_callbacks = {
329345
gboolean
330346
do_init(void)
331347
{
332-
mainloop_io_t *source =
348+
mainloop_io_t *crmd_source =
333349
mainloop_add_ipc_client(CRM_SYSTEM_CRMD, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks);
350+
mainloop_io_t *pacemakerd_source =
351+
mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks);
334352

335353
admin_uuid = crm_getpid_s();
336354

337-
crmd_channel = mainloop_get_ipc_client(source);
355+
crmd_channel = mainloop_get_ipc_client(crmd_source);
356+
pacemakerd_channel = mainloop_get_ipc_client(pacemakerd_source);
338357

339-
if (DO_RESOURCE || DO_RESOURCE_LIST || DO_NODE_LIST) {
358+
if (DO_RESOURCE || DO_RESOURCE_LIST || DO_NODE_LIST || DO_PACEMAKERD_HEALTH) {
340359
return TRUE;
341360

342-
} else if (crmd_channel != NULL) {
343-
xmlNode *xml = create_hello_message(admin_uuid, crm_system_name, "0", "1");
361+
} else {
362+
int hellos = 0;
363+
364+
if (crmd_channel != NULL) {
365+
xmlNode *xml = create_hello_message(admin_uuid, crm_system_name, "0", "1");
344366

345-
crm_ipc_send(crmd_channel, xml, 0, 0, NULL);
346-
return TRUE;
367+
crm_ipc_send(crmd_channel, xml, 0, 0, NULL);
368+
hellos++;
369+
}
370+
#if 0
371+
if (pacemakerd_channel != NULL) {
372+
xmlNode *xml = create_hello_message(admin_uuid, crm_system_name, "0", "1");
373+
374+
crm_ipc_send(pacemakerd_channel, xml, 0, 0, NULL);
375+
hellos++;
376+
}
377+
#endif
378+
if (hellos == 1) {
379+
return TRUE;
380+
}
347381
}
348382
return FALSE;
349383
}
@@ -394,15 +428,31 @@ admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata)
394428

395429
} else if (validate_crm_message(xml, crm_system_name, admin_uuid, XML_ATTR_RESPONSE) == FALSE) {
396430
crm_trace("Message was not a CRM response. Discarding.");
431+
printf("Validation of response failed\n");
397432

398-
} else if (DO_HEALTH) {
433+
} else if (DO_HEALTH || DO_PACEMAKERD_HEALTH) {
399434
xmlNode *data = get_message_xml(xml, F_CRM_DATA);
400-
const char *state = crm_element_value(data, XML_PING_ATTR_CRMDSTATE);
401-
402-
printf("Status of %s@%s: %s (%s)\n",
435+
const char *state = DO_PACEMAKERD_HEALTH?
436+
crm_element_value(data, XML_PING_ATTR_PACEMAKERDSTATE):
437+
crm_element_value(data, XML_PING_ATTR_CRMDSTATE);
438+
const char *host_from = crm_element_value(xml, F_CRM_HOST_FROM);
439+
time_t pinged = (time_t) 0;
440+
crm_time_t *crm_when = crm_time_new(NULL);
441+
char *pinged_buf = NULL;
442+
443+
crm_element_value_int(data, XML_ATTR_TSTAMP, (int *) &pinged);
444+
crm_time_set_timet(crm_when, &pinged);
445+
pinged_buf = crm_time_as_string(crm_when,
446+
crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone);
447+
printf("Status of %s@%s: %s (%s%s%s)\n",
403448
crm_element_value(data, XML_PING_ATTR_SYSFROM),
404-
crm_element_value(xml, F_CRM_HOST_FROM),
405-
state, crm_element_value(data, XML_PING_ATTR_STATUS));
449+
host_from?host_from:"local",
450+
state, crm_element_value(data, XML_PING_ATTR_STATUS),
451+
((int) pinged)?" @ ":"",
452+
((int) pinged)?pinged_buf:"");
453+
454+
free(pinged_buf);
455+
crm_time_free(crm_when);
406456

407457
if (BE_SILENT && state != NULL) {
408458
fprintf(stderr, "%s\n", state);

0 commit comments

Comments
 (0)