Skip to content

Commit c0426eb

Browse files
committed
sync pacemakerd with sbd
1 parent 33e28dc commit c0426eb

File tree

3 files changed

+155
-40
lines changed

3 files changed

+155
-40
lines changed

daemons/pacemakerd/pacemakerd.c

Lines changed: 97 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,14 @@ static bool global_keep_tracking = false;
4848
static const char *local_name = NULL;
4949
static uint32_t local_nodeid = 0;
5050
static crm_trigger_t *shutdown_trigger = NULL;
51+
static crm_trigger_t *startup_trigger = NULL;
5152
static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid";
5253

54+
static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
55+
static gboolean running_with_sbd = FALSE;
56+
static gboolean first_state_query_seen = FALSE;
57+
static gboolean shutdown_complete_state_reported = FALSE;
58+
5359
typedef struct pcmk_child_s {
5460
int pid;
5561
long flag;
@@ -444,6 +450,7 @@ pcmk_shutdown_worker(gpointer user_data)
444450
if (phase == 0) {
445451
crm_notice("Shutting down Pacemaker");
446452
phase = max;
453+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
447454
}
448455

449456
for (; phase > 0; phase--) {
@@ -497,6 +504,10 @@ pcmk_shutdown_worker(gpointer user_data)
497504

498505
/* send_cluster_id(); */
499506
crm_notice("Shutdown complete");
507+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
508+
if (running_with_sbd && !shutdown_complete_state_reported) {
509+
return TRUE;
510+
}
500511

501512
{
502513
const char *delay = daemon_option("shutdown_delay");
@@ -563,35 +574,86 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
563574
crm_client_t *c = crm_client_get(qbc);
564575
xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);
565576

566-
crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);
567-
if (msg == NULL) {
568-
return 0;
577+
if (msg != NULL) {
578+
task = crm_element_value(msg, F_CRM_TASK);
569579
}
570580

571-
task = crm_element_value(msg, F_CRM_TASK);
572-
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
573-
/* Time to quit */
574-
crm_notice("Shutting down in response to ticket %s (%s)",
575-
crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN));
576-
pcmk_shutdown(15);
581+
if (crm_str_eq(task, CRM_OP_PING, TRUE)) {
582+
const char *value = NULL;
583+
xmlNode *ping = NULL;
584+
xmlNode *reply = NULL;
585+
586+
/* Pinged for status */
587+
crm_trace("Pinged from %s.%s",
588+
crm_element_value(msg, F_CRM_ORIGIN),
589+
crm_element_value(msg, F_CRM_SYS_FROM));
590+
first_state_query_seen = TRUE;
591+
ping = create_xml_node(NULL, XML_CRM_TAG_PING);
592+
value = crm_element_value(msg, F_CRM_SYS_TO);
593+
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
594+
crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
595+
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
596+
reply = create_reply(msg, ping);
597+
free_xml(ping);
598+
if (reply) {
599+
const char *local_name = get_local_node_name();
600+
601+
if ((crm_element_value(reply, F_CRM_HOST_FROM) == NULL) &&
602+
local_name) {
603+
crm_xml_add(reply, F_CRM_HOST_FROM, local_name);
604+
}
605+
if (crm_ipcs_send(c, id, reply, crm_ipc_server_event) <= 0) {
606+
crm_err("Failed sending ping-reply");
607+
}
608+
free_xml(reply);
609+
} else {
610+
crm_err("Failed building ping-reply");
611+
}
612+
if (crm_str_eq(pacemakerd_state,
613+
XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, TRUE)) {
614+
sleep(5); /* get out message - less ugly alternative? */
615+
shutdown_complete_state_reported = TRUE;
616+
if (shutdown_trigger) {
617+
mainloop_set_trigger(shutdown_trigger);
618+
}
619+
} else if (crm_str_eq(pacemakerd_state,
620+
XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
621+
TRUE)) {
622+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
623+
mainloop_set_trigger(startup_trigger);
624+
}
625+
} else {
626+
crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);
577627

578-
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
579-
/* Send to everyone */
580-
struct iovec *iov;
581-
int id = 0;
582-
const char *name = NULL;
628+
if (msg == NULL) {
629+
return 0;
630+
}
583631

584-
crm_element_value_int(msg, XML_ATTR_ID, &id);
585-
name = crm_element_value(msg, XML_ATTR_UNAME);
586-
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
632+
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
633+
/* Time to quit */
634+
crm_notice("Shutting down in response to ticket %s (%s)",
635+
crm_element_value(msg, F_CRM_REFERENCE),
636+
crm_element_value(msg, F_CRM_ORIGIN));
637+
pcmk_shutdown(15);
587638

588-
iov = calloc(1, sizeof(struct iovec));
589-
iov->iov_base = dump_xml_unformatted(msg);
590-
iov->iov_len = 1 + strlen(iov->iov_base);
591-
send_cpg_iov(iov);
639+
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
640+
/* Send to everyone */
641+
struct iovec *iov;
642+
int id = 0;
643+
const char *name = NULL;
592644

593-
} else {
594-
update_process_clients(c);
645+
crm_element_value_int(msg, XML_ATTR_ID, &id);
646+
name = crm_element_value(msg, XML_ATTR_UNAME);
647+
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
648+
649+
iov = calloc(1, sizeof(struct iovec));
650+
iov->iov_base = dump_xml_unformatted(msg);
651+
iov->iov_len = 1 + strlen(iov->iov_base);
652+
send_cpg_iov(iov);
653+
654+
} else {
655+
update_process_clients(c);
656+
}
595657
}
596658

597659
free_xml(msg);
@@ -1051,8 +1113,8 @@ find_and_track_existing_processes(void)
10511113
return (tracking > INT_MAX) ? INT_MAX : tracking;
10521114
}
10531115

1054-
static void
1055-
init_children_processes(void)
1116+
static gboolean
1117+
init_children_processes(gpointer user_data)
10561118
{
10571119
int start_seq = 1, lpc = 0;
10581120
static int max = SIZEOF(pcmk_children);
@@ -1078,6 +1140,8 @@ init_children_processes(void)
10781140
* This may be useful for the daemons to know
10791141
*/
10801142
setenv("PCMK_respawned", "true", 1);
1143+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
1144+
return TRUE;
10811145
}
10821146

10831147
static void
@@ -1356,6 +1420,7 @@ main(int argc, char **argv)
13561420

13571421
if(pcmk_locate_sbd() > 0) {
13581422
setenv("PCMK_watchdog", "true", 1);
1423+
running_with_sbd = TRUE;
13591424
} else {
13601425
setenv("PCMK_watchdog", "false", 1);
13611426
}
@@ -1394,7 +1459,13 @@ main(int argc, char **argv)
13941459
mainloop_add_signal(SIGTERM, pcmk_shutdown);
13951460
mainloop_add_signal(SIGINT, pcmk_shutdown);
13961461

1397-
init_children_processes();
1462+
if (running_with_sbd) {
1463+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
1464+
startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
1465+
} else {
1466+
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
1467+
init_children_processes(NULL);
1468+
}
13981469

13991470
crm_notice("Pacemaker daemon successfully started and accepting connections");
14001471
g_main_loop_run(mainloop);

include/crm/msg_xml.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ extern "C" {
123123
# define XML_PING_ATTR_STATUS "result"
124124
# define XML_PING_ATTR_SYSFROM "crm_subsystem"
125125
# define XML_PING_ATTR_CRMDSTATE "crmd_state"
126+
# define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state"
127+
# define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init"
128+
# define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons"
129+
# define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping"
130+
# define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running"
131+
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down"
132+
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete"
126133

127134
# define XML_TAG_FRAGMENT "cib_fragment"
128135

tools/crmadmin.c

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ static int message_timeout_ms = 30 * 1000;
3333

3434
static GMainLoop *mainloop = NULL;
3535
static crm_ipc_t *crmd_channel = NULL;
36+
static crm_ipc_t *pacemakerd_channel = NULL;
3637
static char *admin_uuid = NULL;
3738

3839
gboolean do_init(void);
@@ -46,6 +47,7 @@ static gboolean BE_VERBOSE = FALSE;
4647
static int expected_responses = 1;
4748
static gboolean BASH_EXPORT = FALSE;
4849
static gboolean DO_HEALTH = FALSE;
50+
static gboolean DO_PACEMAKERD_HEALTH = FALSE;
4951
static gboolean DO_RESET = FALSE;
5052
static gboolean DO_RESOURCE = FALSE;
5153
static gboolean DO_ELECT_DC = FALSE;
@@ -70,6 +72,8 @@ static struct crm_option long_options[] = {
7072
/* daemon options */
7173
{"status", 1, 0, 'S', "Display the status of the specified node." },
7274
{"-spacer-", 1, 0, '-', "\n\tResult is the node's internal FSM state which can be useful for debugging\n"},
75+
{"pacemakerd",0, 0, 'P', "Display the status of local pacemakerd."},
76+
{"-spacer-", 1, 0, '-', "\n\tResult is the state of the sub-daemons watched by pacemakerd\n"},
7377
{"dc_lookup", 0, 0, 'D', "Display the uname of the node co-ordinating the cluster."},
7478
{"-spacer-", 1, 0, '-', "\n\tThis is an internal detail and is rarely useful to administrators except when deciding on which node to examine the logs.\n"},
7579
{"nodes", 0, 0, 'N', "\tDisplay the uname of all member nodes"},
@@ -142,6 +146,9 @@ main(int argc, char **argv)
142146
case 'q':
143147
BE_SILENT = TRUE;
144148
break;
149+
case 'P':
150+
DO_PACEMAKERD_HEALTH = TRUE;
151+
break;
145152
case 'S':
146153
DO_HEALTH = TRUE;
147154
crm_trace("Option %c => %s", flag, optarg);
@@ -215,19 +222,26 @@ do_work(void)
215222
xmlNode *msg_data = NULL;
216223
gboolean all_is_good = TRUE;
217224

218-
if (DO_HEALTH == TRUE) {
225+
if ((DO_HEALTH == TRUE) || (DO_PACEMAKERD_HEALTH == TRUE)) {
219226
crm_trace("Querying the system");
220227

221228
sys_to = CRM_SYSTEM_DC;
222229

223-
if (dest_node != NULL) {
230+
if ((DO_HEALTH == TRUE) && (dest_node != NULL)) {
224231
sys_to = CRM_SYSTEM_CRMD;
225232
crmd_operation = CRM_OP_PING;
226233

227234
if (BE_VERBOSE) {
228235
expected_responses = 1;
229236
}
230237

238+
} else if (DO_PACEMAKERD_HEALTH == TRUE) {
239+
sys_to = CRM_SYSTEM_MCP;
240+
crmd_operation = CRM_OP_PING;
241+
242+
if (BE_VERBOSE) {
243+
expected_responses = 1;
244+
}
231245
} else {
232246
crm_info("Cluster-wide health not available yet");
233247
all_is_good = FALSE;
@@ -286,7 +300,7 @@ do_work(void)
286300
}
287301

288302
/* send it */
289-
if (crmd_channel == NULL) {
303+
if ((DO_PACEMAKERD_HEALTH?pacemakerd_channel:crmd_channel) == NULL) {
290304
crm_err("The IPC connection is not valid, cannot send anything");
291305
return -1;
292306
}
@@ -303,7 +317,8 @@ do_work(void)
303317
xmlNode *cmd = create_request(crmd_operation, msg_data, dest_node, sys_to,
304318
crm_system_name, admin_uuid);
305319

306-
crm_ipc_send(crmd_channel, cmd, 0, 0, NULL);
320+
crm_ipc_send(DO_PACEMAKERD_HEALTH?pacemakerd_channel:crmd_channel,
321+
cmd, 0, 0, NULL);
307322
free_xml(cmd);
308323
}
309324

@@ -329,21 +344,39 @@ struct ipc_client_callbacks crm_callbacks = {
329344
gboolean
330345
do_init(void)
331346
{
332-
mainloop_io_t *source =
347+
mainloop_io_t *crmd_source =
333348
mainloop_add_ipc_client(CRM_SYSTEM_CRMD, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks);
349+
mainloop_io_t *pacemakerd_source =
350+
mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks);
334351

335352
admin_uuid = crm_getpid_s();
336353

337-
crmd_channel = mainloop_get_ipc_client(source);
354+
crmd_channel = mainloop_get_ipc_client(crmd_source);
355+
pacemakerd_channel = mainloop_get_ipc_client(pacemakerd_source);
338356

339-
if (DO_RESOURCE || DO_RESOURCE_LIST || DO_NODE_LIST) {
357+
if (DO_RESOURCE || DO_RESOURCE_LIST || DO_NODE_LIST || DO_PACEMAKERD_HEALTH) {
340358
return TRUE;
341359

342-
} else if (crmd_channel != NULL) {
343-
xmlNode *xml = create_hello_message(admin_uuid, crm_system_name, "0", "1");
360+
} else {
361+
int hellos = 0;
344362

345-
crm_ipc_send(crmd_channel, xml, 0, 0, NULL);
346-
return TRUE;
363+
if (crmd_channel != NULL) {
364+
xmlNode *xml = create_hello_message(admin_uuid, crm_system_name, "0", "1");
365+
366+
crm_ipc_send(crmd_channel, xml, 0, 0, NULL);
367+
hellos++;
368+
}
369+
#if 0
370+
if (pacemakerd_channel != NULL) {
371+
xmlNode *xml = create_hello_message(admin_uuid, crm_system_name, "0", "1");
372+
373+
crm_ipc_send(pacemakerd_channel, xml, 0, 0, NULL);
374+
hellos++;
375+
}
376+
#endif
377+
if (hellos == 1) {
378+
return TRUE;
379+
}
347380
}
348381
return FALSE;
349382
}
@@ -394,14 +427,18 @@ admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata)
394427

395428
} else if (validate_crm_message(xml, crm_system_name, admin_uuid, XML_ATTR_RESPONSE) == FALSE) {
396429
crm_trace("Message was not a CRM response. Discarding.");
430+
printf("Validation of response failed\n");
397431

398-
} else if (DO_HEALTH) {
432+
} else if (DO_HEALTH || DO_PACEMAKERD_HEALTH) {
399433
xmlNode *data = get_message_xml(xml, F_CRM_DATA);
400-
const char *state = crm_element_value(data, XML_PING_ATTR_CRMDSTATE);
434+
const char *state = DO_PACEMAKERD_HEALTH?
435+
crm_element_value(data, XML_PING_ATTR_PACEMAKERDSTATE):
436+
crm_element_value(data, XML_PING_ATTR_CRMDSTATE);
437+
const char *host_from = crm_element_value(xml, F_CRM_HOST_FROM);
401438

402439
printf("Status of %s@%s: %s (%s)\n",
403440
crm_element_value(data, XML_PING_ATTR_SYSFROM),
404-
crm_element_value(xml, F_CRM_HOST_FROM),
441+
host_from?host_from:"local",
405442
state, crm_element_value(data, XML_PING_ATTR_STATUS));
406443

407444
if (BE_SILENT && state != NULL) {

0 commit comments

Comments
 (0)