1    	/*
2    	 * Copyright 2004-2023 the Pacemaker project contributors
3    	 *
4    	 * The version control history for this file may have further details.
5    	 *
6    	 * This source code is licensed under the GNU General Public License version 2
7    	 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8    	 */
9    	
10   	#include <crm_internal.h>
11   	
12   	#include <sys/param.h>
13   	#include <sys/types.h>
14   	#include <sys/stat.h>
15   	
16   	#include <crm/crm.h>
17   	#include <crm/msg_xml.h>
18   	#include <crm/pengine/rules.h>
19   	#include <crm/cluster/internal.h>
20   	#include <crm/cluster/election_internal.h>
21   	#include <crm/common/ipc_internal.h>
22   	
23   	#include <pacemaker-controld.h>
24   	
25   	static qb_ipcs_service_t *ipcs = NULL;
26   	
27   	static crm_trigger_t *config_read_trigger = NULL;
28   	
29   	#if SUPPORT_COROSYNC
30   	extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
31   	#endif
32   	
33   	void crm_shutdown(int nsig);
34   	static gboolean crm_read_options(gpointer user_data);
35   	
36   	/*	 A_HA_CONNECT	*/
37   	void
38   	do_ha_control(long long action,
39   	              enum crmd_fsa_cause cause,
40   	              enum crmd_fsa_state cur_state,
41   	              enum crmd_fsa_input current_input, fsa_data_t * msg_data)
42   	{
43   	    gboolean registered = FALSE;
44   	    static crm_cluster_t *cluster = NULL;
45   	
46   	    if (cluster == NULL) {
47   	        cluster = pcmk_cluster_new();
48   	    }
49   	
50   	    if (action & A_HA_DISCONNECT) {
51   	        crm_cluster_disconnect(cluster);
52   	        crm_info("Disconnected from the cluster");
53   	
54   	        controld_set_fsa_input_flags(R_HA_DISCONNECTED);
55   	    }
56   	
57   	    if (action & A_HA_CONNECT) {
58   	        crm_set_status_callback(&peer_update_callback);
59   	        crm_set_autoreap(FALSE);
60   	
61   	#if SUPPORT_COROSYNC
62   	        if (is_corosync_cluster()) {
63   	            registered = crm_connect_corosync(cluster);
64   	        }
65   	#endif // SUPPORT_COROSYNC
66   	
67   	        if (registered) {
68   	            controld_election_init(cluster->uname);
69   	            controld_globals.our_nodename = cluster->uname;
70   	            controld_globals.our_uuid = cluster->uuid;
71   	            if(cluster->uuid == NULL) {
72   	                crm_err("Could not obtain local uuid");
73   	                registered = FALSE;
74   	            }
75   	        }
76   	
77   	        if (!registered) {
78   	            controld_set_fsa_input_flags(R_HA_DISCONNECTED);
79   	            register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
80   	            return;
81   	        }
82   	
83   	        populate_cib_nodes(node_update_none, __func__);
84   	        controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
85   	        crm_info("Connected to the cluster");
86   	    }
87   	
88   	    if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
89   	        crm_err("Unexpected action %s in %s", fsa_action2string(action),
90   	                __func__);
91   	    }
92   	}
93   	
94   	/*	 A_SHUTDOWN	*/
95   	void
96   	do_shutdown(long long action,
97   	            enum crmd_fsa_cause cause,
98   	            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
99   	{
100  	    /* just in case */
101  	    controld_set_fsa_input_flags(R_SHUTDOWN);
102  	    controld_disconnect_fencer(FALSE);
103  	}
104  	
105  	/*	 A_SHUTDOWN_REQ	*/
106  	void
107  	do_shutdown_req(long long action,
108  	                enum crmd_fsa_cause cause,
109  	                enum crmd_fsa_state cur_state,
110  	                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
111  	{
112  	    xmlNode *msg = NULL;
113  	
114  	    controld_set_fsa_input_flags(R_SHUTDOWN);
115  	    //controld_set_fsa_input_flags(R_STAYDOWN);
116  	    crm_info("Sending shutdown request to all peers (DC is %s)",
117  	             pcmk__s(controld_globals.dc_name, "not set"));
118  	    msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
119  	
120  	    if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
121  	        register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
122  	    }
123  	    free_xml(msg);
124  	}
125  	
126  	void
127  	crmd_fast_exit(crm_exit_t exit_code)
128  	{
129  	    if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
130  	        crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
131  	                 exit_code, CRM_EX_FATAL);
132  	        exit_code = CRM_EX_FATAL;
133  	
134  	    } else if ((exit_code == CRM_EX_OK)
135  	               && pcmk_is_set(controld_globals.fsa_input_register,
136  	                              R_IN_RECOVERY)) {
137  	        crm_err("Could not recover from internal error");
138  	        exit_code = CRM_EX_ERROR;
139  	    }
140  	
141  	    if (controld_globals.logger_out != NULL) {
142  	        controld_globals.logger_out->finish(controld_globals.logger_out,
143  	                                            exit_code, true, NULL);
144  	        pcmk__output_free(controld_globals.logger_out);
145  	        controld_globals.logger_out = NULL;
146  	    }
147  	
148  	    crm_exit(exit_code);
149  	}
150  	
151  	crm_exit_t
152  	crmd_exit(crm_exit_t exit_code)
153  	{
154  	    GMainLoop *mloop = controld_globals.mainloop;
155  	
156  	    static bool in_progress = FALSE;
157  	
158  	    if (in_progress && (exit_code == CRM_EX_OK)) {
159  	        crm_debug("Exit is already in progress");
160  	        return exit_code;
161  	
162  	    } else if(in_progress) {
163  	        crm_notice("Error during shutdown process, exiting now with status %d (%s)",
164  	                   exit_code, crm_exit_str(exit_code));
165  	        crm_write_blackbox(SIGTRAP, NULL);
166  	        crmd_fast_exit(exit_code);
167  	    }
168  	
169  	    in_progress = TRUE;
170  	    crm_trace("Preparing to exit with status %d (%s)",
171  	              exit_code, crm_exit_str(exit_code));
172  	
173  	    /* Suppress secondary errors resulting from us disconnecting everything */
174  	    controld_set_fsa_input_flags(R_HA_DISCONNECTED);
175  	
176  	/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
177  	
178  	    if(ipcs) {
179  	        crm_trace("Closing IPC server");
180  	        mainloop_del_ipc_server(ipcs);
181  	        ipcs = NULL;
182  	    }
183  	
184  	    controld_close_attrd_ipc();
185  	    controld_shutdown_schedulerd_ipc();
186  	    controld_disconnect_fencer(TRUE);
187  	
188  	    if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) {
189  	        crm_debug("No mainloop detected");
190  	        exit_code = CRM_EX_ERROR;
191  	    }
192  	
193  	    /* On an error, just get out.
194  	     *
195  	     * Otherwise, make the effort to have mainloop exit gracefully so
196  	     * that it (mostly) cleans up after itself and valgrind has less
197  	     * to report on - allowing real errors stand out
198  	     */
199  	    if (exit_code != CRM_EX_OK) {
200  	        crm_notice("Forcing immediate exit with status %d (%s)",
201  	                   exit_code, crm_exit_str(exit_code));
202  	        crm_write_blackbox(SIGTRAP, NULL);
203  	        crmd_fast_exit(exit_code);
204  	    }
205  	
206  	/* Clean up as much memory as possible for valgrind */
207  	
(49) Event example_checked: Example 3: "iter->next" has its value checked in "iter != NULL".
Also see events: [null_field][alias_transfer][dereference][example_checked][example_checked][example_checked][example_checked]
208  	    for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
209  	         iter = iter->next) {
210  	        fsa_data_t *fsa_data = (fsa_data_t *) iter->data;
211  	
212  	        crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
213  	                 fsa_input2string(fsa_data->fsa_input),
214  	                 fsa_state2string(controld_globals.fsa_state),
215  	                 fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
216  	        delete_fsa_input(fsa_data);
217  	    }
218  	
219  	    controld_clear_fsa_input_flags(R_MEMBERSHIP);
220  	
221  	    g_list_free(controld_globals.fsa_message_queue);
222  	    controld_globals.fsa_message_queue = NULL;
223  	
224  	    controld_free_node_pending_timers();
225  	    controld_election_fini();
226  	
227  	    /* Tear down the CIB manager connection, but don't free it yet -- it could
228  	     * be used when we drain the mainloop later.
229  	     */
230  	
231  	    controld_disconnect_cib_manager();
232  	
233  	    verify_stopped(controld_globals.fsa_state, LOG_WARNING);
234  	    controld_clear_fsa_input_flags(R_LRM_CONNECTED);
235  	    lrm_state_destroy_all();
236  	
237  	    mainloop_destroy_trigger(config_read_trigger);
238  	    config_read_trigger = NULL;
239  	
240  	    controld_destroy_fsa_trigger();
241  	    controld_destroy_transition_trigger();
242  	
243  	    pcmk__client_cleanup();
244  	    crm_peer_destroy();
245  	
246  	    controld_free_fsa_timers();
247  	    te_cleanup_stonith_history_sync(NULL, TRUE);
248  	    controld_free_sched_timer();
249  	
250  	    free(controld_globals.our_nodename);
251  	    controld_globals.our_nodename = NULL;
252  	
253  	    free(controld_globals.our_uuid);
254  	    controld_globals.our_uuid = NULL;
255  	
256  	    free(controld_globals.dc_name);
257  	    controld_globals.dc_name = NULL;
258  	
259  	    free(controld_globals.dc_version);
260  	    controld_globals.dc_version = NULL;
261  	
262  	    free(controld_globals.cluster_name);
263  	    controld_globals.cluster_name = NULL;
264  	
265  	    free(controld_globals.te_uuid);
266  	    controld_globals.te_uuid = NULL;
267  	
268  	    free_max_generation();
269  	    controld_destroy_failed_sync_table();
270  	    controld_destroy_outside_events_table();
271  	
272  	    mainloop_destroy_signal(SIGPIPE);
273  	    mainloop_destroy_signal(SIGUSR1);
274  	    mainloop_destroy_signal(SIGTERM);
275  	    mainloop_destroy_signal(SIGTRAP);
276  	    /* leave SIGCHLD engaged as we might still want to drain some service-actions */
277  	
278  	    if (mloop) {
279  	        GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop);
280  	
281  	        /* Don't re-enter this block */
282  	        controld_globals.mainloop = NULL;
283  	
284  	        /* no signals on final draining anymore */
285  	        mainloop_destroy_signal(SIGCHLD);
286  	
287  	        crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
288  	
289  	        {
290  	            int lpc = 0;
291  	
292  	            while((g_main_context_pending(ctx) && lpc < 10)) {
293  	                lpc++;
294  	                crm_trace("Iteration %d", lpc);
295  	                g_main_context_dispatch(ctx);
296  	            }
297  	        }
298  	
299  	        crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
300  	        g_main_loop_quit(mloop);
301  	
302  	        /* Won't do anything yet, since we're inside it now */
303  	        g_main_loop_unref(mloop);
304  	    } else {
305  	        mainloop_destroy_signal(SIGCHLD);
306  	    }
307  	
308  	    cib_delete(controld_globals.cib_conn);
309  	    controld_globals.cib_conn = NULL;
310  	
311  	    throttle_fini();
312  	
313  	    /* Graceful */
314  	    crm_trace("Done preparing for exit with status %d (%s)",
315  	              exit_code, crm_exit_str(exit_code));
316  	    return exit_code;
317  	}
318  	
319  	/*	 A_EXIT_0, A_EXIT_1	*/
320  	void
321  	do_exit(long long action,
322  	        enum crmd_fsa_cause cause,
323  	        enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
324  	{
325  	    crm_exit_t exit_code = CRM_EX_OK;
326  	
327  	    if (pcmk_is_set(action, A_EXIT_1)) {
328  	        exit_code = CRM_EX_ERROR;
329  	        crm_err("Exiting now due to errors");
330  	    }
331  	    verify_stopped(cur_state, LOG_ERR);
332  	    crmd_exit(exit_code);
333  	}
334  	
335  	static void sigpipe_ignore(int nsig) { return; }
336  	
337  	/*	 A_STARTUP	*/
338  	void
339  	do_startup(long long action,
340  	           enum crmd_fsa_cause cause,
341  	           enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
342  	{
343  	    crm_debug("Registering Signal Handlers");
344  	    mainloop_add_signal(SIGTERM, crm_shutdown);
345  	    mainloop_add_signal(SIGPIPE, sigpipe_ignore);
346  	
347  	    config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH,
348  	                                               crm_read_options, NULL);
349  	
350  	    controld_init_fsa_trigger();
351  	    controld_init_transition_trigger();
352  	
353  	    crm_debug("Creating CIB manager and executor objects");
354  	    controld_globals.cib_conn = cib_new();
355  	
356  	    lrm_state_init_local();
357  	    if (controld_init_fsa_timers() == FALSE) {
358  	        register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
359  	    }
360  	}
361  	
362  	// \return libqb error code (0 on success, -errno on error)
363  	static int32_t
364  	accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
365  	{
366  	    crm_trace("Accepting new IPC client connection");
367  	    if (pcmk__new_client(c, uid, gid) == NULL) {
368  	        return -EIO;
369  	    }
370  	    return 0;
371  	}
372  	
373  	// \return libqb error code (0 on success, -errno on error)
374  	static int32_t
375  	dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
376  	{
377  	    uint32_t id = 0;
378  	    uint32_t flags = 0;
379  	    pcmk__client_t *client = pcmk__find_client(c);
380  	
381  	    xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
382  	
383  	    if (msg == NULL) {
384  	        pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
385  	        return 0;
386  	    }
387  	    pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
388  	
389  	    CRM_ASSERT(client->user != NULL);
390  	    pcmk__update_acl_user(msg, F_CRM_USER, client->user);
391  	
392  	    crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
393  	    if (controld_authorize_ipc_message(msg, client, NULL)) {
394  	        crm_trace("Processing IPC message from client %s",
395  	                  pcmk__client_name(client));
396  	        route_message(C_IPC_MESSAGE, msg);
397  	    }
398  	
399  	    controld_trigger_fsa();
400  	    free_xml(msg);
401  	    return 0;
402  	}
403  	
404  	static int32_t
405  	ipc_client_disconnected(qb_ipcs_connection_t *c)
406  	{
407  	    pcmk__client_t *client = pcmk__find_client(c);
408  	
409  	    if (client) {
410  	        crm_trace("Disconnecting %sregistered client %s (%p/%p)",
411  	                  (client->userdata? "" : "un"), pcmk__client_name(client),
412  	                  c, client);
413  	        free(client->userdata);
414  	        pcmk__free_client(client);
415  	        controld_trigger_fsa();
416  	    }
417  	    return 0;
418  	}
419  	
420  	static void
421  	ipc_connection_destroyed(qb_ipcs_connection_t *c)
422  	{
423  	    crm_trace("Connection %p", c);
424  	    ipc_client_disconnected(c);
425  	}
426  	
427  	/*	 A_STOP	*/
428  	void
429  	do_stop(long long action,
430  	        enum crmd_fsa_cause cause,
431  	        enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
432  	{
433  	    crm_trace("Closing IPC server");
434  	    mainloop_del_ipc_server(ipcs); ipcs = NULL;
435  	    register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
436  	}
437  	
438  	/*	 A_STARTED	*/
439  	void
440  	do_started(long long action,
441  	           enum crmd_fsa_cause cause,
442  	           enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
443  	{
444  	    static struct qb_ipcs_service_handlers crmd_callbacks = {
445  	        .connection_accept = accept_controller_client,
446  	        .connection_created = NULL,
447  	        .msg_process = dispatch_controller_ipc,
448  	        .connection_closed = ipc_client_disconnected,
449  	        .connection_destroyed = ipc_connection_destroyed
450  	    };
451  	
452  	    if (cur_state != S_STARTING) {
453  	        crm_err("Start cancelled... %s", fsa_state2string(cur_state));
454  	        return;
455  	
456  	    } else if (!pcmk_is_set(controld_globals.fsa_input_register,
457  	                            R_MEMBERSHIP)) {
458  	        crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
459  	
460  	        crmd_fsa_stall(TRUE);
461  	        return;
462  	
463  	    } else if (!pcmk_is_set(controld_globals.fsa_input_register,
464  	                            R_LRM_CONNECTED)) {
465  	        crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
466  	
467  	        crmd_fsa_stall(TRUE);
468  	        return;
469  	
470  	    } else if (!pcmk_is_set(controld_globals.fsa_input_register,
471  	                            R_CIB_CONNECTED)) {
472  	        crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
473  	
474  	        crmd_fsa_stall(TRUE);
475  	        return;
476  	
477  	    } else if (!pcmk_is_set(controld_globals.fsa_input_register,
478  	                            R_READ_CONFIG)) {
479  	        crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
480  	
481  	        crmd_fsa_stall(TRUE);
482  	        return;
483  	
484  	    } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) {
485  	
486  	        crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
487  	        crmd_fsa_stall(TRUE);
488  	        return;
489  	    }
490  	
491  	    crm_debug("Init server comms");
492  	    ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
493  	    if (ipcs == NULL) {
494  	        crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
495  	        register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
496  	    } else {
497  	        crm_notice("Pacemaker controller successfully started and accepting connections");
498  	    }
499  	    controld_set_fsa_input_flags(R_ST_REQUIRED);
500  	    controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
501  	
502  	    controld_clear_fsa_input_flags(R_STARTING);
503  	    register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
504  	}
505  	
506  	/*	 A_RECOVER	*/
507  	void
508  	do_recover(long long action,
509  	           enum crmd_fsa_cause cause,
510  	           enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
511  	{
512  	    controld_set_fsa_input_flags(R_IN_RECOVERY);
513  	    crm_warn("Fast-tracking shutdown in response to errors");
514  	
515  	    register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
516  	}
517  	
518  	static pcmk__cluster_option_t controller_options[] = {
519  	    /* name, old name, type, allowed values,
520  	     * default value, validator,
521  	     * short description,
522  	     * long description
523  	     */
524  	    {
525  	        "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL,
526  	        N_("Pacemaker version on cluster node elected Designated Controller (DC)"),
527  	        N_("Includes a hash which identifies the exact changeset the code was "
528  	            "built from. Used for diagnostic purposes.")
529  	    },
530  	    {
531  	        "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
532  	        N_("The messaging stack on which Pacemaker is currently running"),
533  	        N_("Used for informational and diagnostic purposes.")
534  	    },
535  	    {
536  	        "cluster-name", NULL, "string", NULL, NULL, NULL,
537  	        N_("An arbitrary name for the cluster"),
538  	        N_("This optional value is mostly for users' convenience as desired "
539  	            "in administration, but may also be used in Pacemaker "
540  	            "configuration rules via the #cluster-name node attribute, and "
541  	            "by higher-level tools and resource agents.")
542  	    },
543  	    {
544  	        XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time",
545  	        NULL, "20s", pcmk__valid_interval_spec,
546  	        N_("How long to wait for a response from other nodes during start-up"),
547  	        N_("The optimal value will depend on the speed and load of your network "
548  	            "and the type of switches used.")
549  	    },
550  	    {
551  	        XML_CONFIG_ATTR_RECHECK, NULL, "time",
552  	        N_("Zero disables polling, while positive values are an interval in seconds"
553  	            "(unless other units are specified, for example \"5min\")"),
554  	        "15min", pcmk__valid_interval_spec,
555  	        N_("Polling interval to recheck cluster state and evaluate rules "
556  	            "with date specifications"),
557  	        N_("Pacemaker is primarily event-driven, and looks ahead to know when to "
558  	            "recheck cluster state for failure timeouts and most time-based "
559  	            "rules. However, it will also recheck the cluster after this "
560  	            "amount of inactivity, to evaluate rules with date specifications "
561  	            "and serve as a fail-safe for certain types of scheduler bugs.")
562  	    },
563  	    {
564  	        "load-threshold", NULL, "percentage", NULL,
565  	        "80%", pcmk__valid_percentage,
566  	        N_("Maximum amount of system load that should be used by cluster nodes"),
567  	        N_("The cluster will slow down its recovery process when the amount of "
568  	            "system resources used (currently CPU) approaches this limit"),
569  	    },
570  	    {
571  	        "node-action-limit", NULL, "integer", NULL,
572  	        "0", pcmk__valid_number,
573  	        N_("Maximum number of jobs that can be scheduled per node "
574  	            "(defaults to 2x cores)")
575  	    },
576  	    { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL,
577  	        N_("How a cluster node should react if notified of its own fencing"),
578  	        N_("A cluster node may receive notification of its own fencing if fencing "
579  	        "is misconfigured, or if fabric fencing is in use that doesn't cut "
580  	        "cluster communication. Allowed values are \"stop\" to attempt to "
581  	        "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt "
582  	        "to immediately reboot the local node, falling back to stop on failure.")
583  	    },
584  	    {
585  	        XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL,
586  	        "2min", pcmk__valid_interval_spec,
587  	        "*** Advanced Use Only ***",
588  	        N_("Declare an election failed if it is not decided within this much "
589  	            "time. If you need to adjust this value, it probably indicates "
590  	            "the presence of a bug.")
591  	    },
592  	    {
593  	        XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL,
594  	        "20min", pcmk__valid_interval_spec,
595  	        "*** Advanced Use Only ***",
596  	        N_("Exit immediately if shutdown does not complete within this much "
597  	            "time. If you need to adjust this value, it probably indicates "
598  	            "the presence of a bug.")
599  	    },
600  	    {
601  	        "join-integration-timeout", "crmd-integration-timeout", "time", NULL,
602  	        "3min", pcmk__valid_interval_spec,
603  	        "*** Advanced Use Only ***",
604  	        N_("If you need to adjust this value, it probably indicates "
605  	            "the presence of a bug.")
606  	    },
607  	    {
608  	        "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL,
609  	        "30min", pcmk__valid_interval_spec,
610  	        "*** Advanced Use Only ***",
611  	        N_("If you need to adjust this value, it probably indicates "
612  	            "the presence of a bug.")
613  	    },
614  	    {
615  	        "transition-delay", "crmd-transition-delay", "time", NULL,
616  	        "0s", pcmk__valid_interval_spec,
617  	        N_("*** Advanced Use Only *** Enabling this option will slow down "
618  	            "cluster recovery under all conditions"),
619  	        N_("Delay cluster recovery for this much time to allow for additional "
620  	            "events to occur. Useful if your configuration is sensitive to "
621  	            "the order in which ping updates arrive.")
622  	    },
623  	    {
624  	        "stonith-watchdog-timeout", NULL, "time", NULL,
625  	        "0", controld_verify_stonith_watchdog_timeout,
626  	        N_("How long before nodes can be assumed to be safely down when "
627  	           "watchdog-based self-fencing via SBD is in use"),
628  	        N_("If this is set to a positive value, lost nodes are assumed to "
629  	           "self-fence using watchdog-based SBD within this much time. This "
630  	           "does not require a fencing resource to be explicitly configured, "
631  	           "though a fence_watchdog resource can be configured, to limit use "
632  	           "to specific nodes. If this is set to 0 (the default), the cluster "
633  	           "will never assume watchdog-based self-fencing. If this is set to a "
634  	           "negative value, the cluster will use twice the local value of the "
635  	           "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, "
636  	           "or otherwise treat this as 0. WARNING: When used, this timeout "
637  	           "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use "
638  	           "watchdog-based SBD, and Pacemaker will refuse to start on any of "
639  	           "those nodes where this is not true for the local value or SBD is "
640  	           "not active. When this is set to a negative value, "
641  	           "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes "
642  	           "that use SBD, otherwise data corruption or loss could occur.")
643  	    },
644  	    {
645  	        "stonith-max-attempts", NULL, "integer", NULL,
646  	        "10", pcmk__valid_positive_number,
647  	        N_("How many times fencing can fail before it will no longer be "
648  	            "immediately re-attempted on a target")
649  	    },
650  	
651  	    // Already documented in libpe_status (other values must be kept identical)
652  	    {
653  	        "no-quorum-policy", NULL, "select",
654  	        "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum,
655  	        N_("What to do when the cluster does not have quorum"), NULL
656  	    },
657  	    {
658  	        XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
659  	        "false", pcmk__valid_boolean,
660  	        N_("Whether to lock resources to a cleanly shut down node"),
661  	        N_("When true, resources active on a node when it is cleanly shut down "
662  	            "are kept \"locked\" to that node (not allowed to run elsewhere) "
663  	            "until they start again on that node after it rejoins (or for at "
664  	            "most shutdown-lock-limit, if set). Stonith resources and "
665  	            "Pacemaker Remote connections are never locked. Clone and bundle "
666  	            "instances and the promoted role of promotable clones are "
667  	            "currently never locked, though support could be added in a future "
668  	            "release.")
669  	    },
670  	    {
671  	        XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
672  	        "0", pcmk__valid_interval_spec,
673  	        N_("Do not lock resources to a cleanly shut down node longer than "
674  	           "this"),
675  	        N_("If shutdown-lock is true and this is set to a nonzero time "
676  	            "duration, shutdown locks will expire after this much time has "
677  	            "passed since the shutdown was initiated, even if the node has not "
678  	            "rejoined.")
679  	    },
680  	    {
681  	        XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL,
682  	        "0", pcmk__valid_interval_spec,
683  	        N_("How long to wait for a node that has joined the cluster to join "
684  	           "the controller process group"),
685  	        N_("Fence nodes that do not join the controller process group within "
686  	           "this much time after joining the cluster, to allow the cluster "
687  	           "to continue managing resources. A value of 0 means never fence " 
688  	           "pending nodes. Setting the value to 2h means fence nodes after "
689  	           "2 hours.")
690  	    },
691  	};
692  	
693  	void
694  	crmd_metadata(void)
695  	{
696  	    const char *desc_short = "Pacemaker controller options";
697  	    const char *desc_long = "Cluster options used by Pacemaker's controller";
698  	
699  	    gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short,
700  	                                            desc_long, controller_options,
701  	                                            PCMK__NELEM(controller_options));
702  	    printf("%s", s);
703  	    g_free(s);
704  	}
705  	
706  	static void
707  	config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
708  	{
709  	    const char *value = NULL;
710  	    GHashTable *config_hash = NULL;
711  	    crm_time_t *now = crm_time_new(NULL);
712  	    xmlNode *crmconfig = NULL;
713  	    xmlNode *alerts = NULL;
714  	
715  	    if (rc != pcmk_ok) {
716  	        fsa_data_t *msg_data = NULL;
717  	
718  	        crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
719  	        register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
720  	
721  	        if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
722  	            crm_err("The cluster is mis-configured - shutting down and staying down");
723  	            controld_set_fsa_input_flags(R_STAYDOWN);
724  	        }
725  	        goto bail;
726  	    }
727  	
728  	    crmconfig = output;
729  	    if ((crmconfig != NULL)
730  	        && !pcmk__xe_is(crmconfig, XML_CIB_TAG_CRMCONFIG)) {
731  	        crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
732  	    }
733  	    if (!crmconfig) {
734  	        fsa_data_t *msg_data = NULL;
735  	
736  	        crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
737  	        register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
738  	        goto bail;
739  	    }
740  	
741  	    crm_debug("Call %d : Parsing CIB options", call_id);
742  	    config_hash = pcmk__strkey_table(free, free);
743  	    pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
744  	                      config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
745  	
746  	    // Validate all options, and use defaults if not already present in hash
747  	    pcmk__validate_cluster_options(config_hash, controller_options,
748  	                                   PCMK__NELEM(controller_options));
749  	
750  	    value = g_hash_table_lookup(config_hash, "no-quorum-policy");
751  	    if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) {
752  	        controld_set_global_flags(controld_no_quorum_suicide);
753  	    }
754  	
755  	    value = g_hash_table_lookup(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
756  	    if (crm_is_true(value)) {
757  	        controld_set_global_flags(controld_shutdown_lock_enabled);
758  	    } else {
759  	        controld_clear_global_flags(controld_shutdown_lock_enabled);
760  	    }
761  	
762  	    value = g_hash_table_lookup(config_hash,
763  	                                XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT);
764  	    controld_globals.shutdown_lock_limit = crm_parse_interval_spec(value)
765  	                                           / 1000;
766  	
767  	    value = g_hash_table_lookup(config_hash,
768  	                                XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT);
769  	    controld_globals.node_pending_timeout = crm_parse_interval_spec(value) / 1000;
770  	
771  	    value = g_hash_table_lookup(config_hash, "cluster-name");
772  	    pcmk__str_update(&(controld_globals.cluster_name), value);
773  	
774  	    // Let subcomponents initialize their own static variables
775  	    controld_configure_election(config_hash);
776  	    controld_configure_fencing(config_hash);
777  	    controld_configure_fsa_timers(config_hash);
778  	    controld_configure_throttle(config_hash);
779  	
780  	    alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
781  	    crmd_unpack_alerts(alerts);
782  	
783  	    controld_set_fsa_input_flags(R_READ_CONFIG);
784  	    controld_trigger_fsa();
785  	
786  	    g_hash_table_destroy(config_hash);
787  	  bail:
788  	    crm_time_free(now);
789  	}
790  	
791  	/*!
792  	 * \internal
793  	 * \brief Trigger read and processing of the configuration
794  	 *
795  	 * \param[in] fn    Calling function name
796  	 * \param[in] line  Line number where call occurred
797  	 */
798  	void
799  	controld_trigger_config_as(const char *fn, int line)
800  	{
801  	    if (config_read_trigger != NULL) {
802  	        crm_trace("%s:%d - Triggered config processing", fn, line);
803  	        mainloop_set_trigger(config_read_trigger);
804  	    }
805  	}
806  	
807  	gboolean
808  	crm_read_options(gpointer user_data)
809  	{
810  	    cib_t *cib_conn = controld_globals.cib_conn;
811  	    int call_id = cib_conn->cmds->query(cib_conn,
812  	                                        "//" XML_CIB_TAG_CRMCONFIG
813  	                                        " | //" XML_CIB_TAG_ALERTS,
814  	                                        NULL, cib_xpath|cib_scope_local);
815  	
816  	    fsa_register_cib_callback(call_id, NULL, config_query_callback);
817  	    crm_trace("Querying the CIB... call %d", call_id);
818  	    return TRUE;
819  	}
820  	
821  	/*	 A_READCONFIG	*/
822  	void
823  	do_read_config(long long action,
824  	               enum crmd_fsa_cause cause,
825  	               enum crmd_fsa_state cur_state,
826  	               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
827  	{
828  	    throttle_init();
829  	    controld_trigger_config();
830  	}
831  	
832  	void
833  	crm_shutdown(int nsig)
834  	{
835  	    const char *value = NULL;
836  	    guint default_period_ms = 0;
837  	
838  	    if ((controld_globals.mainloop == NULL)
839  	        || !g_main_loop_is_running(controld_globals.mainloop)) {
840  	        crmd_exit(CRM_EX_OK);
841  	        return;
842  	    }
843  	
844  	    if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
845  	        crm_err("Escalating shutdown");
846  	        register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
847  	        return;
848  	    }
849  	
850  	    controld_set_fsa_input_flags(R_SHUTDOWN);
851  	    register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
852  	
853  	    /* If shutdown timer doesn't have a period set, use the default
854  	     *
855  	     * @TODO: Evaluate whether this is still necessary. As long as
856  	     * config_query_callback() has been run at least once, it doesn't look like
857  	     * anything could have changed the timer period since then.
858  	     */
859  	    value = pcmk__cluster_option(NULL, controller_options,
860  	                                 PCMK__NELEM(controller_options),
861  	                                 XML_CONFIG_ATTR_FORCE_QUIT);
862  	    default_period_ms = crm_parse_interval_spec(value);
863  	    controld_shutdown_start_countdown(default_period_ms);
864  	}
865