1    	/*
2    	 * Copyright 2004-2023 the Pacemaker project contributors
3    	 *
4    	 * The version control history for this file may have further details.
5    	 *
6    	 * This source code is licensed under the GNU General Public License version 2
7    	 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8    	 */
9    	
10   	#include <crm_internal.h>
11   	
12   	#include <unistd.h>  /* pid_t, sleep, ssize_t */
13   	
14   	#include <crm/cib.h>
15   	#include <crm/cluster.h>
16   	#include <crm/common/xml.h>
17   	#include <crm/crm.h>
18   	#include <crm/msg_xml.h>
19   	#include <crm/common/xml_internal.h>
20   	#include <crm/common/ipc.h>
21   	#include <crm/common/ipc_schedulerd.h>
22   	
23   	#include <pacemaker-controld.h>
24   	
25   	static void handle_disconnect(void);
26   	
27   	static pcmk_ipc_api_t *schedulerd_api = NULL;
28   	
29   	/*!
30   	 * \internal
31   	 * \brief Close any scheduler connection and free associated memory
32   	 */
33   	void
34   	controld_shutdown_schedulerd_ipc(void)
35   	{
36   	    controld_clear_fsa_input_flags(R_PE_REQUIRED);
37   	    pcmk_disconnect_ipc(schedulerd_api);
38   	    handle_disconnect();
39   	
40   	    pcmk_free_ipc_api(schedulerd_api);
41   	    schedulerd_api = NULL;
42   	}
43   	
44   	/*!
45   	 * \internal
46   	 * \brief Save CIB query result to file, raising FSA error
47   	 *
48   	 * \param[in] msg        Ignored
49   	 * \param[in] call_id    Call ID of CIB query
50   	 * \param[in] rc         Return code of CIB query
51   	 * \param[in] output     Result of CIB query
52   	 * \param[in] user_data  Unique identifier for filename
53   	 *
54   	 * \note This is intended to be called after a scheduler connection fails.
55   	 */
56   	static void
57   	save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
58   	                  void *user_data)
59   	{
60   	    const char *id = user_data;
61   	
62   	    register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
63   	    CRM_CHECK(id != NULL, return);
64   	
65   	    if (rc == pcmk_ok) {
66   	        char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id);
67   	
68   	        if (write_xml_file(output, filename, TRUE) < 0) {
69   	            crm_err("Could not save Cluster Information Base to %s after scheduler crash",
70   	                    filename);
71   	        } else {
72   	            crm_notice("Saved Cluster Information Base to %s after scheduler crash",
73   	                       filename);
74   	        }
75   	        free(filename);
76   	    }
77   	}
78   	
79   	/*!
80   	 * \internal
81   	 * \brief Respond to scheduler connection failure
82   	 */
83   	static void
84   	handle_disconnect(void)
85   	{
86   	    // If we aren't connected to the scheduler, we can't expect a reply
87   	    controld_expect_sched_reply(NULL);
88   	
89   	    if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) {
90   	        int rc = pcmk_ok;
91   	        char *uuid_str = crm_generate_uuid();
92   	
93   	        crm_crit("Lost connection to the scheduler "
94   	                 CRM_XS " CIB will be saved to " PE_STATE_DIR "/pe-core-%s.bz2",
95   	                 uuid_str);
96   	
97   	        /*
98   	         * The scheduler died...
99   	         *
100  	         * Save the current CIB so that we have a chance of
101  	         * figuring out what killed it.
102  	         *
103  	         * Delay raising the I_ERROR until the query below completes or
104  	         * 5s is up, whichever comes first.
105  	         *
106  	         */
107  	        rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn,
108  	                                                    NULL, NULL,
109  	                                                    cib_scope_local);
110  	        fsa_register_cib_callback(rc, uuid_str, save_cib_contents);
111  	    }
112  	
113  	    controld_clear_fsa_input_flags(R_PE_CONNECTED);
114  	    controld_trigger_fsa();
115  	    return;
116  	}
117  	
118  	static void
119  	handle_reply(pcmk_schedulerd_api_reply_t *reply)
120  	{
121  	    const char *msg_ref = NULL;
122  	
123  	    if (!AM_I_DC) {
124  	        return;
125  	    }
126  	
127  	    msg_ref = reply->data.graph.reference;
128  	
129  	    if (msg_ref == NULL) {
130  	        crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC);
131  	
132  	    } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
133  	                            pcmk__str_none)) {
134  	        ha_msg_input_t fsa_input;
135  	        xmlNode *crm_data_node;
136  	
137  	        controld_stop_sched_timer();
138  	
139  	        /* do_te_invoke (which will eventually process the fsa_input we are constructing
140  	         * here) requires that fsa_input.xml be non-NULL.  That will only happen if
141  	         * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the
142  	         * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function
143  	         * gave us the values we need, we just need to put them into XML.
144  	         *
145  	         * The name of the top level element here is irrelevant.  Nothing checks it.
146  	         */
147  	        fsa_input.msg = create_xml_node(NULL, "dummy-reply");
148  	        crm_xml_add(fsa_input.msg, XML_ATTR_REFERENCE, msg_ref);
149  	        crm_xml_add(fsa_input.msg, F_CRM_TGRAPH_INPUT, reply->data.graph.input);
150  	
151  	        crm_data_node = create_xml_node(fsa_input.msg, F_CRM_DATA);
152  	        add_node_copy(crm_data_node, reply->data.graph.tgraph);
153  	        register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
154  	
155  	        free_xml(fsa_input.msg);
156  	
157  	    } else {
158  	        crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref);
159  	    }
160  	}
161  	
162  	static void
163  	scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
164  	                         crm_exit_t status, void *event_data, void *user_data)
165  	{
166  	    pcmk_schedulerd_api_reply_t *reply = event_data;
167  	
168  	    switch (event_type) {
169  	        case pcmk_ipc_event_disconnect:
170  	            handle_disconnect();
171  	            break;
172  	
173  	        case pcmk_ipc_event_reply:
174  	            handle_reply(reply);
175  	            break;
176  	
177  	        default:
178  	            break;
179  	    }
180  	}
181  	
182  	static bool
183  	new_schedulerd_ipc_connection(void)
184  	{
185  	    int rc;
186  	
187  	    controld_set_fsa_input_flags(R_PE_REQUIRED);
188  	
189  	    if (schedulerd_api == NULL) {
190  	        rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd);
191  	
192  	        if (rc != pcmk_rc_ok) {
193  	            crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
194  	            return false;
195  	        }
196  	    }
197  	
198  	    pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);
199  	
200  	    rc = pcmk__connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main, 3);
201  	    if (rc != pcmk_rc_ok) {
202  	        crm_err("Error connecting to %s: %s",
203  	                pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc));
204  	        return false;
205  	    }
206  	
207  	    controld_set_fsa_input_flags(R_PE_CONNECTED);
208  	    return true;
209  	}
210  	
211  	static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
212  	                                  xmlNode *output, void *user_data);
213  	
214  	/*	 A_PE_START, A_PE_STOP, O_PE_RESTART	*/
215  	void
216  	do_pe_control(long long action,
217  	              enum crmd_fsa_cause cause,
218  	              enum crmd_fsa_state cur_state,
219  	              enum crmd_fsa_input current_input, fsa_data_t * msg_data)
220  	{
221  	    if (pcmk_is_set(action, A_PE_STOP)) {
222  	        controld_clear_fsa_input_flags(R_PE_REQUIRED);
223  	        pcmk_disconnect_ipc(schedulerd_api);
224  	        handle_disconnect();
225  	    }
226  	    if (pcmk_is_set(action, A_PE_START)
227  	        && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
228  	
229  	        if (cur_state == S_STOPPING) {
230  	            crm_info("Ignoring request to connect to scheduler while shutting down");
231  	
232  	        } else if (!new_schedulerd_ipc_connection()) {
233  	            crm_warn("Could not connect to scheduler");
234  	            register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
235  	        }
236  	    }
237  	}
238  	
239  	static int fsa_pe_query = 0;
240  	static mainloop_timer_t *controld_sched_timer = NULL;
241  	
242  	// @TODO Make this a configurable cluster option if there's demand for it
243  	#define SCHED_TIMEOUT_MS (120000)
244  	
245  	/*!
246  	 * \internal
247  	 * \brief Handle a timeout waiting for scheduler reply
248  	 *
249  	 * \param[in] user_data  Ignored
250  	 *
251  	 * \return FALSE (indicating that timer should not be restarted)
252  	 */
253  	static gboolean
254  	controld_sched_timeout(gpointer user_data)
255  	{
256  	    crm_err("Timeout waiting for reply from scheduler.");
257  	    if (AM_I_DC) {
258  	        /* If this node is the DC but can't communicate with the scheduler, just
259  	         * exit (and likely get fenced) so this node doesn't interfere with any
260  	         * further DC elections.
261  	         *
262  	         * @TODO We could try something less drastic first, like disconnecting
263  	         * and reconnecting to the scheduler, but something is likely going
264  	         * seriously wrong, so perhaps it's better to just fail as quickly as
265  	         * possible.
266  	         */
267  	        crmd_exit(CRM_EX_FATAL);
268  	    }
269  	    return FALSE;
270  	}
271  	
272  	void
273  	controld_stop_sched_timer(void)
274  	{
275  	    if ((controld_sched_timer != NULL)
276  	        && (controld_globals.fsa_pe_ref != NULL)) {
277  	        crm_trace("Stopping timer for scheduler reply %s",
278  	                  controld_globals.fsa_pe_ref);
279  	    }
280  	    mainloop_timer_stop(controld_sched_timer);
281  	}
282  	
283  	/*!
284  	 * \internal
285  	 * \brief Set the scheduler request currently being waited on
286  	 *
287  	 * \param[in] ref  Request to expect reply to (or NULL for none)
288  	 *
289  	 * \note This function takes ownership of \p ref.
290  	 */
291  	void
292  	controld_expect_sched_reply(char *ref)
293  	{
294  	    if (ref) {
295  	        if (controld_sched_timer == NULL) {
296  	            controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
297  	                                                      SCHED_TIMEOUT_MS, FALSE,
298  	                                                      controld_sched_timeout,
299  	                                                      NULL);
300  	        }
301  	        mainloop_timer_start(controld_sched_timer);
302  	    } else {
303  	        controld_stop_sched_timer();
304  	    }
305  	    free(controld_globals.fsa_pe_ref);
306  	    controld_globals.fsa_pe_ref = ref;
307  	}
308  	
309  	/*!
310  	 * \internal
311  	 * \brief Free the scheduler reply timer
312  	 */
313  	void
314  	controld_free_sched_timer(void)
315  	{
316  	    if (controld_sched_timer != NULL) {
317  	        mainloop_timer_del(controld_sched_timer);
318  	        controld_sched_timer = NULL;
319  	    }
320  	}
321  	
322  	/*	 A_PE_INVOKE	*/
323  	void
324  	do_pe_invoke(long long action,
325  	             enum crmd_fsa_cause cause,
326  	             enum crmd_fsa_state cur_state,
327  	             enum crmd_fsa_input current_input, fsa_data_t * msg_data)
328  	{
329  	    cib_t *cib_conn = controld_globals.cib_conn;
330  	
331  	    if (AM_I_DC == FALSE) {
332  	        crm_err("Not invoking scheduler because not DC: %s",
333  	                fsa_action2string(action));
334  	        return;
335  	    }
336  	
337  	    if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
338  	        if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
339  	            crm_err("Cannot shut down gracefully without the scheduler");
340  	            register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
341  	
342  	        } else {
343  	            crm_info("Waiting for the scheduler to connect");
344  	            crmd_fsa_stall(FALSE);
345  	            controld_set_fsa_action_flags(A_PE_START);
346  	            controld_trigger_fsa();
347  	        }
348  	        return;
349  	    }
350  	
351  	    if (cur_state != S_POLICY_ENGINE) {
352  	        crm_notice("Not invoking scheduler because in state %s",
353  	                   fsa_state2string(cur_state));
354  	        return;
355  	    }
356  	    if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
357  	        crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
358  	
359  	        /* start the join from scratch */
360  	        register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
361  	        return;
362  	    }
363  	
364  	    fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_scope_local);
365  	
366  	    crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
367  	              fsa_state2string(controld_globals.fsa_state));
368  	
369  	    controld_expect_sched_reply(NULL);
370  	    fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback);
371  	}
372  	
373  	static void
374  	force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
375  	{
376  	    int max = 0;
377  	    int lpc = 0;
378  	    const char *xpath_base = NULL;
379  	    char *xpath_string = NULL;
380  	    xmlXPathObjectPtr xpathObj = NULL;
381  	
382  	    xpath_base = pcmk_cib_xpath_for(XML_CIB_TAG_CRMCONFIG);
383  	    if (xpath_base == NULL) {
384  	        crm_err(XML_CIB_TAG_CRMCONFIG " CIB element not known (bug?)");
385  	        return;
386  	    }
387  	
388  	    xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']",
389  	                                     xpath_base, XML_CIB_TAG_PROPSET,
390  	                                     attr_name);
391  	    xpathObj = xpath_search(xml, xpath_string);
392  	    max = numXpathResults(xpathObj);
393  	    free(xpath_string);
394  	
395  	    for (lpc = 0; lpc < max; lpc++) {
396  	        xmlNode *match = getXpathResult(xpathObj, lpc);
397  	        crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value);
398  	        crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value);
399  	    }
400  	
401  	    if(max == 0) {
402  	        xmlNode *configuration = NULL;
403  	        xmlNode *crm_config = NULL;
404  	        xmlNode *cluster_property_set = NULL;
405  	
406  	        crm_trace("Creating %s-%s for %s=%s",
407  	                  CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value);
408  	
409  	        configuration = pcmk__xe_match(xml, XML_CIB_TAG_CONFIGURATION, NULL,
410  	                                       NULL);
411  	        if (configuration == NULL) {
412  	            configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION);
413  	        }
414  	
415  	        crm_config = pcmk__xe_match(configuration, XML_CIB_TAG_CRMCONFIG, NULL,
416  	                                    NULL);
417  	        if (crm_config == NULL) {
418  	            crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG);
419  	        }
420  	
421  	        cluster_property_set = pcmk__xe_match(crm_config, XML_CIB_TAG_PROPSET,
422  	                                              NULL, NULL);
423  	        if (cluster_property_set == NULL) {
424  	            cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET);
425  	            crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST);
426  	        }
427  	
428  	        xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR);
429  	
430  	        crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name);
431  	        crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name);
432  	        crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value);
433  	    }
434  	    freeXpathObject(xpathObj);
435  	}
436  	
437  	static void
438  	do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
439  	{
440  	    char *ref = NULL;
441  	    pid_t watchdog = pcmk__locate_sbd();
442  	
(1) Event path: Condition "rc != 0", taking false branch.
443  	    if (rc != pcmk_ok) {
444  	        crm_err("Could not retrieve the Cluster Information Base: %s "
445  	                CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
446  	        register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
447  	        return;
448  	
(2) Event path: Condition "call_id != fsa_pe_query", taking false branch.
449  	    } else if (call_id != fsa_pe_query) {
450  	        crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
451  	        return;
452  	
(3) Event path: Condition "!pcmk_all_flags_set(controld_globals.fsa_input_register, 1ULL)", taking false branch.
(4) Event path: Condition "!pcmk_all_flags_set(controld_globals.fsa_input_register, 512ULL)", taking false branch.
453  	    } else if (!AM_I_DC
454  	               || !pcmk_is_set(controld_globals.fsa_input_register,
455  	                               R_PE_CONNECTED)) {
456  	        crm_debug("No need to invoke the scheduler anymore");
457  	        return;
458  	
(5) Event path: Condition "controld_globals.fsa_state != S_POLICY_ENGINE", taking false branch.
459  	    } else if (controld_globals.fsa_state != S_POLICY_ENGINE) {
460  	        crm_debug("Discarding scheduler request in state: %s",
461  	                  fsa_state2string(controld_globals.fsa_state));
462  	        return;
463  	
464  	    /* this callback counts as 1 */
(6) Event path: Condition "num_cib_op_callbacks() > 1", taking false branch.
465  	    } else if (num_cib_op_callbacks() > 1) {
466  	        crm_debug("Re-asking for the CIB: %d other peer updates still pending",
467  	                  (num_cib_op_callbacks() - 1));
468  	        sleep(1);
469  	        controld_set_fsa_action_flags(A_PE_INVOKE);
470  	        controld_trigger_fsa();
471  	        return;
472  	    }
473  	
(7) Event path: Condition "!(output != NULL)", taking false branch.
474  	    CRM_LOG_ASSERT(output != NULL);
475  	
476  	    /* Refresh the remote node cache and the known node cache when the
477  	     * scheduler is invoked */
478  	    pcmk__refresh_node_caches_from_cib(output);
479  	
480  	    crm_xml_add(output, XML_ATTR_DC_UUID, controld_globals.our_uuid);
481  	    pcmk__xe_set_bool_attr(output, XML_ATTR_HAVE_QUORUM,
482  	                           pcmk_is_set(controld_globals.flags,
483  	                                       controld_has_quorum));
484  	
(8) Event path: Condition "watchdog", taking true branch.
485  	    force_local_option(output, XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog));
486  	
(9) Event path: Condition "pcmk_all_flags_set(controld_globals.flags, controld_ever_had_quorum)", taking true branch.
(10) Event path: Condition "!crm_have_quorum", taking true branch.
487  	    if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum)
488  	        && !crm_have_quorum) {
489  	        crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
490  	    }
491  	
(11) Event alloc_arg: "pcmk_schedulerd_api_graph" allocates memory that is stored into "ref". [details]
Also see events: [leaked_storage]
492  	    rc = pcmk_rc2legacy(pcmk_schedulerd_api_graph(schedulerd_api, output, &ref));
493  	
(12) Event path: Condition "rc < 0", taking true branch.
494  	    if (rc < 0) {
495  	        crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d",
496  	                pcmk_strerror(rc), rc);
497  	        register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
(13) Event path: Falling through to end of if statement.
498  	    } else {
499  	        CRM_ASSERT(ref != NULL);
500  	        controld_expect_sched_reply(ref);
501  	        crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, "
502  	                  "quorate=%s", fsa_pe_query, controld_globals.fsa_pe_ref,
503  	                  crm_peer_seq, pcmk__btoa(pcmk_is_set(controld_globals.flags,
504  	                                                       controld_has_quorum)));
505  	    }
CID (unavailable; MK=5cca2d62578940f78239f332fc289b51) (#1 of 1): Resource leak (RESOURCE_LEAK):
(14) Event leaked_storage: Variable "ref" going out of scope leaks the storage it points to.
Also see events: [alloc_arg]
506  	}
507