1 /*
2 * Copyright 2004-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <stdbool.h>
13 #include <unistd.h> /* pid_t, sleep, ssize_t */
14
15 #include <crm/cib.h>
16 #include <crm/cluster.h>
17 #include <crm/common/xml.h>
18 #include <crm/crm.h>
19 #include <crm/common/ipc.h>
20 #include <crm/common/ipc_schedulerd.h>
21
22 #include <libxml/xpath.h> // xmlXPathObject, etc.
23
24 #include <pacemaker-controld.h>
25
26 static pcmk_ipc_api_t *schedulerd_api = NULL;
27
28 static mainloop_timer_t *controld_cib_retry_timer = NULL;
29
30 /*!
31 * \internal
32 * \brief Save CIB query result to file, raising FSA error
33 *
34 * \param[in] msg Ignored
35 * \param[in] call_id Call ID of CIB query
36 * \param[in] rc Return code of CIB query
37 * \param[in] output Result of CIB query
38 * \param[in] user_data Unique identifier for filename
39 *
40 * \note This is intended to be called after a scheduler connection fails.
41 */
42 static void
43 save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
44 void *user_data)
45 {
46 const char *id = user_data;
47
48 register_fsa_error(I_ERROR, NULL);
49 CRM_CHECK(id != NULL, return);
50
51 if (rc == pcmk_ok) {
52 char *filename = pcmk__assert_asprintf(PCMK_SCHEDULER_INPUT_DIR
53 "/pe-core-%s.bz2",
54 id);
55
56 if (pcmk__xml_write_file(output, filename, true) != pcmk_rc_ok) {
57 pcmk__err("Could not save CIB to %s after scheduler crash",
58 filename);
59 } else {
60 pcmk__notice("Saved CIB to %s after scheduler crash", filename);
61 }
62 free(filename);
63 }
64 }
65
66 /*!
67 * \internal
68 * \brief Respond to scheduler connection failure
69 */
70 static void
71 handle_disconnect(void)
72 {
73 // If we aren't connected to the scheduler, we can't expect a reply
74 controld_expect_sched_reply(NULL);
75
76 if (pcmk__is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) {
77 int rc = pcmk_ok;
78 char *uuid_str = pcmk__generate_uuid();
79
80 pcmk__crit("Lost connection to the scheduler "
81 QB_XS " CIB will be saved to "
82 PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2",
83 uuid_str);
84
85 /* Save the current CIB so that we have a chance of figuring out what
86 * killed the scheduler.
87 *
88 * Delay registering an I_ERROR until the query completes or times out.
89 */
90 rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn,
91 NULL, NULL, cib_none);
92 fsa_register_cib_callback(rc, uuid_str, save_cib_contents);
93 }
94
95 controld_clear_fsa_input_flags(R_PE_CONNECTED);
96 controld_trigger_fsa();
97 }
98
99 static void
100 handle_reply(pcmk_schedulerd_api_reply_t *reply)
101 {
102 const char *msg_ref = NULL;
103
104 if (!AM_I_DC) {
105 return;
106 }
107
108 pcmk__assert(reply != NULL);
109 msg_ref = reply->data.graph.reference;
110
111 if (msg_ref == NULL) {
112 pcmk__err(CRM_OP_PECALC " - Ignoring calculation with no reference");
113
114 } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
115 pcmk__str_none)) {
116 ha_msg_input_t fsa_input = { NULL, NULL };
117 xmlNode *crm_data_node = NULL;
118
119 controld_stop_sched_timer();
120
121 /* do_te_invoke() (which will eventually process the fsa_input we are
122 * constructing here) requires that fsa_input.xml be non-NULL. That will
123 * happen only if copy_ha_msg_input() (which is called by
124 * register_fsa_input_adv()) sees the fsa_input.msg that it is
125 * expecting. The scheduler's IPC dispatch function gave us the values
126 * we need, so we just need to put them into XML.
127 *
128 * The name of the top-level element here is irrelevant. Nothing checks
129 * it.
130 */
131 fsa_input.msg = pcmk__xe_create(NULL, "dummy-reply");
132 pcmk__xe_set(fsa_input.msg, PCMK_XA_REFERENCE, msg_ref);
133 pcmk__xe_set(fsa_input.msg, PCMK__XA_CRM_TGRAPH_IN,
134 reply->data.graph.input);
135
136 crm_data_node = pcmk__xe_create(fsa_input.msg, PCMK__XE_CRM_XML);
137 pcmk__xml_copy(crm_data_node, reply->data.graph.tgraph);
138 controld_fsa_append(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
139
140 pcmk__xml_free(fsa_input.msg);
141
142 } else {
143 pcmk__info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref);
144 }
145 }
146
147 static void
148 scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
149 crm_exit_t status, void *event_data, void *user_data)
150 {
151 switch (event_type) {
152 case pcmk_ipc_event_disconnect:
153 handle_disconnect();
154 break;
155
156 case pcmk_ipc_event_reply:
157 handle_reply((pcmk_schedulerd_api_reply_t *) event_data);
158 break;
159
160 default:
161 break;
162 }
163 }
164
165 static bool
166 new_schedulerd_ipc_connection(void)
167 {
168 int rc = pcmk_rc_ok;
169
170 controld_set_fsa_input_flags(R_PE_REQUIRED);
171
172 if (schedulerd_api == NULL) {
173 rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd);
174
175 if (rc != pcmk_rc_ok) {
176 pcmk__err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
177 return false;
178 }
179 }
180
181 pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);
182
183 rc = pcmk__connect_ipc_retry_conrefused(schedulerd_api,
184 pcmk_ipc_dispatch_main, 3);
185 if (rc != pcmk_rc_ok) {
186 pcmk__err("Error connecting to %s: %s",
187 pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc));
188 return false;
189 }
190
191 controld_set_fsa_input_flags(R_PE_CONNECTED);
192 return true;
193 }
194
195 /*!
196 * \internal
197 * \brief Close any scheduler connection and free associated memory
198 */
199 void
200 controld_shutdown_schedulerd_ipc(void)
201 {
202 controld_clear_fsa_input_flags(R_PE_REQUIRED);
203 pcmk_disconnect_ipc(schedulerd_api);
204 handle_disconnect();
|
CID (unavailable; MK=2ca00693a7e9ed5ecc37a1318d83e11d) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS): |
|
(1) Event assign_union_field: |
The union field "in" of "_pp" is written. |
|
(2) Event inconsistent_union_field_access: |
In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in". |
205 g_clear_pointer(&schedulerd_api, pcmk_free_ipc_api);
206 }
207
208 static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
209 xmlNode *output, void *user_data);
210
211 // A_PE_START, A_PE_STOP, O_PE_RESTART
212 void
213 do_pe_control(long long action, enum crmd_fsa_cause cause,
214 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input,
215 fsa_data_t *msg_data)
216 {
217 if (pcmk__is_set(action, A_PE_STOP)) {
218 controld_clear_fsa_input_flags(R_PE_REQUIRED);
219 pcmk_disconnect_ipc(schedulerd_api);
220 handle_disconnect();
221 }
222
223 if (pcmk__is_set(action, A_PE_START)
224 && !pcmk__is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
225
226 if (cur_state == S_STOPPING) {
227 pcmk__info("Ignoring request to connect to scheduler while "
228 "shutting down");
229
230 } else if (!new_schedulerd_ipc_connection()) {
231 pcmk__warn("Could not connect to scheduler");
232 register_fsa_error(I_FAIL, msg_data);
233 }
234 }
235 }
236
237 static int fsa_pe_query = 0;
238 static mainloop_timer_t *controld_sched_timer = NULL;
239
240 // @TODO Make this a configurable cluster option if there's demand for it
241 #define SCHED_TIMEOUT_MS (120000)
242
243 /*!
244 * \internal
245 * \brief Handle a timeout waiting for scheduler reply
246 *
247 * \param[in] user_data Ignored
248 *
249 * \return FALSE (indicating that timer should not be restarted)
250 */
251 static gboolean
252 controld_sched_timeout(gpointer user_data)
253 {
254 if (AM_I_DC) {
255 /* If this node is the DC but can't communicate with the scheduler, just
256 * exit (and likely get fenced) so this node doesn't interfere with any
257 * further DC elections.
258 *
259 * @TODO We could try something less drastic first, like disconnecting
260 * and reconnecting to the scheduler, but something is likely going
261 * seriously wrong, so perhaps it's better to just fail as quickly as
262 * possible.
263 */
264 crmd_exit(CRM_EX_FATAL);
265 }
266 return FALSE;
267 }
268
269 void
270 controld_stop_sched_timer(void)
271 {
272 if ((controld_sched_timer != NULL)
273 && (controld_globals.fsa_pe_ref != NULL)) {
274 pcmk__trace("Stopping timer for scheduler reply %s",
275 controld_globals.fsa_pe_ref);
276 }
277 mainloop_timer_stop(controld_sched_timer);
278 }
279
280 /*!
281 * \internal
282 * \brief Set the scheduler request currently being waited on
283 *
284 * \param[in] ref Request to expect reply to (or NULL for none)
285 *
286 * \note This function takes ownership of \p ref.
287 */
288 void
289 controld_expect_sched_reply(char *ref)
290 {
291 if (ref) {
292 if (controld_sched_timer == NULL) {
293 controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
294 SCHED_TIMEOUT_MS, FALSE,
295 controld_sched_timeout,
296 NULL);
297 }
298 mainloop_timer_start(controld_sched_timer);
299 } else {
300 controld_stop_sched_timer();
301 }
302 free(controld_globals.fsa_pe_ref);
303 controld_globals.fsa_pe_ref = ref;
304 }
305
306 /*!
307 * \internal
308 * \brief Free the scheduler reply timer
309 */
310 void
311 controld_free_sched_timer(void)
312 {
313 g_clear_pointer(&controld_sched_timer, mainloop_timer_del);
314 }
315
316 // A_PE_INVOKE
317 void
318 do_pe_invoke(long long action, enum crmd_fsa_cause cause,
319 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input,
320 fsa_data_t *msg_data)
321 {
322 cib_t *cib_conn = controld_globals.cib_conn;
323
324 if (!AM_I_DC) {
325 pcmk__err("Not invoking scheduler because not DC: %s",
326 fsa_action2string(action));
327 return;
328 }
329
330 if (!pcmk__is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
331 if (pcmk__is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
332 pcmk__err("Cannot shut down gracefully without the scheduler");
333 controld_fsa_prepend(C_FSA_INTERNAL, I_TERMINATE, NULL);
334
335 } else {
336 pcmk__info("Waiting for the scheduler to connect");
337 controld_fsa_stall(msg_data, action);
338 controld_set_fsa_action_flags(A_PE_START);
339 controld_trigger_fsa();
340 }
341 return;
342 }
343
344 if (cur_state != S_POLICY_ENGINE) {
345 pcmk__notice("Not invoking scheduler because in state %s",
346 fsa_state2string(cur_state));
347 return;
348 }
349
350 if (!pcmk__is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
351 pcmk__err("Attempted to invoke scheduler without consistent CIB");
352
353 // Start the join from scratch
354 controld_fsa_prepend(C_FSA_INTERNAL, I_ELECTION, NULL);
355 return;
356 }
357
358 if (controld_cib_retry_timer != NULL) {
359 pcmk__debug("Not invoking scheduler until CIB retry timer expires");
360 return;
361 }
362
363 fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_none);
364
365 pcmk__debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
366 fsa_state2string(controld_globals.fsa_state));
367
368 controld_expect_sched_reply(NULL);
369 fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback);
370 }
371
372 static void
373 force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
374 {
375 int max = 0;
376 int lpc = 0;
377 const char *xpath_base = NULL;
378 char *xpath_string = NULL;
379 xmlXPathObject *xpathObj = NULL;
380
381 xpath_base = pcmk_cib_xpath_for(PCMK_XE_CRM_CONFIG);
382 if (xpath_base == NULL) {
383 pcmk__err(PCMK_XE_CRM_CONFIG " CIB element not known (bug?)");
384 return;
385 }
386
387 xpath_string = pcmk__assert_asprintf("%s//%s//nvpair[@name='%s']",
388 xpath_base,
389 PCMK_XE_CLUSTER_PROPERTY_SET,
390 attr_name);
391 xpathObj = pcmk__xpath_search(xml->doc, xpath_string);
392 max = pcmk__xpath_num_results(xpathObj);
393 free(xpath_string);
394
395 for (lpc = 0; lpc < max; lpc++) {
396 xmlNode *match = pcmk__xpath_result(xpathObj, lpc);
397
398 if (match == NULL) {
399 continue;
400 }
401 pcmk__trace("Forcing %s/%s = %s", pcmk__xe_id(match), attr_name,
402 attr_value);
403 pcmk__xe_set(match, PCMK_XA_VALUE, attr_value);
404 }
405
406 if(max == 0) {
407 xmlNode *configuration = NULL;
408 xmlNode *crm_config = NULL;
409 xmlNode *cluster_property_set = NULL;
410
411 pcmk__trace("Creating " PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS "-%s for "
412 "%s=%s",
413 attr_name, attr_name, attr_value);
414
415 configuration = pcmk__xe_first_child(xml, PCMK_XE_CONFIGURATION, NULL,
416 NULL);
417 if (configuration == NULL) {
418 configuration = pcmk__xe_create(xml, PCMK_XE_CONFIGURATION);
419 }
420
421 crm_config = pcmk__xe_first_child(configuration, PCMK_XE_CRM_CONFIG,
422 NULL, NULL);
423 if (crm_config == NULL) {
424 crm_config = pcmk__xe_create(configuration, PCMK_XE_CRM_CONFIG);
425 }
426
427 cluster_property_set =
428 pcmk__xe_first_child(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET, NULL,
429 NULL);
430 if (cluster_property_set == NULL) {
431 cluster_property_set =
432 pcmk__xe_create(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET);
433 pcmk__xe_set(cluster_property_set, PCMK_XA_ID,
434 PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS);
435 }
436
437 xml = pcmk__xe_create(cluster_property_set, PCMK_XE_NVPAIR);
438
439 pcmk__xe_set_id(xml, "%s-%s",
440 PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name);
441 pcmk__xe_set(xml, PCMK_XA_NAME, attr_name);
442 pcmk__xe_set(xml, PCMK_XA_VALUE, attr_value);
443 }
444 xmlXPathFreeObject(xpathObj);
445 }
446
447 static gboolean
448 sleep_timer(gpointer data)
449 {
450 controld_set_fsa_action_flags(A_PE_INVOKE);
451 controld_trigger_fsa();
452 g_clear_pointer(&controld_cib_retry_timer, mainloop_timer_del);
453 return G_SOURCE_REMOVE;
454 }
455
456 static void
457 do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
458 {
459 char *ref = NULL;
460 pid_t watchdog = pcmk__locate_sbd();
461
462 if (rc != pcmk_ok) {
463 pcmk__err("Could not retrieve the CIB: %s " QB_XS " rc=%d call=%d",
464 pcmk_strerror(rc), rc, call_id);
465 register_fsa_error(I_ERROR, NULL);
466 return;
467
468 } else if (call_id != fsa_pe_query) {
469 pcmk__trace("Skipping superseded CIB query: %d (current=%d)", call_id,
470 fsa_pe_query);
471 return;
472
473 } else if (!AM_I_DC
474 || !pcmk__is_set(controld_globals.fsa_input_register,
475 R_PE_CONNECTED)) {
476 pcmk__debug("No need to invoke the scheduler anymore");
477 return;
478
479 } else if (controld_globals.fsa_state != S_POLICY_ENGINE) {
480 pcmk__debug("Discarding scheduler request in state: %s",
481 fsa_state2string(controld_globals.fsa_state));
482 return;
483
484 /* this callback counts as 1 */
485 } else if (num_cib_op_callbacks() > 1) {
486 pcmk__debug("Re-asking for the CIB: %d other peer updates still "
487 "pending", (num_cib_op_callbacks() - 1));
488
489 controld_cib_retry_timer = mainloop_timer_add("cib_retry", 1000, false,
490 sleep_timer, NULL);
491 mainloop_timer_start(controld_cib_retry_timer);
492 return;
493 }
494
495 CRM_LOG_ASSERT(output != NULL);
496
497 /* Refresh the remote node cache and the known node cache when the
498 * scheduler is invoked */
499 pcmk__refresh_node_caches_from_cib(output);
500
501 pcmk__xe_set(output, PCMK_XA_DC_UUID, controld_globals.our_uuid);
502 pcmk__xe_set_bool(output, PCMK_XA_HAVE_QUORUM,
503 pcmk__is_set(controld_globals.flags,
504 controld_has_quorum));
505
506 force_local_option(output, PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog));
507
508 if (pcmk__is_set(controld_globals.flags, controld_ever_had_quorum)
509 && !pcmk__cluster_has_quorum()) {
510
511 pcmk__xe_set_int(output, PCMK_XA_NO_QUORUM_PANIC, 1);
512 }
513
514 rc = pcmk_schedulerd_api_graph(schedulerd_api, output, &ref);
515 if (rc != pcmk_rc_ok) {
516 free(ref);
517 pcmk__err("Could not contact the scheduler: %s " QB_XS " rc=%d",
518 pcmk_rc_str(rc), rc);
519 register_fsa_error(I_ERROR, NULL);
520
521 } else {
522 pcmk__assert(ref != NULL);
523 controld_expect_sched_reply(ref);
524 pcmk__debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, "
525 "quorate=%s",
526 fsa_pe_query, controld_globals.fsa_pe_ref,
527 controld_globals.peer_seq,
528 pcmk__flag_text(controld_globals.flags,
529 controld_has_quorum));
530 }
531 }
532