1 /*
2 * Copyright 2004-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <stdbool.h>
13
14 #include <crm/crm.h>
15 #include <crm/common/xml.h>
16
17 #include <pacemaker-controld.h>
18
19 //! Triggers transition graph processing
20 static crm_trigger_t *transition_trigger = NULL;
21
22 static GHashTable *node_pending_timers = NULL;
23
24 gboolean
25 stop_te_timer(pcmk__graph_action_t *action)
26 {
27 if (action == NULL) {
28 return FALSE;
29 }
30 if (action->timer != 0) {
31 pcmk__trace("Stopping action timer");
32 g_source_remove(action->timer);
33 action->timer = 0;
34 } else {
35 pcmk__trace("Action timer was already stopped");
36 return FALSE;
37 }
38 return TRUE;
39 }
40
41 static gboolean
42 te_graph_trigger(gpointer user_data)
43 {
44 if (controld_globals.transition_graph == NULL) {
45 pcmk__debug("Nothing to do");
46 return TRUE;
47 }
48
49 pcmk__trace("Invoking graph %d in state %s",
50 controld_globals.transition_graph->id,
51 fsa_state2string(controld_globals.fsa_state));
52
53 switch (controld_globals.fsa_state) {
54 case S_STARTING:
55 case S_PENDING:
56 case S_NOT_DC:
57 case S_STOPPING:
58 case S_TERMINATE:
59 return TRUE;
60 default:
61 break;
62 }
63
64 if (!controld_globals.transition_graph->complete) {
65 enum pcmk__graph_status graph_rc;
66 int orig_limit = controld_globals.transition_graph->batch_limit;
67 int throttled_limit = throttle_get_total_job_limit(orig_limit);
68
69 controld_globals.transition_graph->batch_limit = throttled_limit;
70 graph_rc = pcmk__execute_graph(controld_globals.transition_graph);
71 controld_globals.transition_graph->batch_limit = orig_limit;
72
73 if (graph_rc == pcmk__graph_active) {
74 pcmk__trace("Transition not yet complete");
75 return TRUE;
76
77 } else if (graph_rc == pcmk__graph_pending) {
78 pcmk__trace("Transition not yet complete - no actions fired");
79 return TRUE;
80 }
81
82 if (graph_rc != pcmk__graph_complete) {
83 pcmk__warn("Transition failed: %s",
84 pcmk__graph_status2text(graph_rc));
85 pcmk__log_graph(LOG_NOTICE, controld_globals.transition_graph);
86 }
87 }
88
89 pcmk__debug("Transition %d is now complete",
90 controld_globals.transition_graph->id);
91 controld_globals.transition_graph->complete = true;
92 notify_crmd(controld_globals.transition_graph);
93
94 return TRUE;
95 }
96
97 /*!
98 * \internal
99 * \brief Initialize transition trigger
100 */
101 void
102 controld_init_transition_trigger(void)
103 {
104 transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger,
105 NULL);
106 }
107
108 /*!
109 * \internal
110 * \brief Destroy transition trigger
111 */
112 void
113 controld_destroy_transition_trigger(void)
114 {
115 g_clear_pointer(&transition_trigger, mainloop_destroy_trigger);
116 }
117
118 void
119 controld_trigger_graph_as(const char *fn, int line)
120 {
121 pcmk__trace("%s:%d - Triggered graph processing", fn, line);
122 mainloop_set_trigger(transition_trigger);
123 }
124
125 static struct abort_timer_s {
126 bool aborted;
127 guint id;
128 int priority;
129 enum pcmk__graph_next action;
130 const char *text;
131 } abort_timer = { 0, };
132
133 static gboolean
134 abort_timer_popped(gpointer data)
135 {
136 struct abort_timer_s *abort_timer = (struct abort_timer_s *) data;
137
138 if (AM_I_DC && (abort_timer->aborted == FALSE)) {
139 abort_transition(abort_timer->priority, abort_timer->action,
140 abort_timer->text, NULL);
141 }
142 abort_timer->id = 0;
143 return FALSE; // do not immediately reschedule timer
144 }
145
146 /*!
147 * \internal
148 * \brief Abort transition after delay, if not already aborted in that time
149 *
150 * \param[in] abort_text Must be literal string
151 */
152 void
153 abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action,
154 const char *abort_text, guint delay_ms)
155 {
156 if (abort_timer.id) {
157 // Timer already in progress, stop and reschedule
158 g_source_remove(abort_timer.id);
159 }
160 abort_timer.aborted = FALSE;
161 abort_timer.priority = abort_priority;
162 abort_timer.action = abort_action;
163 abort_timer.text = abort_text;
164 abort_timer.id = pcmk__create_timer(delay_ms, abort_timer_popped, &abort_timer);
165 }
166
167 static void
168 free_node_pending_timer(gpointer data)
169 {
170 struct abort_timer_s *node_pending_timer = (struct abort_timer_s *) data;
171
172 if (node_pending_timer->id != 0) {
173 g_source_remove(node_pending_timer->id);
174 node_pending_timer->id = 0;
175 }
176
177 free(node_pending_timer);
178 }
179
180 static gboolean
181 node_pending_timer_popped(gpointer key)
182 {
183 struct abort_timer_s *node_pending_timer = NULL;
184
185 if (node_pending_timers == NULL) {
186 return FALSE;
187 }
188
189 node_pending_timer = g_hash_table_lookup(node_pending_timers, key);
190 if (node_pending_timer == NULL) {
191 return FALSE;
192 }
193
194 pcmk__warn("Node with " PCMK_XA_ID " '%s' pending timed out (%us) on "
195 "joining the process group",
196 (const char *) key, controld_globals.node_pending_timeout);
197
198 if (controld_globals.node_pending_timeout > 0) {
199 abort_timer_popped(node_pending_timer);
200 }
201
202 g_hash_table_remove(node_pending_timers, key);
203
204 return FALSE; // do not reschedule timer
205 }
206
207 static void
208 init_node_pending_timer(const pcmk__node_status_t *node, guint timeout)
209 {
210 struct abort_timer_s *node_pending_timer = NULL;
211 char *key = NULL;
212
213 if (node->xml_id == NULL) {
214 return;
215 }
216
217 if (node_pending_timers == NULL) {
218 node_pending_timers = pcmk__strikey_table(free,
219 free_node_pending_timer);
220
221 // The timer is somehow already existing
222 } else if (g_hash_table_lookup(node_pending_timers, node->xml_id) != NULL) {
223 return;
224 }
225
226 pcmk__notice("Waiting for pending %s with " PCMK_XA_ID " '%s' to join the "
227 "process group (timeout=%us)",
228 pcmk__s(node->name, "node"), node->xml_id,
229 controld_globals.node_pending_timeout);
230
231 key = pcmk__str_copy(node->xml_id);
232 node_pending_timer = pcmk__assert_alloc(1, sizeof(struct abort_timer_s));
233
234 node_pending_timer->aborted = FALSE;
235 node_pending_timer->priority = PCMK_SCORE_INFINITY;
236 node_pending_timer->action = pcmk__graph_restart;
237 node_pending_timer->text = "Node pending timed out";
238
239 g_hash_table_replace(node_pending_timers, key, node_pending_timer);
240
241 node_pending_timer->id = pcmk__create_timer(timeout * 1000,
242 node_pending_timer_popped,
243 key);
244 pcmk__assert(node_pending_timer->id != 0);
245 }
246
247 static void
248 remove_node_pending_timer(const char *node_uuid)
249 {
250 if (node_pending_timers == NULL) {
251 return;
252 }
253
254 g_hash_table_remove(node_pending_timers, node_uuid);
255 }
256
257 void
258 controld_node_pending_timer(const pcmk__node_status_t *node)
259 {
260 long long remaining_timeout = 0;
261
262 /* If the node is not an active cluster node, is leaving the cluster, or is
263 * already part of CPG, or PCMK_OPT_NODE_PENDING_TIMEOUT is disabled, free
264 * any node pending timer for it.
265 */
266 if (pcmk__is_set(node->flags, pcmk__node_status_remote)
267 || (node->when_member <= 1) || (node->when_online > 0)
268 || (controld_globals.node_pending_timeout == 0)) {
269
270 remove_node_pending_timer(node->xml_id);
271 return;
272 }
273
274 // Node is a cluster member but offline in CPG
275
276 remaining_timeout = node->when_member - time(NULL)
277 + controld_globals.node_pending_timeout;
278
279 /* It already passed node pending timeout somehow.
280 * Free any node pending timer of it.
281 */
282 if (remaining_timeout <= 0) {
283 remove_node_pending_timer(node->xml_id);
284 return;
285 }
286
287 init_node_pending_timer(node, remaining_timeout);
288 }
289
290 void
291 controld_free_node_pending_timers(void)
292 {
|
CID (unavailable; MK=8e169c327f375aa6e9115a6b4d295f02) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS): |
|
(1) Event assign_union_field: |
The union field "in" of "_pp" is written. |
|
(2) Event inconsistent_union_field_access: |
In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in". |
293 g_clear_pointer(&node_pending_timers, g_hash_table_destroy);
294 }
295
296 static const char *
297 abort2text(enum pcmk__graph_next abort_action)
298 {
299 switch (abort_action) {
300 case pcmk__graph_done: return "done";
301 case pcmk__graph_wait: return "stop";
302 case pcmk__graph_restart: return "restart";
303 case pcmk__graph_shutdown: return "shutdown";
304 }
305 return "unknown";
306 }
307
308 static bool
309 update_abort_priority(pcmk__graph_t *graph, int priority,
310 enum pcmk__graph_next action, const char *abort_reason)
311 {
312 bool change = FALSE;
313
314 if (graph == NULL) {
315 return change;
316 }
317
318 if (graph->abort_priority < priority) {
319 pcmk__debug("Abort priority upgraded from %d to %d",
320 graph->abort_priority, priority);
321 graph->abort_priority = priority;
322 if (graph->abort_reason != NULL) {
323 pcmk__debug("'%s' abort superseded by %s", graph->abort_reason,
324 abort_reason);
325 }
326 graph->abort_reason = abort_reason;
327 change = TRUE;
328 }
329
330 if (graph->completion_action < action) {
331 pcmk__debug("Abort action %s superseded by %s: %s",
332 abort2text(graph->completion_action), abort2text(action),
333 abort_reason);
334 graph->completion_action = action;
335 change = TRUE;
336 }
337
338 return change;
339 }
340
341 void
342 abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action,
343 const char *abort_text, const xmlNode *reason,
344 const char *fn, int line)
345 {
346 int add[] = { 0, 0, 0 };
347 int del[] = { 0, 0, 0 };
348 int level = LOG_INFO;
349 const xmlNode *diff = NULL;
350 const xmlNode *change = NULL;
351 const bool complete = controld_globals.transition_graph->complete;
352
353 CRM_CHECK(controld_globals.transition_graph != NULL, return);
354
355 switch (controld_globals.fsa_state) {
356 case S_STARTING:
357 case S_PENDING:
358 case S_NOT_DC:
359 case S_STOPPING:
360 case S_TERMINATE:
361 pcmk__info("Abort %s suppressed: state=%s (%scomplete)",
362 abort_text, fsa_state2string(controld_globals.fsa_state),
363 (complete? "" : "in"));
364 return;
365 default:
366 break;
367 }
368
369 abort_timer.aborted = TRUE;
370 controld_expect_sched_reply(NULL);
371
372 if (!controld_globals.transition_graph->complete
373 && update_abort_priority(controld_globals.transition_graph,
374 abort_priority, abort_action,
375 abort_text)) {
376 level = LOG_NOTICE;
377 }
378
379 if (reason != NULL) {
380 const xmlNode *search = NULL;
381
382 for(search = reason; search; search = search->parent) {
383 if (pcmk__xe_is(search, PCMK_XE_DIFF)) {
384 diff = search;
385 break;
386 }
387 }
388
389 if(diff) {
390 pcmk__xml_patchset_versions(diff, del, add);
391 for(search = reason; search; search = search->parent) {
392 if (pcmk__xe_is(search, PCMK_XE_CHANGE)) {
393 change = search;
394 break;
395 }
396 }
397 }
398 }
399
400 if (reason == NULL) {
401 do_crm_log(level,
402 "Transition %d aborted: %s " QB_XS " source=%s:%d "
403 "complete=%s", controld_globals.transition_graph->id,
404 abort_text, fn, line,
405 pcmk__btoa(controld_globals.transition_graph->complete));
406
407 } else if(change == NULL) {
408 GString *local_path = pcmk__element_xpath(reason);
409 pcmk__assert(local_path != NULL);
410
411 do_crm_log(level, "Transition %d aborted by %s.%s: %s "
412 QB_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
413 controld_globals.transition_graph->id, reason->name,
414 pcmk__xe_id(reason), abort_text, add[0], add[1], add[2], fn,
415 line, (const char *) local_path->str,
416 pcmk__btoa(controld_globals.transition_graph->complete));
417 g_string_free(local_path, TRUE);
418
419 } else {
420 const char *op = pcmk__xe_get(change, PCMK_XA_OPERATION);
421 const char *path = pcmk__xe_get(change, PCMK_XA_PATH);
422
423 if(change == reason) {
424 if (strcmp(op, PCMK_VALUE_CREATE) == 0) {
425 reason = reason->children;
426
427 } else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) {
428 reason = pcmk__xe_first_child(reason, PCMK_XE_CHANGE_RESULT,
429 NULL, NULL);
430 if(reason) {
431 reason = reason->children;
432 }
433 }
434 CRM_CHECK(reason != NULL, goto done);
435 }
436
437 if (strcmp(op, PCMK_VALUE_DELETE) == 0) {
438 const char *shortpath = strrchr(path, '/');
439
440 do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
441 QB_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
442 controld_globals.transition_graph->id,
443 (shortpath? (shortpath + 1) : path), abort_text,
444 add[0], add[1], add[2], fn, line, path,
445 pcmk__btoa(controld_globals.transition_graph->complete));
446
447 } else if (pcmk__xe_is(reason, PCMK_XE_NVPAIR)) {
448 do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
449 QB_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
450 controld_globals.transition_graph->id,
451 pcmk__xe_get(reason, PCMK_XA_ID), op,
452 pcmk__xe_get(reason, PCMK_XA_NAME),
453 pcmk__xe_get(reason, PCMK_XA_VALUE),
454 abort_text, add[0], add[1], add[2], fn, line, path,
455 pcmk__btoa(controld_globals.transition_graph->complete));
456
457 } else if (pcmk__xe_is(reason, PCMK__XE_LRM_RSC_OP)) {
458 const char *magic = pcmk__xe_get(reason, PCMK__XA_TRANSITION_MAGIC);
459
460 do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
461 QB_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
462 controld_globals.transition_graph->id,
463 pcmk__xe_get(reason, PCMK__XA_OPERATION_KEY), op,
464 pcmk__xe_get(reason, PCMK__META_ON_NODE), abort_text,
465 magic, add[0], add[1], add[2], fn, line,
466 pcmk__btoa(controld_globals.transition_graph->complete));
467
468 } else if (pcmk__str_any_of((const char *) reason->name,
469 PCMK__XE_NODE_STATE, PCMK_XE_NODE, NULL)) {
470 const char *uname = pcmk__node_name_from_uuid(pcmk__xe_id(reason));
471
472 do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
473 QB_XS " cib=%d.%d.%d source=%s:%d complete=%s",
474 controld_globals.transition_graph->id,
475 reason->name, op, pcmk__s(uname, pcmk__xe_id(reason)),
476 abort_text, add[0], add[1], add[2], fn, line,
477 pcmk__btoa(controld_globals.transition_graph->complete));
478
479 } else {
480 const char *id = pcmk__xe_id(reason);
481
482 do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
483 QB_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
484 controld_globals.transition_graph->id,
485 reason->name, pcmk__s(id, ""), pcmk__s(op, "change"),
486 abort_text, add[0], add[1], add[2], fn, line, path,
487 pcmk__btoa(controld_globals.transition_graph->complete));
488 }
489 }
490
491 done:
492 if (controld_globals.transition_graph->complete) {
493 if (controld_get_period_transition_timer() > 0) {
494 controld_stop_transition_timer();
495 controld_start_transition_timer();
496 } else {
497 controld_fsa_append(C_FSA_INTERNAL, I_PE_CALC, NULL);
498 }
499 return;
500 }
501
502 trigger_graph();
503 }
504