1    	/*
2    	 * Copyright 2004-2026 the Pacemaker project contributors
3    	 *
4    	 * The version control history for this file may have further details.
5    	 *
6    	 * This source code is licensed under the GNU General Public License version 2
7    	 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8    	 */
9    	
10   	#include <crm_internal.h>
11   	
12   	#include <stdbool.h>
13   	
14   	#include <crm/crm.h>
15   	#include <crm/common/xml.h>
16   	
17   	#include <pacemaker-controld.h>
18   	
19   	//! Triggers transition graph processing
20   	static crm_trigger_t *transition_trigger = NULL;
21   	
22   	static GHashTable *node_pending_timers = NULL;
23   	
24   	gboolean
25   	stop_te_timer(pcmk__graph_action_t *action)
26   	{
27   	    if (action == NULL) {
28   	        return FALSE;
29   	    }
30   	    if (action->timer != 0) {
31   	        pcmk__trace("Stopping action timer");
32   	        g_source_remove(action->timer);
33   	        action->timer = 0;
34   	    } else {
35   	        pcmk__trace("Action timer was already stopped");
36   	        return FALSE;
37   	    }
38   	    return TRUE;
39   	}
40   	
41   	static gboolean
42   	te_graph_trigger(gpointer user_data)
43   	{
44   	    if (controld_globals.transition_graph == NULL) {
45   	        pcmk__debug("Nothing to do");
46   	        return TRUE;
47   	    }
48   	
49   	    pcmk__trace("Invoking graph %d in state %s",
50   	                controld_globals.transition_graph->id,
51   	                fsa_state2string(controld_globals.fsa_state));
52   	
53   	    switch (controld_globals.fsa_state) {
54   	        case S_STARTING:
55   	        case S_PENDING:
56   	        case S_NOT_DC:
57   	        case S_STOPPING:
58   	        case S_TERMINATE:
59   	            return TRUE;
60   	        default:
61   	            break;
62   	    }
63   	
64   	    if (!controld_globals.transition_graph->complete) {
65   	        enum pcmk__graph_status graph_rc;
66   	        int orig_limit = controld_globals.transition_graph->batch_limit;
67   	        int throttled_limit = throttle_get_total_job_limit(orig_limit);
68   	
69   	        controld_globals.transition_graph->batch_limit = throttled_limit;
70   	        graph_rc = pcmk__execute_graph(controld_globals.transition_graph);
71   	        controld_globals.transition_graph->batch_limit = orig_limit;
72   	
73   	        if (graph_rc == pcmk__graph_active) {
74   	            pcmk__trace("Transition not yet complete");
75   	            return TRUE;
76   	
77   	        } else if (graph_rc == pcmk__graph_pending) {
78   	            pcmk__trace("Transition not yet complete - no actions fired");
79   	            return TRUE;
80   	        }
81   	
82   	        if (graph_rc != pcmk__graph_complete) {
83   	            pcmk__warn("Transition failed: %s",
84   	                       pcmk__graph_status2text(graph_rc));
85   	            pcmk__log_graph(LOG_NOTICE, controld_globals.transition_graph);
86   	        }
87   	    }
88   	
89   	    pcmk__debug("Transition %d is now complete",
90   	                controld_globals.transition_graph->id);
91   	    controld_globals.transition_graph->complete = true;
92   	    notify_crmd(controld_globals.transition_graph);
93   	
94   	    return TRUE;
95   	}
96   	
97   	/*!
98   	 * \internal
99   	 * \brief Initialize transition trigger
100  	 */
101  	void
102  	controld_init_transition_trigger(void)
103  	{
104  	    transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger,
105  	                                              NULL);
106  	}
107  	
108  	/*!
109  	 * \internal
110  	 * \brief Destroy transition trigger
111  	 */
112  	void
113  	controld_destroy_transition_trigger(void)
114  	{
115  	    g_clear_pointer(&transition_trigger, mainloop_destroy_trigger);
116  	}
117  	
118  	void
119  	controld_trigger_graph_as(const char *fn, int line)
120  	{
121  	    pcmk__trace("%s:%d - Triggered graph processing", fn, line);
122  	    mainloop_set_trigger(transition_trigger);
123  	}
124  	
125  	static struct abort_timer_s {
126  	    bool aborted;
127  	    guint id;
128  	    int priority;
129  	    enum pcmk__graph_next action;
130  	    const char *text;
131  	} abort_timer = { 0, };
132  	
133  	static gboolean
134  	abort_timer_popped(gpointer data)
135  	{
136  	    struct abort_timer_s *abort_timer = (struct abort_timer_s *) data;
137  	
138  	    if (AM_I_DC && (abort_timer->aborted == FALSE)) {
139  	        abort_transition(abort_timer->priority, abort_timer->action,
140  	                         abort_timer->text, NULL);
141  	    }
142  	    abort_timer->id = 0;
143  	    return FALSE; // do not immediately reschedule timer
144  	}
145  	
146  	/*!
147  	 * \internal
148  	 * \brief Abort transition after delay, if not already aborted in that time
149  	 *
150  	 * \param[in] abort_text  Must be literal string
151  	 */
152  	void
153  	abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action,
154  	                  const char *abort_text, guint delay_ms)
155  	{
156  	    if (abort_timer.id) {
157  	        // Timer already in progress, stop and reschedule
158  	        g_source_remove(abort_timer.id);
159  	    }
160  	    abort_timer.aborted = FALSE;
161  	    abort_timer.priority = abort_priority;
162  	    abort_timer.action = abort_action;
163  	    abort_timer.text = abort_text;
164  	    abort_timer.id = pcmk__create_timer(delay_ms, abort_timer_popped, &abort_timer);
165  	}
166  	
167  	static void
168  	free_node_pending_timer(gpointer data)
169  	{
170  	    struct abort_timer_s *node_pending_timer = (struct abort_timer_s *) data;
171  	
172  	    if (node_pending_timer->id != 0) {
173  	        g_source_remove(node_pending_timer->id);
174  	        node_pending_timer->id = 0;
175  	    }
176  	
177  	    free(node_pending_timer);
178  	}
179  	
180  	static gboolean
181  	node_pending_timer_popped(gpointer key)
182  	{
183  	    struct abort_timer_s *node_pending_timer = NULL;
184  	
185  	    if (node_pending_timers == NULL) {
186  	        return FALSE;
187  	    }
188  	
189  	    node_pending_timer = g_hash_table_lookup(node_pending_timers, key);
190  	    if (node_pending_timer == NULL) {
191  	        return FALSE;
192  	    }
193  	
194  	    pcmk__warn("Node with " PCMK_XA_ID " '%s' pending timed out (%us) on "
195  	               "joining the process group",
196  	               (const char *) key, controld_globals.node_pending_timeout);
197  	
198  	    if (controld_globals.node_pending_timeout > 0) {
199  	        abort_timer_popped(node_pending_timer);
200  	    }
201  	
202  	    g_hash_table_remove(node_pending_timers, key);
203  	
204  	    return FALSE; // do not reschedule timer
205  	}
206  	
207  	static void
208  	init_node_pending_timer(const pcmk__node_status_t *node, guint timeout)
209  	{
210  	    struct abort_timer_s *node_pending_timer = NULL;
211  	    char *key = NULL;
212  	
213  	    if (node->xml_id == NULL) {
214  	        return;
215  	    }
216  	
217  	    if (node_pending_timers == NULL) {
218  	        node_pending_timers = pcmk__strikey_table(free,
219  	                                                  free_node_pending_timer);
220  	
221  	    // The timer is somehow already existing
222  	    } else if (g_hash_table_lookup(node_pending_timers, node->xml_id) != NULL) {
223  	        return;
224  	    }
225  	
226  	    pcmk__notice("Waiting for pending %s with " PCMK_XA_ID " '%s' to join the "
227  	                 "process group (timeout=%us)",
228  	                 pcmk__s(node->name, "node"), node->xml_id,
229  	                 controld_globals.node_pending_timeout);
230  	
231  	    key = pcmk__str_copy(node->xml_id);
232  	    node_pending_timer = pcmk__assert_alloc(1, sizeof(struct abort_timer_s));
233  	
234  	    node_pending_timer->aborted = FALSE;
235  	    node_pending_timer->priority = PCMK_SCORE_INFINITY;
236  	    node_pending_timer->action = pcmk__graph_restart;
237  	    node_pending_timer->text = "Node pending timed out";
238  	
239  	    g_hash_table_replace(node_pending_timers, key, node_pending_timer);
240  	
241  	    node_pending_timer->id = pcmk__create_timer(timeout * 1000,
242  	                                                node_pending_timer_popped,
243  	                                                key);
244  	    pcmk__assert(node_pending_timer->id != 0);
245  	}
246  	
247  	static void
248  	remove_node_pending_timer(const char *node_uuid)
249  	{
250  	    if (node_pending_timers == NULL) {
251  	        return;
252  	    }
253  	
254  	    g_hash_table_remove(node_pending_timers, node_uuid);
255  	}
256  	
257  	void
258  	controld_node_pending_timer(const pcmk__node_status_t *node)
259  	{
260  	    long long remaining_timeout = 0;
261  	
262  	    /* If the node is not an active cluster node, is leaving the cluster, or is
263  	     * already part of CPG, or PCMK_OPT_NODE_PENDING_TIMEOUT is disabled, free
264  	     * any node pending timer for it.
265  	     */
266  	    if (pcmk__is_set(node->flags, pcmk__node_status_remote)
267  	        || (node->when_member <= 1) || (node->when_online > 0)
268  	        || (controld_globals.node_pending_timeout == 0)) {
269  	
270  	        remove_node_pending_timer(node->xml_id);
271  	        return;
272  	    }
273  	
274  	    // Node is a cluster member but offline in CPG
275  	
276  	    remaining_timeout = node->when_member - time(NULL)
277  	                        + controld_globals.node_pending_timeout;
278  	
279  	    /* It already passed node pending timeout somehow.
280  	     * Free any node pending timer of it.
281  	     */
282  	    if (remaining_timeout <= 0) {
283  	        remove_node_pending_timer(node->xml_id);
284  	        return;
285  	    }
286  	
287  	    init_node_pending_timer(node, remaining_timeout);
288  	}
289  	
290  	void
291  	controld_free_node_pending_timers(void)
292  	{
CID (unavailable; MK=8e169c327f375aa6e9115a6b4d295f02) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS):
(1) Event assign_union_field: The union field "in" of "_pp" is written.
(2) Event inconsistent_union_field_access: In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in".
293  	    g_clear_pointer(&node_pending_timers, g_hash_table_destroy);
294  	}
295  	
296  	static const char *
297  	abort2text(enum pcmk__graph_next abort_action)
298  	{
299  	    switch (abort_action) {
300  	        case pcmk__graph_done:      return "done";
301  	        case pcmk__graph_wait:      return "stop";
302  	        case pcmk__graph_restart:   return "restart";
303  	        case pcmk__graph_shutdown:  return "shutdown";
304  	    }
305  	    return "unknown";
306  	}
307  	
308  	static bool
309  	update_abort_priority(pcmk__graph_t *graph, int priority,
310  	                      enum pcmk__graph_next action, const char *abort_reason)
311  	{
312  	    bool change = FALSE;
313  	
314  	    if (graph == NULL) {
315  	        return change;
316  	    }
317  	
318  	    if (graph->abort_priority < priority) {
319  	        pcmk__debug("Abort priority upgraded from %d to %d",
320  	                    graph->abort_priority, priority);
321  	        graph->abort_priority = priority;
322  	        if (graph->abort_reason != NULL) {
323  	            pcmk__debug("'%s' abort superseded by %s", graph->abort_reason,
324  	                        abort_reason);
325  	        }
326  	        graph->abort_reason = abort_reason;
327  	        change = TRUE;
328  	    }
329  	
330  	    if (graph->completion_action < action) {
331  	        pcmk__debug("Abort action %s superseded by %s: %s",
332  	                    abort2text(graph->completion_action), abort2text(action),
333  	                    abort_reason);
334  	        graph->completion_action = action;
335  	        change = TRUE;
336  	    }
337  	
338  	    return change;
339  	}
340  	
341  	void
342  	abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action,
343  	                       const char *abort_text, const xmlNode *reason,
344  	                       const char *fn, int line)
345  	{
346  	    int add[] = { 0, 0, 0 };
347  	    int del[] = { 0, 0, 0 };
348  	    int level = LOG_INFO;
349  	    const xmlNode *diff = NULL;
350  	    const xmlNode *change = NULL;
351  	    const bool complete = controld_globals.transition_graph->complete;
352  	
353  	    CRM_CHECK(controld_globals.transition_graph != NULL, return);
354  	
355  	    switch (controld_globals.fsa_state) {
356  	        case S_STARTING:
357  	        case S_PENDING:
358  	        case S_NOT_DC:
359  	        case S_STOPPING:
360  	        case S_TERMINATE:
361  	            pcmk__info("Abort %s suppressed: state=%s (%scomplete)",
362  	                       abort_text, fsa_state2string(controld_globals.fsa_state),
363  	                       (complete? "" : "in"));
364  	            return;
365  	        default:
366  	            break;
367  	    }
368  	
369  	    abort_timer.aborted = TRUE;
370  	    controld_expect_sched_reply(NULL);
371  	
372  	    if (!controld_globals.transition_graph->complete
373  	        && update_abort_priority(controld_globals.transition_graph,
374  	                                 abort_priority, abort_action,
375  	                                 abort_text)) {
376  	        level = LOG_NOTICE;
377  	    }
378  	
379  	    if (reason != NULL) {
380  	        const xmlNode *search = NULL;
381  	
382  	        for(search = reason; search; search = search->parent) {
383  	            if (pcmk__xe_is(search, PCMK_XE_DIFF)) {
384  	                diff = search;
385  	                break;
386  	            }
387  	        }
388  	
389  	        if(diff) {
390  	            pcmk__xml_patchset_versions(diff, del, add);
391  	            for(search = reason; search; search = search->parent) {
392  	                if (pcmk__xe_is(search, PCMK_XE_CHANGE)) {
393  	                    change = search;
394  	                    break;
395  	                }
396  	            }
397  	        }
398  	    }
399  	
400  	    if (reason == NULL) {
401  	        do_crm_log(level,
402  	                   "Transition %d aborted: %s " QB_XS " source=%s:%d "
403  	                   "complete=%s", controld_globals.transition_graph->id,
404  	                   abort_text, fn, line,
405  	                   pcmk__btoa(controld_globals.transition_graph->complete));
406  	
407  	    } else if(change == NULL) {
408  	        GString *local_path = pcmk__element_xpath(reason);
409  	        pcmk__assert(local_path != NULL);
410  	
411  	        do_crm_log(level, "Transition %d aborted by %s.%s: %s "
412  	                   QB_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
413  	                   controld_globals.transition_graph->id, reason->name,
414  	                   pcmk__xe_id(reason), abort_text, add[0], add[1], add[2], fn,
415  	                   line, (const char *) local_path->str,
416  	                   pcmk__btoa(controld_globals.transition_graph->complete));
417  	        g_string_free(local_path, TRUE);
418  	
419  	    } else {
420  	        const char *op = pcmk__xe_get(change, PCMK_XA_OPERATION);
421  	        const char *path = pcmk__xe_get(change, PCMK_XA_PATH);
422  	
423  	        if(change == reason) {
424  	            if (strcmp(op, PCMK_VALUE_CREATE) == 0) {
425  	                reason = reason->children;
426  	
427  	            } else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) {
428  	                reason = pcmk__xe_first_child(reason, PCMK_XE_CHANGE_RESULT,
429  	                                              NULL, NULL);
430  	                if(reason) {
431  	                    reason = reason->children;
432  	                }
433  	            }
434  	            CRM_CHECK(reason != NULL, goto done);
435  	        }
436  	
437  	        if (strcmp(op, PCMK_VALUE_DELETE) == 0) {
438  	            const char *shortpath = strrchr(path, '/');
439  	
440  	            do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
441  	                       QB_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
442  	                       controld_globals.transition_graph->id,
443  	                       (shortpath? (shortpath + 1) : path), abort_text,
444  	                       add[0], add[1], add[2], fn, line, path,
445  	                       pcmk__btoa(controld_globals.transition_graph->complete));
446  	
447  	        } else if (pcmk__xe_is(reason, PCMK_XE_NVPAIR)) {
448  	            do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
449  	                       QB_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
450  	                       controld_globals.transition_graph->id,
451  	                       pcmk__xe_get(reason, PCMK_XA_ID), op,
452  	                       pcmk__xe_get(reason, PCMK_XA_NAME),
453  	                       pcmk__xe_get(reason, PCMK_XA_VALUE),
454  	                       abort_text, add[0], add[1], add[2], fn, line, path,
455  	                       pcmk__btoa(controld_globals.transition_graph->complete));
456  	
457  	        } else if (pcmk__xe_is(reason, PCMK__XE_LRM_RSC_OP)) {
458  	            const char *magic = pcmk__xe_get(reason, PCMK__XA_TRANSITION_MAGIC);
459  	
460  	            do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
461  	                       QB_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
462  	                       controld_globals.transition_graph->id,
463  	                       pcmk__xe_get(reason, PCMK__XA_OPERATION_KEY), op,
464  	                       pcmk__xe_get(reason, PCMK__META_ON_NODE), abort_text,
465  	                       magic, add[0], add[1], add[2], fn, line,
466  	                       pcmk__btoa(controld_globals.transition_graph->complete));
467  	
468  	        } else if (pcmk__str_any_of((const char *) reason->name,
469  	                   PCMK__XE_NODE_STATE, PCMK_XE_NODE, NULL)) {
470  	            const char *uname = pcmk__node_name_from_uuid(pcmk__xe_id(reason));
471  	
472  	            do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
473  	                       QB_XS " cib=%d.%d.%d source=%s:%d complete=%s",
474  	                       controld_globals.transition_graph->id,
475  	                       reason->name, op, pcmk__s(uname, pcmk__xe_id(reason)),
476  	                       abort_text, add[0], add[1], add[2], fn, line,
477  	                       pcmk__btoa(controld_globals.transition_graph->complete));
478  	
479  	        } else {
480  	            const char *id = pcmk__xe_id(reason);
481  	
482  	            do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
483  	                       QB_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
484  	                       controld_globals.transition_graph->id,
485  	                       reason->name, pcmk__s(id, ""), pcmk__s(op, "change"),
486  	                       abort_text, add[0], add[1], add[2], fn, line, path,
487  	                       pcmk__btoa(controld_globals.transition_graph->complete));
488  	        }
489  	    }
490  	
491  	done:
492  	    if (controld_globals.transition_graph->complete) {
493  	        if (controld_get_period_transition_timer() > 0) {
494  	            controld_stop_transition_timer();
495  	            controld_start_transition_timer();
496  	        } else {
497  	            controld_fsa_append(C_FSA_INTERNAL, I_PE_CALC, NULL);
498  	        }
499  	        return;
500  	    }
501  	
502  	    trigger_graph();
503  	}
504