1    	/*
2    	 * Copyright 2004-2023 the Pacemaker project contributors
3    	 *
4    	 * The version control history for this file may have further details.
5    	 *
6    	 * This source code is licensed under the GNU General Public License version 2
7    	 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8    	 */
9    	
10   	#include <crm_internal.h>
11   	#include <crm/crm.h>
12   	#include <crm/msg_xml.h>
13   	#include <crm/common/xml.h>
14   	
15   	#include <pacemaker-controld.h>
16   	
17   	//! Triggers transition graph processing
18   	static crm_trigger_t *transition_trigger = NULL;
19   	
20   	static GHashTable *node_pending_timers = NULL;
21   	
22   	gboolean
23   	stop_te_timer(pcmk__graph_action_t *action)
24   	{
25   	    if (action == NULL) {
26   	        return FALSE;
27   	    }
28   	    if (action->timer != 0) {
29   	        crm_trace("Stopping action timer");
30   	        g_source_remove(action->timer);
31   	        action->timer = 0;
32   	    } else {
33   	        crm_trace("Action timer was already stopped");
34   	        return FALSE;
35   	    }
36   	    return TRUE;
37   	}
38   	
39   	static gboolean
40   	te_graph_trigger(gpointer user_data)
41   	{
42   	    if (controld_globals.transition_graph == NULL) {
43   	        crm_debug("Nothing to do");
44   	        return TRUE;
45   	    }
46   	
47   	    crm_trace("Invoking graph %d in state %s",
48   	              controld_globals.transition_graph->id,
49   	              fsa_state2string(controld_globals.fsa_state));
50   	
51   	    switch (controld_globals.fsa_state) {
52   	        case S_STARTING:
53   	        case S_PENDING:
54   	        case S_NOT_DC:
55   	        case S_HALT:
56   	        case S_ILLEGAL:
57   	        case S_STOPPING:
58   	        case S_TERMINATE:
59   	            return TRUE;
60   	        default:
61   	            break;
62   	    }
63   	
64   	    if (!controld_globals.transition_graph->complete) {
65   	        enum pcmk__graph_status graph_rc;
66   	        int orig_limit = controld_globals.transition_graph->batch_limit;
67   	        int throttled_limit = throttle_get_total_job_limit(orig_limit);
68   	
69   	        controld_globals.transition_graph->batch_limit = throttled_limit;
70   	        graph_rc = pcmk__execute_graph(controld_globals.transition_graph);
71   	        controld_globals.transition_graph->batch_limit = orig_limit;
72   	
73   	        if (graph_rc == pcmk__graph_active) {
74   	            crm_trace("Transition not yet complete");
75   	            return TRUE;
76   	
77   	        } else if (graph_rc == pcmk__graph_pending) {
78   	            crm_trace("Transition not yet complete - no actions fired");
79   	            return TRUE;
80   	        }
81   	
82   	        if (graph_rc != pcmk__graph_complete) {
83   	            crm_warn("Transition failed: %s",
84   	                     pcmk__graph_status2text(graph_rc));
85   	            pcmk__log_graph(LOG_NOTICE, controld_globals.transition_graph);
86   	        }
87   	    }
88   	
89   	    crm_debug("Transition %d is now complete",
90   	              controld_globals.transition_graph->id);
91   	    controld_globals.transition_graph->complete = true;
92   	    notify_crmd(controld_globals.transition_graph);
93   	
94   	    return TRUE;
95   	}
96   	
97   	/*!
98   	 * \internal
99   	 * \brief Initialize transition trigger
100  	 */
101  	void
102  	controld_init_transition_trigger(void)
103  	{
104  	    transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger,
105  	                                              NULL);
106  	}
107  	
108  	/*!
109  	 * \internal
110  	 * \brief Destroy transition trigger
111  	 */
112  	void
113  	controld_destroy_transition_trigger(void)
114  	{
115  	    mainloop_destroy_trigger(transition_trigger);
116  	    transition_trigger = NULL;
117  	}
118  	
119  	void
120  	controld_trigger_graph_as(const char *fn, int line)
121  	{
122  	    crm_trace("%s:%d - Triggered graph processing", fn, line);
123  	    mainloop_set_trigger(transition_trigger);
124  	}
125  	
126  	static struct abort_timer_s {
127  	    bool aborted;
128  	    guint id;
129  	    int priority;
130  	    enum pcmk__graph_next action;
131  	    const char *text;
132  	} abort_timer = { 0, };
133  	
134  	static gboolean
135  	abort_timer_popped(gpointer data)
136  	{
137  	    struct abort_timer_s *abort_timer = (struct abort_timer_s *) data;
138  	
139  	    if (AM_I_DC && (abort_timer->aborted == FALSE)) {
140  	        abort_transition(abort_timer->priority, abort_timer->action,
141  	                         abort_timer->text, NULL);
142  	    }
143  	    abort_timer->id = 0;
144  	    return FALSE; // do not immediately reschedule timer
145  	}
146  	
147  	/*!
148  	 * \internal
149  	 * \brief Abort transition after delay, if not already aborted in that time
150  	 *
151  	 * \param[in] abort_text  Must be literal string
152  	 */
153  	void
154  	abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action,
155  	                  const char *abort_text, guint delay_ms)
156  	{
157  	    if (abort_timer.id) {
158  	        // Timer already in progress, stop and reschedule
159  	        g_source_remove(abort_timer.id);
160  	    }
161  	    abort_timer.aborted = FALSE;
162  	    abort_timer.priority = abort_priority;
163  	    abort_timer.action = abort_action;
164  	    abort_timer.text = abort_text;
165  	    abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, &abort_timer);
166  	}
167  	
168  	static void
169  	free_node_pending_timer(gpointer data)
170  	{
171  	    struct abort_timer_s *node_pending_timer = (struct abort_timer_s *) data;
172  	
173  	    if (node_pending_timer->id != 0) {
174  	        g_source_remove(node_pending_timer->id);
175  	        node_pending_timer->id = 0;
176  	    }
177  	
178  	    free(node_pending_timer);
179  	}
180  	
181  	static gboolean
182  	node_pending_timer_popped(gpointer key)
183  	{
184  	    struct abort_timer_s *node_pending_timer = NULL;
185  	
186  	    if (node_pending_timers == NULL) {
187  	        return FALSE;
188  	    }
189  	
190  	    node_pending_timer = g_hash_table_lookup(node_pending_timers, key);
191  	    if (node_pending_timer == NULL) {
192  	        return FALSE;
193  	    }
194  	
195  	    crm_warn("Node with id '%s' pending timed out (%us) on joining the process "
196  	             "group",
197  	             (const char *) key, controld_globals.node_pending_timeout);
198  	
199  	    if (controld_globals.node_pending_timeout > 0) {
200  	        abort_timer_popped(node_pending_timer);
201  	    }
202  	
203  	    g_hash_table_remove(node_pending_timers, key);
204  	
205  	    return FALSE; // do not reschedule timer
206  	}
207  	
208  	static void
209  	init_node_pending_timer(const crm_node_t *node, guint timeout)
210  	{
211  	    struct abort_timer_s *node_pending_timer = NULL;
212  	    char *key = NULL;
213  	
214  	    if (node->uuid == NULL) {
215  	        return;
216  	    }
217  	
218  	    if (node_pending_timers == NULL) {
219  	        node_pending_timers = pcmk__strikey_table(free,
220  	                                                  free_node_pending_timer);
221  	
222  	    // The timer is somehow already existing
223  	    } else if (g_hash_table_lookup(node_pending_timers, node->uuid) != NULL) {
224  	        return;
225  	    }
226  	
227  	    crm_notice("Waiting for pending %s with id '%s' to join the process "
228  	               "group (timeout=%us)",
229  	               node->uname ? node->uname : "node", node->uuid,
230  	               controld_globals.node_pending_timeout);
231  	
232  	    node_pending_timer = calloc(1, sizeof(struct abort_timer_s));
233  	    CRM_ASSERT(node_pending_timer != NULL);
234  	
235  	    node_pending_timer->aborted = FALSE;
236  	    node_pending_timer->priority = INFINITY;
237  	    node_pending_timer->action = pcmk__graph_restart;
238  	    node_pending_timer->text = "Node pending timed out";
239  	
240  	    key = strdup(node->uuid);
241  	    CRM_ASSERT(key != NULL);
242  	
243  	    g_hash_table_replace(node_pending_timers, key, node_pending_timer);
244  	
245  	    node_pending_timer->id = g_timeout_add_seconds(timeout,
246  	                                                   node_pending_timer_popped,
247  	                                                   key);
248  	    CRM_ASSERT(node_pending_timer->id != 0);
249  	}
250  	
251  	static void
252  	remove_node_pending_timer(const char *node_uuid)
253  	{
254  	    if (node_pending_timers == NULL) {
255  	        return;
256  	    }
257  	
258  	    g_hash_table_remove(node_pending_timers, node_uuid);
259  	}
260  	
261  	void
262  	controld_node_pending_timer(const crm_node_t *node)
263  	{
264  	    long long remaining_timeout = 0;
265  	
266  	    /* If the node is not an active cluster node, is leaving the cluster, or is
267  	     * already part of CPG, or node-pending-timeout is disabled, free any
268  	     * node pending timer for it.
269  	     */
270  	    if (pcmk_is_set(node->flags, crm_remote_node)
271  	        || (node->when_member <= 1) || (node->when_online > 0)
272  	        || (controld_globals.node_pending_timeout == 0)) {
273  	        remove_node_pending_timer(node->uuid);
274  	        return;
275  	    }
276  	
277  	    // Node is a cluster member but offline in CPG
278  	
279  	    remaining_timeout = node->when_member - time(NULL)
280  	                        + controld_globals.node_pending_timeout;
281  	
282  	    /* It already passed node pending timeout somehow.
283  	     * Free any node pending timer of it.
284  	     */
285  	    if (remaining_timeout <= 0) {
286  	        remove_node_pending_timer(node->uuid);
287  	        return;
288  	    }
289  	
290  	    init_node_pending_timer(node, remaining_timeout);
291  	}
292  	
293  	void
294  	controld_free_node_pending_timers(void)
295  	{
296  	    if (node_pending_timers == NULL) {
297  	        return;
298  	    }
299  	
300  	    g_hash_table_destroy(node_pending_timers);
301  	    node_pending_timers = NULL;
302  	}
303  	
304  	static const char *
305  	abort2text(enum pcmk__graph_next abort_action)
306  	{
307  	    switch (abort_action) {
308  	        case pcmk__graph_done:      return "done";
309  	        case pcmk__graph_wait:      return "stop";
310  	        case pcmk__graph_restart:   return "restart";
311  	        case pcmk__graph_shutdown:  return "shutdown";
312  	    }
313  	    return "unknown";
314  	}
315  	
316  	static bool
317  	update_abort_priority(pcmk__graph_t *graph, int priority,
318  	                      enum pcmk__graph_next action, const char *abort_reason)
319  	{
320  	    bool change = FALSE;
321  	
322  	    if (graph == NULL) {
323  	        return change;
324  	    }
325  	
326  	    if (graph->abort_priority < priority) {
327  	        crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority);
328  	        graph->abort_priority = priority;
329  	        if (graph->abort_reason != NULL) {
330  	            crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason);
331  	        }
332  	        graph->abort_reason = abort_reason;
333  	        change = TRUE;
334  	    }
335  	
336  	    if (graph->completion_action < action) {
337  	        crm_debug("Abort action %s superseded by %s: %s",
338  	                  abort2text(graph->completion_action), abort2text(action), abort_reason);
339  	        graph->completion_action = action;
340  	        change = TRUE;
341  	    }
342  	
343  	    return change;
344  	}
345  	
346  	void
347  	abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action,
348  	                       const char *abort_text, const xmlNode *reason,
349  	                       const char *fn, int line)
350  	{
351  	    int add[] = { 0, 0, 0 };
352  	    int del[] = { 0, 0, 0 };
353  	    int level = LOG_INFO;
354  	    const xmlNode *diff = NULL;
355  	    const xmlNode *change = NULL;
356  	
357  	    CRM_CHECK(controld_globals.transition_graph != NULL, return);
358  	
359  	    switch (controld_globals.fsa_state) {
360  	        case S_STARTING:
361  	        case S_PENDING:
362  	        case S_NOT_DC:
363  	        case S_HALT:
364  	        case S_ILLEGAL:
365  	        case S_STOPPING:
366  	        case S_TERMINATE:
367  	            crm_info("Abort %s suppressed: state=%s (%scomplete)",
368  	                     abort_text, fsa_state2string(controld_globals.fsa_state),
369  	                     (controld_globals.transition_graph->complete? "" : "in"));
370  	            return;
371  	        default:
372  	            break;
373  	    }
374  	
375  	    abort_timer.aborted = TRUE;
376  	    controld_expect_sched_reply(NULL);
377  	
378  	    if (!controld_globals.transition_graph->complete
379  	        && update_abort_priority(controld_globals.transition_graph,
380  	                                 abort_priority, abort_action,
381  	                                 abort_text)) {
382  	        level = LOG_NOTICE;
383  	    }
384  	
385  	    if (reason != NULL) {
386  	        const xmlNode *search = NULL;
387  	
388  	        for(search = reason; search; search = search->parent) {
389  	            if (pcmk__xe_is(search, XML_TAG_DIFF)) {
390  	                diff = search;
391  	                break;
392  	            }
393  	        }
394  	
395  	        if(diff) {
396  	            xml_patch_versions(diff, add, del);
397  	            for(search = reason; search; search = search->parent) {
398  	                if (pcmk__xe_is(search, XML_DIFF_CHANGE)) {
399  	                    change = search;
400  	                    break;
401  	                }
402  	            }
403  	        }
404  	    }
405  	
406  	    if (reason == NULL) {
407  	        do_crm_log(level,
408  	                   "Transition %d aborted: %s " CRM_XS " source=%s:%d "
409  	                   "complete=%s", controld_globals.transition_graph->id,
410  	                   abort_text, fn, line,
411  	                   pcmk__btoa(controld_globals.transition_graph->complete));
412  	
413  	    } else if(change == NULL) {
414  	        GString *local_path = pcmk__element_xpath(reason);
415  	        CRM_ASSERT(local_path != NULL);
416  	
417  	        do_crm_log(level, "Transition %d aborted by %s.%s: %s "
418  	                   CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
419  	                   controld_globals.transition_graph->id, reason->name,
420  	                   ID(reason), abort_text, add[0], add[1], add[2], fn, line,
421  	                   (const char *) local_path->str,
422  	                   pcmk__btoa(controld_globals.transition_graph->complete));
423  	        g_string_free(local_path, TRUE);
424  	
425  	    } else {
426  	        const char *op = crm_element_value(change, XML_DIFF_OP);
427  	        const char *path = crm_element_value(change, XML_DIFF_PATH);
428  	
429  	        if(change == reason) {
430  	            if(strcmp(op, "create") == 0) {
431  	                reason = reason->children;
432  	
433  	            } else if(strcmp(op, "modify") == 0) {
434  	                reason = first_named_child(reason, XML_DIFF_RESULT);
435  	                if(reason) {
436  	                    reason = reason->children;
437  	                }
438  	            }
439  	            CRM_CHECK(reason != NULL, goto done);
440  	        }
441  	
442  	        if(strcmp(op, "delete") == 0) {
(20) Event example_assign: Example 2: Assigning: "shortpath" = return value from "strrchr(path, 47)".
Also see events: [returned_null][dereference][example_assign][example_checked][example_checked][example_assign][example_checked][example_assign][example_checked][example_assign][example_checked]
443  	            const char *shortpath = strrchr(path, '/');
444  	
(21) Event example_checked: Example 2 (cont.): "shortpath" has its value checked in "shortpath".
Also see events: [returned_null][dereference][example_assign][example_checked][example_assign][example_assign][example_checked][example_assign][example_checked][example_assign][example_checked]
445  	            do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
446  	                       CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
447  	                       controld_globals.transition_graph->id,
448  	                       (shortpath? (shortpath + 1) : path), abort_text,
449  	                       add[0], add[1], add[2], fn, line, path,
450  	                       pcmk__btoa(controld_globals.transition_graph->complete));
451  	
452  	        } else if (pcmk__xe_is(reason, XML_CIB_TAG_NVPAIR)) {
453  	            do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
454  	                       CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
455  	                       controld_globals.transition_graph->id,
456  	                       crm_element_value(reason, XML_ATTR_ID), op,
457  	                       crm_element_value(reason, XML_NVPAIR_ATTR_NAME),
458  	                       crm_element_value(reason, XML_NVPAIR_ATTR_VALUE),
459  	                       abort_text, add[0], add[1], add[2], fn, line, path,
460  	                       pcmk__btoa(controld_globals.transition_graph->complete));
461  	
462  	        } else if (pcmk__xe_is(reason, XML_LRM_TAG_RSC_OP)) {
463  	            const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
464  	
465  	            do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
466  	                       CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
467  	                       controld_globals.transition_graph->id,
468  	                       crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op,
469  	                       crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text,
470  	                       magic, add[0], add[1], add[2], fn, line,
471  	                       pcmk__btoa(controld_globals.transition_graph->complete));
472  	
473  	        } else if (pcmk__str_any_of((const char *) reason->name,
474  	                   XML_CIB_TAG_STATE, XML_CIB_TAG_NODE, NULL)) {
475  	            const char *uname = crm_peer_uname(ID(reason));
476  	
477  	            do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
478  	                       CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s",
479  	                       controld_globals.transition_graph->id,
480  	                       reason->name, op, pcmk__s(uname, ID(reason)),
481  	                       abort_text, add[0], add[1], add[2], fn, line,
482  	                       pcmk__btoa(controld_globals.transition_graph->complete));
483  	
484  	        } else {
485  	            const char *id = ID(reason);
486  	
487  	            do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
488  	                       CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
489  	                       controld_globals.transition_graph->id,
490  	                       reason->name, pcmk__s(id, ""), pcmk__s(op, "change"),
491  	                       abort_text, add[0], add[1], add[2], fn, line, path,
492  	                       pcmk__btoa(controld_globals.transition_graph->complete));
493  	        }
494  	    }
495  	
496  	done:
497  	    if (controld_globals.transition_graph->complete) {
498  	        if (controld_get_period_transition_timer() > 0) {
499  	            controld_stop_transition_timer();
500  	            controld_start_transition_timer();
501  	        } else {
502  	            register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
503  	        }
504  	        return;
505  	    }
506  	
507  	    trigger_graph();
508  	}
509