1    	/*
2    	 * Copyright 2004-2026 the Pacemaker project contributors
3    	 *
4    	 * The version control history for this file may have further details.
5    	 *
6    	 * This source code is licensed under the GNU Lesser General Public License
7    	 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8    	 */
9    	
10   	#include <crm_internal.h>
11   	
12   	#include <stdbool.h>
13   	#include <stdio.h>
14   	#include <string.h>
15   	#include <time.h>
16   	
17   	#include <glib.h>
18   	#include <libxml/tree.h>                // xmlNode
19   	#include <libxml/xpath.h>               // xmlXPathObject, etc.
20   	
21   	#include <crm/crm.h>
22   	#include <crm/services.h>
23   	#include <crm/common/xml.h>
24   	
25   	#include <crm/common/util.h>
26   	#include <crm/pengine/internal.h>
27   	#include <pe_status_private.h>
28   	
29   	// A (parsed) resource action history entry
30   	struct action_history {
31   	    pcmk_resource_t *rsc;       // Resource that history is for
32   	    pcmk_node_t *node;        // Node that history is for
33   	    xmlNode *xml;             // History entry XML
34   	
35   	    // Parsed from entry XML
36   	    const char *id;           // XML ID of history entry
37   	    const char *key;          // Operation key of action
38   	    const char *task;         // Action name
39   	    const char *exit_reason;  // Exit reason given for result
40   	    guint interval_ms;        // Action interval
41   	    int call_id;              // Call ID of action
42   	    int expected_exit_status; // Expected exit status of action
43   	    int exit_status;          // Actual exit status of action
44   	    int execution_status;     // Execution status of action
45   	};
46   	
47   	/* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
48   	 * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
49   	 * flag is stringified more readably in log messages.
50   	 */
51   	#define set_config_flag(scheduler, option, flag) do {                         \
52   	        GHashTable *config_hash = (scheduler)->priv->options;                 \
53   	        const char *scf_value = pcmk__cluster_option(config_hash, (option));  \
54   	                                                                              \
55   	        if (scf_value != NULL) {                                              \
56   	            if (pcmk__is_true(scf_value)) {                                   \
57   	                (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__,   \
58   	                                    LOG_TRACE, "Scheduler",                   \
59   	                                    crm_system_name, (scheduler)->flags,      \
60   	                                    (flag), #flag);                           \
61   	            } else {                                                          \
62   	                (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
63   	                                    LOG_TRACE, "Scheduler",                   \
64   	                                    crm_system_name, (scheduler)->flags,      \
65   	                                    (flag), #flag);                           \
66   	            }                                                                 \
67   	        }                                                                     \
68   	    } while(0)
69   	
70   	static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
71   	                          xmlNode *xml_op, xmlNode **last_failure,
72   	                          enum pcmk__on_fail *failed);
73   	static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
74   	                                           pcmk_node_t *this_node);
75   	static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
76   	                           bool overwrite, pcmk_scheduler_t *scheduler);
77   	static void determine_online_status(const xmlNode *node_state,
78   	                                    pcmk_node_t *this_node,
79   	                                    pcmk_scheduler_t *scheduler);
80   	
81   	static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
82   	                            pcmk_scheduler_t *scheduler);
83   	
84   	
85   	/*!
86   	 * \internal
87   	 * \brief Check whether a node is a dangling guest node
88   	 *
89   	 * \param[in] node  Node to check
90   	 *
91   	 * \return true if \p node had a Pacemaker Remote connection resource with a
92   	 *         launcher that was removed from the CIB, otherwise false.
93   	 */
94   	static bool
95   	is_dangling_guest_node(pcmk_node_t *node)
96   	{
97   	    return pcmk__is_pacemaker_remote_node(node)
98   	           && (node->priv->remote != NULL)
99   	           && (node->priv->remote->priv->launcher == NULL)
100  	           && pcmk__is_set(node->priv->remote->flags,
101  	                           pcmk__rsc_removed_launched);
102  	}
103  	
104  	/*!
105  	 * \brief Schedule a fence action for a node
106  	 *
107  	 * \param[in,out] scheduler       Scheduler data
108  	 * \param[in,out] node            Node to fence
109  	 * \param[in]     reason          Text description of why fencing is needed
110  	 * \param[in]     priority_delay  Whether to consider
111  	 *                                \c PCMK_OPT_PRIORITY_FENCING_DELAY
112  	 */
113  	void
114  	pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node,
115  	              const char *reason, bool priority_delay)
116  	{
117  	    CRM_CHECK(node, return);
118  	
119  	    if (pcmk__is_guest_or_bundle_node(node)) {
120  	        // Fence a guest or bundle node by marking its launcher as failed
121  	        pcmk_resource_t *rsc = node->priv->remote->priv->launcher;
122  	
123  	        if (!pcmk__is_set(rsc->flags, pcmk__rsc_failed)) {
124  	            if (!pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
125  	                pcmk__notice("Not fencing guest node %s (otherwise would "
126  	                             "because %s): its guest resource %s is unmanaged",
127  	                             pcmk__node_name(node), reason, rsc->id);
128  	            } else {
129  	                pcmk__sched_warn(scheduler,
130  	                                 "Guest node %s will be fenced "
131  	                                 "(by recovering its guest resource %s): %s",
132  	                                 pcmk__node_name(node), rsc->id, reason);
133  	
134  	                /* We don't mark the node as unclean because that would prevent the
135  	                 * node from running resources. We want to allow it to run resources
136  	                 * in this transition if the recovery succeeds.
137  	                 */
138  	                pcmk__set_node_flags(node, pcmk__node_remote_reset);
139  	                pcmk__set_rsc_flags(rsc,
140  	                                    pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
141  	            }
142  	        }
143  	
144  	    } else if (is_dangling_guest_node(node)) {
145  	        pcmk__info("Cleaning up dangling connection for guest node %s: fencing "
146  	                   "was already done because %s, and guest resource no longer "
147  	                   "exists",
148  	                   pcmk__node_name(node), reason);
149  	        pcmk__set_rsc_flags(node->priv->remote,
150  	                            pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
151  	
152  	    } else if (pcmk__is_remote_node(node)) {
153  	        pcmk_resource_t *rsc = node->priv->remote;
154  	
155  	        if ((rsc != NULL) && !pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
156  	            pcmk__notice("Not fencing remote node %s (otherwise would because "
157  	                         "%s): connection is unmanaged",
158  	                         pcmk__node_name(node), reason);
159  	        } else if (!pcmk__is_set(node->priv->flags, pcmk__node_remote_reset)) {
160  	            pcmk__set_node_flags(node, pcmk__node_remote_reset);
161  	            pcmk__sched_warn(scheduler, "Remote node %s %s: %s",
162  	                             pcmk__node_name(node),
163  	                             pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
164  	                             reason);
165  	        }
166  	        node->details->unclean = TRUE;
167  	        // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
168  	        pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
169  	
170  	    } else if (node->details->unclean) {
171  	        const char *fenced_s = "also is unclean";
172  	
173  	        if (pe_can_fence(scheduler, node)) {
174  	            fenced_s = "would also be fenced";
175  	        }
176  	        pcmk__trace("Cluster node %s %s because %s",
177  	                    pcmk__node_name(node), fenced_s, reason);
178  	
179  	    } else {
180  	        pcmk__sched_warn(scheduler, "Cluster node %s %s: %s",
181  	                         pcmk__node_name(node),
182  	                         pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
183  	                         reason);
184  	        node->details->unclean = TRUE;
185  	        pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
186  	    }
187  	}
188  	
189  	// @TODO xpaths can't handle templates, rules, or id-refs
190  	
191  	// nvpair with provides or requires set to unfencing
192  	#define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR           \
193  	    "[(@" PCMK_XA_NAME "='" PCMK_FENCING_PROVIDES "'"   \
194  	    "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') "   \
195  	    "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
196  	
197  	// unfencing in rsc_defaults or any resource
198  	#define XPATH_ENABLE_UNFENCING \
199  	    "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES     \
200  	    "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR             \
201  	    "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
202  	    "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
203  	
204  	static void
205  	set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
206  	{
207  	    xmlXPathObject *result = NULL;
208  	
209  	    if (!pcmk__is_set(scheduler->flags, flag)) {
210  	        result = pcmk__xpath_search(scheduler->input->doc, xpath);
211  	        if (pcmk__xpath_num_results(result) > 0) {
212  	            pcmk__set_scheduler_flags(scheduler, flag);
213  	        }
214  	        xmlXPathFreeObject(result);
215  	    }
216  	}
217  	
218  	gboolean
219  	unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
220  	{
221  	    const char *value = NULL;
222  	    GHashTable *config_hash = pcmk__strkey_table(free, free);
223  	
224  	    const pcmk_rule_input_t rule_input = {
225  	        .now = scheduler->priv->now,
226  	    };
227  	
228  	    scheduler->priv->options = config_hash;
229  	
230  	    pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET,
231  	                               &rule_input, config_hash,
232  	                               PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, scheduler);
233  	
234  	    pcmk__validate_cluster_options(config_hash);
235  	
236  	    set_config_flag(scheduler, PCMK__OPT_ENABLE_STARTUP_PROBES,
237  	                    pcmk__sched_probe_resources);
238  	    if (!pcmk__is_set(scheduler->flags, pcmk__sched_probe_resources)) {
239  	        pcmk__warn_once(pcmk__wo_enable_startup_probes,
240  	                        "Support for the " PCMK__OPT_ENABLE_STARTUP_PROBES " "
241  	                        "cluster property is deprecated and will be removed "
242  	                        "(and behave as true) in a future release. Use a "
243  	                        "location constraint with "
244  	                        PCMK_XA_RESOURCE_DISCOVERY "=" PCMK_VALUE_NEVER " "
245  	                        "instead to disable probes where desired.");
246  	    }
247  	
248  	    value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
249  	    if (pcmk__is_true(value)) {
250  	        pcmk__info("Watchdog-based self-fencing will be performed via SBD if "
251  	                   "fencing is required and " PCMK_OPT_FENCING_WATCHDOG_TIMEOUT
252  	                   " is nonzero");
253  	        pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_fencing);
254  	    }
255  	
256  	    /* Set certain flags via xpath here, so they can be used before the relevant
257  	     * configuration sections are unpacked.
258  	     */
259  	    set_if_xpath(pcmk__sched_enable_unfencing, XPATH_ENABLE_UNFENCING,
260  	                 scheduler);
261  	
262  	    value = pcmk__cluster_option(config_hash, PCMK_OPT_FENCING_TIMEOUT);
263  	    pcmk_parse_interval_spec(value, &(scheduler->priv->fence_timeout_ms));
264  	
265  	    pcmk__debug("Default fencing action timeout: %s",
266  	                pcmk__readable_interval(scheduler->priv->fence_timeout_ms));
267  	
268  	    set_config_flag(scheduler, PCMK_OPT_FENCING_ENABLED,
269  	                    pcmk__sched_fencing_enabled);
270  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
271  	        pcmk__debug("Fencing of failed nodes is enabled");
272  	    } else {
273  	        pcmk__debug("Fencing of failed nodes is disabled");
274  	    }
275  	
276  	    scheduler->priv->fence_action =
277  	        pcmk__cluster_option(config_hash, PCMK_OPT_FENCING_ACTION);
278  	    pcmk__trace("Fencing will %s nodes", scheduler->priv->fence_action);
279  	
280  	    set_config_flag(scheduler, PCMK__OPT_CONCURRENT_FENCING,
281  	                    pcmk__sched_concurrent_fencing);
282  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) {
283  	        pcmk__debug("Concurrent fencing is enabled");
284  	
285  	    } else {
286  	        pcmk__warn_once(pcmk__wo_concurrent_fencing,
287  	                        "Support for the " PCMK__OPT_CONCURRENT_FENCING " "
288  	                        "cluster property is deprecated and will be removed "
289  	                        "(and behave as true) in a future release.");
290  	    }
291  	
292  	    value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY);
293  	    if (value) {
294  	        guint *delay_ms = &(scheduler->priv->priority_fencing_ms);
295  	
296  	        pcmk_parse_interval_spec(value, delay_ms);
297  	        pcmk__trace("Priority fencing delay is %s",
298  	                    pcmk__readable_interval(*delay_ms));
299  	    }
300  	
301  	    set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES,
302  	                    pcmk__sched_stop_all);
303  	    pcmk__debug("Stop all active resources: %s",
304  	                pcmk__flag_text(scheduler->flags, pcmk__sched_stop_all));
305  	
306  	    set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER,
307  	                    pcmk__sched_symmetric_cluster);
308  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) {
309  	        pcmk__debug("Cluster is symmetric - resources can run anywhere by "
310  	                    "default");
311  	    }
312  	
313  	    value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
314  	
315  	    if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
316  	        scheduler->no_quorum_policy = pcmk_no_quorum_ignore;
317  	
318  	    } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
319  	        scheduler->no_quorum_policy = pcmk_no_quorum_freeze;
320  	
321  	    } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
322  	        scheduler->no_quorum_policy = pcmk_no_quorum_demote;
323  	
324  	    } else if (pcmk__strcase_any_of(value, PCMK_VALUE_FENCE,
325  	                                    PCMK_VALUE_FENCE_LEGACY, NULL)) {
326  	        if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
327  	            int do_panic = 0;
328  	
329  	            pcmk__xe_get_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC,
330  	                             &do_panic);
331  	            if (do_panic
332  	                || pcmk__is_set(scheduler->flags, pcmk__sched_quorate)) {
333  	                scheduler->no_quorum_policy = pcmk_no_quorum_fence;
334  	            } else {
335  	                pcmk__notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to "
336  	                             "'" PCMK_VALUE_STOP "': cluster has never had "
337  	                             "quorum");
338  	                scheduler->no_quorum_policy = pcmk_no_quorum_stop;
339  	            }
340  	        } else {
341  	            pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY
342  	                             " to 'stop' because fencing is disabled");
343  	            scheduler->no_quorum_policy = pcmk_no_quorum_stop;
344  	        }
345  	
346  	    } else {
347  	        scheduler->no_quorum_policy = pcmk_no_quorum_stop;
348  	    }
349  	
350  	    switch (scheduler->no_quorum_policy) {
351  	        case pcmk_no_quorum_freeze:
352  	            pcmk__debug("On loss of quorum: Freeze resources that require "
353  	                        "quorum");
354  	            break;
355  	        case pcmk_no_quorum_stop:
356  	            pcmk__debug("On loss of quorum: Stop resources that require "
357  	                        "quorum");
358  	            break;
359  	        case pcmk_no_quorum_demote:
360  	            pcmk__debug("On loss of quorum: Demote promotable resources and "
361  	                        "stop other resources");
362  	            break;
363  	        case pcmk_no_quorum_fence:
364  	            pcmk__notice("On loss of quorum: Fence all remaining nodes");
365  	            break;
366  	        case pcmk_no_quorum_ignore:
367  	            pcmk__notice("On loss of quorum: Ignore");
368  	            break;
369  	    }
370  	
371  	    set_config_flag(scheduler, PCMK__OPT_STOP_REMOVED_RESOURCES,
372  	                    pcmk__sched_stop_removed_resources);
373  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
374  	        pcmk__trace("Removed resources are stopped");
375  	    } else {
376  	        pcmk__warn_once(pcmk__wo_stop_removed_resources,
377  	                        "Support for the " PCMK__OPT_STOP_REMOVED_RESOURCES " "
378  	                        "cluster property is deprecated and will be removed "
379  	                        "(and behave as true) in a future release.");
380  	    }
381  	
382  	    set_config_flag(scheduler, PCMK__OPT_CANCEL_REMOVED_ACTIONS,
383  	                    pcmk__sched_cancel_removed_actions);
384  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_cancel_removed_actions)) {
385  	        pcmk__trace("Removed resource actions are stopped");
386  	    } else {
387  	        pcmk__warn_once(pcmk__wo_cancel_removed_actions,
388  	                        "Support for the " PCMK__OPT_CANCEL_REMOVED_ACTIONS " "
389  	                        "cluster property is deprecated and will be removed "
390  	                        "(and behave as true) in a future release.");
391  	    }
392  	
393  	    set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE,
394  	                    pcmk__sched_in_maintenance);
395  	    pcmk__trace("Maintenance mode: %s",
396  	                pcmk__flag_text(scheduler->flags, pcmk__sched_in_maintenance));
397  	
398  	    set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL,
399  	                    pcmk__sched_start_failure_fatal);
400  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) {
401  	        pcmk__trace("Start failures are always fatal");
402  	    } else {
403  	        pcmk__trace("Start failures are handled by failcount");
404  	    }
405  	
406  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
407  	        set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING,
408  	                        pcmk__sched_startup_fencing);
409  	    }
410  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
411  	        pcmk__trace("Unseen nodes will be fenced");
412  	    } else {
413  	        pcmk__warn_once(pcmk__wo_blind,
414  	                        "Blind faith: not fencing unseen nodes");
415  	    }
416  	
417  	    pe__unpack_node_health_scores(scheduler);
418  	
419  	    scheduler->priv->placement_strategy =
420  	        pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY);
421  	    pcmk__trace("Placement strategy: %s", scheduler->priv->placement_strategy);
422  	
423  	    set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK,
424  	                    pcmk__sched_shutdown_lock);
425  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
426  	        value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
427  	        pcmk_parse_interval_spec(value, &(scheduler->priv->shutdown_lock_ms));
428  	        pcmk__trace("Resources will be locked to nodes that were cleanly "
429  	                    "shut down (locks expire after %s)",
430  	                    pcmk__readable_interval(scheduler->priv->shutdown_lock_ms));
431  	    } else {
432  	        pcmk__trace("Resources will not be locked to nodes that were cleanly "
433  	                    "shut down");
434  	    }
435  	
436  	    value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
437  	    pcmk_parse_interval_spec(value, &(scheduler->priv->node_pending_ms));
438  	    if (scheduler->priv->node_pending_ms == 0U) {
439  	        pcmk__trace("Do not fence pending nodes");
440  	    } else {
441  	        pcmk__trace("Fence pending nodes after %s",
442  	                    pcmk__readable_interval(scheduler->priv->node_pending_ms));
443  	    }
444  	
445  	    set_config_flag(scheduler, PCMK_OPT_FENCE_REMOTE_WITHOUT_QUORUM,
446  	                    pcmk__sched_fence_remote_no_quorum);
447  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_fence_remote_no_quorum)) {
448  	        pcmk__trace("Pacemaker Remote nodes may be fenced without quorum");
449  	
450  	    } else {
451  	        pcmk__trace("Pacemaker Remote nodes require quorum to be fenced");
452  	    }
453  	
454  	    return TRUE;
455  	}
456  	
457  	/*!
458  	 * \internal
459  	 * \brief Create a new node object in scheduler data
460  	 *
461  	 * \param[in]     id         ID of new node
462  	 * \param[in]     uname      Name of new node
463  	 * \param[in]     type       Type of new node
464  	 * \param[in]     score      Score of new node
465  	 * \param[in,out] scheduler  Scheduler data
466  	 *
467  	 * \return Newly created node object
468  	 * \note The returned object is part of the scheduler data and should not be
469  	 *       freed separately.
470  	 */
471  	pcmk_node_t *
472  	pe_create_node(const char *id, const char *uname, const char *type,
473  	               int score, pcmk_scheduler_t *scheduler)
474  	{
475  	    enum pcmk__node_variant variant = pcmk__node_variant_cluster;
476  	    pcmk_node_t *new_node = NULL;
477  	
478  	    if (pcmk_find_node(scheduler, uname) != NULL) {
479  	        pcmk__config_warn("More than one node entry has name '%s'", uname);
480  	    }
481  	
482  	    if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
483  	                     pcmk__str_null_matches|pcmk__str_casei)) {
484  	        variant = pcmk__node_variant_cluster;
485  	
486  	    } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
487  	        variant = pcmk__node_variant_remote;
488  	
489  	    } else {
490  	        pcmk__config_err("Ignoring node %s with unrecognized type '%s'",
491  	                         pcmk__s(uname, "without name"), type);
492  	        return NULL;
493  	    }
494  	
495  	    new_node = calloc(1, sizeof(pcmk_node_t));
496  	    if (new_node == NULL) {
497  	        pcmk__sched_err(scheduler, "Could not allocate memory for node %s",
498  	                        uname);
499  	        return NULL;
500  	    }
501  	
502  	    new_node->assign = calloc(1, sizeof(struct pcmk__node_assignment));
503  	    new_node->details = calloc(1, sizeof(struct pcmk__node_details));
504  	    new_node->priv = calloc(1, sizeof(pcmk__node_private_t));
505  	    if ((new_node->assign == NULL) || (new_node->details == NULL)
506  	        || (new_node->priv == NULL)) {
507  	        free(new_node->assign);
508  	        free(new_node->details);
509  	        free(new_node->priv);
510  	        free(new_node);
511  	        pcmk__sched_err(scheduler, "Could not allocate memory for node %s",
512  	                        uname);
513  	        return NULL;
514  	    }
515  	
516  	    pcmk__trace("Creating node for entry %s/%s", uname, id);
517  	    new_node->assign->score = score;
518  	    new_node->priv->id = id;
519  	    new_node->priv->name = uname;
520  	    new_node->priv->flags = pcmk__node_probes_allowed;
521  	    new_node->details->online = FALSE;
522  	    new_node->details->shutdown = FALSE;
523  	    new_node->details->running_rsc = NULL;
524  	    new_node->priv->scheduler = scheduler;
525  	    new_node->priv->variant = variant;
526  	    new_node->priv->attrs = pcmk__strkey_table(free, free);
527  	    new_node->priv->utilization = pcmk__strkey_table(free, free);
528  	    new_node->priv->digest_cache = pcmk__strkey_table(free, pe__free_digests);
529  	
530  	    if (pcmk__is_pacemaker_remote_node(new_node)) {
531  	        pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "remote");
532  	        pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_remote_nodes);
533  	    } else {
534  	        pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "cluster");
535  	    }
536  	
537  	    scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
538  	                                            pe__cmp_node_name);
539  	    return new_node;
540  	}
541  	
542  	static const char *
543  	expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
544  	{
545  	    xmlNode *attr_set = NULL;
546  	    xmlNode *attr = NULL;
547  	
548  	    const char *container_id = pcmk__xe_id(xml_obj);
549  	    const char *remote_name = NULL;
550  	    const char *remote_server = NULL;
551  	    const char *remote_port = NULL;
552  	    const char *connect_timeout = "60s";
553  	    const char *remote_allow_migrate=NULL;
554  	    const char *is_managed = NULL;
555  	
556  	    // @TODO This doesn't handle rules or id-ref
557  	    for (attr_set = pcmk__xe_first_child(xml_obj, PCMK_XE_META_ATTRIBUTES,
558  	                                         NULL, NULL);
559  	         attr_set != NULL;
560  	         attr_set = pcmk__xe_next(attr_set, PCMK_XE_META_ATTRIBUTES)) {
561  	
562  	        for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
563  	             attr != NULL; attr = pcmk__xe_next(attr, NULL)) {
564  	
565  	            const char *value = pcmk__xe_get(attr, PCMK_XA_VALUE);
566  	            const char *name = pcmk__xe_get(attr, PCMK_XA_NAME);
567  	
568  	            if (name == NULL) { // Sanity
569  	                continue;
570  	            }
571  	
572  	            if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
573  	                remote_name = value;
574  	
575  	            } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
576  	                remote_server = value;
577  	
578  	            } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
579  	                remote_port = value;
580  	
581  	            } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
582  	                connect_timeout = value;
583  	
584  	            } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
585  	                remote_allow_migrate = value;
586  	
587  	            } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
588  	                is_managed = value;
589  	            }
590  	        }
591  	    }
592  	
593  	    if (remote_name == NULL) {
594  	        return NULL;
595  	    }
596  	
597  	    if (pe_find_resource(data->priv->resources, remote_name) != NULL) {
598  	        return NULL;
599  	    }
600  	
601  	    pe_create_remote_xml(parent, remote_name, container_id,
602  	                         remote_allow_migrate, is_managed,
603  	                         connect_timeout, remote_server, remote_port);
604  	    return remote_name;
605  	}
606  	
607  	static void
608  	handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
609  	{
610  	    if ((new_node->priv->variant == pcmk__node_variant_remote)
611  	        && (new_node->priv->remote == NULL)) {
612  	        /* Ignore fencing for remote nodes that don't have a connection resource
613  	         * associated with them. This happens when remote node entries get left
614  	         * in the nodes section after the connection resource is removed.
615  	         */
616  	        return;
617  	    }
618  	
619  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
620  	        // All nodes are unclean until we've seen their status entry
621  	        new_node->details->unclean = TRUE;
622  	
623  	    } else {
624  	        // Blind faith ...
625  	        new_node->details->unclean = FALSE;
626  	    }
627  	}
628  	
629  	gboolean
630  	unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
631  	{
632  	    xmlNode *xml_obj = NULL;
633  	    pcmk_node_t *new_node = NULL;
634  	    const char *id = NULL;
635  	    const char *uname = NULL;
636  	    const char *type = NULL;
637  	
638  	    for (xml_obj = pcmk__xe_first_child(xml_nodes, PCMK_XE_NODE, NULL, NULL);
639  	         xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, PCMK_XE_NODE)) {
640  	
641  	        int score = 0;
642  	        int rc = pcmk__xe_get_score(xml_obj, PCMK_XA_SCORE, &score, 0);
643  	
644  	        new_node = NULL;
645  	
646  	        id = pcmk__xe_get(xml_obj, PCMK_XA_ID);
647  	        uname = pcmk__xe_get(xml_obj, PCMK_XA_UNAME);
648  	        type = pcmk__xe_get(xml_obj, PCMK_XA_TYPE);
649  	        pcmk__trace("Processing node %s/%s", uname, id);
650  	
651  	        if (id == NULL) {
652  	            pcmk__config_err("Ignoring <" PCMK_XE_NODE
653  	                             "> entry in configuration without id");
654  	            continue;
655  	        }
656  	        if (rc != pcmk_rc_ok) {
657  	            // Not possible with schema validation enabled
658  	            pcmk__config_warn("Using 0 as score for node %s "
659  	                              "because '%s' is not a valid score: %s",
660  	                              pcmk__s(uname, "without name"),
661  	                              pcmk__xe_get(xml_obj, PCMK_XA_SCORE),
662  	                              pcmk_rc_str(rc));
663  	        }
664  	        new_node = pe_create_node(id, uname, type, score, scheduler);
665  	
666  	        if (new_node == NULL) {
667  	            return FALSE;
668  	        }
669  	
670  	        handle_startup_fencing(scheduler, new_node);
671  	
672  	        add_node_attrs(xml_obj, new_node, FALSE, scheduler);
673  	
674  	        pcmk__trace("Done with node %s", pcmk__xe_get(xml_obj, PCMK_XA_UNAME));
675  	    }
676  	
677  	    return TRUE;
678  	}
679  	
680  	static void
681  	unpack_launcher(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
682  	{
683  	    const char *launcher_id = NULL;
684  	
685  	    if (rsc->priv->children != NULL) {
686  	        g_list_foreach(rsc->priv->children, (GFunc) unpack_launcher,
687  	                       scheduler);
688  	        return;
689  	    }
690  	
691  	    launcher_id = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CONTAINER);
692  	    if ((launcher_id != NULL)
693  	        && !pcmk__str_eq(launcher_id, rsc->id, pcmk__str_none)) {
694  	        pcmk_resource_t *launcher = pe_find_resource(scheduler->priv->resources,
695  	                                                     launcher_id);
696  	
697  	        if (launcher != NULL) {
698  	            rsc->priv->launcher = launcher;
699  	            launcher->priv->launched =
700  	                g_list_append(launcher->priv->launched, rsc);
701  	            pcmk__rsc_trace(rsc, "Resource %s's launcher is %s",
702  	                            rsc->id, launcher_id);
703  	        } else {
704  	            pcmk__config_err("Resource %s: Unknown " PCMK__META_CONTAINER " %s",
705  	                             rsc->id, launcher_id);
706  	        }
707  	    }
708  	}
709  	
710  	gboolean
711  	unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
712  	{
713  	    xmlNode *xml_obj = NULL;
714  	
715  	    /* Create remote nodes and guest nodes from the resource configuration
716  	     * before unpacking resources.
717  	     */
718  	    for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
719  	         xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
720  	
721  	        const char *new_node_id = NULL;
722  	
723  	        /* Check for remote nodes, which are defined by ocf:pacemaker:remote
724  	         * primitives.
725  	         */
726  	        if (xml_contains_remote_node(xml_obj)) {
727  	            new_node_id = pcmk__xe_id(xml_obj);
728  	            /* The pcmk_find_node() check ensures we don't iterate over an
729  	             * expanded node that has already been added to the node list
730  	             */
731  	            if (new_node_id
732  	                && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
733  	                pcmk__trace("Found remote node %s defined by resource %s",
734  	                            new_node_id, pcmk__xe_id(xml_obj));
735  	                pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
736  	                               0, scheduler);
737  	            }
738  	            continue;
739  	        }
740  	
741  	        /* Check for guest nodes, which are defined by special meta-attributes
742  	         * of a primitive of any type (for example, VirtualDomain or Xen).
743  	         */
744  	        if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
745  	            /* This will add an ocf:pacemaker:remote primitive to the
746  	             * configuration for the guest node's connection, to be unpacked
747  	             * later.
748  	             */
749  	            new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
750  	                                                 scheduler);
751  	            if (new_node_id
752  	                && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
753  	                pcmk__trace("Found guest node %s in resource %s",
754  	                            new_node_id, pcmk__xe_id(xml_obj));
755  	                pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
756  	                               0, scheduler);
757  	            }
758  	            continue;
759  	        }
760  	
761  	        /* Check for guest nodes inside a group. Clones are currently not
762  	         * supported as guest nodes.
763  	         */
764  	        if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
765  	            xmlNode *xml_obj2 = NULL;
766  	            for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
767  	                 xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2, NULL)) {
768  	
769  	                new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
770  	                                                     scheduler);
771  	
772  	                if (new_node_id
773  	                    && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
774  	                    pcmk__trace("Found guest node %s in resource %s inside "
775  	                                "group %s",
776  	                                new_node_id, pcmk__xe_id(xml_obj2),
777  	                                pcmk__xe_id(xml_obj));
778  	                    pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
779  	                                   0, scheduler);
780  	                }
781  	            }
782  	        }
783  	    }
784  	    return TRUE;
785  	}
786  	
787  	/* Call this after all the nodes and resources have been
788  	 * unpacked, but before the status section is read.
789  	 *
790  	 * A remote node's online status is reflected by the state
791  	 * of the remote node's connection resource. We need to link
792  	 * the remote node to this connection resource so we can have
793  	 * easy access to the connection resource during the scheduler calculations.
794  	 */
795  	static void
796  	link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
797  	{
798  	    pcmk_node_t *remote_node = NULL;
799  	
800  	    if (!pcmk__is_set(new_rsc->flags, pcmk__rsc_is_remote_connection)) {
801  	        return;
802  	    }
803  	
804  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_location_only)) {
805  	        /* remote_nodes and remote_resources are not linked in quick location calculations */
806  	        return;
807  	    }
808  	
809  	    remote_node = pcmk_find_node(scheduler, new_rsc->id);
810  	    CRM_CHECK(remote_node != NULL, return);
811  	
812  	    pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
813  	                    new_rsc->id, pcmk__node_name(remote_node));
814  	    remote_node->priv->remote = new_rsc;
815  	
816  	    if (new_rsc->priv->launcher == NULL) {
817  	        /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
818  	         * the same as is done for cluster nodes.
819  	         */
820  	        handle_startup_fencing(scheduler, remote_node);
821  	
822  	    } else {
823  	        /* pe_create_node() marks the new node as "remote" or "cluster"; now
824  	         * that we know the node is a guest node, update it correctly.
825  	         */
826  	        pcmk__insert_dup(remote_node->priv->attrs,
827  	                         CRM_ATTR_KIND, "container");
828  	    }
829  	}
830  	
831  	/*!
832  	 * \internal
833  	 * \brief Parse configuration XML for resource information
834  	 *
835  	 * \param[in]     xml_resources  Top of resource configuration XML
836  	 * \param[in,out] scheduler      Scheduler data
837  	 *
838  	 * \return TRUE
839  	 *
840  	 * \note unpack_remote_nodes() MUST be called before this, so that the nodes can
841  	 *       be used when pe__unpack_resource() calls resource_location()
842  	 */
843  	gboolean
844  	unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
845  	{
846  	    xmlNode *xml_obj = NULL;
847  	    GList *gIter = NULL;
848  	
849  	    scheduler->priv->templates = pcmk__strkey_table(free, pcmk__free_idref);
850  	
851  	    for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
852  	         xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
853  	
854  	        pcmk_resource_t *new_rsc = NULL;
855  	        const char *id = pcmk__xe_id(xml_obj);
856  	
857  	        if (pcmk__str_empty(id)) {
858  	            pcmk__config_err("Ignoring <%s> resource without ID",
859  	                             xml_obj->name);
860  	            continue;
861  	        }
862  	
863  	        if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
864  	            if (g_hash_table_lookup_extended(scheduler->priv->templates, id,
865  	                                             NULL, NULL) == FALSE) {
866  	                /* Record the template's ID for the knowledge of its existence anyway. */
867  	                pcmk__insert_dup(scheduler->priv->templates, id, NULL);
868  	            }
869  	            continue;
870  	        }
871  	
872  	        pcmk__trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
873  	
874  	        if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
875  	                                scheduler) != pcmk_rc_ok) {
876  	
877  	            pcmk__config_err("Ignoring <%s> resource '%s' because "
878  	                             "configuration is invalid", xml_obj->name, id);
879  	            continue;
880  	        }
881  	
882  	        scheduler->priv->resources = g_list_append(scheduler->priv->resources,
883  	                                                   new_rsc);
884  	        pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
885  	    }
886  	
887  	    for (gIter = scheduler->priv->resources;
888  	         gIter != NULL; gIter = gIter->next) {
889  	
890  	        pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
891  	
892  	        unpack_launcher(rsc, scheduler);
893  	        link_rsc2remotenode(scheduler, rsc);
894  	    }
895  	
896  	    scheduler->priv->resources = g_list_sort(scheduler->priv->resources,
897  	                                             pe__cmp_rsc_priority);
898  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_location_only)) {
899  	        /* Ignore */
900  	
901  	    } else if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)
902  	               && !pcmk__is_set(scheduler->flags, pcmk__sched_have_fencing)) {
903  	
904  	        /* pcs's CI tests look for this specific error message. Confer with the
905  	         * pcs team before changing it. If the dependency still exists, bump the
906  	         * CRM_FEATURE_SET and inform the pcs maintainers.
907  	         *
908  	         * Also, ResyncCIB.errors_to_ignore() looks for this specific error
909  	         * message as well.
910  	         */
911  	        pcmk__config_err("Resource start-up disabled since no fencing "
912  	                         "resources have been defined. Either configure some "
913  	                         "or disable fencing with the "
914  	                         PCMK_OPT_FENCING_ENABLED " option. NOTE: Clusters "
915  	                         "with shared data need fencing to ensure data "
916  	                         "integrity.");
917  	    }
918  	
919  	    return TRUE;
920  	}
921  	
922  	/*!
923  	 * \internal
924  	 * \brief Validate the levels in a fencing topology
925  	 *
926  	 * \param[in] xml  \c PCMK_XE_FENCING_TOPOLOGY element
927  	 */
928  	void
929  	pcmk__validate_fencing_topology(const xmlNode *xml)
930  	{
931  	    if (xml == NULL) {
932  	        return;
933  	    }
934  	
935  	    CRM_CHECK(pcmk__xe_is(xml, PCMK_XE_FENCING_TOPOLOGY), return);
936  	
937  	    for (const xmlNode *level = pcmk__xe_first_child(xml, PCMK_XE_FENCING_LEVEL,
938  	                                                     NULL, NULL);
939  	         level != NULL; level = pcmk__xe_next(level, PCMK_XE_FENCING_LEVEL)) {
940  	
941  	        const char *id = pcmk__xe_id(level);
942  	        int index = 0;
943  	
944  	        if (pcmk__str_empty(id)) {
945  	            pcmk__config_err("Ignoring fencing level without ID");
946  	            continue;
947  	        }
948  	
949  	        if (pcmk__xe_get_int(level, PCMK_XA_INDEX, &index) != pcmk_rc_ok) {
950  	            pcmk__config_err("Ignoring fencing level %s with invalid index",
951  	                             id);
952  	            continue;
953  	        }
954  	
955  	        if ((index < ST__LEVEL_MIN) || (index > ST__LEVEL_MAX)) {
956  	            pcmk__config_err("Ignoring fencing level %s with out-of-range "
957  	                             "index %d",
958  	                             id, index);
959  	        }
960  	    }
961  	}
962  	
963  	gboolean
964  	unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
965  	{
966  	    xmlNode *xml_tag = NULL;
967  	
968  	    scheduler->priv->tags = pcmk__strkey_table(free, pcmk__free_idref);
969  	
970  	    for (xml_tag = pcmk__xe_first_child(xml_tags, PCMK_XE_TAG, NULL, NULL);
971  	         xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag, PCMK_XE_TAG)) {
972  	
973  	        xmlNode *xml_obj_ref = NULL;
974  	        const char *tag_id = pcmk__xe_id(xml_tag);
975  	
976  	        if (tag_id == NULL) {
977  	            pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
978  	                             (const char *) xml_tag->name);
979  	            continue;
980  	        }
981  	
982  	        for (xml_obj_ref = pcmk__xe_first_child(xml_tag, PCMK_XE_OBJ_REF,
983  	                                                NULL, NULL);
984  	             xml_obj_ref != NULL;
985  	             xml_obj_ref = pcmk__xe_next(xml_obj_ref, PCMK_XE_OBJ_REF)) {
986  	
987  	            const char *obj_ref = pcmk__xe_id(xml_obj_ref);
988  	
989  	            if (obj_ref == NULL) {
990  	                pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
991  	                                 xml_obj_ref->name, tag_id);
992  	                continue;
993  	            }
994  	
995  	            pcmk__add_idref(scheduler->priv->tags, tag_id, obj_ref);
996  	        }
997  	    }
998  	
999  	    return TRUE;
1000 	}
1001 	
1002 	/*!
1003 	 * \internal
1004 	 * \brief Unpack a ticket state entry
1005 	 *
1006 	 * \param[in]     xml_ticket  XML ticket state to unpack
1007 	 * \param[in,out] userdata    Scheduler data
1008 	 *
1009 	 * \return pcmk_rc_ok (to always continue unpacking further entries)
1010 	 */
1011 	static int
1012 	unpack_ticket_state(xmlNode *xml_ticket, void *userdata)
1013 	{
1014 	    pcmk_scheduler_t *scheduler = userdata;
1015 	
1016 	    const char *ticket_id = NULL;
1017 	    const char *granted = NULL;
1018 	    const char *last_granted = NULL;
1019 	    const char *standby = NULL;
1020 	    xmlAttrPtr xIter = NULL;
1021 	
1022 	    pcmk__ticket_t *ticket = NULL;
1023 	
1024 	    ticket_id = pcmk__xe_id(xml_ticket);
1025 	    if (pcmk__str_empty(ticket_id)) {
1026 	        return pcmk_rc_ok;
1027 	    }
1028 	
1029 	    pcmk__trace("Processing ticket state for %s", ticket_id);
1030 	
1031 	    ticket = g_hash_table_lookup(scheduler->priv->ticket_constraints,
1032 	                                 ticket_id);
1033 	    if (ticket == NULL) {
1034 	        ticket = ticket_new(ticket_id, scheduler);
1035 	        if (ticket == NULL) {
1036 	            return pcmk_rc_ok;
1037 	        }
1038 	    }
1039 	
1040 	    for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1041 	        const char *prop_name = (const char *)xIter->name;
1042 	        const char *prop_value = pcmk__xml_attr_value(xIter);
1043 	
1044 	        if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
1045 	            continue;
1046 	        }
1047 	        pcmk__insert_dup(ticket->state, prop_name, prop_value);
1048 	    }
1049 	
1050 	    granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
1051 	    if (pcmk__is_true(granted)) {
1052 	        pcmk__set_ticket_flags(ticket, pcmk__ticket_granted);
1053 	        pcmk__info("We have ticket '%s'", ticket->id);
1054 	    } else {
1055 	        pcmk__clear_ticket_flags(ticket, pcmk__ticket_granted);
1056 	        pcmk__info("We do not have ticket '%s'", ticket->id);
1057 	    }
1058 	
1059 	    last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1060 	    if (last_granted) {
1061 	        long long last_granted_ll = 0LL;
1062 	        int rc = pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1063 	
1064 	        if (rc != pcmk_rc_ok) {
1065 	            pcmk__warn("Using %lld instead of invalid " PCMK_XA_LAST_GRANTED
1066 	                       " value '%s' in state for ticket %s: %s",
1067 	                       last_granted_ll, last_granted, ticket->id,
1068 	                       pcmk_rc_str(rc));
1069 	        }
1070 	        ticket->last_granted = (time_t) last_granted_ll;
1071 	    }
1072 	
1073 	    standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1074 	    if (pcmk__is_true(standby)) {
1075 	        pcmk__set_ticket_flags(ticket, pcmk__ticket_standby);
1076 	        if (pcmk__is_set(ticket->flags, pcmk__ticket_granted)) {
1077 	            pcmk__info("Granted ticket '%s' is in standby-mode", ticket->id);
1078 	        }
1079 	    } else {
1080 	        pcmk__clear_ticket_flags(ticket, pcmk__ticket_standby);
1081 	    }
1082 	
1083 	    pcmk__trace("Done with ticket state for %s", ticket_id);
1084 	
1085 	    return pcmk_rc_ok;
1086 	}
1087 	
1088 	static void
1089 	unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
1090 	                           pcmk_scheduler_t *scheduler)
1091 	{
1092 	    const char *discovery = NULL;
1093 	    const xmlNode *attrs = NULL;
1094 	    pcmk_resource_t *rsc = NULL;
1095 	    int maint = 0;
1096 	
1097 	    if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1098 	        return;
1099 	    }
1100 	
1101 	    if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1102 	        return;
1103 	    }
1104 	    pcmk__trace("Processing Pacemaker Remote node %s",
1105 	                pcmk__node_name(this_node));
1106 	
1107 	    pcmk__scan_min_int(pcmk__xe_get(state, PCMK__XA_NODE_IN_MAINTENANCE),
1108 	                       &maint, 0);
1109 	    if (maint) {
1110 	        pcmk__set_node_flags(this_node, pcmk__node_remote_maint);
1111 	    } else {
1112 	        pcmk__clear_node_flags(this_node, pcmk__node_remote_maint);
1113 	    }
1114 	
1115 	    rsc = this_node->priv->remote;
1116 	    if (!pcmk__is_set(this_node->priv->flags, pcmk__node_remote_reset)) {
1117 	        this_node->details->unclean = FALSE;
1118 	        pcmk__set_node_flags(this_node, pcmk__node_seen);
1119 	    }
1120 	    attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL,
1121 	                                 NULL);
1122 	    add_node_attrs(attrs, this_node, TRUE, scheduler);
1123 	
1124 	    if (pe__shutdown_requested(this_node)) {
1125 	        pcmk__info("%s is shutting down", pcmk__node_name(this_node));
1126 	        this_node->details->shutdown = TRUE;
1127 	    }
1128 	
1129 	    if (pcmk__is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
1130 	                                      pcmk__rsc_node_current))) {
1131 	        pcmk__info("%s is in standby mode", pcmk__node_name(this_node));
1132 	        pcmk__set_node_flags(this_node, pcmk__node_standby);
1133 	    }
1134 	
1135 	    if (pcmk__is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE,
1136 	                                      NULL, pcmk__rsc_node_current))
1137 	        || ((rsc != NULL) && !pcmk__is_set(rsc->flags, pcmk__rsc_managed))) {
1138 	        pcmk__info("%s is in maintenance mode", pcmk__node_name(this_node));
1139 	        this_node->details->maintenance = TRUE;
1140 	    }
1141 	
1142 	    discovery = pcmk__node_attr(this_node,
1143 	                                PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1144 	                                NULL, pcmk__rsc_node_current);
1145 	    if ((discovery != NULL) && !pcmk__is_true(discovery)) {
1146 	        pcmk__warn_once(pcmk__wo_rdisc_enabled,
1147 	                        "Support for the "
1148 	                        PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1149 	                        " node attribute is deprecated and will be removed"
1150 	                        " (and behave as 'true') in a future release.");
1151 	
1152 	        if (pcmk__is_remote_node(this_node)
1153 	            && !pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
1154 	            pcmk__config_warn("Ignoring "
1155 	                              PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1156 	                              " attribute on Pacemaker Remote node %s"
1157 	                              " because fencing is disabled",
1158 	                              pcmk__node_name(this_node));
1159 	        } else {
1160 	            /* This is either a remote node with fencing enabled, or a guest
1161 	             * node. We don't care whether fencing is enabled when fencing guest
1162 	             * nodes, because they are "fenced" by recovering their containing
1163 	             * resource.
1164 	             */
1165 	            pcmk__info("%s has resource discovery disabled",
1166 	                       pcmk__node_name(this_node));
1167 	            pcmk__clear_node_flags(this_node, pcmk__node_probes_allowed);
1168 	        }
1169 	    }
1170 	}
1171 	
1172 	/*!
1173 	 * \internal
1174 	 * \brief Unpack a cluster node's transient attributes
1175 	 *
1176 	 * \param[in]     state      CIB node state XML
1177 	 * \param[in,out] node       Cluster node whose attributes are being unpacked
1178 	 * \param[in,out] scheduler  Scheduler data
1179 	 */
1180 	static void
1181 	unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
1182 	                            pcmk_scheduler_t *scheduler)
1183 	{
1184 	    const char *discovery = NULL;
1185 	    const xmlNode *attrs = pcmk__xe_first_child(state,
1186 	                                                PCMK__XE_TRANSIENT_ATTRIBUTES,
1187 	                                                NULL, NULL);
1188 	
1189 	    add_node_attrs(attrs, node, TRUE, scheduler);
1190 	
1191 	    if (pcmk__is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL,
1192 	                                      pcmk__rsc_node_current))) {
1193 	        pcmk__info("%s is in standby mode", pcmk__node_name(node));
1194 	        pcmk__set_node_flags(node, pcmk__node_standby);
1195 	    }
1196 	
1197 	    if (pcmk__is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
1198 	                                      pcmk__rsc_node_current))) {
1199 	        pcmk__info("%s is in maintenance mode", pcmk__node_name(node));
1200 	        node->details->maintenance = TRUE;
1201 	    }
1202 	
1203 	    discovery = pcmk__node_attr(node,
1204 	                                PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1205 	                                NULL, pcmk__rsc_node_current);
1206 	    if ((discovery != NULL) && !pcmk__is_true(discovery)) {
1207 	        pcmk__config_warn("Ignoring "
1208 	                          PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1209 	                          " attribute for %s because disabling resource"
1210 	                          " discovery is not allowed for cluster nodes",
1211 	                          pcmk__node_name(node));
1212 	    }
1213 	}
1214 	
1215 	/*!
1216 	 * \internal
1217 	 * \brief Unpack a node state entry (first pass)
1218 	 *
1219 	 * Unpack one node state entry from status. This unpacks information from the
1220 	 * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not
1221 	 * the resource history inside it. Multiple passes through the status are needed
1222 	 * to fully unpack everything.
1223 	 *
1224 	 * \param[in]     state      CIB node state XML
1225 	 * \param[in,out] scheduler  Scheduler data
1226 	 */
1227 	static void
1228 	unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
1229 	{
1230 	    const char *id = NULL;
1231 	    const char *uname = NULL;
1232 	    pcmk_node_t *this_node = NULL;
1233 	
1234 	    id = pcmk__xe_get(state, PCMK_XA_ID);
1235 	    if (id == NULL) {
1236 	        pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1237 	                         PCMK_XA_ID);
1238 	        pcmk__log_xml_info(state, "missing-id");
1239 	        return;
1240 	    }
1241 	
1242 	    uname = pcmk__xe_get(state, PCMK_XA_UNAME);
1243 	    if (uname == NULL) {
1244 	        /* If a joining peer makes the cluster acquire the quorum from Corosync
1245 	         * but has not joined the controller CPG membership yet, it's possible
1246 	         * that the created PCMK__XE_NODE_STATE entry doesn't have a
1247 	         * PCMK_XA_UNAME yet. Recognize the node as pending and wait for it to
1248 	         * join CPG.
1249 	         */
1250 	        pcmk__trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1251 	                    "without " PCMK_XA_UNAME,
1252 	                    id);
1253 	    }
1254 	
1255 	    this_node = pe_find_node_any(scheduler->nodes, id, uname);
1256 	    if (this_node == NULL) {
1257 	        pcmk__notice("Ignoring recorded state for removed node with name %s "
1258 	                     "and " PCMK_XA_ID " %s",
1259 	                     pcmk__s(uname, "unknown"), id);
1260 	        return;
1261 	    }
1262 	
1263 	    if (pcmk__is_pacemaker_remote_node(this_node)) {
1264 	        int remote_fenced = 0;
1265 	
1266 	        /* We can't determine the online status of Pacemaker Remote nodes until
1267 	         * after all resource history has been unpacked. In this first pass, we
1268 	         * do need to mark whether the node has been fenced, as this plays a
1269 	         * role during unpacking cluster node resource state.
1270 	         */
1271 	        pcmk__scan_min_int(pcmk__xe_get(state, PCMK__XA_NODE_FENCED),
1272 	                           &remote_fenced, 0);
1273 	        if (remote_fenced) {
1274 	            pcmk__set_node_flags(this_node, pcmk__node_remote_fenced);
1275 	        } else {
1276 	            pcmk__clear_node_flags(this_node, pcmk__node_remote_fenced);
1277 	        }
1278 	        return;
1279 	    }
1280 	
1281 	    unpack_transient_attributes(state, this_node, scheduler);
1282 	
1283 	    /* Provisionally mark this cluster node as clean. We have at least seen it
1284 	     * in the current cluster's lifetime.
1285 	     */
1286 	    this_node->details->unclean = FALSE;
1287 	    pcmk__set_node_flags(this_node, pcmk__node_seen);
1288 	
1289 	    pcmk__trace("Determining online status of cluster node %s (id %s)",
1290 	                pcmk__node_name(this_node), id);
1291 	    determine_online_status(state, this_node, scheduler);
1292 	
1293 	    if (!pcmk__is_set(scheduler->flags, pcmk__sched_quorate)
1294 	        && this_node->details->online
1295 	        && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) {
1296 	        /* Everything else should flow from this automatically
1297 	         * (at least until the scheduler becomes able to migrate off
1298 	         * healthy resources)
1299 	         */
1300 	        pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1301 	                      FALSE);
1302 	    }
1303 	}
1304 	
1305 	/*!
1306 	 * \internal
1307 	 * \brief Unpack nodes' resource history as much as possible
1308 	 *
1309 	 * Unpack as many nodes' resource history as possible in one pass through the
1310 	 * status. We need to process Pacemaker Remote nodes' connections/containers
1311 	 * before unpacking their history; the connection/container history will be
1312 	 * in another node's history, so it might take multiple passes to unpack
1313 	 * everything.
1314 	 *
1315 	 * \param[in]     status     CIB XML status section
1316 	 * \param[in]     fence      If true, treat any not-yet-unpacked nodes as unseen
1317 	 * \param[in,out] scheduler  Scheduler data
1318 	 *
1319 	 * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
1320 	 *         or EAGAIN if more unpacking remains to be done)
1321 	 */
1322 	static int
1323 	unpack_node_history(const xmlNode *status, bool fence,
1324 	                    pcmk_scheduler_t *scheduler)
1325 	{
1326 	    int rc = pcmk_rc_ok;
1327 	
1328 	    // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1329 	    for (const xmlNode *state = pcmk__xe_first_child(status,
1330 	                                                     PCMK__XE_NODE_STATE, NULL,
1331 	                                                     NULL);
1332 	         state != NULL; state = pcmk__xe_next(state, PCMK__XE_NODE_STATE)) {
1333 	
1334 	        const char *id = pcmk__xe_id(state);
1335 	        const char *uname = pcmk__xe_get(state, PCMK_XA_UNAME);
1336 	        pcmk_node_t *this_node = NULL;
1337 	
1338 	        if ((id == NULL) || (uname == NULL)) {
1339 	            // Warning already logged in first pass through status section
1340 	            pcmk__trace("Not unpacking resource history from malformed "
1341 	                        PCMK__XE_NODE_STATE " without id and/or uname");
1342 	            continue;
1343 	        }
1344 	
1345 	        this_node = pe_find_node_any(scheduler->nodes, id, uname);
1346 	        if (this_node == NULL) {
1347 	            // Warning already logged in first pass through status section
1348 	            pcmk__trace("Not unpacking resource history for node %s because "
1349 	                        "no longer in configuration",
1350 	                        id);
1351 	            continue;
1352 	        }
1353 	
1354 	        if (pcmk__is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1355 	            pcmk__trace("Not unpacking resource history for node %s because "
1356 	                        "already unpacked",
1357 	                        id);
1358 	            continue;
1359 	        }
1360 	
1361 	        if (fence) {
1362 	            // We're processing all remaining nodes
1363 	
1364 	        } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1365 	            /* We can unpack a guest node's history only after we've unpacked
1366 	             * other resource history to the point that we know that the node's
1367 	             * connection and containing resource are both up.
1368 	             */
1369 	            const pcmk_resource_t *remote = this_node->priv->remote;
1370 	            const pcmk_resource_t *launcher = remote->priv->launcher;
1371 	
1372 	            if ((remote->priv->orig_role != pcmk_role_started)
1373 	                || (launcher->priv->orig_role != pcmk_role_started)) {
1374 	                pcmk__trace("Not unpacking resource history for guest node %s "
1375 	                            "because launcher and connection are not known to "
1376 	                            "be up",
1377 	                            id);
1378 	                continue;
1379 	            }
1380 	
1381 	        } else if (pcmk__is_remote_node(this_node)) {
1382 	            /* We can unpack a remote node's history only after we've unpacked
1383 	             * other resource history to the point that we know that the node's
1384 	             * connection is up, with the exception of when shutdown locks are
1385 	             * in use.
1386 	             */
1387 	            pcmk_resource_t *rsc = this_node->priv->remote;
1388 	
1389 	            if ((rsc == NULL)
1390 	                || (!pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)
1391 	                    && (rsc->priv->orig_role != pcmk_role_started))) {
1392 	                pcmk__trace("Not unpacking resource history for remote node %s "
1393 	                            "because connection is not known to be up",
1394 	                            id);
1395 	                continue;
1396 	            }
1397 	
1398 	        /* If fencing and shutdown locks are disabled and we're not processing
1399 	         * unseen nodes, then we don't want to unpack offline nodes until online
1400 	         * nodes have been unpacked. This allows us to number active clone
1401 	         * instances first.
1402 	         */
1403 	        } else if (!pcmk__any_flags_set(scheduler->flags,
1404 	                                        pcmk__sched_fencing_enabled
1405 	                                        |pcmk__sched_shutdown_lock)
1406 	                   && !this_node->details->online) {
1407 	            pcmk__trace("Not unpacking resource history for offline "
1408 	                        "cluster node %s",
1409 	                        id);
1410 	            continue;
1411 	        }
1412 	
1413 	        if (pcmk__is_pacemaker_remote_node(this_node)) {
1414 	            determine_remote_online_status(scheduler, this_node);
1415 	            unpack_handle_remote_attrs(this_node, state, scheduler);
1416 	        }
1417 	
1418 	        pcmk__trace("Unpacking resource history for %snode %s",
1419 	                    (fence? "unseen " : ""), id);
1420 	
1421 	        pcmk__set_node_flags(this_node, pcmk__node_unpacked);
1422 	        unpack_node_lrm(this_node, state, scheduler);
1423 	
1424 	        rc = EAGAIN; // Other node histories might depend on this one
1425 	    }
1426 	    return rc;
1427 	}
1428 	
1429 	/* remove nodes that are down, stopping */
1430 	/* create positive rsc_to_node constraints between resources and the nodes they are running on */
1431 	/* anything else? */
1432 	gboolean
1433 	unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
1434 	{
1435 	    xmlNode *state = NULL;
1436 	
1437 	    pcmk__trace("Beginning unpack");
1438 	
1439 	    if (scheduler->priv->ticket_constraints == NULL) {
1440 	        scheduler->priv->ticket_constraints =
1441 	            pcmk__strkey_table(free, destroy_ticket);
1442 	    }
1443 	
1444 	    for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1445 	         state = pcmk__xe_next(state, NULL)) {
1446 	
1447 	        if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1448 	            pcmk__xe_foreach_child(state, PCMK__XE_TICKET_STATE,
1449 	                                   unpack_ticket_state, scheduler);
1450 	
1451 	        } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1452 	            unpack_node_state(state, scheduler);
1453 	        }
1454 	    }
1455 	
1456 	    while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1457 	        pcmk__trace("Another pass through node resource histories is needed");
1458 	    }
1459 	
1460 	    // Now catch any nodes we didn't see
1461 	    unpack_node_history(status,
1462 	                        pcmk__is_set(scheduler->flags,
1463 	                                     pcmk__sched_fencing_enabled),
1464 	                        scheduler);
1465 	
1466 	    /* Now that we know where resources are, we can schedule stops of containers
1467 	     * with failed bundle connections
1468 	     */
1469 	    if (scheduler->priv->stop_needed != NULL) {
1470 	        for (GList *item = scheduler->priv->stop_needed;
1471 	             item != NULL; item = item->next) {
1472 	
1473 	            pcmk_resource_t *container = item->data;
1474 	            pcmk_node_t *node = pcmk__current_node(container);
1475 	
1476 	            if (node) {
1477 	                stop_action(container, node, FALSE);
1478 	            }
1479 	        }
1480 	
1481 	        g_clear_pointer(&scheduler->priv->stop_needed, g_list_free);
1482 	    }
1483 	
1484 	    /* Now that we know status of all Pacemaker Remote connections and nodes,
1485 	     * we can stop connections for node shutdowns, and check the online status
1486 	     * of remote/guest nodes that didn't have any node history to unpack.
1487 	     */
1488 	    for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1489 	        pcmk_node_t *this_node = gIter->data;
1490 	
1491 	        if (!pcmk__is_pacemaker_remote_node(this_node)) {
1492 	            continue;
1493 	        }
1494 	        if (this_node->details->shutdown
1495 	            && (this_node->priv->remote != NULL)) {
1496 	            pe__set_next_role(this_node->priv->remote, pcmk_role_stopped,
1497 	                              "remote shutdown");
1498 	        }
1499 	        if (!pcmk__is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1500 	            determine_remote_online_status(scheduler, this_node);
1501 	        }
1502 	    }
1503 	
1504 	    return TRUE;
1505 	}
1506 	
1507 	/*!
1508 	 * \internal
1509 	 * \brief Unpack node's time when it became a member at the cluster layer
1510 	 *
1511 	 * \param[in]     node_state  Node's \c PCMK__XE_NODE_STATE entry
1512 	 * \param[in,out] scheduler   Scheduler data
1513 	 *
1514 	 * \return Epoch time when node became a cluster member
1515 	 *         (or scheduler effective time for legacy entries) if a member,
1516 	 *         0 if not a member, or -1 if no valid information available
1517 	 */
1518 	static long long
1519 	unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
1520 	{
1521 	    const char *member_time = pcmk__xe_get(node_state, PCMK__XA_IN_CCM);
1522 	    bool is_member = false;
1523 	
1524 	    if (member_time == NULL) {
1525 	        return -1LL;
1526 	    }
1527 	
1528 	    if (pcmk__parse_bool(member_time, &is_member) != pcmk_rc_ok) {
1529 	        long long when_member = 0LL;
1530 	
1531 	        if ((pcmk__scan_ll(member_time, &when_member,
1532 	                           0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1533 	            pcmk__warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM " in "
1534 	                       PCMK__XE_NODE_STATE " entry",
1535 	                       member_time);
1536 	            return -1LL;
1537 	        }
1538 	        return when_member;
1539 	    }
1540 	
1541 	    /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1542 	     * recorded as a boolean for a DC < 2.1.7, or the node is pending shutdown
1543 	     * and has left the CPG, in which case it was set to 1 to avoid fencing for
1544 	     * PCMK_OPT_NODE_PENDING_TIMEOUT.
1545 	     *
1546 	     * We return the effective time for in_ccm=1 because what's important to
1547 	     * avoid fencing is that effective time minus this value is less than the
1548 	     * pending node timeout.
1549 	     */
1550 	    return is_member? (long long) pcmk__scheduler_epoch_time(scheduler) : 0LL;
1551 	}
1552 	
1553 	/*!
1554 	 * \internal
1555 	 * \brief Unpack node's time when it became online in process group
1556 	 *
1557 	 * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry
1558 	 *
1559 	 * \return Epoch time when node became online in process group (or 0 if not
1560 	 *         online, or 1 for legacy online entries)
1561 	 */
1562 	static long long
1563 	unpack_node_online(const xmlNode *node_state)
1564 	{
1565 	    const char *peer_time = pcmk__xe_get(node_state, PCMK_XA_CRMD);
1566 	
1567 	    // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1568 	    if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1569 	                     pcmk__str_casei|pcmk__str_null_matches)) {
1570 	        return 0LL;
1571 	
1572 	    } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1573 	        return 1LL;
1574 	
1575 	    } else {
1576 	        long long when_online = 0LL;
1577 	
1578 	        if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1579 	            || (when_online < 0)) {
1580 	            pcmk__warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1581 	                       PCMK__XE_NODE_STATE " entry, assuming offline",
1582 	                       peer_time);
1583 	            return 0LL;
1584 	        }
1585 	        return when_online;
1586 	    }
1587 	}
1588 	
1589 	/*!
1590 	 * \internal
1591 	 * \brief Unpack node attribute for user-requested fencing
1592 	 *
1593 	 * \param[in] node        Node to check
1594 	 * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry in CIB status
1595 	 *
1596 	 * \return \c true if fencing has been requested for \p node, otherwise \c false
1597 	 */
1598 	static bool
1599 	unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
1600 	{
1601 	    bool value_b = false;
1602 	    long long value_ll = 0LL;
1603 	    int rc = pcmk_rc_ok;
1604 	    const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1605 	                                          NULL, pcmk__rsc_node_current);
1606 	
1607 	    // Value may be boolean or an epoch time
1608 	    if ((value_s != NULL)
1609 	        && (pcmk__parse_bool(value_s, &value_b) == pcmk_rc_ok)) {
1610 	        return value_b;
1611 	    }
1612 	
1613 	    rc = pcmk__scan_ll(value_s, &value_ll, 0LL);
1614 	    if (rc == pcmk_rc_ok) {
1615 	        return (value_ll > 0);
1616 	    }
1617 	    pcmk__warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1618 	               "node attribute for %s: %s",
1619 	               value_s, pcmk__node_name(node), pcmk_rc_str(rc));
1620 	    return false;
1621 	}
1622 	
1623 	static gboolean
1624 	determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
1625 	                                   const xmlNode *node_state,
1626 	                                   pcmk_node_t *this_node)
1627 	{
1628 	    gboolean online = FALSE;
1629 	    const char *join = pcmk__xe_get(node_state, PCMK__XA_JOIN);
1630 	    const char *exp_state = pcmk__xe_get(node_state, PCMK_XA_EXPECTED);
1631 	    long long when_member = unpack_node_member(node_state, scheduler);
1632 	    long long when_online = unpack_node_online(node_state);
1633 	
1634 	    if (when_member <= 0) {
1635 	        pcmk__trace("Node %s is %sdown", pcmk__node_name(this_node),
1636 	                    ((when_member < 0)? "presumed " : ""));
1637 	
1638 	    } else if (when_online > 0) {
1639 	        if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1640 	            online = TRUE;
1641 	        } else {
1642 	            pcmk__debug("Node %s is not ready to run resources: %s",
1643 	                        pcmk__node_name(this_node), join);
1644 	        }
1645 	
1646 	    } else if (!pcmk__is_set(this_node->priv->flags,
1647 	                             pcmk__node_expected_up)) {
1648 	        pcmk__trace("Node %s controller is down: "
1649 	                    "member@%lld online@%lld join=%s expected=%s",
1650 	                    pcmk__node_name(this_node), when_member, when_online,
1651 	                    pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1652 	
1653 	    } else {
1654 	        /* mark it unclean */
1655 	        pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1656 	        pcmk__info("Node %s member@%lld online@%lld join=%s expected=%s",
1657 	                   pcmk__node_name(this_node), when_member, when_online,
1658 	                   pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1659 	    }
1660 	    return online;
1661 	}
1662 	
1663 	/*!
1664 	 * \internal
1665 	 * \brief Check whether a node has taken too long to join controller group
1666 	 *
1667 	 * \param[in,out] scheduler    Scheduler data
1668 	 * \param[in]     node         Node to check
1669 	 * \param[in]     when_member  Epoch time when node became a cluster member
1670 	 * \param[in]     when_online  Epoch time when node joined controller group
1671 	 *
1672 	 * \return true if node has been pending (on the way up) longer than
1673 	 *         \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false
1674 	 * \note This will also update the cluster's recheck time if appropriate.
1675 	 */
1676 	static inline bool
1677 	pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1678 	                 long long when_member, long long when_online)
1679 	{
1680 	    if ((scheduler->priv->node_pending_ms > 0U)
1681 	        && (when_member > 0) && (when_online <= 0)) {
1682 	        // There is a timeout on pending nodes, and node is pending
1683 	
1684 	        time_t timeout = when_member
1685 	                         + pcmk__timeout_ms2s(scheduler->priv->node_pending_ms);
1686 	
1687 	        if (pcmk__scheduler_epoch_time(node->priv->scheduler) >= timeout) {
1688 	            return true; // Node has timed out
1689 	        }
1690 	
1691 	        // Node is pending, but still has time
1692 	        pcmk__update_recheck_time(timeout, scheduler, "pending node timeout");
1693 	    }
1694 	    return false;
1695 	}
1696 	
1697 	static bool
1698 	determine_online_status_fencing(pcmk_scheduler_t *scheduler,
1699 	                                const xmlNode *node_state,
1700 	                                pcmk_node_t *this_node)
1701 	{
1702 	    bool termination_requested = unpack_node_terminate(this_node, node_state);
1703 	    const char *join = pcmk__xe_get(node_state, PCMK__XA_JOIN);
1704 	    const char *exp_state = pcmk__xe_get(node_state, PCMK_XA_EXPECTED);
1705 	    long long when_member = unpack_node_member(node_state, scheduler);
1706 	    long long when_online = unpack_node_online(node_state);
1707 	
1708 	/*
1709 	  - PCMK__XA_JOIN          ::= member|down|pending|banned
1710 	  - PCMK_XA_EXPECTED       ::= member|down
1711 	
1712 	  @COMPAT with entries recorded for DCs < 2.1.7
1713 	  - PCMK__XA_IN_CCM        ::= true|false
1714 	  - PCMK_XA_CRMD           ::= online|offline
1715 	
1716 	  Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1717 	  - PCMK__XA_IN_CCM        ::= <timestamp>|0
1718 	  Since when node has been a cluster member. A value 0 of means the node is not
1719 	  a cluster member.
1720 	
1721 	  - PCMK_XA_CRMD           ::= <timestamp>|0
1722 	  Since when peer has been online in CPG. A value 0 means the peer is offline
1723 	  in CPG.
1724 	*/
1725 	
1726 	    pcmk__trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1727 	                pcmk__node_name(this_node), when_member, when_online,
1728 	                pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1729 	                (termination_requested? " (termination requested)" : ""));
1730 	
1731 	    if (this_node->details->shutdown) {
1732 	        pcmk__debug("%s is shutting down", pcmk__node_name(this_node));
1733 	
1734 	        /* Slightly different criteria since we can't shut down a dead peer */
1735 	        return (when_online > 0);
1736 	    }
1737 	
1738 	    if (when_member < 0) {
1739 	        pe_fence_node(scheduler, this_node,
1740 	                      "peer has not been seen by the cluster", FALSE);
1741 	        return false;
1742 	    }
1743 	
1744 	    if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1745 	        pe_fence_node(scheduler, this_node,
1746 	                      "peer failed Pacemaker membership criteria", FALSE);
1747 	
1748 	    } else if (termination_requested) {
1749 	        if ((when_member <= 0) && (when_online <= 0)
1750 	            && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1751 	            pcmk__info("%s was fenced as requested",
1752 	                       pcmk__node_name(this_node));
1753 	            return false;
1754 	        }
1755 	        pe_fence_node(scheduler, this_node, "fencing was requested", false);
1756 	
1757 	    } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1758 	                            pcmk__str_null_matches)) {
1759 	
1760 	        if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1761 	            pe_fence_node(scheduler, this_node,
1762 	                          "peer pending timed out on joining the process group",
1763 	                          FALSE);
1764 	
1765 	        } else if ((when_member > 0) || (when_online > 0)) {
1766 	            pcmk__info("- %s is not ready to run resources",
1767 	                       pcmk__node_name(this_node));
1768 	            pcmk__set_node_flags(this_node, pcmk__node_standby);
1769 	            this_node->details->pending = TRUE;
1770 	
1771 	        } else {
1772 	            pcmk__trace("%s is down or still coming up",
1773 	                        pcmk__node_name(this_node));
1774 	        }
1775 	
1776 	    } else if (when_member <= 0) {
1777 	        // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1778 	        pe_fence_node(scheduler, this_node,
1779 	                      "peer is no longer part of the cluster", TRUE);
1780 	
1781 	    } else if (when_online <= 0) {
1782 	        pe_fence_node(scheduler, this_node,
1783 	                      "peer process is no longer available", FALSE);
1784 	
1785 	        /* Everything is running at this point, now check join state */
1786 	
1787 	    } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1788 	        pcmk__info("%s is active", pcmk__node_name(this_node));
1789 	
1790 	    } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1791 	                                CRMD_JOINSTATE_DOWN, NULL)) {
1792 	        pcmk__info("%s is not ready to run resources",
1793 	                   pcmk__node_name(this_node));
1794 	        pcmk__set_node_flags(this_node, pcmk__node_standby);
1795 	        this_node->details->pending = TRUE;
1796 	
1797 	    } else {
1798 	        pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1799 	                      FALSE);
1800 	    }
1801 	
1802 	    return (when_member > 0);
1803 	}
1804 	
1805 	static void
1806 	determine_remote_online_status(pcmk_scheduler_t *scheduler,
1807 	                               pcmk_node_t *this_node)
1808 	{
1809 	    pcmk_resource_t *rsc = this_node->priv->remote;
1810 	    pcmk_resource_t *launcher = NULL;
1811 	    pcmk_node_t *host = NULL;
1812 	    const char *node_type = "Remote";
1813 	
1814 	    if (rsc == NULL) {
1815 	        /* This is a leftover node state entry for a former Pacemaker Remote
1816 	         * node whose connection resource was removed. Consider it offline.
1817 	         */
1818 	        pcmk__trace("Pacemaker Remote node %s is considered OFFLINE because "
1819 	                    "its connection resource has been removed from the CIB",
1820 	                    this_node->priv->id);
1821 	        this_node->details->online = FALSE;
1822 	        return;
1823 	    }
1824 	
1825 	    launcher = rsc->priv->launcher;
1826 	    if (launcher != NULL) {
1827 	        node_type = "Guest";
1828 	        if (pcmk__list_of_1(rsc->priv->active_nodes)) {
1829 	            host = rsc->priv->active_nodes->data;
1830 	        }
1831 	    }
1832 	
1833 	    /* If the resource is currently started, mark it online. */
1834 	    if (rsc->priv->orig_role == pcmk_role_started) {
1835 	        this_node->details->online = TRUE;
1836 	    }
1837 	
1838 	    /* consider this node shutting down if transitioning start->stop */
1839 	    if ((rsc->priv->orig_role == pcmk_role_started)
1840 	        && (rsc->priv->next_role == pcmk_role_stopped)) {
1841 	
1842 	        pcmk__trace("%s node %s shutting down because connection resource is "
1843 	                    "stopping",
1844 	                    node_type, this_node->priv->id);
1845 	        this_node->details->shutdown = TRUE;
1846 	    }
1847 	
1848 	    /* Now check all the failure conditions. */
1849 	    if ((launcher != NULL) && pcmk__is_set(launcher->flags, pcmk__rsc_failed)) {
1850 	        pcmk__trace("Guest node %s UNCLEAN because guest resource failed",
1851 	                    this_node->priv->id);
1852 	        this_node->details->online = FALSE;
1853 	        pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
1854 	
1855 	    } else if (pcmk__is_set(rsc->flags, pcmk__rsc_failed)) {
1856 	        pcmk__trace("%s node %s OFFLINE because connection resource failed",
1857 	                    node_type, this_node->priv->id);
1858 	        this_node->details->online = FALSE;
1859 	
1860 	    } else if ((rsc->priv->orig_role == pcmk_role_stopped)
1861 	               || ((launcher != NULL)
1862 	                   && (launcher->priv->orig_role == pcmk_role_stopped))) {
1863 	
1864 	        pcmk__trace("%s node %s OFFLINE because its resource is stopped",
1865 	                    node_type, this_node->priv->id);
1866 	        this_node->details->online = FALSE;
1867 	        pcmk__clear_node_flags(this_node, pcmk__node_remote_reset);
1868 	
1869 	    } else if (host && (host->details->online == FALSE)
1870 	               && host->details->unclean) {
1871 	        pcmk__trace("Guest node %s UNCLEAN because host is unclean",
1872 	                    this_node->priv->id);
1873 	        this_node->details->online = FALSE;
1874 	        pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
1875 	
1876 	    } else {
1877 	        pcmk__trace("%s node %s is %s",
1878 	                    node_type, this_node->priv->id,
1879 	                    (this_node->details->online? "ONLINE" : "OFFLINE"));
1880 	    }
1881 	}
1882 	
1883 	static void
1884 	determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
1885 	                        pcmk_scheduler_t *scheduler)
1886 	{
1887 	    gboolean online = FALSE;
1888 	    const char *exp_state = pcmk__xe_get(node_state, PCMK_XA_EXPECTED);
1889 	
1890 	    CRM_CHECK(this_node != NULL, return);
1891 	
1892 	    this_node->details->shutdown = FALSE;
1893 	
1894 	    if (pe__shutdown_requested(this_node)) {
1895 	        this_node->details->shutdown = TRUE;
1896 	
1897 	    } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1898 	        pcmk__set_node_flags(this_node, pcmk__node_expected_up);
1899 	    }
1900 	
1901 	    if (!pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
1902 	        online = determine_online_status_no_fencing(scheduler, node_state,
1903 	                                                    this_node);
1904 	
1905 	    } else {
1906 	        online = determine_online_status_fencing(scheduler, node_state,
1907 	                                                 this_node);
1908 	    }
1909 	
1910 	    if (online) {
1911 	        this_node->details->online = TRUE;
1912 	
1913 	    } else {
1914 	        /* remove node from contention */
1915 	        this_node->assign->score = -PCMK_SCORE_INFINITY;
1916 	    }
1917 	
1918 	    if (online && this_node->details->shutdown) {
1919 	        /* don't run resources here */
1920 	        this_node->assign->score = -PCMK_SCORE_INFINITY;
1921 	    }
1922 	
1923 	    if (this_node->details->unclean) {
1924 	        pcmk__sched_warn(scheduler, "%s is unclean",
1925 	                         pcmk__node_name(this_node));
1926 	
1927 	    } else if (!this_node->details->online) {
1928 	        pcmk__trace("%s is offline", pcmk__node_name(this_node));
1929 	
1930 	    } else if (this_node->details->shutdown) {
1931 	        pcmk__info("%s is shutting down", pcmk__node_name(this_node));
1932 	
1933 	    } else if (this_node->details->pending) {
1934 	        pcmk__info("%s is pending", pcmk__node_name(this_node));
1935 	
1936 	    } else if (pcmk__is_set(this_node->priv->flags, pcmk__node_standby)) {
1937 	        pcmk__info("%s is in standby", pcmk__node_name(this_node));
1938 	
1939 	    } else if (this_node->details->maintenance) {
1940 	        pcmk__info("%s is in maintenance", pcmk__node_name(this_node));
1941 	
1942 	    } else {
1943 	        pcmk__info("%s is online", pcmk__node_name(this_node));
1944 	    }
1945 	}
1946 	
1947 	/*!
1948 	 * \internal
1949 	 * \brief Find the end of a resource's name, excluding any clone suffix
1950 	 *
1951 	 * \param[in] id  Resource ID to check
1952 	 *
1953 	 * \return Pointer to last character of resource's base name
1954 	 */
1955 	const char *
1956 	pe_base_name_end(const char *id)
1957 	{
1958 	    if (!pcmk__str_empty(id)) {
1959 	        const char *end = id + strlen(id) - 1;
1960 	
1961 	        for (const char *s = end; s > id; --s) {
1962 	            switch (*s) {
1963 	                case '0':
1964 	                case '1':
1965 	                case '2':
1966 	                case '3':
1967 	                case '4':
1968 	                case '5':
1969 	                case '6':
1970 	                case '7':
1971 	                case '8':
1972 	                case '9':
1973 	                    break;
1974 	                case ':':
1975 	                    return (s == end)? s : (s - 1);
1976 	                default:
1977 	                    return end;
1978 	            }
1979 	        }
1980 	        return end;
1981 	    }
1982 	    return NULL;
1983 	}
1984 	
1985 	/*!
1986 	 * \internal
1987 	 * \brief Get a resource name excluding any clone suffix
1988 	 *
1989 	 * \param[in] last_rsc_id  Resource ID to check
1990 	 *
1991 	 * \return Pointer to newly allocated string with resource's base name
1992 	 * \note It is the caller's responsibility to free() the result.
1993 	 *       This asserts on error, so callers can assume result is not NULL.
1994 	 */
1995 	char *
1996 	clone_strip(const char *last_rsc_id)
1997 	{
1998 	    const char *end = pe_base_name_end(last_rsc_id);
1999 	    char *basename = NULL;
2000 	
2001 	    pcmk__assert(end != NULL);
2002 	    basename = strndup(last_rsc_id, end - last_rsc_id + 1);
2003 	    pcmk__assert(basename != NULL);
2004 	    return basename;
2005 	}
2006 	
2007 	/*!
2008 	 * \internal
2009 	 * \brief Get the name of the first instance of a cloned resource
2010 	 *
2011 	 * \param[in] last_rsc_id  Resource ID to check
2012 	 *
2013 	 * \return Pointer to newly allocated string with resource's base name plus :0
2014 	 * \note It is the caller's responsibility to free() the result.
2015 	 *       This asserts on error, so callers can assume result is not NULL.
2016 	 */
2017 	char *
2018 	clone_zero(const char *last_rsc_id)
2019 	{
2020 	    const char *end = pe_base_name_end(last_rsc_id);
2021 	    size_t base_name_len = end - last_rsc_id + 1;
2022 	    char *zero = NULL;
2023 	
2024 	    pcmk__assert(end != NULL);
2025 	    zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
2026 	    memcpy(zero, last_rsc_id, base_name_len);
2027 	    zero[base_name_len] = ':';
2028 	    zero[base_name_len + 1] = '0';
2029 	    return zero;
2030 	}
2031 	
2032 	static pcmk_resource_t *
2033 	create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
2034 	                     pcmk_scheduler_t *scheduler)
2035 	{
2036 	    pcmk_resource_t *rsc = NULL;
2037 	    xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
2038 	
2039 	    pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
2040 	    pcmk__xe_set(xml_rsc, PCMK_XA_ID, rsc_id);
2041 	    pcmk__log_xml_debug(xml_rsc, "Removed resource");
2042 	
2043 	    if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
2044 	        return NULL;
2045 	    }
2046 	
2047 	    if (xml_contains_remote_node(xml_rsc)) {
2048 	        pcmk_node_t *node;
2049 	
2050 	        pcmk__debug("Detected removed remote node %s", rsc_id);
2051 	        node = pcmk_find_node(scheduler, rsc_id);
2052 	        if (node == NULL) {
2053 	            node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, 0,
2054 	                                  scheduler);
2055 	        }
2056 	        link_rsc2remotenode(scheduler, rsc);
2057 	
2058 	        if (node) {
2059 	            pcmk__trace("Setting node %s as shutting down due to removed "
2060 	                        "connection resource", rsc_id);
2061 	            node->details->shutdown = TRUE;
2062 	        }
2063 	    }
2064 	
2065 	    if (pcmk__xe_get(rsc_entry, PCMK__META_CONTAINER)) {
2066 	        // This removed resource needs to be mapped to a launcher
2067 	        pcmk__trace("Launched resource %s was removed from the configuration",
2068 	                    rsc_id);
2069 	        pcmk__set_rsc_flags(rsc, pcmk__rsc_removed_launched);
2070 	    }
2071 	    pcmk__set_rsc_flags(rsc, pcmk__rsc_removed);
2072 	    scheduler->priv->resources = g_list_append(scheduler->priv->resources, rsc);
2073 	    return rsc;
2074 	}
2075 	
2076 	/*!
2077 	 * \internal
2078 	 * \brief Create "removed" instance for anonymous clone resource history
2079 	 *
2080 	 * \param[in,out] parent     Clone resource that instance will be added to
2081 	 * \param[in]     rsc_id     Instance's resource ID
2082 	 * \param[in]     node       Where instance is active (for logging only)
2083 	 * \param[in,out] scheduler  Scheduler data
2084 	 *
2085 	 * \return Newly created "removed" instance of \p parent
2086 	 */
2087 	static pcmk_resource_t *
2088 	create_anonymous_removed_instance(pcmk_resource_t *parent, const char *rsc_id,
2089 	                                  const pcmk_node_t *node,
2090 	                                  pcmk_scheduler_t *scheduler)
2091 	{
2092 	    pcmk_resource_t *top = pe__create_clone_child(parent, scheduler);
2093 	    pcmk_resource_t *instance = NULL;
2094 	
2095 	    // find_rsc() because we might be a cloned group
2096 	    instance = top->priv->fns->find_rsc(top, rsc_id, NULL,
2097 	                                        pcmk_rsc_match_clone_only);
2098 	
2099 	    pcmk__rsc_debug(parent, "Created \"removed\" instance %s for %s: %s on %s",
2100 	                    top->id, parent->id, rsc_id, pcmk__node_name(node));
2101 	    return instance;
2102 	}
2103 	
2104 	/*!
2105 	 * \internal
2106 	 * \brief Check a node for an instance of an anonymous clone
2107 	 *
2108 	 * Return a child instance of the specified anonymous clone, in order of
2109 	 * preference: (1) the instance running on the specified node, if any;
2110 	 * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX
2111 	 * instances); (3) a newly created "removed" instance (that is,
2112 	 * \c PCMK_META_CLONE_MAX instances are already active).
2113 	 *
2114 	 * \param[in,out] scheduler  Scheduler data
2115 	 * \param[in]     node       Node on which to check for instance
2116 	 * \param[in,out] parent     Clone to check
2117 	 * \param[in]     rsc_id     Name of cloned resource in history (no instance)
2118 	 */
2119 	static pcmk_resource_t *
2120 	find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2121 	                     pcmk_resource_t *parent, const char *rsc_id)
2122 	{
2123 	    GList *rIter = NULL;
2124 	    pcmk_resource_t *rsc = NULL;
2125 	    pcmk_resource_t *inactive_instance = NULL;
2126 	    gboolean skip_inactive = FALSE;
2127 	
2128 	    pcmk__assert(pcmk__is_anonymous_clone(parent));
2129 	
2130 	    // Check for active (or partially active, for cloned groups) instance
2131 	    pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2132 	                    rsc_id, pcmk__node_name(node), parent->id);
2133 	
2134 	    for (rIter = parent->priv->children;
2135 	         (rIter != NULL) && (rsc == NULL); rIter = rIter->next) {
2136 	
2137 	        GList *locations = NULL;
2138 	        pcmk_resource_t *child = rIter->data;
2139 	
2140 	        /* Check whether this instance is already known to be active or pending
2141 	         * anywhere, at this stage of unpacking. Because this function is called
2142 	         * for a resource before the resource's individual operation history
2143 	         * entries are unpacked, locations will generally not contain the
2144 	         * desired node.
2145 	         *
2146 	         * However, there are three exceptions:
2147 	         * (1) when child is a cloned group and we have already unpacked the
2148 	         *     history of another member of the group on the same node;
2149 	         * (2) when we've already unpacked the history of another numbered
2150 	         *     instance on the same node (which can happen if
2151 	         *     PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2152 	         * (3) when we re-run calculations on the same scheduler data as part of
2153 	         *     a simulation.
2154 	         */
2155 	        child->priv->fns->location(child, &locations, pcmk__rsc_node_current
2156 	                                                      |pcmk__rsc_node_pending);
2157 	        if (locations) {
2158 	            /* We should never associate the same numbered anonymous clone
2159 	             * instance with multiple nodes, and clone instances can't migrate,
2160 	             * so there must be only one location, regardless of history.
2161 	             */
2162 	            CRM_LOG_ASSERT(locations->next == NULL);
2163 	
2164 	            if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2165 	                /* This child instance is active on the requested node, so check
2166 	                 * for a corresponding configured resource. We use find_rsc()
2167 	                 * instead of child because child may be a cloned group, and we
2168 	                 * need the particular member corresponding to rsc_id.
2169 	                 *
2170 	                 * If the history entry represents a removed instance, rsc will
2171 	                 * be NULL.
2172 	                 */
2173 	                rsc = parent->priv->fns->find_rsc(child, rsc_id, NULL,
2174 	                                                  pcmk_rsc_match_clone_only);
2175 	                if (rsc) {
2176 	                    /* If there are multiple instance history entries for an
2177 	                     * anonymous clone in a single node's history (which can
2178 	                     * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2179 	                     * to false), we want to consider the instances beyond the
2180 	                     * first as removed, even if there are inactive instance
2181 	                     * numbers available.
2182 	                     */
2183 	                    if (rsc->priv->active_nodes != NULL) {
2184 	                        pcmk__notice("Active (now-)anonymous clone %s has "
2185 	                                     "multiple \"removed\" instance histories "
2186 	                                     "on %s",
2187 	                                     parent->id, pcmk__node_name(node));
2188 	                        skip_inactive = TRUE;
2189 	                        rsc = NULL;
2190 	                    } else {
2191 	                        pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2192 	                    }
2193 	                }
2194 	            }
2195 	            g_list_free(locations);
2196 	
2197 	        } else {
2198 	            pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2199 	            if (!skip_inactive && !inactive_instance
2200 	                && !pcmk__is_set(child->flags, pcmk__rsc_blocked)) {
2201 	                // Remember one inactive instance in case we don't find active
2202 	                inactive_instance =
2203 	                    parent->priv->fns->find_rsc(child, rsc_id, NULL,
2204 	                                                pcmk_rsc_match_clone_only);
2205 	
2206 	                /* ... but don't use it if it was already associated with a
2207 	                 * pending action on another node
2208 	                 */
2209 	                if (inactive_instance != NULL) {
2210 	                    const pcmk_node_t *pending_node = NULL;
2211 	
2212 	                    pending_node = inactive_instance->priv->pending_node;
2213 	                    if ((pending_node != NULL)
2214 	                        && !pcmk__same_node(pending_node, node)) {
2215 	                        inactive_instance = NULL;
2216 	                    }
2217 	                }
2218 	            }
2219 	        }
2220 	    }
2221 	
2222 	    if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2223 	        pcmk__rsc_trace(parent, "Resource %s, empty slot",
2224 	                        inactive_instance->id);
2225 	        rsc = inactive_instance;
2226 	    }
2227 	
2228 	    /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2229 	     * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2230 	     * don't want to consume a valid instance number for unclean nodes. Such
2231 	     * instances may appear to be active according to the history, but should be
2232 	     * considered inactive, so we can start an instance elsewhere. Treat such
2233 	     * instances as removed.
2234 	     *
2235 	     * An exception is instances running on guest nodes -- since guest node
2236 	     * "fencing" is actually just a resource stop, requires shouldn't apply.
2237 	     *
2238 	     * @TODO Ideally, we'd use an inactive instance number if it is not needed
2239 	     * for any clean instances. However, we don't know that at this point.
2240 	     */
2241 	    if ((rsc != NULL) && !pcmk__is_set(rsc->flags, pcmk__rsc_needs_fencing)
2242 	        && (!node->details->online || node->details->unclean)
2243 	        && !pcmk__is_guest_or_bundle_node(node)
2244 	        && !pe__is_universal_clone(parent, scheduler)) {
2245 	
2246 	        rsc = NULL;
2247 	    }
2248 	
2249 	    if (rsc == NULL) {
2250 	        rsc = create_anonymous_removed_instance(parent, rsc_id, node,
2251 	                                                scheduler);
2252 	        pcmk__rsc_trace(parent, "Resource %s, removed", rsc->id);
2253 	    }
2254 	    return rsc;
2255 	}
2256 	
2257 	static pcmk_resource_t *
2258 	unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2259 	                     const char *rsc_id)
2260 	{
2261 	    pcmk_resource_t *rsc = NULL;
2262 	    pcmk_resource_t *parent = NULL;
2263 	
2264 	    pcmk__trace("looking for %s", rsc_id);
2265 	    rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2266 	
2267 	    if (rsc == NULL) {
2268 	        /* If we didn't find the resource by its name in the operation history,
2269 	         * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2270 	         * we create a single :0 "removed" instance to match against here.
2271 	         */
2272 	        char *clone0_id = clone_zero(rsc_id);
2273 	        pcmk_resource_t *clone0 = pe_find_resource(scheduler->priv->resources,
2274 	                                                   clone0_id);
2275 	
2276 	        if ((clone0 != NULL)
2277 	            && !pcmk__is_set(clone0->flags, pcmk__rsc_unique)) {
2278 	
2279 	            rsc = clone0;
2280 	            parent = uber_parent(clone0);
2281 	            pcmk__trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2282 	        } else {
2283 	            pcmk__trace("%s is not known as %s either (removed)", rsc_id,
2284 	                        clone0_id);
2285 	        }
2286 	        free(clone0_id);
2287 	
2288 	    } else if (rsc->priv->variant > pcmk__rsc_variant_primitive) {
2289 	        pcmk__trace("Resource history for %s is considered removed "
2290 	                    "because it is no longer primitive", rsc_id);
2291 	        return NULL;
2292 	
2293 	    } else {
2294 	        parent = uber_parent(rsc);
2295 	    }
2296 	
2297 	    if (pcmk__is_anonymous_clone(parent)) {
2298 	
2299 	        if (pcmk__is_bundled(parent)) {
2300 	            rsc = pe__find_bundle_replica(parent->priv->parent, node);
2301 	        } else {
2302 	            char *base = clone_strip(rsc_id);
2303 	
2304 	            rsc = find_anonymous_clone(scheduler, node, parent, base);
2305 	            free(base);
2306 	            pcmk__assert(rsc != NULL);
2307 	        }
2308 	    }
2309 	
2310 	    if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2311 	        && !pcmk__str_eq(rsc_id, rsc->priv->history_id, pcmk__str_none)) {
2312 	
2313 	        const bool removed = pcmk__is_set(rsc->flags, pcmk__rsc_removed);
2314 	
2315 	        pcmk__str_update(&(rsc->priv->history_id), rsc_id);
2316 	        pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2317 	                        rsc_id, pcmk__node_name(node), rsc->id,
2318 	                        (removed? " (removed)" : ""));
2319 	    }
2320 	    return rsc;
2321 	}
2322 	
2323 	static pcmk_resource_t *
2324 	process_removed_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
2325 	                        pcmk_scheduler_t *scheduler)
2326 	{
2327 	    pcmk_resource_t *rsc = NULL;
2328 	    const char *rsc_id = pcmk__xe_get(rsc_entry, PCMK_XA_ID);
2329 	
2330 	    pcmk__debug("Detected removed resource %s on %s", rsc_id,
2331 	                pcmk__node_name(node));
2332 	    rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2333 	    if (rsc == NULL) {
2334 	        return NULL;
2335 	    }
2336 	
2337 	    if (!pcmk__is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
2338 	        pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
2339 	
2340 	    } else {
2341 	        CRM_CHECK(rsc != NULL, return NULL);
2342 	        pcmk__rsc_trace(rsc, "Added \"removed\" resource %s", rsc->id);
2343 	        resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
2344 	                          "__removed_do_not_run__", scheduler);
2345 	    }
2346 	    return rsc;
2347 	}
2348 	
2349 	static void
2350 	process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
2351 	                  enum pcmk__on_fail on_fail)
2352 	{
2353 	    pcmk_node_t *tmpnode = NULL;
2354 	    char *reason = NULL;
2355 	    enum pcmk__on_fail save_on_fail = pcmk__on_fail_ignore;
2356 	    pcmk_scheduler_t *scheduler = NULL;
2357 	    bool known_active = false;
2358 	
(1) Event path: Condition "!(rsc != NULL)", taking false branch.
2359 	    pcmk__assert(rsc != NULL);
2360 	    scheduler = rsc->priv->scheduler;
(2) Event path: Condition "rsc->priv->orig_role > pcmk_role_stopped", taking false branch.
2361 	    known_active = (rsc->priv->orig_role > pcmk_role_stopped);
(3) Event path: Switch case default.
(4) Event path: Condition "trace_tag_cs == NULL", taking true branch.
(5) Event path: Condition "crm_is_callsite_active(trace_tag_cs, _level, converted_tag)", taking false branch.
2362 	    pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2363 	                    rsc->id, pcmk_role_text(rsc->priv->orig_role),
2364 	                    pcmk__node_name(node), pcmk__on_fail_text(on_fail));
2365 	
2366 	    /* process current state */
(6) Event path: Condition "rsc->priv->orig_role != pcmk_role_unknown", taking true branch.
2367 	    if (rsc->priv->orig_role != pcmk_role_unknown) {
2368 	        pcmk_resource_t *iter = rsc;
2369 	
(7) Event path: Condition "iter", taking true branch.
2370 	        while (iter) {
(8) Event path: Condition "g_hash_table_lookup(iter->priv->probed_nodes, node->priv->id) == NULL", taking true branch.
2371 	            if (g_hash_table_lookup(iter->priv->probed_nodes,
2372 	                                    node->priv->id) == NULL) {
2373 	                pcmk_node_t *n = pe__copy_node(node);
2374 	
(9) Event path: Switch case default.
(10) Event path: Condition "trace_tag_cs == NULL", taking true branch.
(11) Event path: Condition "crm_is_callsite_active(trace_tag_cs, _level, converted_tag)", taking false branch.
2375 	                pcmk__rsc_trace(rsc, "%s (%s in history) known on %s",
2376 	                                rsc->id,
2377 	                                pcmk__s(rsc->priv->history_id, "the same"),
2378 	                                pcmk__node_name(n));
2379 	                g_hash_table_insert(iter->priv->probed_nodes,
2380 	                                    (gpointer) n->priv->id, n);
2381 	            }
(12) Event path: Condition "pcmk__is_set(iter->flags, pcmk__rsc_unique)", taking true branch.
2382 	            if (pcmk__is_set(iter->flags, pcmk__rsc_unique)) {
(13) Event path: Breaking from loop.
2383 	                break;
2384 	            }
2385 	            iter = iter->priv->parent;
2386 	        }
2387 	    }
2388 	
2389 	    /* If a managed resource is believed to be running, but node is down ... */
(14) Event path: Condition "known_active", taking false branch.
2390 	    if (known_active && !node->details->online && !node->details->maintenance
2391 	        && pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
2392 	
2393 	        gboolean should_fence = FALSE;
2394 	
2395 	        /* If this is a guest node, fence it (regardless of whether fencing is
2396 	         * enabled, because guest node fencing is done by recovery of the
2397 	         * container resource rather than by the fencer). Mark the resource
2398 	         * we're processing as failed. When the guest comes back up, its
2399 	         * operation history in the CIB will be cleared, freeing the affected
2400 	         * resource to run again once we are sure we know its state.
2401 	         */
2402 	        if (pcmk__is_guest_or_bundle_node(node)) {
2403 	            pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2404 	            should_fence = TRUE;
2405 	
2406 	        } else if (pcmk__is_set(scheduler->flags,
2407 	                                pcmk__sched_fencing_enabled)) {
2408 	            if (pcmk__is_remote_node(node)
2409 	                && (node->priv->remote != NULL)
2410 	                && !pcmk__is_set(node->priv->remote->flags,
2411 	                                 pcmk__rsc_failed)) {
2412 	
2413 	                /* Setting unseen means that fencing of the remote node will
2414 	                 * occur only if the connection resource is not going to start
2415 	                 * somewhere. This allows connection resources on a failed
2416 	                 * cluster node to move to another node without requiring the
2417 	                 * remote nodes to be fenced as well.
2418 	                 */
2419 	                pcmk__clear_node_flags(node, pcmk__node_seen);
2420 	                reason = pcmk__assert_asprintf("%s is active there (fencing "
2421 	                                               "will be revoked if remote "
2422 	                                               "connection can be "
2423 	                                               "re-established elsewhere)",
2424 	                                               rsc->id);
2425 	            }
2426 	            should_fence = TRUE;
2427 	        }
2428 	
2429 	        if (should_fence) {
2430 	            if (reason == NULL) {
2431 	               reason = pcmk__assert_asprintf("%s is thought to be active "
2432 	                                              "there",
2433 	                                              rsc->id);
2434 	            }
2435 	            pe_fence_node(scheduler, node, reason, FALSE);
2436 	        }
2437 	        free(reason);
2438 	    }
2439 	
2440 	    /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2441 	    save_on_fail = on_fail;
2442 	
(15) Event path: Condition "node->details->unclean", taking true branch.
2443 	    if (node->details->unclean) {
2444 	        /* No extra processing needed
2445 	         * Also allows resources to be started again after a node is shot
2446 	         */
2447 	        on_fail = pcmk__on_fail_ignore;
2448 	    }
2449 	
(16) Event path: Switch case value "pcmk__on_fail_ignore".
2450 	    switch (on_fail) {
2451 	        case pcmk__on_fail_ignore:
2452 	            /* nothing to do */
(17) Event path: Breaking from switch.
2453 	            break;
2454 	
2455 	        case pcmk__on_fail_demote:
2456 	            pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
2457 	            demote_action(rsc, node, FALSE);
2458 	            break;
2459 	
2460 	        case pcmk__on_fail_fence_node:
2461 	            /* treat it as if it is still running
2462 	             * but also mark the node as unclean
2463 	             */
2464 	            reason = pcmk__assert_asprintf("%s failed there", rsc->id);
2465 	            pe_fence_node(scheduler, node, reason, FALSE);
2466 	            free(reason);
2467 	            break;
2468 	
2469 	        case pcmk__on_fail_standby_node:
2470 	            pcmk__set_node_flags(node,
2471 	                                 pcmk__node_standby|pcmk__node_fail_standby);
2472 	            break;
2473 	
2474 	        case pcmk__on_fail_block:
2475 	            /* is_managed == FALSE will prevent any
2476 	             * actions being sent for the resource
2477 	             */
2478 	            pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
2479 	            pcmk__set_rsc_flags(rsc, pcmk__rsc_blocked);
2480 	            break;
2481 	
2482 	        case pcmk__on_fail_ban:
2483 	            /* make sure it comes up somewhere else
2484 	             * or not at all
2485 	             */
2486 	            resource_location(rsc, node, -PCMK_SCORE_INFINITY,
2487 	                              "__action_migration_auto__", scheduler);
2488 	            break;
2489 	
2490 	        case pcmk__on_fail_stop:
2491 	            pe__set_next_role(rsc, pcmk_role_stopped,
2492 	                              PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP);
2493 	            break;
2494 	
2495 	        case pcmk__on_fail_restart:
2496 	            if (known_active) {
2497 	                pcmk__set_rsc_flags(rsc,
2498 	                                    pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2499 	                stop_action(rsc, node, FALSE);
2500 	            }
2501 	            break;
2502 	
2503 	        case pcmk__on_fail_restart_container:
2504 	            pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2505 	            if ((rsc->priv->launcher != NULL) && pcmk__is_bundled(rsc)) {
2506 	                /* A bundle's remote connection can run on a different node than
2507 	                 * the bundle's container. We don't necessarily know where the
2508 	                 * container is running yet, so remember it and add a stop
2509 	                 * action for it later.
2510 	                 */
2511 	                scheduler->priv->stop_needed =
2512 	                    g_list_prepend(scheduler->priv->stop_needed,
2513 	                                   rsc->priv->launcher);
2514 	            } else if (rsc->priv->launcher != NULL) {
2515 	                stop_action(rsc->priv->launcher, node, FALSE);
2516 	            } else if (known_active) {
2517 	                stop_action(rsc, node, FALSE);
2518 	            }
2519 	            break;
2520 	
2521 	        case pcmk__on_fail_reset_remote:
2522 	            pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2523 	            if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
2524 	                tmpnode = NULL;
2525 	                if (pcmk__is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
2526 	                    tmpnode = pcmk_find_node(scheduler, rsc->id);
2527 	                }
2528 	                if (pcmk__is_remote_node(tmpnode)
2529 	                    && !pcmk__is_set(tmpnode->priv->flags,
2530 	                                     pcmk__node_remote_fenced)) {
2531 	                    /* The remote connection resource failed in a way that
2532 	                     * should result in fencing the remote node.
2533 	                     */
2534 	                    pe_fence_node(scheduler, tmpnode,
2535 	                                  "remote connection is unrecoverable", FALSE);
2536 	                }
2537 	            }
2538 	
2539 	            /* require the stop action regardless if fencing is occurring or not. */
2540 	            if (known_active) {
2541 	                stop_action(rsc, node, FALSE);
2542 	            }
2543 	
2544 	            /* if reconnect delay is in use, prevent the connection from exiting the
2545 	             * "STOPPED" role until the failure is cleared by the delay timeout. */
2546 	            if (rsc->priv->remote_reconnect_ms > 0U) {
2547 	                pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2548 	            }
2549 	            break;
2550 	    }
2551 	
2552 	    /* Ensure a remote connection failure forces an unclean Pacemaker Remote
2553 	     * node to be fenced. By marking the node as seen, the failure will result
2554 	     * in a fencing operation regardless if we're going to attempt to reconnect
2555 	     * in this transition.
2556 	     */
(18) Event path: Condition "pcmk__all_flags_set(rsc->flags, 327680UL /* pcmk__rsc_failed | pcmk__rsc_is_remote_connection */)", taking true branch.
2557 	    if (pcmk__all_flags_set(rsc->flags,
2558 	                            pcmk__rsc_failed|pcmk__rsc_is_remote_connection)) {
2559 	        tmpnode = pcmk_find_node(scheduler, rsc->id);
(19) Event path: Condition "tmpnode", taking false branch.
2560 	        if (tmpnode && tmpnode->details->unclean) {
2561 	            pcmk__set_node_flags(tmpnode, pcmk__node_seen);
2562 	        }
2563 	    }
2564 	
(20) Event path: Condition "known_active", taking false branch.
2565 	    if (known_active) {
2566 	        if (pcmk__is_set(rsc->flags, pcmk__rsc_removed)) {
2567 	            if (pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
2568 	                pcmk__notice("Removed resource %s is active on %s and will be "
2569 	                             "stopped when possible",
2570 	                             rsc->id, pcmk__node_name(node));
2571 	
2572 	            } else {
2573 	                pcmk__notice("Removed resource %s must be stopped manually on "
2574 	                             "%s because " PCMK__OPT_STOP_REMOVED_RESOURCES
2575 	                             " is set to false",
2576 	                             rsc->id, pcmk__node_name(node));
2577 	            }
2578 	        }
2579 	
2580 	        native_add_running(rsc, node, scheduler,
2581 	                           (save_on_fail != pcmk__on_fail_ignore));
2582 	        switch (on_fail) {
2583 	            case pcmk__on_fail_ignore:
2584 	                break;
2585 	            case pcmk__on_fail_demote:
2586 	            case pcmk__on_fail_block:
2587 	                pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
2588 	                break;
2589 	            default:
2590 	                pcmk__set_rsc_flags(rsc,
2591 	                                    pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2592 	                break;
2593 	        }
2594 	
(21) Event path: Condition "rsc->priv->history_id != NULL", taking true branch.
(22) Event path: Condition "strchr(rsc->priv->history_id, 58) != NULL", taking true branch.
2595 	    } else if ((rsc->priv->history_id != NULL)
2596 	               && (strchr(rsc->priv->history_id, ':') != NULL)) {
2597 	        /* @COMPAT This is for older (<1.1.8) status sections that included
2598 	         * instance numbers, otherwise stopped instances are considered removed.
2599 	         *
2600 	         * @TODO We should be able to drop this, but some old regression tests
2601 	         * will need to be updated. Double-check that this is not still needed
2602 	         * for unique clones (which may have been later converted to anonymous).
2603 	         */
(23) Event path: Switch case default.
(24) Event path: Condition "trace_tag_cs == NULL", taking true branch.
(25) Event path: Condition "crm_is_callsite_active(trace_tag_cs, _level, converted_tag)", taking false branch.
2604 	        pcmk__rsc_trace(rsc, "Clearing history ID %s for %s (stopped)",
2605 	                        rsc->priv->history_id, rsc->id);
CID (unavailable; MK=c5fcab4ee3508d55b36abb0dc518276b) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS):
(26) Event assign_union_field: The union field "in" of "_pp" is written.
(27) Event inconsistent_union_field_access: In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in".
2606 	        g_clear_pointer(&rsc->priv->history_id, free);
2607 	
2608 	    } else {
2609 	        GList *possible_matches = pe__resource_actions(rsc, node,
2610 	                                                       PCMK_ACTION_STOP, FALSE);
2611 	        GList *gIter = possible_matches;
2612 	
2613 	        for (; gIter != NULL; gIter = gIter->next) {
2614 	            pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2615 	
2616 	            pcmk__set_action_flags(stop, pcmk__action_optional);
2617 	        }
2618 	
2619 	        g_list_free(possible_matches);
2620 	    }
2621 	
2622 	    /* A successful stop after migrate_to on the migration source doesn't make
2623 	     * the partially migrated resource stopped on the migration target.
2624 	     */
2625 	    if ((rsc->priv->orig_role == pcmk_role_stopped)
2626 	        && (rsc->priv->active_nodes != NULL)
2627 	        && (rsc->priv->partial_migration_target != NULL)
2628 	        && pcmk__same_node(rsc->priv->partial_migration_source, node)) {
2629 	
2630 	        rsc->priv->orig_role = pcmk_role_started;
2631 	    }
2632 	}
2633 	
2634 	/* create active recurring operations as optional */
2635 	static void
2636 	process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
2637 	                  int start_index, int stop_index,
2638 	                  GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2639 	{
2640 	    int counter = -1;
2641 	    const char *task = NULL;
2642 	    const char *status = NULL;
2643 	    GList *gIter = sorted_op_list;
2644 	
2645 	    pcmk__assert(rsc != NULL);
2646 	    pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2647 	                    rsc->id, start_index, stop_index);
2648 	
2649 	    for (; gIter != NULL; gIter = gIter->next) {
2650 	        xmlNode *rsc_op = (xmlNode *) gIter->data;
2651 	
2652 	        guint interval_ms = 0;
2653 	        char *key = NULL;
2654 	        const char *id = pcmk__xe_id(rsc_op);
2655 	
2656 	        counter++;
2657 	
2658 	        if (node->details->online == FALSE) {
2659 	            pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2660 	                            rsc->id, pcmk__node_name(node));
2661 	            break;
2662 	
2663 	            /* Need to check if there's a monitor for role="Stopped" */
2664 	        } else if (start_index < stop_index && counter <= stop_index) {
2665 	            pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2666 	                            id, pcmk__node_name(node));
2667 	            continue;
2668 	
2669 	        } else if (counter < start_index) {
2670 	            pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2671 	                            id, pcmk__node_name(node), counter);
2672 	            continue;
2673 	        }
2674 	
2675 	        pcmk__xe_get_guint(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2676 	        if (interval_ms == 0) {
2677 	            pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2678 	                            id, pcmk__node_name(node));
2679 	            continue;
2680 	        }
2681 	
2682 	        status = pcmk__xe_get(rsc_op, PCMK__XA_OP_STATUS);
2683 	        if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2684 	            pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2685 	                            id, pcmk__node_name(node));
2686 	            continue;
2687 	        }
2688 	        task = pcmk__xe_get(rsc_op, PCMK_XA_OPERATION);
2689 	        /* create the action */
2690 	        key = pcmk__op_key(rsc->id, task, interval_ms);
2691 	        pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2692 	        custom_action(rsc, key, task, node, TRUE, scheduler);
2693 	    }
2694 	}
2695 	
2696 	void
2697 	calculate_active_ops(const GList *sorted_op_list, int *start_index,
2698 	                     int *stop_index)
2699 	{
2700 	    int counter = -1;
2701 	    int implied_monitor_start = -1;
2702 	    int implied_clone_start = -1;
2703 	    const char *task = NULL;
2704 	    const char *status = NULL;
2705 	
2706 	    *stop_index = -1;
2707 	    *start_index = -1;
2708 	
2709 	    for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2710 	        const xmlNode *rsc_op = (const xmlNode *) iter->data;
2711 	
2712 	        counter++;
2713 	
2714 	        task = pcmk__xe_get(rsc_op, PCMK_XA_OPERATION);
2715 	        status = pcmk__xe_get(rsc_op, PCMK__XA_OP_STATUS);
2716 	
2717 	        if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2718 	            && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2719 	            *stop_index = counter;
2720 	
2721 	        } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2722 	                                        PCMK_ACTION_MIGRATE_FROM, NULL)) {
2723 	            *start_index = counter;
2724 	
2725 	        } else if ((implied_monitor_start <= *stop_index)
2726 	                   && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2727 	                                   pcmk__str_casei)) {
2728 	            const char *rc = pcmk__xe_get(rsc_op, PCMK__XA_RC_CODE);
2729 	
2730 	            if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2731 	                implied_monitor_start = counter;
2732 	            }
2733 	        } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2734 	                                        PCMK_ACTION_DEMOTE, NULL)) {
2735 	            implied_clone_start = counter;
2736 	        }
2737 	    }
2738 	
2739 	    if (*start_index == -1) {
2740 	        if (implied_clone_start != -1) {
2741 	            *start_index = implied_clone_start;
2742 	        } else if (implied_monitor_start != -1) {
2743 	            *start_index = implied_monitor_start;
2744 	        }
2745 	    }
2746 	}
2747 	
2748 	// If resource history entry has shutdown lock, remember lock node and time
2749 	static void
2750 	unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
2751 	                     const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2752 	{
2753 	    time_t lock_time = 0;   // When lock started (i.e. node shutdown time)
2754 	    time_t sched_time = 0;
2755 	    guint shutdown_lock_ms = scheduler->priv->shutdown_lock_ms;
2756 	
2757 	    pcmk__xe_get_time(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK, &lock_time);
2758 	    if (lock_time == 0) {
2759 	        return;
2760 	    }
2761 	
2762 	    sched_time = pcmk__scheduler_epoch_time(scheduler);
2763 	    if ((shutdown_lock_ms > 0U)
2764 	        && (sched_time > (lock_time + pcmk__timeout_ms2s(shutdown_lock_ms)))) {
2765 	
2766 	        pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2767 	                       rsc->id, pcmk__node_name(node));
2768 	        pe__clear_resource_history(rsc, node);
2769 	
2770 	    } else {
2771 	        rsc->priv->lock_node = node;
2772 	        rsc->priv->lock_time = lock_time;
2773 	    }
2774 	}
2775 	
2776 	/*!
2777 	 * \internal
2778 	 * \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status
2779 	 *
2780 	 * \param[in,out] node       Node whose status is being unpacked
2781 	 * \param[in]     rsc_entry  \c PCMK__XE_LRM_RESOURCE XML being unpacked
2782 	 * \param[in,out] scheduler  Scheduler data
2783 	 *
2784 	 * \return Resource corresponding to the entry, or NULL if no operation history
2785 	 */
2786 	static pcmk_resource_t *
2787 	unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
2788 	                    pcmk_scheduler_t *scheduler)
2789 	{
2790 	    GList *gIter = NULL;
2791 	    int stop_index = -1;
2792 	    int start_index = -1;
2793 	    enum rsc_role_e req_role = pcmk_role_unknown;
2794 	
2795 	    const char *rsc_id = pcmk__xe_id(lrm_resource);
2796 	
2797 	    pcmk_resource_t *rsc = NULL;
2798 	    GList *op_list = NULL;
2799 	    GList *sorted_op_list = NULL;
2800 	
2801 	    xmlNode *rsc_op = NULL;
2802 	    xmlNode *last_failure = NULL;
2803 	
2804 	    enum pcmk__on_fail on_fail = pcmk__on_fail_ignore;
2805 	    enum rsc_role_e saved_role = pcmk_role_unknown;
2806 	
2807 	    if (rsc_id == NULL) {
2808 	        pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2809 	                         " entry: No " PCMK_XA_ID);
2810 	        pcmk__log_xml_info(lrm_resource, "missing-id");
2811 	        return NULL;
2812 	    }
2813 	    pcmk__trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s", rsc_id,
2814 	                pcmk__node_name(node));
2815 	
2816 	    /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2817 	     * them
2818 	     */
2819 	    for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2820 	                                       NULL);
2821 	         rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
2822 	
2823 	        op_list = g_list_prepend(op_list, rsc_op);
2824 	    }
2825 	
2826 	    if (!pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
2827 	        if (op_list == NULL) {
2828 	            // If there are no operations, there is nothing to do
2829 	            return NULL;
2830 	        }
2831 	    }
2832 	
2833 	    /* find the resource */
2834 	    rsc = unpack_find_resource(scheduler, node, rsc_id);
2835 	    if (rsc == NULL) {
2836 	        if (op_list == NULL) {
2837 	            // If there are no operations, there is nothing to do
2838 	            return NULL;
2839 	        } else {
2840 	            rsc = process_removed_resource(lrm_resource, node, scheduler);
2841 	        }
2842 	    }
2843 	    pcmk__assert(rsc != NULL);
2844 	
2845 	    // Check whether the resource is "shutdown-locked" to this node
2846 	    if (pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
2847 	        unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2848 	    }
2849 	
2850 	    /* process operations */
2851 	    saved_role = rsc->priv->orig_role;
2852 	    rsc->priv->orig_role = pcmk_role_unknown;
2853 	    sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2854 	
2855 	    for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2856 	        xmlNode *rsc_op = (xmlNode *) gIter->data;
2857 	
2858 	        unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2859 	    }
2860 	
2861 	    /* create active recurring operations as optional */
2862 	    calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2863 	    process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2864 	                      scheduler);
2865 	
2866 	    /* no need to free the contents */
2867 	    g_list_free(sorted_op_list);
2868 	
2869 	    process_rsc_state(rsc, node, on_fail);
2870 	
2871 	    if (get_target_role(rsc, &req_role)) {
2872 	        if ((rsc->priv->next_role == pcmk_role_unknown)
2873 	            || (req_role < rsc->priv->next_role)) {
2874 	
2875 	            pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
2876 	
2877 	        } else if (req_role > rsc->priv->next_role) {
2878 	            pcmk__rsc_info(rsc,
2879 	                           "%s: Not overwriting calculated next role %s"
2880 	                           " with requested next role %s",
2881 	                           rsc->id, pcmk_role_text(rsc->priv->next_role),
2882 	                           pcmk_role_text(req_role));
2883 	        }
2884 	    }
2885 	
2886 	    if (saved_role > rsc->priv->orig_role) {
2887 	        rsc->priv->orig_role = saved_role;
2888 	    }
2889 	
2890 	    return rsc;
2891 	}
2892 	
2893 	static void
2894 	handle_removed_launched_resources(const xmlNode *lrm_rsc_list,
2895 	                                  pcmk_scheduler_t *scheduler)
2896 	{
2897 	    for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list,
2898 	                                                         PCMK__XE_LRM_RESOURCE,
2899 	                                                         NULL, NULL);
2900 	         rsc_entry != NULL;
2901 	         rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2902 	
2903 	        pcmk_resource_t *rsc;
2904 	        pcmk_resource_t *launcher = NULL;
2905 	        const char *rsc_id;
2906 	        const char *launcher_id = NULL;
2907 	
2908 	        launcher_id = pcmk__xe_get(rsc_entry, PCMK__META_CONTAINER);
2909 	        rsc_id = pcmk__xe_get(rsc_entry, PCMK_XA_ID);
2910 	        if ((launcher_id == NULL) || (rsc_id == NULL)) {
2911 	            continue;
2912 	        }
2913 	
2914 	        launcher = pe_find_resource(scheduler->priv->resources, launcher_id);
2915 	        if (launcher == NULL) {
2916 	            continue;
2917 	        }
2918 	
2919 	        rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2920 	        if ((rsc == NULL) || (rsc->priv->launcher != NULL)
2921 	            || !pcmk__is_set(rsc->flags, pcmk__rsc_removed_launched)) {
2922 	            continue;
2923 	        }
2924 	
2925 	        pcmk__rsc_trace(rsc, "Mapped launcher of removed resource %s to %s",
2926 	                        rsc->id, launcher_id);
2927 	        rsc->priv->launcher = launcher;
2928 	        launcher->priv->launched = g_list_append(launcher->priv->launched,
2929 	                                                    rsc);
2930 	    }
2931 	}
2932 	
2933 	/*!
2934 	 * \internal
2935 	 * \brief Unpack one node's lrm status section
2936 	 *
2937 	 * \param[in,out] node       Node whose status is being unpacked
2938 	 * \param[in]     xml        CIB node state XML
2939 	 * \param[in,out] scheduler  Scheduler data
2940 	 */
2941 	static void
2942 	unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
2943 	                pcmk_scheduler_t *scheduler)
2944 	{
2945 	    bool found_removed_launched_resource = false;
2946 	
2947 	    // Drill down to PCMK__XE_LRM_RESOURCES section
2948 	    xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2949 	    if (xml == NULL) {
2950 	        return;
2951 	    }
2952 	    xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2953 	    if (xml == NULL) {
2954 	        return;
2955 	    }
2956 	
2957 	    // Unpack each PCMK__XE_LRM_RESOURCE entry
2958 	    for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2959 	                                                         PCMK__XE_LRM_RESOURCE,
2960 	                                                         NULL, NULL);
2961 	         rsc_entry != NULL;
2962 	         rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2963 	
2964 	        pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2965 	
2966 	        if ((rsc != NULL)
2967 	            && pcmk__is_set(rsc->flags, pcmk__rsc_removed_launched)) {
2968 	            found_removed_launched_resource = true;
2969 	        }
2970 	    }
2971 	
2972 	    /* Now that all resource state has been unpacked for this node, map any
2973 	     * removed launched resources to their launchers.
2974 	     */
2975 	    if (found_removed_launched_resource) {
2976 	        handle_removed_launched_resources(xml, scheduler);
2977 	    }
2978 	}
2979 	
2980 	static void
2981 	set_active(pcmk_resource_t *rsc)
2982 	{
2983 	    const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2984 	
2985 	    if ((top != NULL) && pcmk__is_set(top->flags, pcmk__rsc_promotable)) {
2986 	        rsc->priv->orig_role = pcmk_role_unpromoted;
2987 	    } else {
2988 	        rsc->priv->orig_role = pcmk_role_started;
2989 	    }
2990 	}
2991 	
2992 	static void
2993 	set_node_score(gpointer key, gpointer value, gpointer user_data)
2994 	{
2995 	    pcmk_node_t *node = value;
2996 	    int *score = user_data;
2997 	
2998 	    node->assign->score = *score;
2999 	}
3000 	
3001 	#define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
3002 	                         "/" PCMK__XE_NODE_STATE
3003 	#define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM             \
3004 	                               "/" PCMK__XE_LRM_RESOURCES   \
3005 	                               "/" PCMK__XE_LRM_RESOURCE
3006 	#define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
3007 	
3008 	static xmlNode *
3009 	find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
3010 	            int target_rc, pcmk_scheduler_t *scheduler)
3011 	{
3012 	    GString *xpath = NULL;
3013 	    xmlNode *xml = NULL;
3014 	
3015 	    CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
3016 	              return NULL);
3017 	
3018 	    xpath = g_string_sized_new(256);
3019 	    pcmk__g_strcat(xpath,
3020 	                   XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
3021 	                   SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
3022 	                   SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
3023 	                   NULL);
3024 	
3025 	    /* Need to check against transition_magic too? */
3026 	    if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
3027 	        pcmk__g_strcat(xpath,
3028 	                       " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
3029 	                       NULL);
3030 	
3031 	    } else if ((source != NULL)
3032 	               && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
3033 	        pcmk__g_strcat(xpath,
3034 	                       " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
3035 	                       NULL);
3036 	    } else {
3037 	        g_string_append_c(xpath, ']');
3038 	    }
3039 	
3040 	    xml = pcmk__xpath_find_one(scheduler->input->doc, xpath->str, LOG_DEBUG);
3041 	    g_string_free(xpath, TRUE);
3042 	
3043 	    if (xml && target_rc >= 0) {
3044 	        int rc = PCMK_OCF_UNKNOWN_ERROR;
3045 	        int status = PCMK_EXEC_ERROR;
3046 	
3047 	        pcmk__xe_get_int(xml, PCMK__XA_RC_CODE, &rc);
3048 	        pcmk__xe_get_int(xml, PCMK__XA_OP_STATUS, &status);
3049 	        if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
3050 	            return NULL;
3051 	        }
3052 	    }
3053 	    return xml;
3054 	}
3055 	
3056 	static xmlNode *
3057 	find_lrm_resource(const char *rsc_id, const char *node_name,
3058 	                  pcmk_scheduler_t *scheduler)
3059 	{
3060 	    GString *xpath = NULL;
3061 	    xmlNode *xml = NULL;
3062 	
3063 	    CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
3064 	
3065 	    xpath = g_string_sized_new(256);
3066 	    pcmk__g_strcat(xpath,
3067 	                   XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
3068 	                   SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
3069 	                   NULL);
3070 	
3071 	    xml = pcmk__xpath_find_one(scheduler->input->doc, xpath->str, LOG_DEBUG);
3072 	
3073 	    g_string_free(xpath, TRUE);
3074 	    return xml;
3075 	}
3076 	
3077 	/*!
3078 	 * \internal
3079 	 * \brief Check whether a resource has no completed action history on a node
3080 	 *
3081 	 * \param[in,out] rsc        Resource to check
3082 	 * \param[in]     node_name  Node to check
3083 	 *
3084 	 * \return true if \p rsc_id is unknown on \p node_name, otherwise false
3085 	 */
3086 	static bool
3087 	unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
3088 	{
3089 	    bool result = false;
3090 	    xmlXPathObject *search;
3091 	    char *xpath = NULL;
3092 	
3093 	    xpath = pcmk__assert_asprintf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
3094 	                                  SUB_XPATH_LRM_RESOURCE
3095 	                                  "[@" PCMK_XA_ID "='%s']"
3096 	                                  SUB_XPATH_LRM_RSC_OP
3097 	                                  "[@" PCMK__XA_RC_CODE "!='%d']",
3098 	                                  node_name, rsc->id, PCMK_OCF_UNKNOWN);
3099 	
3100 	    search = pcmk__xpath_search(rsc->priv->scheduler->input->doc, xpath);
3101 	    result = (pcmk__xpath_num_results(search) == 0);
3102 	    xmlXPathFreeObject(search);
3103 	    free(xpath);
3104 	    return result;
3105 	}
3106 	
3107 	/*!
3108 	 * \internal
3109 	 * \brief Check whether a probe/monitor indicating the resource was not running
3110 	 *        on a node happened after some event
3111 	 *
3112 	 * \param[in]     rsc_id     Resource being checked
3113 	 * \param[in]     node_name  Node being checked
3114 	 * \param[in]     xml_op     Event that monitor is being compared to
3115 	 * \param[in,out] scheduler  Scheduler data
3116 	 *
3117 	 * \return true if such a monitor happened after event, false otherwise
3118 	 */
3119 	static bool
3120 	monitor_not_running_after(const char *rsc_id, const char *node_name,
3121 	                          const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3122 	{
3123 	    /* Any probe/monitor operation on the node indicating it was not running
3124 	     * there
3125 	     */
3126 	    xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3127 	                                   NULL, PCMK_OCF_NOT_RUNNING, scheduler);
3128 	
3129 	    return (monitor != NULL) && (pe__is_newer_op(monitor, xml_op) > 0);
3130 	}
3131 	
3132 	/*!
3133 	 * \internal
3134 	 * \brief Check whether any non-monitor operation on a node happened after some
3135 	 *        event
3136 	 *
3137 	 * \param[in]     rsc_id     Resource being checked
3138 	 * \param[in]     node_name  Node being checked
3139 	 * \param[in]     xml_op     Event that non-monitor is being compared to
3140 	 * \param[in,out] scheduler  Scheduler data
3141 	 *
3142 	 * \return true if such a operation happened after event, false otherwise
3143 	 */
3144 	static bool
3145 	non_monitor_after(const char *rsc_id, const char *node_name,
3146 	                  const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3147 	{
3148 	    xmlNode *lrm_resource = NULL;
3149 	
3150 	    lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3151 	    if (lrm_resource == NULL) {
3152 	        return false;
3153 	    }
3154 	
3155 	    for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3156 	                                            NULL, NULL);
3157 	         op != NULL; op = pcmk__xe_next(op, PCMK__XE_LRM_RSC_OP)) {
3158 	
3159 	        const char * task = NULL;
3160 	
3161 	        if (op == xml_op) {
3162 	            continue;
3163 	        }
3164 	
3165 	        task = pcmk__xe_get(op, PCMK_XA_OPERATION);
3166 	
3167 	        if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
3168 	                             PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
3169 	                             NULL)
3170 	            && pe__is_newer_op(op, xml_op) > 0) {
3171 	            return true;
3172 	        }
3173 	    }
3174 	
3175 	    return false;
3176 	}
3177 	
3178 	/*!
3179 	 * \internal
3180 	 * \brief Check whether the resource has newer state on a node after a migration
3181 	 *        attempt
3182 	 *
3183 	 * \param[in]     rsc_id        Resource being checked
3184 	 * \param[in]     node_name     Node being checked
3185 	 * \param[in]     migrate_to    Any migrate_to event that is being compared to
3186 	 * \param[in]     migrate_from  Any migrate_from event that is being compared to
3187 	 * \param[in,out] scheduler     Scheduler data
3188 	 *
3189 	 * \return true if such a operation happened after event, false otherwise
3190 	 */
3191 	static bool
3192 	newer_state_after_migrate(const char *rsc_id, const char *node_name,
3193 	                          const xmlNode *migrate_to,
3194 	                          const xmlNode *migrate_from,
3195 	                          pcmk_scheduler_t *scheduler)
3196 	{
3197 	    const xmlNode *xml_op = (migrate_from != NULL)? migrate_from : migrate_to;
3198 	    const char *source = pcmk__xe_get(xml_op, PCMK__META_MIGRATE_SOURCE);
3199 	
3200 	    /* It's preferred to compare to the migrate event on the same node if
3201 	     * existing, since call ids are more reliable.
3202 	     */
3203 	    if ((xml_op != migrate_to) && (migrate_to != NULL)
3204 	        && pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3205 	
3206 	        xml_op = migrate_to;
3207 	    }
3208 	
3209 	    /* If there's any newer non-monitor operation on the node, or any newer
3210 	     * probe/monitor operation on the node indicating it was not running there,
3211 	     * the migration events potentially no longer matter for the node.
3212 	     */
3213 	    return non_monitor_after(rsc_id, node_name, xml_op, scheduler)
3214 	           || monitor_not_running_after(rsc_id, node_name, xml_op, scheduler);
3215 	}
3216 	
3217 	/*!
3218 	 * \internal
3219 	 * \brief Parse migration source and target node names from history entry
3220 	 *
3221 	 * \param[in]  entry        Resource history entry for a migration action
3222 	 * \param[in]  source_node  If not NULL, source must match this node
3223 	 * \param[in]  target_node  If not NULL, target must match this node
3224 	 * \param[out] source_name  Where to store migration source node name
3225 	 * \param[out] target_name  Where to store migration target node name
3226 	 *
3227 	 * \return Standard Pacemaker return code
3228 	 */
3229 	static int
3230 	get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
3231 	                         const pcmk_node_t *target_node,
3232 	                         const char **source_name, const char **target_name)
3233 	{
3234 	    *source_name = pcmk__xe_get(entry, PCMK__META_MIGRATE_SOURCE);
3235 	    *target_name = pcmk__xe_get(entry, PCMK__META_MIGRATE_TARGET);
3236 	    if ((*source_name == NULL) || (*target_name == NULL)) {
3237 	        pcmk__config_err("Ignoring resource history entry %s without "
3238 	                         PCMK__META_MIGRATE_SOURCE " and "
3239 	                         PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3240 	        return pcmk_rc_unpack_error;
3241 	    }
3242 	
3243 	    if ((source_node != NULL)
3244 	        && !pcmk__str_eq(*source_name, source_node->priv->name,
3245 	                         pcmk__str_casei|pcmk__str_null_matches)) {
3246 	        pcmk__config_err("Ignoring resource history entry %s because "
3247 	                         PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3248 	                         pcmk__xe_id(entry), *source_name,
3249 	                         pcmk__node_name(source_node));
3250 	        return pcmk_rc_unpack_error;
3251 	    }
3252 	
3253 	    if ((target_node != NULL)
3254 	        && !pcmk__str_eq(*target_name, target_node->priv->name,
3255 	                         pcmk__str_casei|pcmk__str_null_matches)) {
3256 	        pcmk__config_err("Ignoring resource history entry %s because "
3257 	                         PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3258 	                         pcmk__xe_id(entry), *target_name,
3259 	                         pcmk__node_name(target_node));
3260 	        return pcmk_rc_unpack_error;
3261 	    }
3262 	
3263 	    return pcmk_rc_ok;
3264 	}
3265 	
3266 	/*
3267 	 * \internal
3268 	 * \brief Add a migration source to a resource's list of dangling migrations
3269 	 *
3270 	 * If the migrate_to and migrate_from actions in a live migration both
3271 	 * succeeded, but there is no stop on the source, the migration is considered
3272 	 * "dangling." Add the source to the resource's dangling migration list, which
3273 	 * will be used to schedule a stop on the source without affecting the target.
3274 	 *
3275 	 * \param[in,out] rsc   Resource involved in migration
3276 	 * \param[in]     node  Migration source
3277 	 */
3278 	static void
3279 	add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
3280 	{
3281 	    pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3282 	                    rsc->id, pcmk__node_name(node));
3283 	    rsc->priv->orig_role = pcmk_role_stopped;
3284 	    rsc->priv->dangling_migration_sources =
3285 	        g_list_prepend(rsc->priv->dangling_migration_sources,
3286 	                       (gpointer) node);
3287 	}
3288 	
3289 	/*!
3290 	 * \internal
3291 	 * \brief Update resource role etc. after a successful migrate_to action
3292 	 *
3293 	 * \param[in,out] history  Parsed action result history
3294 	 */
3295 	static void
3296 	unpack_migrate_to_success(struct action_history *history)
3297 	{
3298 	    /* A complete migration sequence is:
3299 	     * 1. migrate_to on source node (which succeeded if we get to this function)
3300 	     * 2. migrate_from on target node
3301 	     * 3. stop on source node
3302 	     *
3303 	     * If no migrate_from has happened, the migration is considered to be
3304 	     * "partial". If the migrate_from succeeded but no stop has happened, the
3305 	     * migration is considered to be "dangling".
3306 	     *
3307 	     * If a successful migrate_to and stop have happened on the source node, we
3308 	     * still need to check for a partial migration, due to scenarios (easier to
3309 	     * produce with batch-limit=1) like:
3310 	     *
3311 	     * - A resource is migrating from node1 to node2, and a migrate_to is
3312 	     *   initiated for it on node1.
3313 	     *
3314 	     * - node2 goes into standby mode while the migrate_to is pending, which
3315 	     *   aborts the transition.
3316 	     *
3317 	     * - Upon completion of the migrate_to, a new transition schedules a stop
3318 	     *   on both nodes and a start on node1.
3319 	     *
3320 	     * - If the new transition is aborted for any reason while the resource is
3321 	     *   stopping on node1, the transition after that stop completes will see
3322 	     *   the migrate_to and stop on the source, but it's still a partial
3323 	     *   migration, and the resource must be stopped on node2 because it is
3324 	     *   potentially active there due to the migrate_to.
3325 	     *
3326 	     *   We also need to take into account that either node's history may be
3327 	     *   cleared at any point in the migration process.
3328 	     */
3329 	    int from_rc = PCMK_OCF_OK;
3330 	    int from_status = PCMK_EXEC_PENDING;
3331 	    pcmk_node_t *target_node = NULL;
3332 	    xmlNode *migrate_from = NULL;
3333 	    const char *source = NULL;
3334 	    const char *target = NULL;
3335 	    bool source_newer_op = false;
3336 	    bool target_newer_state = false;
3337 	    bool active_on_target = false;
3338 	    pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3339 	
3340 	    // Get source and target node names from XML
3341 	    if (get_migration_node_names(history->xml, history->node, NULL, &source,
3342 	                                 &target) != pcmk_rc_ok) {
3343 	        return;
3344 	    }
3345 	
3346 	    // Check for newer state on the source
3347 	    source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3348 	                                        scheduler);
3349 	
3350 	    // Check for a migrate_from action from this source on the target
3351 	    migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3352 	                               target, source, -1, scheduler);
3353 	    if (migrate_from != NULL) {
3354 	        if (source_newer_op) {
3355 	            /* There's a newer non-monitor operation on the source and a
3356 	             * migrate_from on the target, so this migrate_to is irrelevant to
3357 	             * the resource's state.
3358 	             */
3359 	            return;
3360 	        }
3361 	        pcmk__xe_get_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3362 	        pcmk__xe_get_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3363 	    }
3364 	
3365 	    /* If the resource has newer state on both the source and target after the
3366 	     * migration events, this migrate_to is irrelevant to the resource's state.
3367 	     */
3368 	    target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3369 	                                                   history->xml, migrate_from,
3370 	                                                   scheduler);
3371 	    if (source_newer_op && target_newer_state) {
3372 	        return;
3373 	    }
3374 	
3375 	    /* Check for dangling migration (migrate_from succeeded but stop not done).
3376 	     * We know there's no stop because we already returned if the target has a
3377 	     * migrate_from and the source has any newer non-monitor operation.
3378 	     */
3379 	    if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3380 	        add_dangling_migration(history->rsc, history->node);
3381 	        return;
3382 	    }
3383 	
3384 	    /* Without newer state, this migrate_to implies the resource is active.
3385 	     * (Clones are not allowed to migrate, so role can't be promoted.)
3386 	     */
3387 	    history->rsc->priv->orig_role = pcmk_role_started;
3388 	
3389 	    target_node = pcmk_find_node(scheduler, target);
3390 	    active_on_target = !target_newer_state && (target_node != NULL)
3391 	                       && target_node->details->online;
3392 	
3393 	    if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3394 	        if (active_on_target) {
3395 	            native_add_running(history->rsc, target_node, scheduler, TRUE);
3396 	        } else {
3397 	            // Mark resource as failed, require recovery, and prevent migration
3398 	            pcmk__set_rsc_flags(history->rsc,
3399 	                                pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
3400 	            pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
3401 	        }
3402 	        return;
3403 	    }
3404 	
3405 	    // The migrate_from is pending, complete but erased, or to be scheduled
3406 	
3407 	    /* If there is no history at all for the resource on an online target, then
3408 	     * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3409 	     * have the probe result, it will be reflected in target_newer_state.
3410 	     */
3411 	    if ((target_node != NULL) && target_node->details->online
3412 	        && unknown_on_node(history->rsc, target)) {
3413 	        return;
3414 	    }
3415 	
3416 	    if (active_on_target) {
3417 	        pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3418 	
3419 	        native_add_running(history->rsc, target_node, scheduler, FALSE);
3420 	        if ((source_node != NULL) && source_node->details->online) {
3421 	            /* This is a partial migration: the migrate_to completed
3422 	             * successfully on the source, but the migrate_from has not
3423 	             * completed. Remember the source and target; if the newly
3424 	             * chosen target remains the same when we schedule actions
3425 	             * later, we may continue with the migration.
3426 	             */
3427 	            history->rsc->priv->partial_migration_target = target_node;
3428 	            history->rsc->priv->partial_migration_source = source_node;
3429 	        }
3430 	
3431 	    } else if (!source_newer_op) {
3432 	        // Mark resource as failed, require recovery, and prevent migration
3433 	        pcmk__set_rsc_flags(history->rsc,
3434 	                            pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
3435 	        pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
3436 	    }
3437 	}
3438 	
3439 	/*!
3440 	 * \internal
3441 	 * \brief Update resource role etc. after a failed migrate_to action
3442 	 *
3443 	 * \param[in,out] history  Parsed action result history
3444 	 */
3445 	static void
3446 	unpack_migrate_to_failure(struct action_history *history)
3447 	{
3448 	    xmlNode *target_migrate_from = NULL;
3449 	    const char *source = NULL;
3450 	    const char *target = NULL;
3451 	    pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3452 	
3453 	    // Get source and target node names from XML
3454 	    if (get_migration_node_names(history->xml, history->node, NULL, &source,
3455 	                                 &target) != pcmk_rc_ok) {
3456 	        return;
3457 	    }
3458 	
3459 	    /* If a migration failed, we have to assume the resource is active. Clones
3460 	     * are not allowed to migrate, so role can't be promoted.
3461 	     */
3462 	    history->rsc->priv->orig_role = pcmk_role_started;
3463 	
3464 	    // Check for migrate_from on the target
3465 	    target_migrate_from = find_lrm_op(history->rsc->id,
3466 	                                      PCMK_ACTION_MIGRATE_FROM, target, source,
3467 	                                      PCMK_OCF_OK, scheduler);
3468 	
3469 	    if (/* If the resource state is unknown on the target, it will likely be
3470 	         * probed there.
3471 	         * Don't just consider it running there. We will get back here anyway in
3472 	         * case the probe detects it's running there.
3473 	         */
3474 	        !unknown_on_node(history->rsc, target)
3475 	        /* If the resource has newer state on the target after the migration
3476 	         * events, this migrate_to no longer matters for the target.
3477 	         */
3478 	        && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3479 	                                      target_migrate_from, scheduler)) {
3480 	        /* The resource has no newer state on the target, so assume it's still
3481 	         * active there.
3482 	         * (if it is up).
3483 	         */
3484 	        pcmk_node_t *target_node = pcmk_find_node(scheduler, target);
3485 	
3486 	        if (target_node && target_node->details->online) {
3487 	            native_add_running(history->rsc, target_node, scheduler, FALSE);
3488 	        }
3489 	
3490 	    } else if (!non_monitor_after(history->rsc->id, source, history->xml,
3491 	                                  scheduler)) {
3492 	        /* We know the resource has newer state on the target, but this
3493 	         * migrate_to still matters for the source as long as there's no newer
3494 	         * non-monitor operation there.
3495 	         */
3496 	
3497 	        // Mark node as having dangling migration so we can force a stop later
3498 	        history->rsc->priv->dangling_migration_sources =
3499 	            g_list_prepend(history->rsc->priv->dangling_migration_sources,
3500 	                           (gpointer) history->node);
3501 	    }
3502 	}
3503 	
3504 	/*!
3505 	 * \internal
3506 	 * \brief Update resource role etc. after a failed migrate_from action
3507 	 *
3508 	 * \param[in,out] history  Parsed action result history
3509 	 */
3510 	static void
3511 	unpack_migrate_from_failure(struct action_history *history)
3512 	{
3513 	    xmlNode *source_migrate_to = NULL;
3514 	    const char *source = NULL;
3515 	    const char *target = NULL;
3516 	    pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3517 	
3518 	    // Get source and target node names from XML
3519 	    if (get_migration_node_names(history->xml, NULL, history->node, &source,
3520 	                                 &target) != pcmk_rc_ok) {
3521 	        return;
3522 	    }
3523 	
3524 	    /* If a migration failed, we have to assume the resource is active. Clones
3525 	     * are not allowed to migrate, so role can't be promoted.
3526 	     */
3527 	    history->rsc->priv->orig_role = pcmk_role_started;
3528 	
3529 	    // Check for a migrate_to on the source
3530 	    source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3531 	                                    source, target, PCMK_OCF_OK, scheduler);
3532 	
3533 	    if (/* If the resource state is unknown on the source, it will likely be
3534 	         * probed there.
3535 	         * Don't just consider it running there. We will get back here anyway in
3536 	         * case the probe detects it's running there.
3537 	         */
3538 	        !unknown_on_node(history->rsc, source)
3539 	        /* If the resource has newer state on the source after the migration
3540 	         * events, this migrate_from no longer matters for the source.
3541 	         */
3542 	        && !newer_state_after_migrate(history->rsc->id, source,
3543 	                                      source_migrate_to, history->xml,
3544 	                                      scheduler)) {
3545 	        /* The resource has no newer state on the source, so assume it's still
3546 	         * active there (if it is up).
3547 	         */
3548 	        pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3549 	
3550 	        if (source_node && source_node->details->online) {
3551 	            native_add_running(history->rsc, source_node, scheduler, TRUE);
3552 	        }
3553 	    }
3554 	}
3555 	
3556 	/*!
3557 	 * \internal
3558 	 * \brief Add an action to cluster's list of failed actions
3559 	 *
3560 	 * \param[in,out] history  Parsed action result history
3561 	 */
3562 	static void
3563 	record_failed_op(struct action_history *history)
3564 	{
3565 	    const pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3566 	
3567 	    if (!(history->node->details->online)) {
3568 	        return;
3569 	    }
3570 	
3571 	    for (const xmlNode *xIter = scheduler->priv->failed->children;
3572 	         xIter != NULL; xIter = xIter->next) {
3573 	
3574 	        const char *key = pcmk__xe_history_key(xIter);
3575 	        const char *uname = pcmk__xe_get(xIter, PCMK_XA_UNAME);
3576 	
3577 	        if (pcmk__str_eq(history->key, key, pcmk__str_none)
3578 	            && pcmk__str_eq(uname, history->node->priv->name,
3579 	                            pcmk__str_casei)) {
3580 	            pcmk__trace("Skipping duplicate entry %s on %s", history->key,
3581 	                        pcmk__node_name(history->node));
3582 	            return;
3583 	        }
3584 	    }
3585 	
3586 	    pcmk__trace("Adding entry for %s on %s to failed action list",
3587 	                history->key, pcmk__node_name(history->node));
3588 	    pcmk__xe_set(history->xml, PCMK_XA_UNAME, history->node->priv->name);
3589 	    pcmk__xe_set(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3590 	    pcmk__xml_copy(scheduler->priv->failed, history->xml);
3591 	}
3592 	
3593 	static char *
3594 	last_change_str(const xmlNode *xml_op)
3595 	{
3596 	    time_t when;
3597 	    char *result = NULL;
3598 	
3599 	    if (pcmk__xe_get_time(xml_op, PCMK_XA_LAST_RC_CHANGE,
3600 	                          &when) == pcmk_rc_ok) {
3601 	        char *when_s = pcmk__epoch2str(&when, 0);
3602 	        const char *p = strchr(when_s, ' ');
3603 	
3604 	        // Skip day of week to make message shorter
3605 	        if ((p != NULL) && (*(++p) != '\0')) {
3606 	            result = pcmk__str_copy(p);
3607 	        }
3608 	        free(when_s);
3609 	    }
3610 	
3611 	    if (result == NULL) {
3612 	        result = pcmk__str_copy("unknown_time");
3613 	    }
3614 	
3615 	    return result;
3616 	}
3617 	
3618 	/*!
3619 	 * \internal
3620 	 * \brief Ban a resource (or its clone if an anonymous instance) from all nodes
3621 	 *
3622 	 * \param[in,out] rsc  Resource to ban
3623 	 */
3624 	static void
3625 	ban_from_all_nodes(pcmk_resource_t *rsc)
3626 	{
3627 	    int score = -PCMK_SCORE_INFINITY;
3628 	    const pcmk_scheduler_t *scheduler = rsc->priv->scheduler;
3629 	
3630 	    if (rsc->priv->parent != NULL) {
3631 	        pcmk_resource_t *parent = uber_parent(rsc);
3632 	
3633 	        if (pcmk__is_anonymous_clone(parent)) {
3634 	            /* For anonymous clones, if an operation with
3635 	             * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3636 	             * entire clone must stop.
3637 	             */
3638 	            rsc = parent;
3639 	        }
3640 	    }
3641 	
3642 	    // Ban the resource from all nodes
3643 	    pcmk__notice("%s will not be started under current conditions", rsc->id);
3644 	    g_clear_pointer(&rsc->priv->allowed_nodes, g_hash_table_destroy);
3645 	    rsc->priv->allowed_nodes = pe__node_list2table(scheduler->nodes);
3646 	    g_hash_table_foreach(rsc->priv->allowed_nodes, set_node_score, &score);
3647 	}
3648 	
3649 	/*!
3650 	 * \internal
3651 	 * \brief Get configured failure handling and role after failure for an action
3652 	 *
3653 	 * \param[in,out] history    Unpacked action history entry
3654 	 * \param[out]    on_fail    Where to set configured failure handling
3655 	 * \param[out]    fail_role  Where to set to role after failure
3656 	 */
3657 	static void
3658 	unpack_failure_handling(struct action_history *history,
3659 	                        enum pcmk__on_fail *on_fail,
3660 	                        enum rsc_role_e *fail_role)
3661 	{
3662 	    xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3663 	                                               history->interval_ms, true);
3664 	
3665 	    GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3666 	                                                history->task,
3667 	                                                history->interval_ms, config);
3668 	
3669 	    const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3670 	
3671 	    *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3672 	                                   history->interval_ms, on_fail_str);
3673 	    *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3674 	                                          meta);
3675 	    g_hash_table_destroy(meta);
3676 	}
3677 	
3678 	/*!
3679 	 * \internal
3680 	 * \brief Update resource role, failure handling, etc., after a failed action
3681 	 *
3682 	 * \param[in,out] history         Parsed action result history
3683 	 * \param[in]     config_on_fail  Action failure handling from configuration
3684 	 * \param[in]     fail_role       Resource's role after failure of this action
3685 	 * \param[out]    last_failure    This will be set to the history XML
3686 	 * \param[in,out] on_fail         Actual handling of action result
3687 	 */
3688 	static void
3689 	unpack_rsc_op_failure(struct action_history *history,
3690 	                      enum pcmk__on_fail config_on_fail,
3691 	                      enum rsc_role_e fail_role, xmlNode **last_failure,
3692 	                      enum pcmk__on_fail *on_fail)
3693 	{
3694 	    bool is_probe = false;
3695 	    char *last_change_s = NULL;
3696 	    pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3697 	
3698 	    *last_failure = history->xml;
3699 	
3700 	    is_probe = pcmk_xe_is_probe(history->xml);
3701 	    last_change_s = last_change_str(history->xml);
3702 	
3703 	    if (!pcmk__is_set(scheduler->flags, pcmk__sched_symmetric_cluster)
3704 	        && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3705 	        pcmk__trace("Unexpected result (%s%s%s) was recorded for "
3706 	                    "%s of %s on %s at %s " QB_XS " exit-status=%d id=%s",
3707 	                    crm_exit_str(history->exit_status),
3708 	                    (pcmk__str_empty(history->exit_reason)? "" : ": "),
3709 	                    pcmk__s(history->exit_reason, ""),
3710 	                    (is_probe? "probe" : history->task), history->rsc->id,
3711 	                    pcmk__node_name(history->node), last_change_s,
3712 	                    history->exit_status, history->id);
3713 	    } else {
3714 	        pcmk__sched_warn(scheduler,
3715 	                         "Unexpected result (%s%s%s) was recorded for %s of "
3716 	                         "%s on %s at %s " QB_XS " exit-status=%d id=%s",
3717 	                         crm_exit_str(history->exit_status),
3718 	                         (pcmk__str_empty(history->exit_reason)? "" : ": "),
3719 	                         pcmk__s(history->exit_reason, ""),
3720 	                         (is_probe? "probe" : history->task), history->rsc->id,
3721 	                         pcmk__node_name(history->node), last_change_s,
3722 	                         history->exit_status, history->id);
3723 	
3724 	        if (is_probe && (history->exit_status != PCMK_OCF_OK)
3725 	            && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3726 	            && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3727 	
3728 	            /* A failed (not just unexpected) probe result could mean the user
3729 	             * didn't know resources will be probed even where they can't run.
3730 	             */
3731 	            pcmk__notice("If it is not possible for %s to run on %s, see the "
3732 	                         PCMK_XA_RESOURCE_DISCOVERY " option for location "
3733 	                         "constraints",
3734 	                         history->rsc->id, pcmk__node_name(history->node));
3735 	        }
3736 	
3737 	        record_failed_op(history);
3738 	    }
3739 	
3740 	    free(last_change_s);
3741 	
3742 	    if (*on_fail < config_on_fail) {
3743 	        pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3744 	                        pcmk__on_fail_text(*on_fail),
3745 	                        pcmk__on_fail_text(config_on_fail), history->key);
3746 	        *on_fail = config_on_fail;
3747 	    }
3748 	
3749 	    if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3750 	        resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3751 	                          "__stop_fail__", scheduler);
3752 	
3753 	    } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3754 	        unpack_migrate_to_failure(history);
3755 	
3756 	    } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3757 	        unpack_migrate_from_failure(history);
3758 	
3759 	    } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3760 	        history->rsc->priv->orig_role = pcmk_role_promoted;
3761 	
3762 	    } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3763 	        if (config_on_fail == pcmk__on_fail_block) {
3764 	            history->rsc->priv->orig_role = pcmk_role_promoted;
3765 	            pe__set_next_role(history->rsc, pcmk_role_stopped,
3766 	                              "demote with " PCMK_META_ON_FAIL "=block");
3767 	
3768 	        } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3769 	            history->rsc->priv->orig_role = pcmk_role_stopped;
3770 	
3771 	        } else {
3772 	            /* Staying in the promoted role would put the scheduler and
3773 	             * controller into a loop. Setting the role to unpromoted is not
3774 	             * dangerous because the resource will be stopped as part of
3775 	             * recovery, and any promotion will be ordered after that stop.
3776 	             */
3777 	            history->rsc->priv->orig_role = pcmk_role_unpromoted;
3778 	        }
3779 	    }
3780 	
3781 	    if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3782 	        /* leave stopped */
3783 	        pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3784 	        history->rsc->priv->orig_role = pcmk_role_stopped;
3785 	
3786 	    } else if (history->rsc->priv->orig_role < pcmk_role_started) {
3787 	        pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3788 	        set_active(history->rsc);
3789 	    }
3790 	
3791 	    pcmk__rsc_trace(history->rsc,
3792 	                    "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3793 	                    history->rsc->id,
3794 	                    pcmk_role_text(history->rsc->priv->orig_role),
3795 	                    pcmk__btoa(history->node->details->unclean),
3796 	                    pcmk__on_fail_text(config_on_fail),
3797 	                    pcmk_role_text(fail_role));
3798 	
3799 	    if ((fail_role != pcmk_role_started)
3800 	        && (history->rsc->priv->next_role < fail_role)) {
3801 	        pe__set_next_role(history->rsc, fail_role, "failure");
3802 	    }
3803 	
3804 	    if (fail_role == pcmk_role_stopped) {
3805 	        ban_from_all_nodes(history->rsc);
3806 	    }
3807 	}
3808 	
3809 	/*!
3810 	 * \internal
3811 	 * \brief Block a resource with a failed action if it cannot be recovered
3812 	 *
3813 	 * If resource action is a failed stop and fencing is not possible, mark the
3814 	 * resource as unmanaged and blocked, since recovery cannot be done.
3815 	 *
3816 	 * \param[in,out] history  Parsed action history entry
3817 	 */
3818 	static void
3819 	block_if_unrecoverable(struct action_history *history)
3820 	{
3821 	    char *last_change_s = NULL;
3822 	
3823 	    if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3824 	        return; // All actions besides stop are always recoverable
3825 	    }
3826 	    if (pe_can_fence(history->node->priv->scheduler, history->node)) {
3827 	        return; // Failed stops are recoverable via fencing
3828 	    }
3829 	
3830 	    last_change_s = last_change_str(history->xml);
3831 	    pcmk__sched_err(history->node->priv->scheduler,
3832 	                    "No further recovery can be attempted for %s "
3833 	                    "because %s on %s failed (%s%s%s) at %s "
3834 	                    QB_XS " rc=%d id=%s",
3835 	                    history->rsc->id, history->task,
3836 	                    pcmk__node_name(history->node),
3837 	                    crm_exit_str(history->exit_status),
3838 	                    (pcmk__str_empty(history->exit_reason)? "" : ": "),
3839 	                    pcmk__s(history->exit_reason, ""),
3840 	                    last_change_s, history->exit_status, history->id);
3841 	
3842 	    free(last_change_s);
3843 	
3844 	    pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_managed);
3845 	    pcmk__set_rsc_flags(history->rsc, pcmk__rsc_blocked);
3846 	}
3847 	
3848 	/*!
3849 	 * \internal
3850 	 * \brief Update action history's execution status and why
3851 	 *
3852 	 * \param[in,out] history  Parsed action history entry
3853 	 * \param[out]    why      Where to store reason for update
3854 	 * \param[in]     value    New value
3855 	 * \param[in]     reason   Description of why value was changed
3856 	 */
3857 	static inline void
3858 	remap_because(struct action_history *history, const char **why, int value,
3859 	              const char *reason)
3860 	{
3861 	    if (history->execution_status != value) {
3862 	        history->execution_status = value;
3863 	        *why = reason;
3864 	    }
3865 	}
3866 	
3867 	/*!
3868 	 * \internal
3869 	 * \brief Remap informational monitor results and operation status
3870 	 *
3871 	 * For the monitor results, certain OCF codes are for providing extended information
3872 	 * to the user about services that aren't yet failed but not entirely healthy either.
3873 	 * These must be treated as the "normal" result by Pacemaker.
3874 	 *
3875 	 * For operation status, the action result can be used to determine an appropriate
3876 	 * status for the purposes of responding to the action.  The status provided by the
3877 	 * executor is not directly usable since the executor does not know what was expected.
3878 	 *
3879 	 * \param[in,out] history  Parsed action history entry
3880 	 * \param[in,out] on_fail  What should be done about the result
3881 	 * \param[in]     expired  Whether result is expired
3882 	 *
3883 	 * \note If the result is remapped and the node is not shutting down or failed,
3884 	 *       the operation will be recorded in the scheduler data's list of failed
3885 	 *       operations to highlight it for the user.
3886 	 *
3887 	 * \note This may update the resource's current and next role.
3888 	 */
3889 	static void
3890 	remap_operation(struct action_history *history,
3891 	                enum pcmk__on_fail *on_fail, bool expired)
3892 	{
3893 	    /* @TODO It would probably also be a good idea to map an exit status of
3894 	     * CRM_EX_PROMOTED or CRM_EX_DEGRADED_PROMOTED to CRM_EX_OK for promote
3895 	     * actions
3896 	     */
3897 	
3898 	    bool is_probe = false;
3899 	    int orig_exit_status = history->exit_status;
3900 	    int orig_exec_status = history->execution_status;
3901 	    const char *why = NULL;
3902 	    const char *task = history->task;
3903 	
3904 	    // Remap degraded results to their successful counterparts
3905 	    history->exit_status = pcmk__effective_rc(history->exit_status);
3906 	    if (history->exit_status != orig_exit_status) {
3907 	        why = "degraded result";
3908 	        if (!expired && (!history->node->details->shutdown
3909 	                         || history->node->details->online)) {
3910 	            record_failed_op(history);
3911 	        }
3912 	    }
3913 	
3914 	    if (!pcmk__is_bundled(history->rsc)
3915 	        && pcmk_xe_mask_probe_failure(history->xml)
3916 	        && ((history->execution_status != PCMK_EXEC_DONE)
3917 	            || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3918 	        history->execution_status = PCMK_EXEC_DONE;
3919 	        history->exit_status = PCMK_OCF_NOT_RUNNING;
3920 	        why = "equivalent probe result";
3921 	    }
3922 	
3923 	    /* If the executor reported an execution status of anything but done or
3924 	     * error, consider that final. But for done or error, we know better whether
3925 	     * it should be treated as a failure or not, because we know the expected
3926 	     * result.
3927 	     */
3928 	    switch (history->execution_status) {
3929 	        case PCMK_EXEC_DONE:
3930 	        case PCMK_EXEC_ERROR:
3931 	            break;
3932 	
3933 	        // These should be treated as node-fatal
3934 	        case PCMK_EXEC_NO_FENCE_DEVICE:
3935 	        case PCMK_EXEC_NO_SECRETS:
3936 	            remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3937 	                          "node-fatal error");
3938 	            goto remap_done;
3939 	
3940 	        default:
3941 	            goto remap_done;
3942 	    }
3943 	
3944 	    is_probe = pcmk_xe_is_probe(history->xml);
3945 	    if (is_probe) {
3946 	        task = "probe";
3947 	    }
3948 	
3949 	    if (history->expected_exit_status < 0) {
3950 	        /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3951 	         * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3952 	         * expected exit status in the transition key, which (along with the
3953 	         * similar case of a corrupted transition key in the CIB) will be
3954 	         * reported to this function as -1. Pacemaker 2.0+ does not support
3955 	         * rolling upgrades from those versions or processing of saved CIB files
3956 	         * from those versions, so we do not need to care much about this case.
3957 	         */
3958 	        remap_because(history, &why, PCMK_EXEC_ERROR,
3959 	                      "obsolete history format");
3960 	        pcmk__config_warn("Expected result not found for %s on %s "
3961 	                          "(corrupt or obsolete CIB?)",
3962 	                          history->key, pcmk__node_name(history->node));
3963 	
3964 	    } else if (history->exit_status == history->expected_exit_status) {
3965 	        remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
3966 	
3967 	    } else {
3968 	        remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
3969 	        pcmk__rsc_debug(history->rsc,
3970 	                        "%s on %s: expected %d (%s), got %d (%s%s%s)",
3971 	                        history->key, pcmk__node_name(history->node),
3972 	                        history->expected_exit_status,
3973 	                        crm_exit_str(history->expected_exit_status),
3974 	                        history->exit_status,
3975 	                        crm_exit_str(history->exit_status),
3976 	                        (pcmk__str_empty(history->exit_reason)? "" : ": "),
3977 	                        pcmk__s(history->exit_reason, ""));
3978 	    }
3979 	
3980 	    switch (history->exit_status) {
3981 	        case PCMK_OCF_OK:
3982 	            if (is_probe
3983 	                && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
3984 	                char *last_change_s = last_change_str(history->xml);
3985 	
3986 	                remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3987 	                pcmk__rsc_info(history->rsc,
3988 	                               "Probe found %s active on %s at %s",
3989 	                               history->rsc->id, pcmk__node_name(history->node),
3990 	                               last_change_s);
3991 	                free(last_change_s);
3992 	            }
3993 	            break;
3994 	
3995 	        case PCMK_OCF_NOT_RUNNING:
3996 	            if (is_probe
3997 	                || (history->expected_exit_status == history->exit_status)
3998 	                || !pcmk__is_set(history->rsc->flags, pcmk__rsc_managed)) {
3999 	
4000 	                /* For probes, recurring monitors for the Stopped role, and
4001 	                 * unmanaged resources, "not running" is not considered a
4002 	                 * failure.
4003 	                 */
4004 	                remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
4005 	                history->rsc->priv->orig_role = pcmk_role_stopped;
4006 	                *on_fail = pcmk__on_fail_ignore;
4007 	                pe__set_next_role(history->rsc, pcmk_role_unknown,
4008 	                                  "not running");
4009 	            }
4010 	            break;
4011 	
4012 	        case PCMK_OCF_RUNNING_PROMOTED:
4013 	            if (is_probe
4014 	                && (history->exit_status != history->expected_exit_status)) {
4015 	                char *last_change_s = last_change_str(history->xml);
4016 	
4017 	                remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4018 	                pcmk__rsc_info(history->rsc,
4019 	                               "Probe found %s active and promoted on %s at %s",
4020 	                                history->rsc->id,
4021 	                                pcmk__node_name(history->node), last_change_s);
4022 	                free(last_change_s);
4023 	            }
4024 	            if (!expired
4025 	                || (history->exit_status == history->expected_exit_status)) {
4026 	                history->rsc->priv->orig_role = pcmk_role_promoted;
4027 	            }
4028 	            break;
4029 	
4030 	        case PCMK_OCF_FAILED_PROMOTED:
4031 	            if (!expired) {
4032 	                history->rsc->priv->orig_role = pcmk_role_promoted;
4033 	            }
4034 	            remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
4035 	            break;
4036 	
4037 	        case PCMK_OCF_NOT_CONFIGURED:
4038 	            remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
4039 	            break;
4040 	
4041 	        case PCMK_OCF_UNIMPLEMENT_FEATURE:
4042 	            {
4043 	                guint interval_ms = 0;
4044 	                pcmk__xe_get_guint(history->xml, PCMK_META_INTERVAL,
4045 	                                   &interval_ms);
4046 	
4047 	                if (interval_ms == 0) {
4048 	                    if (!expired) {
4049 	                        block_if_unrecoverable(history);
4050 	                    }
4051 	                    remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
4052 	                                  "exit status");
4053 	                } else {
4054 	                    remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
4055 	                                  "exit status");
4056 	                }
4057 	            }
4058 	            break;
4059 	
4060 	        case PCMK_OCF_NOT_INSTALLED:
4061 	        case PCMK_OCF_INVALID_PARAM:
4062 	        case PCMK_OCF_INSUFFICIENT_PRIV:
4063 	            if (!expired) {
4064 	                block_if_unrecoverable(history);
4065 	            }
4066 	            remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
4067 	            break;
4068 	
4069 	        default:
4070 	            if (history->execution_status == PCMK_EXEC_DONE) {
4071 	                char *last_change_s = last_change_str(history->xml);
4072 	
4073 	                pcmk__info("Treating unknown exit status %d from %s of %s on "
4074 	                           "%s at %s as failure",
4075 	                           history->exit_status, task, history->rsc->id,
4076 	                           pcmk__node_name(history->node), last_change_s);
4077 	                remap_because(history, &why, PCMK_EXEC_ERROR,
4078 	                              "unknown exit status");
4079 	                free(last_change_s);
4080 	            }
4081 	            break;
4082 	    }
4083 	
4084 	remap_done:
4085 	    if (why != NULL) {
4086 	        pcmk__rsc_trace(history->rsc,
4087 	                        "Remapped %s result from [%s: %s] to [%s: %s] "
4088 	                        "because of %s",
4089 	                        history->key, pcmk_exec_status_str(orig_exec_status),
4090 	                        crm_exit_str(orig_exit_status),
4091 	                        pcmk_exec_status_str(history->execution_status),
4092 	                        crm_exit_str(history->exit_status), why);
4093 	    }
4094 	}
4095 	
4096 	// return TRUE if start or monitor last failure but parameters changed
4097 	static bool
4098 	should_clear_for_param_change(const xmlNode *xml_op, const char *task,
4099 	                              pcmk_resource_t *rsc, pcmk_node_t *node)
4100 	{
4101 	    if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
4102 	        if (pe__bundle_needs_remote_name(rsc)) {
4103 	            /* We haven't allocated resources yet, so we can't reliably
4104 	             * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4105 	             * When that's needed, defer the check until later.
4106 	             */
4107 	            pcmk__add_param_check(xml_op, rsc, node, pcmk__check_last_failure);
4108 	
4109 	        } else {
4110 	            pcmk__op_digest_t *digest_data = NULL;
4111 	
4112 	            digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4113 	                                                rsc->priv->scheduler);
4114 	            switch (digest_data->rc) {
4115 	                case pcmk__digest_unknown:
4116 	                    pcmk__trace("Resource %s history entry %s on %s"
4117 	                                " has no digest to compare",
4118 	                                rsc->id, pcmk__xe_history_key(xml_op),
4119 	                                node->priv->id);
4120 	                    break;
4121 	                case pcmk__digest_match:
4122 	                    break;
4123 	                default:
4124 	                    return TRUE;
4125 	            }
4126 	        }
4127 	    }
4128 	    return FALSE;
4129 	}
4130 	
4131 	// Order action after fencing of remote node, given connection rsc
4132 	static void
4133 	order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
4134 	                           pcmk_scheduler_t *scheduler)
4135 	{
4136 	    pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4137 	
4138 	    if (remote_node) {
4139 	        pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4140 	                                           FALSE, scheduler);
4141 	
4142 	        order_actions(fence, action, pcmk__ar_first_implies_then);
4143 	    }
4144 	}
4145 	
4146 	static bool
4147 	should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
4148 	                              guint interval_ms, bool is_last_failure)
4149 	{
4150 	    /* Clearing failures of recurring monitors has special concerns. The
4151 	     * executor reports only changes in the monitor result, so if the
4152 	     * monitor is still active and still getting the same failure result,
4153 	     * that will go undetected after the failure is cleared.
4154 	     *
4155 	     * Also, the operation history will have the time when the recurring
4156 	     * monitor result changed to the given code, not the time when the
4157 	     * result last happened.
4158 	     *
4159 	     * @TODO We probably should clear such failures only when the failure
4160 	     * timeout has passed since the last occurrence of the failed result.
4161 	     * However we don't record that information. We could maybe approximate
4162 	     * that by clearing only if there is a more recent successful monitor or
4163 	     * stop result, but we don't even have that information at this point
4164 	     * since we are still unpacking the resource's operation history.
4165 	     *
4166 	     * This is especially important for remote connection resources with a
4167 	     * reconnect interval, so in that case, we skip clearing failures
4168 	     * if the remote node hasn't been fenced.
4169 	     */
4170 	    if ((rsc->priv->remote_reconnect_ms > 0U)
4171 	        && pcmk__is_set(rsc->priv->scheduler->flags,
4172 	                        pcmk__sched_fencing_enabled)
4173 	        && (interval_ms != 0)
4174 	        && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4175 	
4176 	        pcmk_node_t *remote_node = pcmk_find_node(rsc->priv->scheduler,
4177 	                                                  rsc->id);
4178 	
4179 	        if (remote_node && !pcmk__is_set(remote_node->priv->flags,
4180 	                                         pcmk__node_remote_fenced)) {
4181 	            if (is_last_failure) {
4182 	                pcmk__info("Waiting to clear monitor failure for remote node %s"
4183 	                           " until fencing has occurred",
4184 	                           rsc->id);
4185 	            }
4186 	            return TRUE;
4187 	        }
4188 	    }
4189 	    return FALSE;
4190 	}
4191 	
4192 	/*!
4193 	 * \internal
4194 	 * \brief Check operation age and schedule failure clearing when appropriate
4195 	 *
4196 	 * This function has two distinct purposes. The first is to check whether an
4197 	 * operation history entry is expired (i.e. the resource has a failure timeout,
4198 	 * the entry is older than the timeout, and the resource either has no fail
4199 	 * count or its fail count is entirely older than the timeout). The second is to
4200 	 * schedule fail count clearing when appropriate (i.e. the operation is expired
4201 	 * and either the resource has an expired fail count or the operation is a
4202 	 * last_failure for a remote connection resource with a reconnect interval,
4203 	 * or the operation is a last_failure for a start or monitor operation and the
4204 	 * resource's parameters have changed since the operation).
4205 	 *
4206 	 * \param[in,out] history  Parsed action result history
4207 	 *
4208 	 * \return true if operation history entry is expired, otherwise false
4209 	 */
4210 	static bool
4211 	check_operation_expiry(struct action_history *history)
4212 	{
4213 	    bool expired = false;
4214 	    bool is_last_failure = (history->id != NULL)
4215 	                           && g_str_has_suffix(history->id, "_last_failure_0");
4216 	    time_t last_run = 0;
4217 	    int unexpired_fail_count = 0;
4218 	    const char *clear_reason = NULL;
4219 	    const guint expiration_sec =
4220 	        pcmk__timeout_ms2s(history->rsc->priv->failure_expiration_ms);
4221 	    pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
4222 	
4223 	    if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4224 	        pcmk__rsc_trace(history->rsc,
4225 	                        "Resource history entry %s on %s is not expired: "
4226 	                        "Not Installed does not expire",
4227 	                        history->id, pcmk__node_name(history->node));
4228 	        return false; // "Not installed" must always be cleared manually
4229 	    }
4230 	
4231 	    if ((expiration_sec > 0)
4232 	        && (pcmk__xe_get_time(history->xml, PCMK_XA_LAST_RC_CHANGE,
4233 	                              &last_run) == pcmk_rc_ok)) {
4234 	
4235 	        /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4236 	         * timestamp
4237 	         */
4238 	
4239 	        time_t now = pcmk__scheduler_epoch_time(scheduler);
4240 	        time_t last_failure = 0;
4241 	
4242 	        // Is this particular operation history older than the failure timeout?
4243 	        if ((now >= (last_run + expiration_sec))
4244 	            && !should_ignore_failure_timeout(history->rsc, history->task,
4245 	                                              history->interval_ms,
4246 	                                              is_last_failure)) {
4247 	            expired = true;
4248 	        }
4249 	
4250 	        // Does the resource as a whole have an unexpired fail count?
4251 	        unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4252 	                                                &last_failure,
4253 	                                                pcmk__fc_effective,
4254 	                                                history->xml);
4255 	
4256 	        // Update scheduler recheck time according to *last* failure
4257 	        pcmk__trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d "
4258 	                    "expiration=%s last-failure@%lld",
4259 	                    history->id, (long long) last_run, (expired? "" : "not "),
4260 	                    (long long) now, unexpired_fail_count,
4261 	                    pcmk__readable_interval(expiration_sec * 1000),
4262 	                    (long long) last_failure);
4263 	        last_failure += expiration_sec + 1;
4264 	        if (unexpired_fail_count && (now < last_failure)) {
4265 	            pcmk__update_recheck_time(last_failure, scheduler,
4266 	                                      "fail count expiration");
4267 	        }
4268 	    }
4269 	
4270 	    if (expired) {
4271 	        if (pe_get_failcount(history->node, history->rsc, NULL,
4272 	                             pcmk__fc_default, history->xml)) {
4273 	            // There is a fail count ignoring timeout
4274 	
4275 	            if (unexpired_fail_count == 0) {
4276 	                // There is no fail count considering timeout
4277 	                clear_reason = "it expired";
4278 	
4279 	            } else {
4280 	                /* This operation is old, but there is an unexpired fail count.
4281 	                 * In a properly functioning cluster, this should only be
4282 	                 * possible if this operation is not a failure (otherwise the
4283 	                 * fail count should be expired too), so this is really just a
4284 	                 * failsafe.
4285 	                 */
4286 	                pcmk__rsc_trace(history->rsc,
4287 	                                "Resource history entry %s on %s is not "
4288 	                                "expired: Unexpired fail count",
4289 	                                history->id, pcmk__node_name(history->node));
4290 	                expired = false;
4291 	            }
4292 	
4293 	        } else if (is_last_failure
4294 	                   && (history->rsc->priv->remote_reconnect_ms > 0U)) {
4295 	            /* Clear any expired last failure when reconnect interval is set,
4296 	             * even if there is no fail count.
4297 	             */
4298 	            clear_reason = "reconnect interval is set";
4299 	        }
4300 	    }
4301 	
4302 	    if (!expired && is_last_failure
4303 	        && should_clear_for_param_change(history->xml, history->task,
4304 	                                         history->rsc, history->node)) {
4305 	        clear_reason = "resource parameters have changed";
4306 	    }
4307 	
4308 	    if (clear_reason != NULL) {
4309 	        pcmk_action_t *clear_op = NULL;
4310 	
4311 	        // Schedule clearing of the fail count
4312 	        clear_op = pe__clear_failcount(history->rsc, history->node,
4313 	                                       clear_reason, scheduler);
4314 	
4315 	        if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)
4316 	            && (history->rsc->priv->remote_reconnect_ms > 0)) {
4317 	            /* If we're clearing a remote connection due to a reconnect
4318 	             * interval, we want to wait until any scheduled fencing
4319 	             * completes.
4320 	             *
4321 	             * We could limit this to remote_node->details->unclean, but at
4322 	             * this point, that's always true (it won't be reliable until
4323 	             * after unpack_node_history() is done).
4324 	             */
4325 	            pcmk__info("Clearing %s failure will wait until any scheduled "
4326 	                       "fencing of %s completes",
4327 	                       history->task, history->rsc->id);
4328 	            order_after_remote_fencing(clear_op, history->rsc, scheduler);
4329 	        }
4330 	    }
4331 	
4332 	    if (expired && (history->interval_ms == 0)
4333 	        && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4334 	        switch (history->exit_status) {
4335 	            case PCMK_OCF_OK:
4336 	            case PCMK_OCF_NOT_RUNNING:
4337 	            case PCMK_OCF_RUNNING_PROMOTED:
4338 	            case PCMK_OCF_DEGRADED:
4339 	            case PCMK_OCF_DEGRADED_PROMOTED:
4340 	                // Don't expire probes that return these values
4341 	                pcmk__rsc_trace(history->rsc,
4342 	                                "Resource history entry %s on %s is not "
4343 	                                "expired: Probe result",
4344 	                             history->id, pcmk__node_name(history->node));
4345 	                expired = false;
4346 	                break;
4347 	        }
4348 	    }
4349 	
4350 	    return expired;
4351 	}
4352 	
4353 	int
4354 	pe__target_rc_from_xml(const xmlNode *xml_op)
4355 	{
4356 	    int target_rc = 0;
4357 	    const char *key = pcmk__xe_get(xml_op, PCMK__XA_TRANSITION_KEY);
4358 	
4359 	    if (key == NULL) {
4360 	        return -1;
4361 	    }
4362 	    decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4363 	    return target_rc;
4364 	}
4365 	
4366 	/*!
4367 	 * \internal
4368 	 * \brief Update a resource's state for an action result
4369 	 *
4370 	 * \param[in,out] history       Parsed action history entry
4371 	 * \param[in]     exit_status   Exit status to base new state on
4372 	 * \param[in]     last_failure  Resource's last_failure entry, if known
4373 	 * \param[in,out] on_fail       Resource's current failure handling
4374 	 */
4375 	static void
4376 	update_resource_state(struct action_history *history, int exit_status,
4377 	                      const xmlNode *last_failure,
4378 	                      enum pcmk__on_fail *on_fail)
4379 	{
4380 	    bool clear_past_failure = false;
4381 	
4382 	    if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4383 	        || (!pcmk__is_bundled(history->rsc)
4384 	            && pcmk_xe_mask_probe_failure(history->xml))) {
4385 	        history->rsc->priv->orig_role = pcmk_role_stopped;
4386 	
4387 	    } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4388 	        clear_past_failure = true;
4389 	
4390 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4391 	                            pcmk__str_none)) {
4392 	        if ((last_failure != NULL)
4393 	            && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4394 	                            pcmk__str_none)) {
4395 	            clear_past_failure = true;
4396 	        }
4397 	        if (history->rsc->priv->orig_role < pcmk_role_started) {
4398 	            set_active(history->rsc);
4399 	        }
4400 	
4401 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4402 	        history->rsc->priv->orig_role = pcmk_role_started;
4403 	        clear_past_failure = true;
4404 	
4405 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4406 	        history->rsc->priv->orig_role = pcmk_role_stopped;
4407 	        clear_past_failure = true;
4408 	
4409 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4410 	                            pcmk__str_none)) {
4411 	        history->rsc->priv->orig_role = pcmk_role_promoted;
4412 	        clear_past_failure = true;
4413 	
4414 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4415 	                            pcmk__str_none)) {
4416 	        if (*on_fail == pcmk__on_fail_demote) {
4417 	            /* Demote clears an error only if
4418 	             * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4419 	             */
4420 	            clear_past_failure = true;
4421 	        }
4422 	        history->rsc->priv->orig_role = pcmk_role_unpromoted;
4423 	
4424 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4425 	                            pcmk__str_none)) {
4426 	        history->rsc->priv->orig_role = pcmk_role_started;
4427 	        clear_past_failure = true;
4428 	
4429 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4430 	                            pcmk__str_none)) {
4431 	        unpack_migrate_to_success(history);
4432 	
4433 	    } else if (history->rsc->priv->orig_role < pcmk_role_started) {
4434 	        pcmk__rsc_trace(history->rsc, "%s active on %s",
4435 	                        history->rsc->id, pcmk__node_name(history->node));
4436 	        set_active(history->rsc);
4437 	    }
4438 	
4439 	    if (!clear_past_failure) {
4440 	        return;
4441 	    }
4442 	
4443 	    switch (*on_fail) {
4444 	        case pcmk__on_fail_stop:
4445 	        case pcmk__on_fail_ban:
4446 	        case pcmk__on_fail_standby_node:
4447 	        case pcmk__on_fail_fence_node:
4448 	            pcmk__rsc_trace(history->rsc,
4449 	                            "%s (%s) is not cleared by a completed %s",
4450 	                            history->rsc->id, pcmk__on_fail_text(*on_fail),
4451 	                            history->task);
4452 	            break;
4453 	
4454 	        case pcmk__on_fail_block:
4455 	        case pcmk__on_fail_ignore:
4456 	        case pcmk__on_fail_demote:
4457 	        case pcmk__on_fail_restart:
4458 	        case pcmk__on_fail_restart_container:
4459 	            *on_fail = pcmk__on_fail_ignore;
4460 	            pe__set_next_role(history->rsc, pcmk_role_unknown,
4461 	                              "clear past failures");
4462 	            break;
4463 	
4464 	        case pcmk__on_fail_reset_remote:
4465 	            if (history->rsc->priv->remote_reconnect_ms == 0U) {
4466 	                /* With no reconnect interval, the connection is allowed to
4467 	                 * start again after the remote node is fenced and
4468 	                 * completely stopped. (With a reconnect interval, we wait
4469 	                 * for the failure to be cleared entirely before attempting
4470 	                 * to reconnect.)
4471 	                 */
4472 	                *on_fail = pcmk__on_fail_ignore;
4473 	                pe__set_next_role(history->rsc, pcmk_role_unknown,
4474 	                                  "clear past failures and reset remote");
4475 	            }
4476 	            break;
4477 	    }
4478 	}
4479 	
4480 	/*!
4481 	 * \internal
4482 	 * \brief Check whether a given history entry matters for resource state
4483 	 *
4484 	 * \param[in] history  Parsed action history entry
4485 	 *
4486 	 * \return true if action can affect resource state, otherwise false
4487 	 */
4488 	static inline bool
4489 	can_affect_state(struct action_history *history)
4490 	{
4491 	     return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4492 	                             PCMK_ACTION_START, PCMK_ACTION_STOP,
4493 	                             PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
4494 	                             PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
4495 	                             "asyncmon", NULL);
4496 	}
4497 	
4498 	/*!
4499 	 * \internal
4500 	 * \brief Unpack execution/exit status and exit reason from a history entry
4501 	 *
4502 	 * \param[in,out] history  Action history entry to unpack
4503 	 *
4504 	 * \return Standard Pacemaker return code
4505 	 */
4506 	static int
4507 	unpack_action_result(struct action_history *history)
4508 	{
4509 	    if ((pcmk__xe_get_int(history->xml, PCMK__XA_OP_STATUS,
4510 	                          &(history->execution_status)) != pcmk_rc_ok)
4511 	        || (history->execution_status < PCMK_EXEC_PENDING)
4512 	        || (history->execution_status > PCMK_EXEC_MAX)
4513 	        || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4514 	        pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4515 	                         "with invalid " PCMK__XA_OP_STATUS " '%s'",
4516 	                         history->id, history->rsc->id,
4517 	                         pcmk__node_name(history->node),
4518 	                         pcmk__s(pcmk__xe_get(history->xml, PCMK__XA_OP_STATUS),
4519 	                                 ""));
4520 	        return pcmk_rc_unpack_error;
4521 	    }
4522 	    if ((pcmk__xe_get_int(history->xml, PCMK__XA_RC_CODE,
4523 	                          &(history->exit_status)) != pcmk_rc_ok)
4524 	        || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4525 	        pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4526 	                         "with invalid " PCMK__XA_RC_CODE " '%s'",
4527 	                         history->id, history->rsc->id,
4528 	                         pcmk__node_name(history->node),
4529 	                         pcmk__s(pcmk__xe_get(history->xml, PCMK__XA_RC_CODE),
4530 	                                 ""));
4531 	        return pcmk_rc_unpack_error;
4532 	    }
4533 	    history->exit_reason = pcmk__xe_get(history->xml, PCMK_XA_EXIT_REASON);
4534 	    return pcmk_rc_ok;
4535 	}
4536 	
4537 	/*!
4538 	 * \internal
4539 	 * \brief Process an action history entry whose result expired
4540 	 *
4541 	 * \param[in,out] history           Parsed action history entry
4542 	 * \param[in]     orig_exit_status  Action exit status before remapping
4543 	 *
4544 	 * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the
4545 	 *         entry needs no further processing)
4546 	 */
4547 	static int
4548 	process_expired_result(struct action_history *history, int orig_exit_status)
4549 	{
4550 	    if (!pcmk__is_bundled(history->rsc)
4551 	        && pcmk_xe_mask_probe_failure(history->xml)
4552 	        && (orig_exit_status != history->expected_exit_status)) {
4553 	
4554 	        if (history->rsc->priv->orig_role <= pcmk_role_stopped) {
4555 	            history->rsc->priv->orig_role = pcmk_role_unknown;
4556 	        }
4557 	        pcmk__trace("Ignoring resource history entry %s for probe of %s on %s: "
4558 	                    "Masked failure expired",
4559 	                    history->id, history->rsc->id,
4560 	                    pcmk__node_name(history->node));
4561 	        return pcmk_rc_ok;
4562 	    }
4563 	
4564 	    if (history->exit_status == history->expected_exit_status) {
4565 	        return pcmk_rc_undetermined; // Only failures expire
4566 	    }
4567 	
4568 	    if (history->interval_ms == 0) {
4569 	        pcmk__notice("Ignoring resource history entry %s for %s of %s on %s: "
4570 	                     "Expired failure",
4571 	                     history->id, history->task, history->rsc->id,
4572 	                     pcmk__node_name(history->node));
4573 	        return pcmk_rc_ok;
4574 	    }
4575 	
4576 	    if (history->node->details->online && !history->node->details->unclean) {
4577 	        /* Reschedule the recurring action. schedule_cancel() won't work at
4578 	         * this stage, so as a hacky workaround, forcibly change the restart
4579 	         * digest so pcmk__check_action_config() does what we want later.
4580 	         *
4581 	         * @TODO We should skip this if there is a newer successful monitor.
4582 	         *       Also, this causes rescheduling only if the history entry
4583 	         *       has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4584 	         *       scheduler regression test doesn't, but that may not be a
4585 	         *       realistic scenario in production).
4586 	         */
4587 	        pcmk__notice("Rescheduling %s-interval %s of %s on %s after failure "
4588 	                     "expired",
4589 	                     pcmk__readable_interval(history->interval_ms),
4590 	                     history->task, history->rsc->id,
4591 	                     pcmk__node_name(history->node));
4592 	        pcmk__xe_set(history->xml, PCMK__XA_OP_RESTART_DIGEST,
4593 	                     "calculated-failure-timeout");
4594 	        return pcmk_rc_ok;
4595 	    }
4596 	
4597 	    return pcmk_rc_undetermined;
4598 	}
4599 	
4600 	/*!
4601 	 * \internal
4602 	 * \brief Process a masked probe failure
4603 	 *
4604 	 * \param[in,out] history           Parsed action history entry
4605 	 * \param[in]     orig_exit_status  Action exit status before remapping
4606 	 * \param[in]     last_failure      Resource's last_failure entry, if known
4607 	 * \param[in,out] on_fail           Resource's current failure handling
4608 	 */
4609 	static void
4610 	mask_probe_failure(struct action_history *history, int orig_exit_status,
4611 	                   const xmlNode *last_failure,
4612 	                   enum pcmk__on_fail *on_fail)
4613 	{
4614 	    pcmk_resource_t *ban_rsc = history->rsc;
4615 	
4616 	    if (!pcmk__is_set(history->rsc->flags, pcmk__rsc_unique)) {
4617 	        ban_rsc = uber_parent(history->rsc);
4618 	    }
4619 	
4620 	    pcmk__notice("Treating probe result '%s' for %s on %s as 'not running'",
4621 	                 crm_exit_str(orig_exit_status), history->rsc->id,
4622 	                 pcmk__node_name(history->node));
4623 	    update_resource_state(history, history->expected_exit_status, last_failure,
4624 	                          on_fail);
4625 	    pcmk__xe_set(history->xml, PCMK_XA_UNAME, history->node->priv->name);
4626 	
4627 	    record_failed_op(history);
4628 	    resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4629 	                      "masked-probe-failure", ban_rsc->priv->scheduler);
4630 	}
4631 	
4632 	/*!
4633 	 * \internal Check whether a given failure is for a given pending action
4634 	 *
4635 	 * \param[in] history       Parsed history entry for pending action
4636 	 * \param[in] last_failure  Resource's last_failure entry, if known
4637 	 *
4638 	 * \return true if \p last_failure is failure of pending action in \p history,
4639 	 *         otherwise false
4640 	 * \note Both \p history and \p last_failure must come from the same
4641 	 *       \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be
4642 	 *       the same.
4643 	 */
4644 	static bool
4645 	failure_is_newer(const struct action_history *history,
4646 	                 const xmlNode *last_failure)
4647 	{
4648 	    guint failure_interval_ms = 0U;
4649 	    long long failure_change = 0LL;
4650 	    long long this_change = 0LL;
4651 	
4652 	    if (last_failure == NULL) {
4653 	        return false; // Resource has no last_failure entry
4654 	    }
4655 	
4656 	    if (!pcmk__str_eq(history->task,
4657 	                      pcmk__xe_get(last_failure, PCMK_XA_OPERATION),
4658 	                      pcmk__str_none)) {
4659 	        return false; // last_failure is for different action
4660 	    }
4661 	
4662 	    if ((pcmk__xe_get_guint(last_failure, PCMK_META_INTERVAL,
4663 	                            &failure_interval_ms) != pcmk_rc_ok)
4664 	        || (history->interval_ms != failure_interval_ms)) {
4665 	        return false; // last_failure is for action with different interval
4666 	    }
4667 	
4668 	    if ((pcmk__scan_ll(pcmk__xe_get(history->xml, PCMK_XA_LAST_RC_CHANGE),
4669 	                       &this_change, 0LL) != pcmk_rc_ok)
4670 	        || (pcmk__scan_ll(pcmk__xe_get(last_failure, PCMK_XA_LAST_RC_CHANGE),
4671 	                          &failure_change, 0LL) != pcmk_rc_ok)
4672 	        || (failure_change < this_change)) {
4673 	        return false; // Failure is not known to be newer
4674 	    }
4675 	
4676 	    return true;
4677 	}
4678 	
4679 	/*!
4680 	 * \internal
4681 	 * \brief Update a resource's role etc. for a pending action
4682 	 *
4683 	 * \param[in,out] history       Parsed history entry for pending action
4684 	 * \param[in]     last_failure  Resource's last_failure entry, if known
4685 	 */
4686 	static void
4687 	process_pending_action(struct action_history *history,
4688 	                       const xmlNode *last_failure)
4689 	{
4690 	    /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4691 	     * and there might be a RSC_monitor_INTERVAL entry with the last successful
4692 	     * or pending result.
4693 	     *
4694 	     * If last_failure contains the failure of the pending recurring monitor
4695 	     * we're processing here, and is newer, the action is no longer pending.
4696 	     * (Pending results have call ID -1, which sorts last, so the last failure
4697 	     * if any should be known.)
4698 	     */
4699 	    if (failure_is_newer(history, last_failure)) {
4700 	        return;
4701 	    }
4702 	
4703 	    if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4704 	        pcmk__set_rsc_flags(history->rsc, pcmk__rsc_start_pending);
4705 	        set_active(history->rsc);
4706 	
4707 	    } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4708 	        history->rsc->priv->orig_role = pcmk_role_promoted;
4709 	
4710 	    } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4711 	               && history->node->details->unclean) {
4712 	        /* A migrate_to action is pending on a unclean source, so force a stop
4713 	         * on the target.
4714 	         */
4715 	        const char *migrate_target = NULL;
4716 	        pcmk_node_t *target = NULL;
4717 	
4718 	        migrate_target = pcmk__xe_get(history->xml, PCMK__META_MIGRATE_TARGET);
4719 	        target = pcmk_find_node(history->rsc->priv->scheduler,
4720 	                                migrate_target);
4721 	        if (target != NULL) {
4722 	            stop_action(history->rsc, target, FALSE);
4723 	        }
4724 	    }
4725 	
4726 	    if (history->rsc->priv->pending_action != NULL) {
4727 	        /* There should never be multiple pending actions, but as a failsafe,
4728 	         * just remember the first one processed for display purposes.
4729 	         */
4730 	        return;
4731 	    }
4732 	
4733 	    if (pcmk_is_probe(history->task, history->interval_ms)) {
4734 	        /* Pending probes are currently never displayed, even if pending
4735 	         * operations are requested. If we ever want to change that,
4736 	         * enable the below and the corresponding part of
4737 	         * native.c:native_pending_action().
4738 	         */
4739 	#if 0
4740 	        history->rsc->private->pending_action = strdup("probe");
4741 	        history->rsc->private->pending_node = history->node;
4742 	#endif
4743 	    } else {
4744 	        history->rsc->priv->pending_action = strdup(history->task);
4745 	        history->rsc->priv->pending_node = history->node;
4746 	    }
4747 	}
4748 	
4749 	static void
4750 	unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
4751 	              xmlNode **last_failure, enum pcmk__on_fail *on_fail)
4752 	{
4753 	    int old_rc = 0;
4754 	    bool expired = false;
4755 	    pcmk_resource_t *parent = rsc;
4756 	    enum rsc_role_e fail_role = pcmk_role_unknown;
4757 	    enum pcmk__on_fail failure_strategy = pcmk__on_fail_restart;
4758 	
4759 	    struct action_history history = {
4760 	        .rsc = rsc,
4761 	        .node = node,
4762 	        .xml = xml_op,
4763 	        .execution_status = PCMK_EXEC_UNKNOWN,
4764 	    };
4765 	
4766 	    CRM_CHECK(rsc && node && xml_op, return);
4767 	
4768 	    history.id = pcmk__xe_id(xml_op);
4769 	    if (history.id == NULL) {
4770 	        pcmk__config_err("Ignoring resource history entry for %s on %s "
4771 	                         "without ID", rsc->id, pcmk__node_name(node));
4772 	        return;
4773 	    }
4774 	
4775 	    // Task and interval
4776 	    history.task = pcmk__xe_get(xml_op, PCMK_XA_OPERATION);
4777 	    if (history.task == NULL) {
4778 	        pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4779 	                         "without " PCMK_XA_OPERATION,
4780 	                         history.id, rsc->id, pcmk__node_name(node));
4781 	        return;
4782 	    }
4783 	    pcmk__xe_get_guint(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4784 	    if (!can_affect_state(&history)) {
4785 	        pcmk__rsc_trace(rsc,
4786 	                        "Ignoring resource history entry %s for %s on %s "
4787 	                        "with irrelevant action '%s'",
4788 	                        history.id, rsc->id, pcmk__node_name(node),
4789 	                        history.task);
4790 	        return;
4791 	    }
4792 	
4793 	    if (unpack_action_result(&history) != pcmk_rc_ok) {
4794 	        return; // Error already logged
4795 	    }
4796 	
4797 	    history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4798 	    history.key = pcmk__xe_history_key(xml_op);
4799 	    pcmk__xe_get_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4800 	
4801 	    pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4802 	                    history.id, history.task, history.call_id,
4803 	                    pcmk__node_name(node),
4804 	                    pcmk_exec_status_str(history.execution_status),
4805 	                    crm_exit_str(history.exit_status));
4806 	
4807 	    if (node->details->unclean) {
4808 	        pcmk__rsc_trace(rsc,
4809 	                        "%s is running on %s, which is unclean (further action "
4810 	                        "depends on value of stop's on-fail attribute)",
4811 	                        rsc->id, pcmk__node_name(node));
4812 	    }
4813 	
4814 	    expired = check_operation_expiry(&history);
4815 	    old_rc = history.exit_status;
4816 	
4817 	    remap_operation(&history, on_fail, expired);
4818 	
4819 	    if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4820 	        goto done;
4821 	    }
4822 	
4823 	    if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4824 	        mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4825 	        goto done;
4826 	    }
4827 	
4828 	    if (!pcmk__is_set(rsc->flags, pcmk__rsc_unique)) {
4829 	        parent = uber_parent(rsc);
4830 	    }
4831 	
4832 	    switch (history.execution_status) {
4833 	        case PCMK_EXEC_PENDING:
4834 	            process_pending_action(&history, *last_failure);
4835 	            goto done;
4836 	
4837 	        case PCMK_EXEC_DONE:
4838 	            update_resource_state(&history, history.exit_status, *last_failure,
4839 	                                  on_fail);
4840 	            goto done;
4841 	
4842 	        case PCMK_EXEC_NOT_INSTALLED:
4843 	            unpack_failure_handling(&history, &failure_strategy, &fail_role);
4844 	            if (failure_strategy == pcmk__on_fail_ignore) {
4845 	                pcmk__warn("Cannot ignore failed %s of %s on %s: Resource "
4846 	                           "agent doesn't exist "
4847 	                           QB_XS " status=%d rc=%d id=%s",
4848 	                           history.task, rsc->id, pcmk__node_name(node),
4849 	                           history.execution_status, history.exit_status,
4850 	                           history.id);
4851 	                /* Also for printing it as "FAILED" by marking it as
4852 	                 * pcmk__rsc_failed later
4853 	                 */
4854 	                *on_fail = pcmk__on_fail_ban;
4855 	            }
4856 	            resource_location(parent, node, -PCMK_SCORE_INFINITY,
4857 	                              "hard-error", rsc->priv->scheduler);
4858 	            unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4859 	                                  last_failure, on_fail);
4860 	            goto done;
4861 	
4862 	        case PCMK_EXEC_NOT_CONNECTED:
4863 	            if (pcmk__is_pacemaker_remote_node(node)
4864 	                && pcmk__is_set(node->priv->remote->flags,
4865 	                                pcmk__rsc_managed)) {
4866 	                /* We should never get into a situation where a managed remote
4867 	                 * connection resource is considered OK but a resource action
4868 	                 * behind the connection gets a "not connected" status. But as a
4869 	                 * fail-safe in case a bug or unusual circumstances do lead to
4870 	                 * that, ensure the remote connection is considered failed.
4871 	                 */
4872 	                pcmk__set_rsc_flags(node->priv->remote,
4873 	                                    pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
4874 	            }
4875 	            break; // Not done, do error handling
4876 	
4877 	        case PCMK_EXEC_ERROR:
4878 	        case PCMK_EXEC_ERROR_HARD:
4879 	        case PCMK_EXEC_ERROR_FATAL:
4880 	        case PCMK_EXEC_TIMEOUT:
4881 	        case PCMK_EXEC_NOT_SUPPORTED:
4882 	        case PCMK_EXEC_INVALID:
4883 	            break; // Not done, do error handling
4884 	
4885 	        default: // No other value should be possible at this point
4886 	            break;
4887 	    }
4888 	
4889 	    unpack_failure_handling(&history, &failure_strategy, &fail_role);
4890 	    if ((failure_strategy == pcmk__on_fail_ignore)
4891 	        || ((failure_strategy == pcmk__on_fail_restart_container)
4892 	            && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4893 	
4894 	        char *last_change_s = last_change_str(xml_op);
4895 	
4896 	        pcmk__warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4897 	                   QB_XS " %s",
4898 	                   history.task, crm_exit_str(history.exit_status),
4899 	                   (pcmk__str_empty(history.exit_reason)? "" : ": "),
4900 	                   pcmk__s(history.exit_reason, ""), rsc->id,
4901 	                   pcmk__node_name(node), last_change_s, history.id);
4902 	        free(last_change_s);
4903 	
4904 	        update_resource_state(&history, history.expected_exit_status,
4905 	                              *last_failure, on_fail);
4906 	        pcmk__xe_set(xml_op, PCMK_XA_UNAME, node->priv->name);
4907 	        pcmk__set_rsc_flags(rsc, pcmk__rsc_ignore_failure);
4908 	
4909 	        record_failed_op(&history);
4910 	
4911 	        if ((failure_strategy == pcmk__on_fail_restart_container)
4912 	            && (*on_fail <= pcmk__on_fail_restart)) {
4913 	            *on_fail = failure_strategy;
4914 	        }
4915 	
4916 	    } else {
4917 	        unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4918 	                              last_failure, on_fail);
4919 	
4920 	        if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4921 	            uint8_t log_level = LOG_ERR;
4922 	
4923 	            if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4924 	                log_level = LOG_NOTICE;
4925 	            }
4926 	            do_crm_log(log_level,
4927 	                       "Preventing %s from restarting on %s because "
4928 	                       "of hard failure (%s%s%s) " QB_XS " %s",
4929 	                       parent->id, pcmk__node_name(node),
4930 	                       crm_exit_str(history.exit_status),
4931 	                       (pcmk__str_empty(history.exit_reason)? "" : ": "),
4932 	                       pcmk__s(history.exit_reason, ""), history.id);
4933 	            resource_location(parent, node, -PCMK_SCORE_INFINITY,
4934 	                              "hard-error", rsc->priv->scheduler);
4935 	
4936 	        } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4937 	            pcmk__sched_err(rsc->priv->scheduler,
4938 	                            "Preventing %s from restarting anywhere because "
4939 	                            "of fatal failure (%s%s%s) " QB_XS " %s",
4940 	                            parent->id, crm_exit_str(history.exit_status),
4941 	                            (pcmk__str_empty(history.exit_reason)? "" : ": "),
4942 	                            pcmk__s(history.exit_reason, ""), history.id);
4943 	            resource_location(parent, NULL, -PCMK_SCORE_INFINITY,
4944 	                              "fatal-error", rsc->priv->scheduler);
4945 	        }
4946 	    }
4947 	
4948 	done:
4949 	    pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
4950 	                    rsc->id, pcmk__node_name(node), history.id,
4951 	                    pcmk_role_text(rsc->priv->orig_role),
4952 	                    pcmk_role_text(rsc->priv->next_role));
4953 	}
4954 	
4955 	/*!
4956 	 * \internal
4957 	 * \brief Insert a node attribute with value into a \c GHashTable
4958 	 *
4959 	 * \param[in,out] key        Key to insert (either freed or owned by
4960 	 *                           \p user_data upon return)
4961 	 * \param[in]     value      Value to insert (owned by \p user_data upon return)
4962 	 * \param[in]     user_data  \c GHashTable to insert into
4963 	 */
4964 	static gboolean
4965 	insert_attr(gpointer key, gpointer value, gpointer user_data)
4966 	{
4967 	    GHashTable *table = user_data;
4968 	
4969 	    g_hash_table_insert(table, key, value);
4970 	    return TRUE;
4971 	}
4972 	
4973 	static void
4974 	add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
4975 	               pcmk_scheduler_t *scheduler)
4976 	{
4977 	    const char *cluster_name = NULL;
4978 	    const char *dc_id = pcmk__xe_get(scheduler->input, PCMK_XA_DC_UUID);
4979 	    const pcmk_rule_input_t rule_input = {
4980 	        .now = scheduler->priv->now,
4981 	    };
4982 	
4983 	    pcmk__insert_dup(node->priv->attrs,
4984 	                     CRM_ATTR_UNAME, node->priv->name);
4985 	
4986 	    pcmk__insert_dup(node->priv->attrs, CRM_ATTR_ID, node->priv->id);
4987 	
4988 	    if ((scheduler->dc_node == NULL)
4989 	        && pcmk__str_eq(node->priv->id, dc_id, pcmk__str_casei)) {
4990 	
4991 	        scheduler->dc_node = node;
4992 	        pcmk__insert_dup(node->priv->attrs,
4993 	                         CRM_ATTR_IS_DC, PCMK_VALUE_TRUE);
4994 	
4995 	    } else if (!pcmk__same_node(node, scheduler->dc_node)) {
4996 	        pcmk__insert_dup(node->priv->attrs,
4997 	                         CRM_ATTR_IS_DC, PCMK_VALUE_FALSE);
4998 	    }
4999 	
5000 	    cluster_name = g_hash_table_lookup(scheduler->priv->options,
5001 	                                       PCMK_OPT_CLUSTER_NAME);
5002 	    if (cluster_name) {
5003 	        pcmk__insert_dup(node->priv->attrs, CRM_ATTR_CLUSTER_NAME,
5004 	                         cluster_name);
5005 	    }
5006 	
5007 	    if (overwrite) {
5008 	        /* @TODO Try to reorder some unpacking so that we don't need the
5009 	         * overwrite argument or to unpack into a temporary table
5010 	         */
5011 	        GHashTable *unpacked = pcmk__strkey_table(free, free);
5012 	
5013 	        pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
5014 	                                   &rule_input, unpacked, NULL, scheduler);
5015 	        g_hash_table_foreach_steal(unpacked, insert_attr, node->priv->attrs);
5016 	        g_hash_table_destroy(unpacked);
5017 	
5018 	    } else {
5019 	        pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
5020 	                                   &rule_input, node->priv->attrs, NULL,
5021 	                                   scheduler);
5022 	    }
5023 	
5024 	    pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_input,
5025 	                               node->priv->utilization, NULL, scheduler);
5026 	
5027 	    if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
5028 	                        pcmk__rsc_node_current) == NULL) {
5029 	        const char *site_name = pcmk__node_attr(node, "site-name", NULL,
5030 	                                                pcmk__rsc_node_current);
5031 	
5032 	        if (site_name) {
5033 	            pcmk__insert_dup(node->priv->attrs,
5034 	                             CRM_ATTR_SITE_NAME, site_name);
5035 	
5036 	        } else if (cluster_name) {
5037 	            /* Default to cluster-name if unset */
5038 	            pcmk__insert_dup(node->priv->attrs,
5039 	                             CRM_ATTR_SITE_NAME, cluster_name);
5040 	        }
5041 	    }
5042 	}
5043 	
5044 	static GList *
5045 	extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
5046 	{
5047 	    int counter = -1;
5048 	    int stop_index = -1;
5049 	    int start_index = -1;
5050 	
5051 	    xmlNode *rsc_op = NULL;
5052 	
5053 	    GList *gIter = NULL;
5054 	    GList *op_list = NULL;
5055 	    GList *sorted_op_list = NULL;
5056 	
5057 	    /* extract operations */
5058 	    op_list = NULL;
5059 	    sorted_op_list = NULL;
5060 	
5061 	    for (rsc_op = pcmk__xe_first_child(rsc_entry, PCMK__XE_LRM_RSC_OP, NULL,
5062 	                                       NULL);
5063 	         rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
5064 	
5065 	        pcmk__xe_set(rsc_op, PCMK_XA_RESOURCE, rsc);
5066 	        pcmk__xe_set(rsc_op, PCMK_XA_UNAME, node);
5067 	        op_list = g_list_prepend(op_list, rsc_op);
5068 	    }
5069 	
5070 	    if (op_list == NULL) {
5071 	        /* if there are no operations, there is nothing to do */
5072 	        return NULL;
5073 	    }
5074 	
5075 	    sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5076 	
5077 	    /* create active recurring operations as optional */
5078 	    if (active_filter == FALSE) {
5079 	        return sorted_op_list;
5080 	    }
5081 	
5082 	    op_list = NULL;
5083 	
5084 	    calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5085 	
5086 	    for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5087 	        xmlNode *rsc_op = (xmlNode *) gIter->data;
5088 	
5089 	        counter++;
5090 	
5091 	        if (start_index < stop_index) {
5092 	            pcmk__trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5093 	            break;
5094 	
5095 	        } else if (counter < start_index) {
5096 	            pcmk__trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5097 	            continue;
5098 	        }
5099 	        op_list = g_list_append(op_list, rsc_op);
5100 	    }
5101 	
5102 	    g_list_free(sorted_op_list);
5103 	    return op_list;
5104 	}
5105 	
5106 	GList *
5107 	find_operations(const char *rsc, const char *node, gboolean active_filter,
5108 	                pcmk_scheduler_t *scheduler)
5109 	{
5110 	    GList *output = NULL;
5111 	    GList *intermediate = NULL;
5112 	
5113 	    xmlNode *tmp = NULL;
5114 	    xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
5115 	                                           NULL, NULL);
5116 	
5117 	    pcmk_node_t *this_node = NULL;
5118 	
5119 	    xmlNode *node_state = NULL;
5120 	
5121 	    CRM_CHECK(status != NULL, return NULL);
5122 	
5123 	    for (node_state = pcmk__xe_first_child(status, PCMK__XE_NODE_STATE, NULL,
5124 	                                           NULL);
5125 	         node_state != NULL;
5126 	         node_state = pcmk__xe_next(node_state, PCMK__XE_NODE_STATE)) {
5127 	
5128 	        const char *uname = pcmk__xe_get(node_state, PCMK_XA_UNAME);
5129 	
5130 	        if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5131 	            continue;
5132 	        }
5133 	
5134 	        this_node = pcmk_find_node(scheduler, uname);
5135 	        if(this_node == NULL) {
5136 	            CRM_LOG_ASSERT(this_node != NULL);
5137 	            continue;
5138 	
5139 	        } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5140 	            determine_remote_online_status(scheduler, this_node);
5141 	
5142 	        } else {
5143 	            determine_online_status(node_state, this_node, scheduler);
5144 	        }
5145 	
5146 	        if (this_node->details->online
5147 	            || pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
5148 	
5149 	            /* Offline nodes run no resources if fencing is disabled. If fencing
5150 	             * is enabled, we need to ensure that resource start events happen
5151 	             * after the fencing event.
5152 	             */
5153 	            xmlNode *lrm_rsc = NULL;
5154 	
5155 	            tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5156 	                                       NULL);
5157 	            tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL,
5158 	                                       NULL);
5159 	
5160 	            for (lrm_rsc = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCE,
5161 	                                                NULL, NULL);
5162 	                 lrm_rsc != NULL;
5163 	                 lrm_rsc = pcmk__xe_next(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5164 	
5165 	                const char *rsc_id = pcmk__xe_get(lrm_rsc, PCMK_XA_ID);
5166 	
5167 	                if ((rsc != NULL)
5168 	                    && !pcmk__str_eq(rsc_id, rsc, pcmk__str_none)) {
5169 	                    continue;
5170 	                }
5171 	
5172 	                intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5173 	                output = g_list_concat(output, intermediate);
5174 	            }
5175 	        }
5176 	    }
5177 	
5178 	    return output;
5179 	}
5180