Coverity error reader: /pacemaker/lib/pengine/unpack.c

1    	/*
2    	 * Copyright 2004-2026 the Pacemaker project contributors
3    	 *
4    	 * The version control history for this file may have further details.
5    	 *
6    	 * This source code is licensed under the GNU Lesser General Public License
7    	 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8    	 */
9    	
10   	#include <crm_internal.h>
11   	
12   	#include <stdbool.h>
13   	#include <stdio.h>
14   	#include <string.h>
15   	#include <time.h>
16   	
17   	#include <glib.h>
18   	#include <libxml/tree.h>                // xmlNode
19   	#include <libxml/xpath.h>               // xmlXPathObject, etc.
20   	
21   	#include <crm/crm.h>
22   	#include <crm/services.h>
23   	#include <crm/common/xml.h>
24   	
25   	#include <crm/common/util.h>
26   	#include <crm/pengine/internal.h>
27   	#include <pe_status_private.h>
28   	
29   	// A (parsed) resource action history entry
30   	struct action_history {
31   	    pcmk_resource_t *rsc;       // Resource that history is for
32   	    pcmk_node_t *node;        // Node that history is for
33   	    xmlNode *xml;             // History entry XML
34   	
35   	    // Parsed from entry XML
36   	    const char *id;           // XML ID of history entry
37   	    const char *key;          // Operation key of action
38   	    const char *task;         // Action name
39   	    const char *exit_reason;  // Exit reason given for result
40   	    guint interval_ms;        // Action interval
41   	    int call_id;              // Call ID of action
42   	    int expected_exit_status; // Expected exit status of action
43   	    int exit_status;          // Actual exit status of action
44   	    int execution_status;     // Execution status of action
45   	};
46   	
47   	/* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
48   	 * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
49   	 * flag is stringified more readably in log messages.
50   	 */
51   	#define set_config_flag(scheduler, option, flag) do {                         \
52   	        GHashTable *config_hash = (scheduler)->priv->options;                 \
53   	        const char *scf_value = pcmk__cluster_option(config_hash, (option));  \
54   	                                                                              \
55   	        if (scf_value != NULL) {                                              \
56   	            if (pcmk__is_true(scf_value)) {                                   \
57   	                (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__,   \
58   	                                    LOG_TRACE, "Scheduler",                   \
59   	                                    crm_system_name, (scheduler)->flags,      \
60   	                                    (flag), #flag);                           \
61   	            } else {                                                          \
62   	                (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
63   	                                    LOG_TRACE, "Scheduler",                   \
64   	                                    crm_system_name, (scheduler)->flags,      \
65   	                                    (flag), #flag);                           \
66   	            }                                                                 \
67   	        }                                                                     \
68   	    } while(0)
69   	
70   	static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
71   	                          xmlNode *xml_op, xmlNode **last_failure,
72   	                          enum pcmk__on_fail *failed);
73   	static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
74   	                                           pcmk_node_t *this_node);
75   	static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
76   	                           bool overwrite, pcmk_scheduler_t *scheduler);
77   	static void determine_online_status(const xmlNode *node_state,
78   	                                    pcmk_node_t *this_node,
79   	                                    pcmk_scheduler_t *scheduler);
80   	
81   	static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
82   	                            pcmk_scheduler_t *scheduler);
83   	
84   	
85   	/*!
86   	 * \internal
87   	 * \brief Check whether a node is a dangling guest node
88   	 *
89   	 * \param[in] node  Node to check
90   	 *
91   	 * \return true if \p node had a Pacemaker Remote connection resource with a
92   	 *         launcher that was removed from the CIB, otherwise false.
93   	 */
94   	static bool
95   	is_dangling_guest_node(pcmk_node_t *node)
96   	{
97   	    return pcmk__is_pacemaker_remote_node(node)
98   	           && (node->priv->remote != NULL)
99   	           && (node->priv->remote->priv->launcher == NULL)
100  	           && pcmk__is_set(node->priv->remote->flags,
101  	                           pcmk__rsc_removed_launched);
102  	}
103  	
104  	/*!
105  	 * \brief Schedule a fence action for a node
106  	 *
107  	 * \param[in,out] scheduler       Scheduler data
108  	 * \param[in,out] node            Node to fence
109  	 * \param[in]     reason          Text description of why fencing is needed
110  	 * \param[in]     priority_delay  Whether to consider
111  	 *                                \c PCMK_OPT_PRIORITY_FENCING_DELAY
112  	 */
113  	void
114  	pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node,
115  	              const char *reason, bool priority_delay)
116  	{
117  	    CRM_CHECK(node, return);
118  	
119  	    if (pcmk__is_guest_or_bundle_node(node)) {
120  	        // Fence a guest or bundle node by marking its launcher as failed
121  	        pcmk_resource_t *rsc = node->priv->remote->priv->launcher;
122  	
123  	        if (!pcmk__is_set(rsc->flags, pcmk__rsc_failed)) {
124  	            if (!pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
125  	                pcmk__notice("Not fencing guest node %s (otherwise would "
126  	                             "because %s): its guest resource %s is unmanaged",
127  	                             pcmk__node_name(node), reason, rsc->id);
128  	            } else {
129  	                pcmk__sched_warn(scheduler,
130  	                                 "Guest node %s will be fenced "
131  	                                 "(by recovering its guest resource %s): %s",
132  	                                 pcmk__node_name(node), rsc->id, reason);
133  	
134  	                /* We don't mark the node as unclean because that would prevent the
135  	                 * node from running resources. We want to allow it to run resources
136  	                 * in this transition if the recovery succeeds.
137  	                 */
138  	                pcmk__set_node_flags(node, pcmk__node_remote_reset);
139  	                pcmk__set_rsc_flags(rsc,
140  	                                    pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
141  	            }
142  	        }
143  	
144  	    } else if (is_dangling_guest_node(node)) {
145  	        pcmk__info("Cleaning up dangling connection for guest node %s: fencing "
146  	                   "was already done because %s, and guest resource no longer "
147  	                   "exists",
148  	                   pcmk__node_name(node), reason);
149  	        pcmk__set_rsc_flags(node->priv->remote,
150  	                            pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
151  	
152  	    } else if (pcmk__is_remote_node(node)) {
153  	        pcmk_resource_t *rsc = node->priv->remote;
154  	
155  	        if ((rsc != NULL) && !pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
156  	            pcmk__notice("Not fencing remote node %s (otherwise would because "
157  	                         "%s): connection is unmanaged",
158  	                         pcmk__node_name(node), reason);
159  	        } else if (!pcmk__is_set(node->priv->flags, pcmk__node_remote_reset)) {
160  	            pcmk__set_node_flags(node, pcmk__node_remote_reset);
161  	            pcmk__sched_warn(scheduler, "Remote node %s %s: %s",
162  	                             pcmk__node_name(node),
163  	                             pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
164  	                             reason);
165  	        }
166  	        node->details->unclean = TRUE;
167  	        // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
168  	        pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
169  	
170  	    } else if (node->details->unclean) {
171  	        const char *fenced_s = "also is unclean";
172  	
173  	        if (pe_can_fence(scheduler, node)) {
174  	            fenced_s = "would also be fenced";
175  	        }
176  	        pcmk__trace("Cluster node %s %s because %s",
177  	                    pcmk__node_name(node), fenced_s, reason);
178  	
179  	    } else {
180  	        pcmk__sched_warn(scheduler, "Cluster node %s %s: %s",
181  	                         pcmk__node_name(node),
182  	                         pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
183  	                         reason);
184  	        node->details->unclean = TRUE;
185  	        pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
186  	    }
187  	}
188  	
189  	// @TODO xpaths can't handle templates, rules, or id-refs
190  	
191  	// nvpair with provides or requires set to unfencing
192  	#define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR           \
193  	    "[(@" PCMK_XA_NAME "='" PCMK_FENCING_PROVIDES "'"   \
194  	    "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') "   \
195  	    "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
196  	
197  	// unfencing in rsc_defaults or any resource
198  	#define XPATH_ENABLE_UNFENCING \
199  	    "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES     \
200  	    "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR             \
201  	    "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
202  	    "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
203  	
204  	static void
205  	set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
206  	{
207  	    xmlXPathObject *result = NULL;
208  	
209  	    if (!pcmk__is_set(scheduler->flags, flag)) {
210  	        result = pcmk__xpath_search(scheduler->input->doc, xpath);
211  	        if (pcmk__xpath_num_results(result) > 0) {
212  	            pcmk__set_scheduler_flags(scheduler, flag);
213  	        }
214  	        xmlXPathFreeObject(result);
215  	    }
216  	}
217  	
218  	gboolean
219  	unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
220  	{
221  	    const char *value = NULL;
222  	    GHashTable *config_hash = pcmk__strkey_table(free, free);
223  	
224  	    const pcmk_rule_input_t rule_input = {
225  	        .now = scheduler->priv->now,
226  	    };
227  	
228  	    scheduler->priv->options = config_hash;
229  	
230  	    pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET,
231  	                               &rule_input, config_hash,
232  	                               PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, scheduler);
233  	
234  	    pcmk__validate_cluster_options(config_hash);
235  	
236  	    set_config_flag(scheduler, PCMK__OPT_ENABLE_STARTUP_PROBES,
237  	                    pcmk__sched_probe_resources);
238  	    if (!pcmk__is_set(scheduler->flags, pcmk__sched_probe_resources)) {
239  	        pcmk__warn_once(pcmk__wo_enable_startup_probes,
240  	                        "Support for the " PCMK__OPT_ENABLE_STARTUP_PROBES " "
241  	                        "cluster property is deprecated and will be removed "
242  	                        "(and behave as true) in a future release. Use a "
243  	                        "location constraint with "
244  	                        PCMK_XA_RESOURCE_DISCOVERY "=" PCMK_VALUE_NEVER " "
245  	                        "instead to disable probes where desired.");
246  	    }
247  	
248  	    value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
249  	    if (pcmk__is_true(value)) {
250  	        pcmk__info("Watchdog-based self-fencing will be performed via SBD if "
251  	                   "fencing is required and " PCMK_OPT_FENCING_WATCHDOG_TIMEOUT
252  	                   " is nonzero");
253  	        pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_fencing);
254  	    }
255  	
256  	    /* Set certain flags via xpath here, so they can be used before the relevant
257  	     * configuration sections are unpacked.
258  	     */
259  	    set_if_xpath(pcmk__sched_enable_unfencing, XPATH_ENABLE_UNFENCING,
260  	                 scheduler);
261  	
262  	    value = pcmk__cluster_option(config_hash, PCMK_OPT_FENCING_TIMEOUT);
263  	    pcmk_parse_interval_spec(value, &(scheduler->priv->fence_timeout_ms));
264  	
265  	    pcmk__debug("Default fencing action timeout: %s",
266  	                pcmk__readable_interval(scheduler->priv->fence_timeout_ms));
267  	
268  	    set_config_flag(scheduler, PCMK_OPT_FENCING_ENABLED,
269  	                    pcmk__sched_fencing_enabled);
270  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
271  	        pcmk__debug("Fencing of failed nodes is enabled");
272  	    } else {
273  	        pcmk__debug("Fencing of failed nodes is disabled");
274  	    }
275  	
276  	    scheduler->priv->fence_action =
277  	        pcmk__cluster_option(config_hash, PCMK_OPT_FENCING_ACTION);
278  	    pcmk__trace("Fencing will %s nodes", scheduler->priv->fence_action);
279  	
280  	    set_config_flag(scheduler, PCMK__OPT_CONCURRENT_FENCING,
281  	                    pcmk__sched_concurrent_fencing);
282  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) {
283  	        pcmk__debug("Concurrent fencing is enabled");
284  	
285  	    } else {
286  	        pcmk__warn_once(pcmk__wo_concurrent_fencing,
287  	                        "Support for the " PCMK__OPT_CONCURRENT_FENCING " "
288  	                        "cluster property is deprecated and will be removed "
289  	                        "(and behave as true) in a future release.");
290  	    }
291  	
292  	    value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY);
293  	    if (value) {
294  	        guint *delay_ms = &(scheduler->priv->priority_fencing_ms);
295  	
296  	        pcmk_parse_interval_spec(value, delay_ms);
297  	        pcmk__trace("Priority fencing delay is %s",
298  	                    pcmk__readable_interval(*delay_ms));
299  	    }
300  	
301  	    set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES,
302  	                    pcmk__sched_stop_all);
303  	    pcmk__debug("Stop all active resources: %s",
304  	                pcmk__flag_text(scheduler->flags, pcmk__sched_stop_all));
305  	
306  	    set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER,
307  	                    pcmk__sched_symmetric_cluster);
308  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) {
309  	        pcmk__debug("Cluster is symmetric - resources can run anywhere by "
310  	                    "default");
311  	    }
312  	
313  	    value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
314  	
315  	    if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
316  	        scheduler->no_quorum_policy = pcmk_no_quorum_ignore;
317  	
318  	    } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
319  	        scheduler->no_quorum_policy = pcmk_no_quorum_freeze;
320  	
321  	    } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
322  	        scheduler->no_quorum_policy = pcmk_no_quorum_demote;
323  	
324  	    } else if (pcmk__strcase_any_of(value, PCMK_VALUE_FENCE,
325  	                                    PCMK_VALUE_FENCE_LEGACY, NULL)) {
326  	        if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
327  	            int do_panic = 0;
328  	
329  	            pcmk__xe_get_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC,
330  	                             &do_panic);
331  	            if (do_panic
332  	                || pcmk__is_set(scheduler->flags, pcmk__sched_quorate)) {
333  	                scheduler->no_quorum_policy = pcmk_no_quorum_fence;
334  	            } else {
335  	                pcmk__notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to "
336  	                             "'" PCMK_VALUE_STOP "': cluster has never had "
337  	                             "quorum");
338  	                scheduler->no_quorum_policy = pcmk_no_quorum_stop;
339  	            }
340  	        } else {
341  	            pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY
342  	                             " to 'stop' because fencing is disabled");
343  	            scheduler->no_quorum_policy = pcmk_no_quorum_stop;
344  	        }
345  	
346  	    } else {
347  	        scheduler->no_quorum_policy = pcmk_no_quorum_stop;
348  	    }
349  	
350  	    switch (scheduler->no_quorum_policy) {
351  	        case pcmk_no_quorum_freeze:
352  	            pcmk__debug("On loss of quorum: Freeze resources that require "
353  	                        "quorum");
354  	            break;
355  	        case pcmk_no_quorum_stop:
356  	            pcmk__debug("On loss of quorum: Stop resources that require "
357  	                        "quorum");
358  	            break;
359  	        case pcmk_no_quorum_demote:
360  	            pcmk__debug("On loss of quorum: Demote promotable resources and "
361  	                        "stop other resources");
362  	            break;
363  	        case pcmk_no_quorum_fence:
364  	            pcmk__notice("On loss of quorum: Fence all remaining nodes");
365  	            break;
366  	        case pcmk_no_quorum_ignore:
367  	            pcmk__notice("On loss of quorum: Ignore");
368  	            break;
369  	    }
370  	
371  	    set_config_flag(scheduler, PCMK__OPT_STOP_REMOVED_RESOURCES,
372  	                    pcmk__sched_stop_removed_resources);
373  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
374  	        pcmk__trace("Removed resources are stopped");
375  	    } else {
376  	        pcmk__warn_once(pcmk__wo_stop_removed_resources,
377  	                        "Support for the " PCMK__OPT_STOP_REMOVED_RESOURCES " "
378  	                        "cluster property is deprecated and will be removed "
379  	                        "(and behave as true) in a future release.");
380  	    }
381  	
382  	    set_config_flag(scheduler, PCMK__OPT_CANCEL_REMOVED_ACTIONS,
383  	                    pcmk__sched_cancel_removed_actions);
384  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_cancel_removed_actions)) {
385  	        pcmk__trace("Removed resource actions are stopped");
386  	    } else {
387  	        pcmk__warn_once(pcmk__wo_cancel_removed_actions,
388  	                        "Support for the " PCMK__OPT_CANCEL_REMOVED_ACTIONS " "
389  	                        "cluster property is deprecated and will be removed "
390  	                        "(and behave as true) in a future release.");
391  	    }
392  	
393  	    set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE,
394  	                    pcmk__sched_in_maintenance);
395  	    pcmk__trace("Maintenance mode: %s",
396  	                pcmk__flag_text(scheduler->flags, pcmk__sched_in_maintenance));
397  	
398  	    set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL,
399  	                    pcmk__sched_start_failure_fatal);
400  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) {
401  	        pcmk__trace("Start failures are always fatal");
402  	    } else {
403  	        pcmk__trace("Start failures are handled by failcount");
404  	    }
405  	
406  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
407  	        set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING,
408  	                        pcmk__sched_startup_fencing);
409  	    }
410  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
411  	        pcmk__trace("Unseen nodes will be fenced");
412  	    } else {
413  	        pcmk__warn_once(pcmk__wo_blind,
414  	                        "Blind faith: not fencing unseen nodes");
415  	    }
416  	
417  	    pe__unpack_node_health_scores(scheduler);
418  	
419  	    scheduler->priv->placement_strategy =
420  	        pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY);
421  	    pcmk__trace("Placement strategy: %s", scheduler->priv->placement_strategy);
422  	
423  	    set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK,
424  	                    pcmk__sched_shutdown_lock);
425  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
426  	        value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
427  	        pcmk_parse_interval_spec(value, &(scheduler->priv->shutdown_lock_ms));
428  	        pcmk__trace("Resources will be locked to nodes that were cleanly "
429  	                    "shut down (locks expire after %s)",
430  	                    pcmk__readable_interval(scheduler->priv->shutdown_lock_ms));
431  	    } else {
432  	        pcmk__trace("Resources will not be locked to nodes that were cleanly "
433  	                    "shut down");
434  	    }
435  	
436  	    value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
437  	    pcmk_parse_interval_spec(value, &(scheduler->priv->node_pending_ms));
438  	    if (scheduler->priv->node_pending_ms == 0U) {
439  	        pcmk__trace("Do not fence pending nodes");
440  	    } else {
441  	        pcmk__trace("Fence pending nodes after %s",
442  	                    pcmk__readable_interval(scheduler->priv->node_pending_ms));
443  	    }
444  	
445  	    set_config_flag(scheduler, PCMK_OPT_FENCE_REMOTE_WITHOUT_QUORUM,
446  	                    pcmk__sched_fence_remote_no_quorum);
447  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_fence_remote_no_quorum)) {
448  	        pcmk__trace("Pacemaker Remote nodes may be fenced without quorum");
449  	
450  	    } else {
451  	        pcmk__trace("Pacemaker Remote nodes require quorum to be fenced");
452  	    }
453  	
454  	    return TRUE;
455  	}
456  	
457  	/*!
458  	 * \internal
459  	 * \brief Create a new node object in scheduler data
460  	 *
461  	 * \param[in]     id         ID of new node
462  	 * \param[in]     uname      Name of new node
463  	 * \param[in]     type       Type of new node
464  	 * \param[in]     score      Score of new node
465  	 * \param[in,out] scheduler  Scheduler data
466  	 *
467  	 * \return Newly created node object
468  	 * \note The returned object is part of the scheduler data and should not be
469  	 *       freed separately.
470  	 */
471  	pcmk_node_t *
472  	pe__create_node(const char *id, const char *uname, const char *type, int score,
473  	                pcmk_scheduler_t *scheduler)
474  	{
475  	    enum pcmk__node_variant variant = pcmk__node_variant_cluster;
476  	    pcmk_node_t *new_node = NULL;
477  	
478  	    if (pcmk_find_node(scheduler, uname) != NULL) {
479  	        pcmk__config_warn("More than one node entry has name '%s'", uname);
480  	    }
481  	
482  	    if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
483  	                     pcmk__str_null_matches|pcmk__str_casei)) {
484  	        variant = pcmk__node_variant_cluster;
485  	
486  	    } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
487  	        variant = pcmk__node_variant_remote;
488  	
489  	    } else {
490  	        pcmk__config_err("Ignoring node %s with unrecognized type '%s'",
491  	                         pcmk__s(uname, "without name"), type);
492  	        return NULL;
493  	    }
494  	
495  	    new_node = pcmk__assert_alloc(1, sizeof(pcmk_node_t));
496  	    new_node->assign = pcmk__assert_alloc(1,
497  	                                          sizeof(struct pcmk__node_assignment));
498  	    new_node->details = pcmk__assert_alloc(1,
499  	                                           sizeof(struct pcmk__node_details));
500  	    new_node->priv = pcmk__assert_alloc(1, sizeof(pcmk__node_private_t));
501  	
502  	    pcmk__trace("Creating node for entry %s/%s", uname, id);
503  	    new_node->assign->score = score;
504  	    new_node->priv->id = pcmk__str_copy(id);
505  	    new_node->priv->name = pcmk__str_copy(uname);
506  	    new_node->priv->flags = pcmk__node_probes_allowed;
507  	    new_node->details->online = false;
508  	    new_node->details->shutdown = false;
509  	    new_node->details->running_rsc = NULL;
510  	    new_node->priv->scheduler = scheduler;
511  	    new_node->priv->variant = variant;
512  	    new_node->priv->attrs = pcmk__strkey_table(free, free);
513  	    new_node->priv->utilization = pcmk__strkey_table(free, free);
514  	    new_node->priv->digest_cache = pcmk__strkey_table(free, pe__free_digests);
515  	
516  	    if (pcmk__is_pacemaker_remote_node(new_node)) {
517  	        pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "remote");
518  	        pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_remote_nodes);
519  	
520  	    } else {
521  	        pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "cluster");
522  	    }
523  	
524  	    scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
525  	                                            pe__cmp_node_name);
526  	    return new_node;
527  	}
528  	
529  	static const char *
530  	expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
531  	{
532  	    xmlNode *attr_set = NULL;
533  	    xmlNode *attr = NULL;
534  	
535  	    const char *container_id = pcmk__xe_id(xml_obj);
536  	    const char *remote_name = NULL;
537  	    const char *remote_server = NULL;
538  	    const char *remote_port = NULL;
539  	    const char *connect_timeout = "60s";
540  	    const char *remote_allow_migrate=NULL;
541  	    const char *is_managed = NULL;
542  	
543  	    // @TODO This doesn't handle rules or id-ref
544  	    for (attr_set = pcmk__xe_first_child(xml_obj, PCMK_XE_META_ATTRIBUTES,
545  	                                         NULL, NULL);
546  	         attr_set != NULL;
547  	         attr_set = pcmk__xe_next(attr_set, PCMK_XE_META_ATTRIBUTES)) {
548  	
549  	        for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
550  	             attr != NULL; attr = pcmk__xe_next(attr, NULL)) {
551  	
552  	            const char *value = pcmk__xe_get(attr, PCMK_XA_VALUE);
553  	            const char *name = pcmk__xe_get(attr, PCMK_XA_NAME);
554  	
555  	            if (name == NULL) { // Sanity
556  	                continue;
557  	            }
558  	
559  	            if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
560  	                remote_name = value;
561  	
562  	            } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
563  	                remote_server = value;
564  	
565  	            } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
566  	                remote_port = value;
567  	
568  	            } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
569  	                connect_timeout = value;
570  	
571  	            } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
572  	                remote_allow_migrate = value;
573  	
574  	            } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
575  	                is_managed = value;
576  	            }
577  	        }
578  	    }
579  	
580  	    if (remote_name == NULL) {
581  	        return NULL;
582  	    }
583  	
584  	    if (pe_find_resource(data->priv->resources, remote_name) != NULL) {
585  	        return NULL;
586  	    }
587  	
588  	    pe_create_remote_xml(parent, remote_name, container_id,
589  	                         remote_allow_migrate, is_managed,
590  	                         connect_timeout, remote_server, remote_port);
591  	    return remote_name;
592  	}
593  	
594  	static void
595  	handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
596  	{
597  	    if ((new_node->priv->variant == pcmk__node_variant_remote)
598  	        && (new_node->priv->remote == NULL)) {
599  	        /* Ignore fencing for remote nodes that don't have a connection resource
600  	         * associated with them. This happens when remote node entries get left
601  	         * in the nodes section after the connection resource is removed.
602  	         */
603  	        return;
604  	    }
605  	
606  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
607  	        // All nodes are unclean until we've seen their status entry
608  	        new_node->details->unclean = TRUE;
609  	
610  	    } else {
611  	        // Blind faith ...
612  	        new_node->details->unclean = FALSE;
613  	    }
614  	}
615  	
616  	gboolean
617  	unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
618  	{
619  	    xmlNode *xml_obj = NULL;
620  	    pcmk_node_t *new_node = NULL;
621  	    const char *id = NULL;
622  	    const char *uname = NULL;
623  	    const char *type = NULL;
624  	
625  	    for (xml_obj = pcmk__xe_first_child(xml_nodes, PCMK_XE_NODE, NULL, NULL);
626  	         xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, PCMK_XE_NODE)) {
627  	
628  	        int score = 0;
629  	        int rc = pcmk__xe_get_score(xml_obj, PCMK_XA_SCORE, &score, 0);
630  	
631  	        new_node = NULL;
632  	
633  	        id = pcmk__xe_get(xml_obj, PCMK_XA_ID);
634  	        uname = pcmk__xe_get(xml_obj, PCMK_XA_UNAME);
635  	        type = pcmk__xe_get(xml_obj, PCMK_XA_TYPE);
636  	        pcmk__trace("Processing node %s/%s", uname, id);
637  	
638  	        if (id == NULL) {
639  	            pcmk__config_err("Ignoring <" PCMK_XE_NODE
640  	                             "> entry in configuration without id");
641  	            continue;
642  	        }
643  	        if (rc != pcmk_rc_ok) {
644  	            // Not possible with schema validation enabled
645  	            pcmk__config_warn("Using 0 as score for node %s "
646  	                              "because '%s' is not a valid score: %s",
647  	                              pcmk__s(uname, "without name"),
648  	                              pcmk__xe_get(xml_obj, PCMK_XA_SCORE),
649  	                              pcmk_rc_str(rc));
650  	        }
651  	        new_node = pe__create_node(id, uname, type, score, scheduler);
652  	
653  	        if (new_node == NULL) {
654  	            return FALSE;
655  	        }
656  	
657  	        handle_startup_fencing(scheduler, new_node);
658  	
659  	        add_node_attrs(xml_obj, new_node, FALSE, scheduler);
660  	
661  	        pcmk__trace("Done with node %s", pcmk__xe_get(xml_obj, PCMK_XA_UNAME));
662  	    }
663  	
664  	    return TRUE;
665  	}
666  	
667  	static void
668  	unpack_launcher(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
669  	{
670  	    const char *launcher_id = NULL;
671  	
672  	    if (rsc->priv->children != NULL) {
673  	        g_list_foreach(rsc->priv->children, (GFunc) unpack_launcher,
674  	                       scheduler);
675  	        return;
676  	    }
677  	
678  	    launcher_id = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CONTAINER);
679  	    if ((launcher_id != NULL)
680  	        && !pcmk__str_eq(launcher_id, rsc->id, pcmk__str_none)) {
681  	        pcmk_resource_t *launcher = pe_find_resource(scheduler->priv->resources,
682  	                                                     launcher_id);
683  	
684  	        if (launcher != NULL) {
685  	            rsc->priv->launcher = launcher;
686  	            launcher->priv->launched =
687  	                g_list_append(launcher->priv->launched, rsc);
688  	            pcmk__rsc_trace(rsc, "Resource %s's launcher is %s",
689  	                            rsc->id, launcher_id);
690  	        } else {
691  	            pcmk__config_err("Resource %s: Unknown " PCMK__META_CONTAINER " %s",
692  	                             rsc->id, launcher_id);
693  	        }
694  	    }
695  	}
696  	
697  	gboolean
698  	unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
699  	{
700  	    xmlNode *xml_obj = NULL;
701  	
702  	    /* Create remote nodes and guest nodes from the resource configuration
703  	     * before unpacking resources.
704  	     */
705  	    for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
706  	         xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
707  	
708  	        const char *new_node_id = NULL;
709  	
710  	        /* Check for remote nodes, which are defined by ocf:pacemaker:remote
711  	         * primitives.
712  	         */
713  	        if (xml_contains_remote_node(xml_obj)) {
714  	            new_node_id = pcmk__xe_id(xml_obj);
715  	            /* The pcmk_find_node() check ensures we don't iterate over an
716  	             * expanded node that has already been added to the node list
717  	             */
718  	            if (new_node_id
719  	                && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
720  	                pcmk__trace("Found remote node %s defined by resource %s",
721  	                            new_node_id, pcmk__xe_id(xml_obj));
722  	                pe__create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, 0,
723  	                                scheduler);
724  	            }
725  	            continue;
726  	        }
727  	
728  	        /* Check for guest nodes, which are defined by special meta-attributes
729  	         * of a primitive of any type (for example, VirtualDomain or Xen).
730  	         */
731  	        if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
732  	            /* This will add an ocf:pacemaker:remote primitive to the
733  	             * configuration for the guest node's connection, to be unpacked
734  	             * later.
735  	             */
736  	            new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
737  	                                                 scheduler);
738  	            if (new_node_id
739  	                && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
740  	                pcmk__trace("Found guest node %s in resource %s",
741  	                            new_node_id, pcmk__xe_id(xml_obj));
742  	                pe__create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, 0,
743  	                                scheduler);
744  	            }
745  	            continue;
746  	        }
747  	
748  	        /* Check for guest nodes inside a group. Clones are currently not
749  	         * supported as guest nodes.
750  	         */
751  	        if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
752  	            xmlNode *xml_obj2 = NULL;
753  	            for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
754  	                 xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2, NULL)) {
755  	
756  	                new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
757  	                                                     scheduler);
758  	
759  	                if (new_node_id
760  	                    && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
761  	                    pcmk__trace("Found guest node %s in resource %s inside "
762  	                                "group %s",
763  	                                new_node_id, pcmk__xe_id(xml_obj2),
764  	                                pcmk__xe_id(xml_obj));
765  	                    pe__create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
766  	                                    0, scheduler);
767  	                }
768  	            }
769  	        }
770  	    }
771  	    return TRUE;
772  	}
773  	
774  	/* Call this after all the nodes and resources have been
775  	 * unpacked, but before the status section is read.
776  	 *
777  	 * A remote node's online status is reflected by the state
778  	 * of the remote node's connection resource. We need to link
779  	 * the remote node to this connection resource so we can have
780  	 * easy access to the connection resource during the scheduler calculations.
781  	 */
782  	static void
783  	link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
784  	{
785  	    pcmk_node_t *remote_node = NULL;
786  	
787  	    if (!pcmk__is_set(new_rsc->flags, pcmk__rsc_is_remote_connection)) {
788  	        return;
789  	    }
790  	
791  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_location_only)) {
792  	        /* remote_nodes and remote_resources are not linked in quick location calculations */
793  	        return;
794  	    }
795  	
796  	    remote_node = pcmk_find_node(scheduler, new_rsc->id);
797  	    CRM_CHECK(remote_node != NULL, return);
798  	
799  	    pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
800  	                    new_rsc->id, pcmk__node_name(remote_node));
801  	    remote_node->priv->remote = new_rsc;
802  	
803  	    if (new_rsc->priv->launcher == NULL) {
804  	        /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
805  	         * the same as is done for cluster nodes.
806  	         */
807  	        handle_startup_fencing(scheduler, remote_node);
808  	
809  	    } else {
810  	        /* pe__create_node() marks the new node as "remote" or "cluster"; now
811  	         * that we know the node is a guest node, update it correctly.
812  	         */
813  	        pcmk__insert_dup(remote_node->priv->attrs,
814  	                         CRM_ATTR_KIND, "container");
815  	    }
816  	}
817  	
818  	/*!
819  	 * \internal
820  	 * \brief Parse configuration XML for resource information
821  	 *
822  	 * \param[in]     xml_resources  Top of resource configuration XML
823  	 * \param[in,out] scheduler      Scheduler data
824  	 *
825  	 * \return TRUE
826  	 *
827  	 * \note unpack_remote_nodes() MUST be called before this, so that the nodes can
828  	 *       be used when pe__unpack_resource() calls resource_location()
829  	 */
830  	gboolean
831  	unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
832  	{
833  	    xmlNode *xml_obj = NULL;
834  	    GList *gIter = NULL;
835  	
836  	    scheduler->priv->templates = pcmk__strkey_table(free, pcmk__free_idref);
837  	
838  	    for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
839  	         xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
840  	
841  	        pcmk_resource_t *new_rsc = NULL;
842  	        const char *id = pcmk__xe_id(xml_obj);
843  	
844  	        if (pcmk__str_empty(id)) {
845  	            pcmk__config_err("Ignoring <%s> resource without ID",
846  	                             xml_obj->name);
847  	            continue;
848  	        }
849  	
850  	        if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
851  	            if (g_hash_table_lookup_extended(scheduler->priv->templates, id,
852  	                                             NULL, NULL) == FALSE) {
853  	                /* Record the template's ID for the knowledge of its existence anyway. */
854  	                pcmk__insert_dup(scheduler->priv->templates, id, NULL);
855  	            }
856  	            continue;
857  	        }
858  	
859  	        pcmk__trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
860  	
861  	        if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
862  	                                scheduler) != pcmk_rc_ok) {
863  	
864  	            pcmk__config_err("Ignoring <%s> resource '%s' because "
865  	                             "configuration is invalid", xml_obj->name, id);
866  	            continue;
867  	        }
868  	
869  	        scheduler->priv->resources = g_list_append(scheduler->priv->resources,
870  	                                                   new_rsc);
871  	        pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
872  	    }
873  	
874  	    for (gIter = scheduler->priv->resources;
875  	         gIter != NULL; gIter = gIter->next) {
876  	
877  	        pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
878  	
879  	        unpack_launcher(rsc, scheduler);
880  	        link_rsc2remotenode(scheduler, rsc);
881  	    }
882  	
883  	    scheduler->priv->resources = g_list_sort(scheduler->priv->resources,
884  	                                             pe__cmp_rsc_priority);
885  	    if (pcmk__is_set(scheduler->flags, pcmk__sched_location_only)) {
886  	        /* Ignore */
887  	
888  	    } else if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)
889  	               && !pcmk__is_set(scheduler->flags, pcmk__sched_have_fencing)) {
890  	
891  	        /* pcs's CI tests look for this specific error message. Confer with the
892  	         * pcs team before changing it. If the dependency still exists, bump the
893  	         * CRM_FEATURE_SET and inform the pcs maintainers.
894  	         *
895  	         * Also, ResyncCIB.errors_to_ignore() looks for this specific error
896  	         * message as well.
897  	         */
898  	        pcmk__config_err("Resource start-up disabled since no fencing "
899  	                         "resources have been defined. Either configure some "
900  	                         "or disable fencing with the "
901  	                         PCMK_OPT_FENCING_ENABLED " option. NOTE: Clusters "
902  	                         "with shared data need fencing to ensure data "
903  	                         "integrity.");
904  	    }
905  	
906  	    return TRUE;
907  	}
908  	
909  	/*!
910  	 * \internal
911  	 * \brief Validate the levels in a fencing topology
912  	 *
913  	 * \param[in] xml  \c PCMK_XE_FENCING_TOPOLOGY element
914  	 */
915  	void
916  	pcmk__validate_fencing_topology(const xmlNode *xml)
917  	{
918  	    if (xml == NULL) {
919  	        return;
920  	    }
921  	
922  	    CRM_CHECK(pcmk__xe_is(xml, PCMK_XE_FENCING_TOPOLOGY), return);
923  	
924  	    for (const xmlNode *level = pcmk__xe_first_child(xml, PCMK_XE_FENCING_LEVEL,
925  	                                                     NULL, NULL);
926  	         level != NULL; level = pcmk__xe_next(level, PCMK_XE_FENCING_LEVEL)) {
927  	
928  	        const char *id = pcmk__xe_id(level);
929  	        int index = 0;
930  	
931  	        if (pcmk__str_empty(id)) {
932  	            pcmk__config_err("Ignoring fencing level without ID");
933  	            continue;
934  	        }
935  	
936  	        if (pcmk__xe_get_int(level, PCMK_XA_INDEX, &index) != pcmk_rc_ok) {
937  	            pcmk__config_err("Ignoring fencing level %s with invalid index",
938  	                             id);
939  	            continue;
940  	        }
941  	
942  	        if ((index < ST__LEVEL_MIN) || (index > ST__LEVEL_MAX)) {
943  	            pcmk__config_err("Ignoring fencing level %s with out-of-range "
944  	                             "index %d",
945  	                             id, index);
946  	        }
947  	    }
948  	}
949  	
950  	gboolean
951  	unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
952  	{
953  	    xmlNode *xml_tag = NULL;
954  	
955  	    scheduler->priv->tags = pcmk__strkey_table(free, pcmk__free_idref);
956  	
957  	    for (xml_tag = pcmk__xe_first_child(xml_tags, PCMK_XE_TAG, NULL, NULL);
958  	         xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag, PCMK_XE_TAG)) {
959  	
960  	        xmlNode *xml_obj_ref = NULL;
961  	        const char *tag_id = pcmk__xe_id(xml_tag);
962  	
963  	        if (tag_id == NULL) {
964  	            pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
965  	                             (const char *) xml_tag->name);
966  	            continue;
967  	        }
968  	
969  	        for (xml_obj_ref = pcmk__xe_first_child(xml_tag, PCMK_XE_OBJ_REF,
970  	                                                NULL, NULL);
971  	             xml_obj_ref != NULL;
972  	             xml_obj_ref = pcmk__xe_next(xml_obj_ref, PCMK_XE_OBJ_REF)) {
973  	
974  	            const char *obj_ref = pcmk__xe_id(xml_obj_ref);
975  	
976  	            if (obj_ref == NULL) {
977  	                pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
978  	                                 xml_obj_ref->name, tag_id);
979  	                continue;
980  	            }
981  	
982  	            pcmk__add_idref(scheduler->priv->tags, tag_id, obj_ref);
983  	        }
984  	    }
985  	
986  	    return TRUE;
987  	}
988  	
989  	/*!
990  	 * \internal
991  	 * \brief Unpack a ticket state entry
992  	 *
993  	 * \param[in]     xml_ticket  XML ticket state to unpack
994  	 * \param[in,out] userdata    Scheduler data
995  	 *
996  	 * \return pcmk_rc_ok (to always continue unpacking further entries)
997  	 */
998  	static int
999  	unpack_ticket_state(xmlNode *xml_ticket, void *userdata)
1000 	{
1001 	    pcmk_scheduler_t *scheduler = userdata;
1002 	
1003 	    const char *ticket_id = NULL;
1004 	    const char *granted = NULL;
1005 	    const char *last_granted = NULL;
1006 	    const char *standby = NULL;
1007 	    xmlAttrPtr xIter = NULL;
1008 	
1009 	    pcmk__ticket_t *ticket = NULL;
1010 	
1011 	    ticket_id = pcmk__xe_id(xml_ticket);
1012 	    if (pcmk__str_empty(ticket_id)) {
1013 	        return pcmk_rc_ok;
1014 	    }
1015 	
1016 	    pcmk__trace("Processing ticket state for %s", ticket_id);
1017 	
1018 	    ticket = g_hash_table_lookup(scheduler->priv->ticket_constraints,
1019 	                                 ticket_id);
1020 	    if (ticket == NULL) {
1021 	        ticket = ticket_new(ticket_id, scheduler);
1022 	        if (ticket == NULL) {
1023 	            return pcmk_rc_ok;
1024 	        }
1025 	    }
1026 	
1027 	    for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1028 	        const char *prop_name = (const char *)xIter->name;
1029 	        const char *prop_value = pcmk__xml_attr_value(xIter);
1030 	
1031 	        if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
1032 	            continue;
1033 	        }
1034 	        pcmk__insert_dup(ticket->state, prop_name, prop_value);
1035 	    }
1036 	
1037 	    granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
1038 	    if (pcmk__is_true(granted)) {
1039 	        pcmk__set_ticket_flags(ticket, pcmk__ticket_granted);
1040 	        pcmk__info("We have ticket '%s'", ticket->id);
1041 	    } else {
1042 	        pcmk__clear_ticket_flags(ticket, pcmk__ticket_granted);
1043 	        pcmk__info("We do not have ticket '%s'", ticket->id);
1044 	    }
1045 	
1046 	    last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1047 	    if (last_granted) {
1048 	        long long last_granted_ll = 0LL;
1049 	        int rc = pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1050 	
1051 	        if (rc != pcmk_rc_ok) {
1052 	            pcmk__warn("Using %lld instead of invalid " PCMK_XA_LAST_GRANTED
1053 	                       " value '%s' in state for ticket %s: %s",
1054 	                       last_granted_ll, last_granted, ticket->id,
1055 	                       pcmk_rc_str(rc));
1056 	        }
1057 	        ticket->last_granted = (time_t) last_granted_ll;
1058 	    }
1059 	
1060 	    standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1061 	    if (pcmk__is_true(standby)) {
1062 	        pcmk__set_ticket_flags(ticket, pcmk__ticket_standby);
1063 	        if (pcmk__is_set(ticket->flags, pcmk__ticket_granted)) {
1064 	            pcmk__info("Granted ticket '%s' is in standby-mode", ticket->id);
1065 	        }
1066 	    } else {
1067 	        pcmk__clear_ticket_flags(ticket, pcmk__ticket_standby);
1068 	    }
1069 	
1070 	    pcmk__trace("Done with ticket state for %s", ticket_id);
1071 	
1072 	    return pcmk_rc_ok;
1073 	}
1074 	
1075 	static void
1076 	unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
1077 	                           pcmk_scheduler_t *scheduler)
1078 	{
1079 	    const char *discovery = NULL;
1080 	    const xmlNode *attrs = NULL;
1081 	    pcmk_resource_t *rsc = NULL;
1082 	    int maint = 0;
1083 	
1084 	    if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1085 	        return;
1086 	    }
1087 	
1088 	    if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1089 	        return;
1090 	    }
1091 	    pcmk__trace("Processing Pacemaker Remote node %s",
1092 	                pcmk__node_name(this_node));
1093 	
1094 	    pcmk__scan_min_int(pcmk__xe_get(state, PCMK__XA_NODE_IN_MAINTENANCE),
1095 	                       &maint, 0);
1096 	    if (maint) {
1097 	        pcmk__set_node_flags(this_node, pcmk__node_remote_maint);
1098 	    } else {
1099 	        pcmk__clear_node_flags(this_node, pcmk__node_remote_maint);
1100 	    }
1101 	
1102 	    rsc = this_node->priv->remote;
1103 	    if (!pcmk__is_set(this_node->priv->flags, pcmk__node_remote_reset)) {
1104 	        this_node->details->unclean = FALSE;
1105 	        pcmk__set_node_flags(this_node, pcmk__node_seen);
1106 	    }
1107 	    attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL,
1108 	                                 NULL);
1109 	    add_node_attrs(attrs, this_node, TRUE, scheduler);
1110 	
1111 	    if (pe__shutdown_requested(this_node)) {
1112 	        pcmk__info("%s is shutting down", pcmk__node_name(this_node));
1113 	        this_node->details->shutdown = TRUE;
1114 	    }
1115 	
1116 	    if (pcmk__is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
1117 	                                      pcmk__rsc_node_current))) {
1118 	        pcmk__info("%s is in standby mode", pcmk__node_name(this_node));
1119 	        pcmk__set_node_flags(this_node, pcmk__node_standby);
1120 	    }
1121 	
1122 	    if (pcmk__is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE,
1123 	                                      NULL, pcmk__rsc_node_current))
1124 	        || ((rsc != NULL) && !pcmk__is_set(rsc->flags, pcmk__rsc_managed))) {
1125 	        pcmk__info("%s is in maintenance mode", pcmk__node_name(this_node));
1126 	        this_node->details->maintenance = TRUE;
1127 	    }
1128 	
1129 	    discovery = pcmk__node_attr(this_node,
1130 	                                PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1131 	                                NULL, pcmk__rsc_node_current);
1132 	    if ((discovery != NULL) && !pcmk__is_true(discovery)) {
1133 	        pcmk__warn_once(pcmk__wo_rdisc_enabled,
1134 	                        "Support for the "
1135 	                        PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1136 	                        " node attribute is deprecated and will be removed"
1137 	                        " (and behave as 'true') in a future release.");
1138 	
1139 	        if (pcmk__is_remote_node(this_node)
1140 	            && !pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
1141 	            pcmk__config_warn("Ignoring "
1142 	                              PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1143 	                              " attribute on Pacemaker Remote node %s"
1144 	                              " because fencing is disabled",
1145 	                              pcmk__node_name(this_node));
1146 	        } else {
1147 	            /* This is either a remote node with fencing enabled, or a guest
1148 	             * node. We don't care whether fencing is enabled when fencing guest
1149 	             * nodes, because they are "fenced" by recovering their containing
1150 	             * resource.
1151 	             */
1152 	            pcmk__info("%s has resource discovery disabled",
1153 	                       pcmk__node_name(this_node));
1154 	            pcmk__clear_node_flags(this_node, pcmk__node_probes_allowed);
1155 	        }
1156 	    }
1157 	}
1158 	
1159 	/*!
1160 	 * \internal
1161 	 * \brief Unpack a cluster node's transient attributes
1162 	 *
1163 	 * \param[in]     state      CIB node state XML
1164 	 * \param[in,out] node       Cluster node whose attributes are being unpacked
1165 	 * \param[in,out] scheduler  Scheduler data
1166 	 */
1167 	static void
1168 	unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
1169 	                            pcmk_scheduler_t *scheduler)
1170 	{
1171 	    const char *discovery = NULL;
1172 	    const xmlNode *attrs = pcmk__xe_first_child(state,
1173 	                                                PCMK__XE_TRANSIENT_ATTRIBUTES,
1174 	                                                NULL, NULL);
1175 	
1176 	    add_node_attrs(attrs, node, TRUE, scheduler);
1177 	
1178 	    if (pcmk__is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL,
1179 	                                      pcmk__rsc_node_current))) {
1180 	        pcmk__info("%s is in standby mode", pcmk__node_name(node));
1181 	        pcmk__set_node_flags(node, pcmk__node_standby);
1182 	    }
1183 	
1184 	    if (pcmk__is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
1185 	                                      pcmk__rsc_node_current))) {
1186 	        pcmk__info("%s is in maintenance mode", pcmk__node_name(node));
1187 	        node->details->maintenance = TRUE;
1188 	    }
1189 	
1190 	    discovery = pcmk__node_attr(node,
1191 	                                PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1192 	                                NULL, pcmk__rsc_node_current);
1193 	    if ((discovery != NULL) && !pcmk__is_true(discovery)) {
1194 	        pcmk__config_warn("Ignoring "
1195 	                          PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1196 	                          " attribute for %s because disabling resource"
1197 	                          " discovery is not allowed for cluster nodes",
1198 	                          pcmk__node_name(node));
1199 	    }
1200 	}
1201 	
1202 	/*!
1203 	 * \internal
1204 	 * \brief Unpack a node state entry (first pass)
1205 	 *
1206 	 * Unpack one node state entry from status. This unpacks information from the
1207 	 * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not
1208 	 * the resource history inside it. Multiple passes through the status are needed
1209 	 * to fully unpack everything.
1210 	 *
1211 	 * \param[in]     state      CIB node state XML
1212 	 * \param[in,out] scheduler  Scheduler data
1213 	 */
1214 	static void
1215 	unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
1216 	{
1217 	    const char *id = NULL;
1218 	    const char *uname = NULL;
1219 	    pcmk_node_t *this_node = NULL;
1220 	
1221 	    id = pcmk__xe_get(state, PCMK_XA_ID);
1222 	    if (id == NULL) {
1223 	        pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1224 	                         PCMK_XA_ID);
1225 	        pcmk__log_xml_info(state, "missing-id");
1226 	        return;
1227 	    }
1228 	
1229 	    uname = pcmk__xe_get(state, PCMK_XA_UNAME);
1230 	    if (uname == NULL) {
1231 	        /* If a joining peer makes the cluster acquire the quorum from Corosync
1232 	         * but has not joined the controller CPG membership yet, it's possible
1233 	         * that the created PCMK__XE_NODE_STATE entry doesn't have a
1234 	         * PCMK_XA_UNAME yet. Recognize the node as pending and wait for it to
1235 	         * join CPG.
1236 	         */
1237 	        pcmk__trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1238 	                    "without " PCMK_XA_UNAME,
1239 	                    id);
1240 	    }
1241 	
1242 	    this_node = pe_find_node_any(scheduler->nodes, id, uname);
1243 	    if (this_node == NULL) {
1244 	        pcmk__notice("Ignoring recorded state for removed node with name %s "
1245 	                     "and " PCMK_XA_ID " %s",
1246 	                     pcmk__s(uname, "unknown"), id);
1247 	        return;
1248 	    }
1249 	
1250 	    if (pcmk__is_pacemaker_remote_node(this_node)) {
1251 	        int remote_fenced = 0;
1252 	
1253 	        /* We can't determine the online status of Pacemaker Remote nodes until
1254 	         * after all resource history has been unpacked. In this first pass, we
1255 	         * do need to mark whether the node has been fenced, as this plays a
1256 	         * role during unpacking cluster node resource state.
1257 	         */
1258 	        pcmk__scan_min_int(pcmk__xe_get(state, PCMK__XA_NODE_FENCED),
1259 	                           &remote_fenced, 0);
1260 	        if (remote_fenced) {
1261 	            pcmk__set_node_flags(this_node, pcmk__node_remote_fenced);
1262 	        } else {
1263 	            pcmk__clear_node_flags(this_node, pcmk__node_remote_fenced);
1264 	        }
1265 	        return;
1266 	    }
1267 	
1268 	    unpack_transient_attributes(state, this_node, scheduler);
1269 	
1270 	    /* Provisionally mark this cluster node as clean. We have at least seen it
1271 	     * in the current cluster's lifetime.
1272 	     */
1273 	    this_node->details->unclean = FALSE;
1274 	    pcmk__set_node_flags(this_node, pcmk__node_seen);
1275 	
1276 	    pcmk__trace("Determining online status of cluster node %s (id %s)",
1277 	                pcmk__node_name(this_node), id);
1278 	    determine_online_status(state, this_node, scheduler);
1279 	
1280 	    if (!pcmk__is_set(scheduler->flags, pcmk__sched_quorate)
1281 	        && this_node->details->online
1282 	        && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) {
1283 	        /* Everything else should flow from this automatically
1284 	         * (at least until the scheduler becomes able to migrate off
1285 	         * healthy resources)
1286 	         */
1287 	        pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1288 	                      FALSE);
1289 	    }
1290 	}
1291 	
1292 	/*!
1293 	 * \internal
1294 	 * \brief Unpack nodes' resource history as much as possible
1295 	 *
1296 	 * Unpack as many nodes' resource history as possible in one pass through the
1297 	 * status. We need to process Pacemaker Remote nodes' connections/containers
1298 	 * before unpacking their history; the connection/container history will be
1299 	 * in another node's history, so it might take multiple passes to unpack
1300 	 * everything.
1301 	 *
1302 	 * \param[in]     status     CIB XML status section
1303 	 * \param[in]     fence      If true, treat any not-yet-unpacked nodes as unseen
1304 	 * \param[in,out] scheduler  Scheduler data
1305 	 *
1306 	 * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
1307 	 *         or EAGAIN if more unpacking remains to be done)
1308 	 */
1309 	static int
1310 	unpack_node_history(const xmlNode *status, bool fence,
1311 	                    pcmk_scheduler_t *scheduler)
1312 	{
1313 	    int rc = pcmk_rc_ok;
1314 	
1315 	    // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1316 	    for (const xmlNode *state = pcmk__xe_first_child(status,
1317 	                                                     PCMK__XE_NODE_STATE, NULL,
1318 	                                                     NULL);
1319 	         state != NULL; state = pcmk__xe_next(state, PCMK__XE_NODE_STATE)) {
1320 	
1321 	        const char *id = pcmk__xe_id(state);
1322 	        const char *uname = pcmk__xe_get(state, PCMK_XA_UNAME);
1323 	        pcmk_node_t *this_node = NULL;
1324 	
1325 	        if ((id == NULL) || (uname == NULL)) {
1326 	            // Warning already logged in first pass through status section
1327 	            pcmk__trace("Not unpacking resource history from malformed "
1328 	                        PCMK__XE_NODE_STATE " without id and/or uname");
1329 	            continue;
1330 	        }
1331 	
1332 	        this_node = pe_find_node_any(scheduler->nodes, id, uname);
1333 	        if (this_node == NULL) {
1334 	            // Warning already logged in first pass through status section
1335 	            pcmk__trace("Not unpacking resource history for node %s because "
1336 	                        "no longer in configuration",
1337 	                        id);
1338 	            continue;
1339 	        }
1340 	
1341 	        if (pcmk__is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1342 	            pcmk__trace("Not unpacking resource history for node %s because "
1343 	                        "already unpacked",
1344 	                        id);
1345 	            continue;
1346 	        }
1347 	
1348 	        if (fence) {
1349 	            // We're processing all remaining nodes
1350 	
1351 	        } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1352 	            /* We can unpack a guest node's history only after we've unpacked
1353 	             * other resource history to the point that we know that the node's
1354 	             * connection and containing resource are both up.
1355 	             */
1356 	            const pcmk_resource_t *remote = this_node->priv->remote;
1357 	            const pcmk_resource_t *launcher = remote->priv->launcher;
1358 	
1359 	            if ((remote->priv->orig_role != pcmk_role_started)
1360 	                || (launcher->priv->orig_role != pcmk_role_started)) {
1361 	                pcmk__trace("Not unpacking resource history for guest node %s "
1362 	                            "because launcher and connection are not known to "
1363 	                            "be up",
1364 	                            id);
1365 	                continue;
1366 	            }
1367 	
1368 	        } else if (pcmk__is_remote_node(this_node)) {
1369 	            /* We can unpack a remote node's history only after we've unpacked
1370 	             * other resource history to the point that we know that the node's
1371 	             * connection is up, with the exception of when shutdown locks are
1372 	             * in use.
1373 	             */
1374 	            pcmk_resource_t *rsc = this_node->priv->remote;
1375 	
1376 	            if ((rsc == NULL)
1377 	                || (!pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)
1378 	                    && (rsc->priv->orig_role != pcmk_role_started))) {
1379 	                pcmk__trace("Not unpacking resource history for remote node %s "
1380 	                            "because connection is not known to be up",
1381 	                            id);
1382 	                continue;
1383 	            }
1384 	
1385 	        /* If fencing and shutdown locks are disabled and we're not processing
1386 	         * unseen nodes, then we don't want to unpack offline nodes until online
1387 	         * nodes have been unpacked. This allows us to number active clone
1388 	         * instances first.
1389 	         */
1390 	        } else if (!pcmk__any_flags_set(scheduler->flags,
1391 	                                        pcmk__sched_fencing_enabled
1392 	                                        |pcmk__sched_shutdown_lock)
1393 	                   && !this_node->details->online) {
1394 	            pcmk__trace("Not unpacking resource history for offline "
1395 	                        "cluster node %s",
1396 	                        id);
1397 	            continue;
1398 	        }
1399 	
1400 	        if (pcmk__is_pacemaker_remote_node(this_node)) {
1401 	            determine_remote_online_status(scheduler, this_node);
1402 	            unpack_handle_remote_attrs(this_node, state, scheduler);
1403 	        }
1404 	
1405 	        pcmk__trace("Unpacking resource history for %snode %s",
1406 	                    (fence? "unseen " : ""), id);
1407 	
1408 	        pcmk__set_node_flags(this_node, pcmk__node_unpacked);
1409 	        unpack_node_lrm(this_node, state, scheduler);
1410 	
1411 	        rc = EAGAIN; // Other node histories might depend on this one
1412 	    }
1413 	    return rc;
1414 	}
1415 	
1416 	/* remove nodes that are down, stopping */
1417 	/* create positive rsc_to_node constraints between resources and the nodes they are running on */
1418 	/* anything else? */
1419 	gboolean
1420 	unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
1421 	{
1422 	    xmlNode *state = NULL;
1423

(1) Event path:	Switch case default.
(2) Event path:	Condition "trace_cs == NULL", taking true branch.
(3) Event path:	Condition "crm_is_callsite_active(trace_cs, _level, 0)", taking false branch.
(4) Event path:	Breaking from switch.

1424 	    pcmk__trace("Beginning unpack");
1425

(5) Event path:

Condition "scheduler->priv->ticket_constraints == NULL", taking true branch.

1426 	    if (scheduler->priv->ticket_constraints == NULL) {
1427 	        scheduler->priv->ticket_constraints =
1428 	            pcmk__strkey_table(free, destroy_ticket);
1429 	    }
1430

(6) Event path:	Condition "state != NULL", taking true branch.
(10) Event path:	Condition "state != NULL", taking false branch.

1431 	    for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1432 	         state = pcmk__xe_next(state, NULL)) {
1433

(7) Event path:

Condition "pcmk__xe_is(state, "tickets")", taking true branch.

1434 	        if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1435 	            pcmk__xe_foreach_child(state, PCMK__XE_TICKET_STATE,
1436 	                                   unpack_ticket_state, scheduler);
1437

(8) Event path:

Falling through to end of if statement.

1438 	        } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1439 	            unpack_node_state(state, scheduler);
1440 	        }

(9) Event path:

Jumping back to the beginning of the loop.

1441 	    }
1442

(11) Event path:

Condition "unpack_node_history(status, false /* 0 */, scheduler) == 11", taking false branch.

1443 	    while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1444 	        pcmk__trace("Another pass through node resource histories is needed");
1445 	    }
1446 	
1447 	    // Now catch any nodes we didn't see
1448 	    unpack_node_history(status,
1449 	                        pcmk__is_set(scheduler->flags,
1450 	                                     pcmk__sched_fencing_enabled),
1451 	                        scheduler);
1452 	
1453 	    /* Now that we know where resources are, we can schedule stops of containers
1454 	     * with failed bundle connections
1455 	     */

(12) Event path:

Condition "scheduler->priv->stop_needed != NULL", taking true branch.

1456 	    if (scheduler->priv->stop_needed != NULL) {

(13) Event path:	Condition "item != NULL", taking true branch.
(16) Event path:	Condition "item != NULL", taking false branch.

1457 	        for (GList *item = scheduler->priv->stop_needed;
1458 	             item != NULL; item = item->next) {
1459 	
1460 	            pcmk_resource_t *container = item->data;
1461 	            pcmk_node_t *node = pcmk__current_node(container);
1462

(14) Event path:

Condition "node", taking false branch.

1463 	            if (node) {
1464 	                stop_action(container, node, FALSE);
1465 	            }

(15) Event path:

Jumping back to the beginning of the loop.

1466 	        }
1467

CID (unavailable; MK=28a526cd6ab68e387a886c1ef64f6d67) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS):

(17) Event assign_union_field:	The union field "in" of "_pp" is written.
(18) Event inconsistent_union_field_access:	In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in".

1468 	        g_clear_pointer(&scheduler->priv->stop_needed, g_list_free);
1469 	    }
1470 	
1471 	    /* Now that we know status of all Pacemaker Remote connections and nodes,
1472 	     * we can stop connections for node shutdowns, and check the online status
1473 	     * of remote/guest nodes that didn't have any node history to unpack.
1474 	     */
1475 	    for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1476 	        pcmk_node_t *this_node = gIter->data;
1477 	
1478 	        if (!pcmk__is_pacemaker_remote_node(this_node)) {
1479 	            continue;
1480 	        }
1481 	        if (this_node->details->shutdown
1482 	            && (this_node->priv->remote != NULL)) {
1483 	            pe__set_next_role(this_node->priv->remote, pcmk_role_stopped,
1484 	                              "remote shutdown");
1485 	        }
1486 	        if (!pcmk__is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1487 	            determine_remote_online_status(scheduler, this_node);
1488 	        }
1489 	    }
1490 	
1491 	    return TRUE;
1492 	}
1493 	
1494 	/*!
1495 	 * \internal
1496 	 * \brief Unpack node's time when it became a member at the cluster layer
1497 	 *
1498 	 * \param[in]     node_state  Node's \c PCMK__XE_NODE_STATE entry
1499 	 * \param[in,out] scheduler   Scheduler data
1500 	 *
1501 	 * \return Epoch time when node became a cluster member
1502 	 *         (or scheduler effective time for legacy entries) if a member,
1503 	 *         0 if not a member, or -1 if no valid information available
1504 	 */
1505 	static long long
1506 	unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
1507 	{
1508 	    const char *member_time = pcmk__xe_get(node_state, PCMK__XA_IN_CCM);
1509 	    bool is_member = false;
1510 	
1511 	    if (member_time == NULL) {
1512 	        return -1LL;
1513 	    }
1514 	
1515 	    if (pcmk__parse_bool(member_time, &is_member) != pcmk_rc_ok) {
1516 	        long long when_member = 0LL;
1517 	
1518 	        if ((pcmk__scan_ll(member_time, &when_member,
1519 	                           0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1520 	            pcmk__warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM " in "
1521 	                       PCMK__XE_NODE_STATE " entry",
1522 	                       member_time);
1523 	            return -1LL;
1524 	        }
1525 	        return when_member;
1526 	    }
1527 	
1528 	    /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1529 	     * recorded as a boolean for a DC < 2.1.7, or the node is pending shutdown
1530 	     * and has left the CPG, in which case it was set to 1 to avoid fencing for
1531 	     * PCMK_OPT_NODE_PENDING_TIMEOUT.
1532 	     *
1533 	     * We return the effective time for in_ccm=1 because what's important to
1534 	     * avoid fencing is that effective time minus this value is less than the
1535 	     * pending node timeout.
1536 	     */
1537 	    return is_member? (long long) pcmk__scheduler_epoch_time(scheduler) : 0LL;
1538 	}
1539 	
1540 	/*!
1541 	 * \internal
1542 	 * \brief Unpack node's time when it became online in process group
1543 	 *
1544 	 * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry
1545 	 *
1546 	 * \return Epoch time when node became online in process group (or 0 if not
1547 	 *         online, or 1 for legacy online entries)
1548 	 */
1549 	static long long
1550 	unpack_node_online(const xmlNode *node_state)
1551 	{
1552 	    const char *peer_time = pcmk__xe_get(node_state, PCMK_XA_CRMD);
1553 	
1554 	    // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1555 	    if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1556 	                     pcmk__str_casei|pcmk__str_null_matches)) {
1557 	        return 0LL;
1558 	
1559 	    } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1560 	        return 1LL;
1561 	
1562 	    } else {
1563 	        long long when_online = 0LL;
1564 	
1565 	        if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1566 	            || (when_online < 0)) {
1567 	            pcmk__warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1568 	                       PCMK__XE_NODE_STATE " entry, assuming offline",
1569 	                       peer_time);
1570 	            return 0LL;
1571 	        }
1572 	        return when_online;
1573 	    }
1574 	}
1575 	
1576 	/*!
1577 	 * \internal
1578 	 * \brief Unpack node attribute for user-requested fencing
1579 	 *
1580 	 * \param[in] node        Node to check
1581 	 * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry in CIB status
1582 	 *
1583 	 * \return \c true if fencing has been requested for \p node, otherwise \c false
1584 	 */
1585 	static bool
1586 	unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
1587 	{
1588 	    bool value_b = false;
1589 	    long long value_ll = 0LL;
1590 	    int rc = pcmk_rc_ok;
1591 	    const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1592 	                                          NULL, pcmk__rsc_node_current);
1593 	
1594 	    // Value may be boolean or an epoch time
1595 	    if ((value_s != NULL)
1596 	        && (pcmk__parse_bool(value_s, &value_b) == pcmk_rc_ok)) {
1597 	        return value_b;
1598 	    }
1599 	
1600 	    rc = pcmk__scan_ll(value_s, &value_ll, 0LL);
1601 	    if (rc == pcmk_rc_ok) {
1602 	        return (value_ll > 0);
1603 	    }
1604 	    pcmk__warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1605 	               "node attribute for %s: %s",
1606 	               value_s, pcmk__node_name(node), pcmk_rc_str(rc));
1607 	    return false;
1608 	}
1609 	
1610 	static gboolean
1611 	determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
1612 	                                   const xmlNode *node_state,
1613 	                                   pcmk_node_t *this_node)
1614 	{
1615 	    gboolean online = FALSE;
1616 	    const char *join = pcmk__xe_get(node_state, PCMK__XA_JOIN);
1617 	    const char *exp_state = pcmk__xe_get(node_state, PCMK_XA_EXPECTED);
1618 	    long long when_member = unpack_node_member(node_state, scheduler);
1619 	    long long when_online = unpack_node_online(node_state);
1620 	
1621 	    if (when_member <= 0) {
1622 	        pcmk__trace("Node %s is %sdown", pcmk__node_name(this_node),
1623 	                    ((when_member < 0)? "presumed " : ""));
1624 	
1625 	    } else if (when_online > 0) {
1626 	        if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1627 	            online = TRUE;
1628 	        } else {
1629 	            pcmk__debug("Node %s is not ready to run resources: %s",
1630 	                        pcmk__node_name(this_node), join);
1631 	        }
1632 	
1633 	    } else if (!pcmk__is_set(this_node->priv->flags,
1634 	                             pcmk__node_expected_up)) {
1635 	        pcmk__trace("Node %s controller is down: "
1636 	                    "member@%lld online@%lld join=%s expected=%s",
1637 	                    pcmk__node_name(this_node), when_member, when_online,
1638 	                    pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1639 	
1640 	    } else {
1641 	        /* mark it unclean */
1642 	        pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1643 	        pcmk__info("Node %s member@%lld online@%lld join=%s expected=%s",
1644 	                   pcmk__node_name(this_node), when_member, when_online,
1645 	                   pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1646 	    }
1647 	    return online;
1648 	}
1649 	
1650 	/*!
1651 	 * \internal
1652 	 * \brief Check whether a node has taken too long to join controller group
1653 	 *
1654 	 * \param[in,out] scheduler    Scheduler data
1655 	 * \param[in]     node         Node to check
1656 	 * \param[in]     when_member  Epoch time when node became a cluster member
1657 	 * \param[in]     when_online  Epoch time when node joined controller group
1658 	 *
1659 	 * \return true if node has been pending (on the way up) longer than
1660 	 *         \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false
1661 	 * \note This will also update the cluster's recheck time if appropriate.
1662 	 */
1663 	static inline bool
1664 	pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1665 	                 long long when_member, long long when_online)
1666 	{
1667 	    if ((scheduler->priv->node_pending_ms > 0U)
1668 	        && (when_member > 0) && (when_online <= 0)) {
1669 	        // There is a timeout on pending nodes, and node is pending
1670 	
1671 	        time_t timeout = when_member
1672 	                         + pcmk__timeout_ms2s(scheduler->priv->node_pending_ms);
1673 	
1674 	        if (pcmk__scheduler_epoch_time(node->priv->scheduler) >= timeout) {
1675 	            return true; // Node has timed out
1676 	        }
1677 	
1678 	        // Node is pending, but still has time
1679 	        pcmk__update_recheck_time(timeout, scheduler, "pending node timeout");
1680 	    }
1681 	    return false;
1682 	}
1683 	
1684 	static bool
1685 	determine_online_status_fencing(pcmk_scheduler_t *scheduler,
1686 	                                const xmlNode *node_state,
1687 	                                pcmk_node_t *this_node)
1688 	{
1689 	    bool termination_requested = unpack_node_terminate(this_node, node_state);
1690 	    const char *join = pcmk__xe_get(node_state, PCMK__XA_JOIN);
1691 	    const char *exp_state = pcmk__xe_get(node_state, PCMK_XA_EXPECTED);
1692 	    long long when_member = unpack_node_member(node_state, scheduler);
1693 	    long long when_online = unpack_node_online(node_state);
1694 	
1695 	/*
1696 	  - PCMK__XA_JOIN          ::= member|down|pending|banned
1697 	  - PCMK_XA_EXPECTED       ::= member|down
1698 	
1699 	  @COMPAT with entries recorded for DCs < 2.1.7
1700 	  - PCMK__XA_IN_CCM        ::= true|false
1701 	  - PCMK_XA_CRMD           ::= online|offline
1702 	
1703 	  Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1704 	  - PCMK__XA_IN_CCM        ::= <timestamp>|0
1705 	  Since when node has been a cluster member. A value 0 of means the node is not
1706 	  a cluster member.
1707 	
1708 	  - PCMK_XA_CRMD           ::= <timestamp>|0
1709 	  Since when peer has been online in CPG. A value 0 means the peer is offline
1710 	  in CPG.
1711 	*/
1712 	
1713 	    pcmk__trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1714 	                pcmk__node_name(this_node), when_member, when_online,
1715 	                pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1716 	                (termination_requested? " (termination requested)" : ""));
1717 	
1718 	    if (this_node->details->shutdown) {
1719 	        pcmk__debug("%s is shutting down", pcmk__node_name(this_node));
1720 	
1721 	        /* Slightly different criteria since we can't shut down a dead peer */
1722 	        return (when_online > 0);
1723 	    }
1724 	
1725 	    if (when_member < 0) {
1726 	        pe_fence_node(scheduler, this_node,
1727 	                      "peer has not been seen by the cluster", FALSE);
1728 	        return false;
1729 	    }
1730 	
1731 	    if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1732 	        pe_fence_node(scheduler, this_node,
1733 	                      "peer failed Pacemaker membership criteria", FALSE);
1734 	
1735 	    } else if (termination_requested) {
1736 	        if ((when_member <= 0) && (when_online <= 0)
1737 	            && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1738 	            pcmk__info("%s was fenced as requested",
1739 	                       pcmk__node_name(this_node));
1740 	            return false;
1741 	        }
1742 	        pe_fence_node(scheduler, this_node, "fencing was requested", false);
1743 	
1744 	    } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1745 	                            pcmk__str_null_matches)) {
1746 	
1747 	        if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1748 	            pe_fence_node(scheduler, this_node,
1749 	                          "peer pending timed out on joining the process group",
1750 	                          FALSE);
1751 	
1752 	        } else if ((when_member > 0) || (when_online > 0)) {
1753 	            pcmk__info("- %s is not ready to run resources",
1754 	                       pcmk__node_name(this_node));
1755 	            pcmk__set_node_flags(this_node, pcmk__node_standby);
1756 	            this_node->details->pending = TRUE;
1757 	
1758 	        } else {
1759 	            pcmk__trace("%s is down or still coming up",
1760 	                        pcmk__node_name(this_node));
1761 	        }
1762 	
1763 	    } else if (when_member <= 0) {
1764 	        // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1765 	        pe_fence_node(scheduler, this_node,
1766 	                      "peer is no longer part of the cluster", TRUE);
1767 	
1768 	    } else if (when_online <= 0) {
1769 	        pe_fence_node(scheduler, this_node,
1770 	                      "peer process is no longer available", FALSE);
1771 	
1772 	        /* Everything is running at this point, now check join state */
1773 	
1774 	    } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1775 	        pcmk__info("%s is active", pcmk__node_name(this_node));
1776 	
1777 	    } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1778 	                                CRMD_JOINSTATE_DOWN, NULL)) {
1779 	        pcmk__info("%s is not ready to run resources",
1780 	                   pcmk__node_name(this_node));
1781 	        pcmk__set_node_flags(this_node, pcmk__node_standby);
1782 	        this_node->details->pending = TRUE;
1783 	
1784 	    } else {
1785 	        pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1786 	                      FALSE);
1787 	    }
1788 	
1789 	    return (when_member > 0);
1790 	}
1791 	
1792 	static void
1793 	determine_remote_online_status(pcmk_scheduler_t *scheduler,
1794 	                               pcmk_node_t *this_node)
1795 	{
1796 	    pcmk_resource_t *rsc = this_node->priv->remote;
1797 	    pcmk_resource_t *launcher = NULL;
1798 	    pcmk_node_t *host = NULL;
1799 	    const char *node_type = "Remote";
1800 	
1801 	    if (rsc == NULL) {
1802 	        /* This is a leftover node state entry for a former Pacemaker Remote
1803 	         * node whose connection resource was removed. Consider it offline.
1804 	         */
1805 	        pcmk__trace("Pacemaker Remote node %s is considered OFFLINE because "
1806 	                    "its connection resource has been removed from the CIB",
1807 	                    this_node->priv->id);
1808 	        this_node->details->online = FALSE;
1809 	        return;
1810 	    }
1811 	
1812 	    launcher = rsc->priv->launcher;
1813 	    if (launcher != NULL) {
1814 	        node_type = "Guest";
1815 	        if (pcmk__list_of_1(rsc->priv->active_nodes)) {
1816 	            host = rsc->priv->active_nodes->data;
1817 	        }
1818 	    }
1819 	
1820 	    /* If the resource is currently started, mark it online. */
1821 	    if (rsc->priv->orig_role == pcmk_role_started) {
1822 	        this_node->details->online = TRUE;
1823 	    }
1824 	
1825 	    /* consider this node shutting down if transitioning start->stop */
1826 	    if ((rsc->priv->orig_role == pcmk_role_started)
1827 	        && (rsc->priv->next_role == pcmk_role_stopped)) {
1828 	
1829 	        pcmk__trace("%s node %s shutting down because connection resource is "
1830 	                    "stopping",
1831 	                    node_type, this_node->priv->id);
1832 	        this_node->details->shutdown = TRUE;
1833 	    }
1834 	
1835 	    /* Now check all the failure conditions. */
1836 	    if ((launcher != NULL) && pcmk__is_set(launcher->flags, pcmk__rsc_failed)) {
1837 	        pcmk__trace("Guest node %s UNCLEAN because guest resource failed",
1838 	                    this_node->priv->id);
1839 	        this_node->details->online = FALSE;
1840 	        pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
1841 	
1842 	    } else if (pcmk__is_set(rsc->flags, pcmk__rsc_failed)) {
1843 	        pcmk__trace("%s node %s OFFLINE because connection resource failed",
1844 	                    node_type, this_node->priv->id);
1845 	        this_node->details->online = FALSE;
1846 	
1847 	    } else if ((rsc->priv->orig_role == pcmk_role_stopped)
1848 	               || ((launcher != NULL)
1849 	                   && (launcher->priv->orig_role == pcmk_role_stopped))) {
1850 	
1851 	        pcmk__trace("%s node %s OFFLINE because its resource is stopped",
1852 	                    node_type, this_node->priv->id);
1853 	        this_node->details->online = FALSE;
1854 	        pcmk__clear_node_flags(this_node, pcmk__node_remote_reset);
1855 	
1856 	    } else if (host && (host->details->online == FALSE)
1857 	               && host->details->unclean) {
1858 	        pcmk__trace("Guest node %s UNCLEAN because host is unclean",
1859 	                    this_node->priv->id);
1860 	        this_node->details->online = FALSE;
1861 	        pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
1862 	
1863 	    } else {
1864 	        pcmk__trace("%s node %s is %s",
1865 	                    node_type, this_node->priv->id,
1866 	                    (this_node->details->online? "ONLINE" : "OFFLINE"));
1867 	    }
1868 	}
1869 	
1870 	static void
1871 	determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
1872 	                        pcmk_scheduler_t *scheduler)
1873 	{
1874 	    gboolean online = FALSE;
1875 	    const char *exp_state = pcmk__xe_get(node_state, PCMK_XA_EXPECTED);
1876 	
1877 	    CRM_CHECK(this_node != NULL, return);
1878 	
1879 	    this_node->details->shutdown = FALSE;
1880 	
1881 	    if (pe__shutdown_requested(this_node)) {
1882 	        this_node->details->shutdown = TRUE;
1883 	
1884 	    } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1885 	        pcmk__set_node_flags(this_node, pcmk__node_expected_up);
1886 	    }
1887 	
1888 	    if (!pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
1889 	        online = determine_online_status_no_fencing(scheduler, node_state,
1890 	                                                    this_node);
1891 	
1892 	    } else {
1893 	        online = determine_online_status_fencing(scheduler, node_state,
1894 	                                                 this_node);
1895 	    }
1896 	
1897 	    if (online) {
1898 	        this_node->details->online = TRUE;
1899 	
1900 	    } else {
1901 	        /* remove node from contention */
1902 	        this_node->assign->score = -PCMK_SCORE_INFINITY;
1903 	    }
1904 	
1905 	    if (online && this_node->details->shutdown) {
1906 	        /* don't run resources here */
1907 	        this_node->assign->score = -PCMK_SCORE_INFINITY;
1908 	    }
1909 	
1910 	    if (this_node->details->unclean) {
1911 	        pcmk__sched_warn(scheduler, "%s is unclean",
1912 	                         pcmk__node_name(this_node));
1913 	
1914 	    } else if (!this_node->details->online) {
1915 	        pcmk__trace("%s is offline", pcmk__node_name(this_node));
1916 	
1917 	    } else if (this_node->details->shutdown) {
1918 	        pcmk__info("%s is shutting down", pcmk__node_name(this_node));
1919 	
1920 	    } else if (this_node->details->pending) {
1921 	        pcmk__info("%s is pending", pcmk__node_name(this_node));
1922 	
1923 	    } else if (pcmk__is_set(this_node->priv->flags, pcmk__node_standby)) {
1924 	        pcmk__info("%s is in standby", pcmk__node_name(this_node));
1925 	
1926 	    } else if (this_node->details->maintenance) {
1927 	        pcmk__info("%s is in maintenance", pcmk__node_name(this_node));
1928 	
1929 	    } else {
1930 	        pcmk__info("%s is online", pcmk__node_name(this_node));
1931 	    }
1932 	}
1933 	
1934 	/*!
1935 	 * \internal
1936 	 * \brief Find the end of a resource's name, excluding any clone suffix
1937 	 *
1938 	 * \param[in] id  Resource ID to check
1939 	 *
1940 	 * \return Pointer to last character of resource's base name
1941 	 */
1942 	const char *
1943 	pe_base_name_end(const char *id)
1944 	{
1945 	    if (!pcmk__str_empty(id)) {
1946 	        const char *end = id + strlen(id) - 1;
1947 	
1948 	        for (const char *s = end; s > id; --s) {
1949 	            switch (*s) {
1950 	                case '0':
1951 	                case '1':
1952 	                case '2':
1953 	                case '3':
1954 	                case '4':
1955 	                case '5':
1956 	                case '6':
1957 	                case '7':
1958 	                case '8':
1959 	                case '9':
1960 	                    break;
1961 	                case ':':
1962 	                    return (s == end)? s : (s - 1);
1963 	                default:
1964 	                    return end;
1965 	            }
1966 	        }
1967 	        return end;
1968 	    }
1969 	    return NULL;
1970 	}
1971 	
1972 	/*!
1973 	 * \internal
1974 	 * \brief Get a resource name excluding any clone suffix
1975 	 *
1976 	 * \param[in] last_rsc_id  Resource ID to check
1977 	 *
1978 	 * \return Pointer to newly allocated string with resource's base name
1979 	 * \note It is the caller's responsibility to free() the result.
1980 	 *       This asserts on error, so callers can assume result is not NULL.
1981 	 */
1982 	char *
1983 	clone_strip(const char *last_rsc_id)
1984 	{
1985 	    const char *end = pe_base_name_end(last_rsc_id);
1986 	    char *basename = NULL;
1987 	
1988 	    pcmk__assert(end != NULL);
1989 	    basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1990 	    pcmk__assert(basename != NULL);
1991 	    return basename;
1992 	}
1993 	
1994 	/*!
1995 	 * \internal
1996 	 * \brief Get the name of the first instance of a cloned resource
1997 	 *
1998 	 * \param[in] last_rsc_id  Resource ID to check
1999 	 *
2000 	 * \return Pointer to newly allocated string with resource's base name plus :0
2001 	 * \note It is the caller's responsibility to free() the result.
2002 	 *       This asserts on error, so callers can assume result is not NULL.
2003 	 */
2004 	char *
2005 	clone_zero(const char *last_rsc_id)
2006 	{
2007 	    const char *end = pe_base_name_end(last_rsc_id);
2008 	    size_t base_name_len = end - last_rsc_id + 1;
2009 	    char *zero = NULL;
2010 	
2011 	    pcmk__assert(end != NULL);
2012 	    zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
2013 	    memcpy(zero, last_rsc_id, base_name_len);
2014 	    zero[base_name_len] = ':';
2015 	    zero[base_name_len + 1] = '0';
2016 	    return zero;
2017 	}
2018 	
2019 	static pcmk_resource_t *
2020 	create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
2021 	                     pcmk_scheduler_t *scheduler)
2022 	{
2023 	    pcmk_resource_t *rsc = NULL;
2024 	    xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
2025 	
2026 	    pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
2027 	    pcmk__xe_set(xml_rsc, PCMK_XA_ID, rsc_id);
2028 	    pcmk__log_xml_debug(xml_rsc, "Removed resource");
2029 	
2030 	    if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
2031 	        return NULL;
2032 	    }
2033 	
2034 	    if (xml_contains_remote_node(xml_rsc)) {
2035 	        pcmk_node_t *node;
2036 	
2037 	        pcmk__debug("Detected removed remote node %s", rsc_id);
2038 	        node = pcmk_find_node(scheduler, rsc_id);
2039 	        if (node == NULL) {
2040 	            node = pe__create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, 0,
2041 	                                   scheduler);
2042 	        }
2043 	        link_rsc2remotenode(scheduler, rsc);
2044 	
2045 	        if (node) {
2046 	            pcmk__trace("Setting node %s as shutting down due to removed "
2047 	                        "connection resource", rsc_id);
2048 	            node->details->shutdown = TRUE;
2049 	        }
2050 	    }
2051 	
2052 	    if (pcmk__xe_get(rsc_entry, PCMK__META_CONTAINER)) {
2053 	        // This removed resource needs to be mapped to a launcher
2054 	        pcmk__trace("Launched resource %s was removed from the configuration",
2055 	                    rsc_id);
2056 	        pcmk__set_rsc_flags(rsc, pcmk__rsc_removed_launched);
2057 	    }
2058 	    pcmk__set_rsc_flags(rsc, pcmk__rsc_removed);
2059 	    scheduler->priv->resources = g_list_append(scheduler->priv->resources, rsc);
2060 	    return rsc;
2061 	}
2062 	
2063 	/*!
2064 	 * \internal
2065 	 * \brief Create "removed" instance for anonymous clone resource history
2066 	 *
2067 	 * \param[in,out] parent     Clone resource that instance will be added to
2068 	 * \param[in]     rsc_id     Instance's resource ID
2069 	 * \param[in]     node       Where instance is active (for logging only)
2070 	 * \param[in,out] scheduler  Scheduler data
2071 	 *
2072 	 * \return Newly created "removed" instance of \p parent
2073 	 */
2074 	static pcmk_resource_t *
2075 	create_anonymous_removed_instance(pcmk_resource_t *parent, const char *rsc_id,
2076 	                                  const pcmk_node_t *node,
2077 	                                  pcmk_scheduler_t *scheduler)
2078 	{
2079 	    pcmk_resource_t *top = pe__create_clone_child(parent, scheduler);
2080 	    pcmk_resource_t *instance = NULL;
2081 	
2082 	    // find_rsc() because we might be a cloned group
2083 	    instance = top->priv->fns->find_rsc(top, rsc_id, NULL,
2084 	                                        pcmk_rsc_match_clone_only);
2085 	
2086 	    pcmk__rsc_debug(parent, "Created \"removed\" instance %s for %s: %s on %s",
2087 	                    top->id, parent->id, rsc_id, pcmk__node_name(node));
2088 	    return instance;
2089 	}
2090 	
2091 	/*!
2092 	 * \internal
2093 	 * \brief Check a node for an instance of an anonymous clone
2094 	 *
2095 	 * Return a child instance of the specified anonymous clone, in order of
2096 	 * preference: (1) the instance running on the specified node, if any;
2097 	 * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX
2098 	 * instances); (3) a newly created "removed" instance (that is,
2099 	 * \c PCMK_META_CLONE_MAX instances are already active).
2100 	 *
2101 	 * \param[in,out] scheduler  Scheduler data
2102 	 * \param[in]     node       Node on which to check for instance
2103 	 * \param[in,out] parent     Clone to check
2104 	 * \param[in]     rsc_id     Name of cloned resource in history (no instance)
2105 	 */
2106 	static pcmk_resource_t *
2107 	find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2108 	                     pcmk_resource_t *parent, const char *rsc_id)
2109 	{
2110 	    GList *rIter = NULL;
2111 	    pcmk_resource_t *rsc = NULL;
2112 	    pcmk_resource_t *inactive_instance = NULL;
2113 	    gboolean skip_inactive = FALSE;
2114 	
2115 	    pcmk__assert(pcmk__is_anonymous_clone(parent));
2116 	
2117 	    // Check for active (or partially active, for cloned groups) instance
2118 	    pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2119 	                    rsc_id, pcmk__node_name(node), parent->id);
2120 	
2121 	    for (rIter = parent->priv->children;
2122 	         (rIter != NULL) && (rsc == NULL); rIter = rIter->next) {
2123 	
2124 	        GList *locations = NULL;
2125 	        pcmk_resource_t *child = rIter->data;
2126 	
2127 	        /* Check whether this instance is already known to be active or pending
2128 	         * anywhere, at this stage of unpacking. Because this function is called
2129 	         * for a resource before the resource's individual operation history
2130 	         * entries are unpacked, locations will generally not contain the
2131 	         * desired node.
2132 	         *
2133 	         * However, there are three exceptions:
2134 	         * (1) when child is a cloned group and we have already unpacked the
2135 	         *     history of another member of the group on the same node;
2136 	         * (2) when we've already unpacked the history of another numbered
2137 	         *     instance on the same node (which can happen if
2138 	         *     PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2139 	         * (3) when we re-run calculations on the same scheduler data as part of
2140 	         *     a simulation.
2141 	         */
2142 	        child->priv->fns->location(child, &locations, pcmk__rsc_node_current
2143 	                                                      |pcmk__rsc_node_pending);
2144 	        if (locations) {
2145 	            /* We should never associate the same numbered anonymous clone
2146 	             * instance with multiple nodes, and clone instances can't migrate,
2147 	             * so there must be only one location, regardless of history.
2148 	             */
2149 	            CRM_LOG_ASSERT(locations->next == NULL);
2150 	
2151 	            if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2152 	                /* This child instance is active on the requested node, so check
2153 	                 * for a corresponding configured resource. We use find_rsc()
2154 	                 * instead of child because child may be a cloned group, and we
2155 	                 * need the particular member corresponding to rsc_id.
2156 	                 *
2157 	                 * If the history entry represents a removed instance, rsc will
2158 	                 * be NULL.
2159 	                 */
2160 	                rsc = parent->priv->fns->find_rsc(child, rsc_id, NULL,
2161 	                                                  pcmk_rsc_match_clone_only);
2162 	                if (rsc) {
2163 	                    /* If there are multiple instance history entries for an
2164 	                     * anonymous clone in a single node's history (which can
2165 	                     * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2166 	                     * to false), we want to consider the instances beyond the
2167 	                     * first as removed, even if there are inactive instance
2168 	                     * numbers available.
2169 	                     */
2170 	                    if (rsc->priv->active_nodes != NULL) {
2171 	                        pcmk__notice("Active (now-)anonymous clone %s has "
2172 	                                     "multiple \"removed\" instance histories "
2173 	                                     "on %s",
2174 	                                     parent->id, pcmk__node_name(node));
2175 	                        skip_inactive = TRUE;
2176 	                        rsc = NULL;
2177 	                    } else {
2178 	                        pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2179 	                    }
2180 	                }
2181 	            }
2182 	            g_list_free(locations);
2183 	
2184 	        } else {
2185 	            pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2186 	            if (!skip_inactive && !inactive_instance
2187 	                && !pcmk__is_set(child->flags, pcmk__rsc_blocked)) {
2188 	                // Remember one inactive instance in case we don't find active
2189 	                inactive_instance =
2190 	                    parent->priv->fns->find_rsc(child, rsc_id, NULL,
2191 	                                                pcmk_rsc_match_clone_only);
2192 	
2193 	                /* ... but don't use it if it was already associated with a
2194 	                 * pending action on another node
2195 	                 */
2196 	                if (inactive_instance != NULL) {
2197 	                    const pcmk_node_t *pending_node = NULL;
2198 	
2199 	                    pending_node = inactive_instance->priv->pending_node;
2200 	                    if ((pending_node != NULL)
2201 	                        && !pcmk__same_node(pending_node, node)) {
2202 	                        inactive_instance = NULL;
2203 	                    }
2204 	                }
2205 	            }
2206 	        }
2207 	    }
2208 	
2209 	    if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2210 	        pcmk__rsc_trace(parent, "Resource %s, empty slot",
2211 	                        inactive_instance->id);
2212 	        rsc = inactive_instance;
2213 	    }
2214 	
2215 	    /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2216 	     * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2217 	     * don't want to consume a valid instance number for unclean nodes. Such
2218 	     * instances may appear to be active according to the history, but should be
2219 	     * considered inactive, so we can start an instance elsewhere. Treat such
2220 	     * instances as removed.
2221 	     *
2222 	     * An exception is instances running on guest nodes -- since guest node
2223 	     * "fencing" is actually just a resource stop, requires shouldn't apply.
2224 	     *
2225 	     * @TODO Ideally, we'd use an inactive instance number if it is not needed
2226 	     * for any clean instances. However, we don't know that at this point.
2227 	     */
2228 	    if ((rsc != NULL) && !pcmk__is_set(rsc->flags, pcmk__rsc_needs_fencing)
2229 	        && (!node->details->online || node->details->unclean)
2230 	        && !pcmk__is_guest_or_bundle_node(node)
2231 	        && !pe__is_universal_clone(parent, scheduler)) {
2232 	
2233 	        rsc = NULL;
2234 	    }
2235 	
2236 	    if (rsc == NULL) {
2237 	        rsc = create_anonymous_removed_instance(parent, rsc_id, node,
2238 	                                                scheduler);
2239 	        pcmk__rsc_trace(parent, "Resource %s, removed", rsc->id);
2240 	    }
2241 	    return rsc;
2242 	}
2243 	
2244 	static pcmk_resource_t *
2245 	unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2246 	                     const char *rsc_id)
2247 	{
2248 	    pcmk_resource_t *rsc = NULL;
2249 	    pcmk_resource_t *parent = NULL;
2250 	
2251 	    pcmk__trace("looking for %s", rsc_id);
2252 	    rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2253 	
2254 	    if (rsc == NULL) {
2255 	        /* If we didn't find the resource by its name in the operation history,
2256 	         * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2257 	         * we create a single :0 "removed" instance to match against here.
2258 	         */
2259 	        char *clone0_id = clone_zero(rsc_id);
2260 	        pcmk_resource_t *clone0 = pe_find_resource(scheduler->priv->resources,
2261 	                                                   clone0_id);
2262 	
2263 	        if ((clone0 != NULL)
2264 	            && !pcmk__is_set(clone0->flags, pcmk__rsc_unique)) {
2265 	
2266 	            rsc = clone0;
2267 	            parent = uber_parent(clone0);
2268 	            pcmk__trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2269 	        } else {
2270 	            pcmk__trace("%s is not known as %s either (removed)", rsc_id,
2271 	                        clone0_id);
2272 	        }
2273 	        free(clone0_id);
2274 	
2275 	    } else if (rsc->priv->variant > pcmk__rsc_variant_primitive) {
2276 	        pcmk__trace("Resource history for %s is considered removed "
2277 	                    "because it is no longer primitive", rsc_id);
2278 	        return NULL;
2279 	
2280 	    } else {
2281 	        parent = uber_parent(rsc);
2282 	    }
2283 	
2284 	    if (pcmk__is_anonymous_clone(parent)) {
2285 	
2286 	        if (pcmk__is_bundled(parent)) {
2287 	            rsc = pe__find_bundle_replica(parent->priv->parent, node);
2288 	        } else {
2289 	            char *base = clone_strip(rsc_id);
2290 	
2291 	            rsc = find_anonymous_clone(scheduler, node, parent, base);
2292 	            free(base);
2293 	            pcmk__assert(rsc != NULL);
2294 	        }
2295 	    }
2296 	
2297 	    if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2298 	        && !pcmk__str_eq(rsc_id, rsc->priv->history_id, pcmk__str_none)) {
2299 	
2300 	        const bool removed = pcmk__is_set(rsc->flags, pcmk__rsc_removed);
2301 	
2302 	        pcmk__str_update(&(rsc->priv->history_id), rsc_id);
2303 	        pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2304 	                        rsc_id, pcmk__node_name(node), rsc->id,
2305 	                        (removed? " (removed)" : ""));
2306 	    }
2307 	    return rsc;
2308 	}
2309 	
2310 	static pcmk_resource_t *
2311 	process_removed_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
2312 	                        pcmk_scheduler_t *scheduler)
2313 	{
2314 	    pcmk_resource_t *rsc = NULL;
2315 	    const char *rsc_id = pcmk__xe_get(rsc_entry, PCMK_XA_ID);
2316 	
2317 	    pcmk__debug("Detected removed resource %s on %s", rsc_id,
2318 	                pcmk__node_name(node));
2319 	    rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2320 	    if (rsc == NULL) {
2321 	        return NULL;
2322 	    }
2323 	
2324 	    if (!pcmk__is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
2325 	        pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
2326 	
2327 	    } else {
2328 	        CRM_CHECK(rsc != NULL, return NULL);
2329 	        pcmk__rsc_trace(rsc, "Added \"removed\" resource %s", rsc->id);
2330 	        resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
2331 	                          "__removed_do_not_run__", scheduler);
2332 	    }
2333 	    return rsc;
2334 	}
2335 	
2336 	static void
2337 	process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
2338 	                  enum pcmk__on_fail on_fail)
2339 	{
2340 	    pcmk_node_t *tmpnode = NULL;
2341 	    char *reason = NULL;
2342 	    enum pcmk__on_fail save_on_fail = pcmk__on_fail_ignore;
2343 	    pcmk_scheduler_t *scheduler = NULL;
2344 	    bool known_active = false;
2345 	
2346 	    pcmk__assert(rsc != NULL);
2347 	    scheduler = rsc->priv->scheduler;
2348 	    known_active = (rsc->priv->orig_role > pcmk_role_stopped);
2349 	    pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2350 	                    rsc->id, pcmk_role_text(rsc->priv->orig_role),
2351 	                    pcmk__node_name(node), pcmk__on_fail_text(on_fail));
2352 	
2353 	    /* process current state */
2354 	    if (rsc->priv->orig_role != pcmk_role_unknown) {
2355 	        pcmk_resource_t *iter = rsc;
2356 	
2357 	        while (iter) {
2358 	            if (g_hash_table_lookup(iter->priv->probed_nodes,
2359 	                                    node->priv->id) == NULL) {
2360 	                pcmk_node_t *n = pe__copy_node(node);
2361 	
2362 	                pcmk__rsc_trace(rsc, "%s (%s in history) known on %s",
2363 	                                rsc->id,
2364 	                                pcmk__s(rsc->priv->history_id, "the same"),
2365 	                                pcmk__node_name(n));
2366 	                g_hash_table_insert(iter->priv->probed_nodes,
2367 	                                    (gpointer) n->priv->id, n);
2368 	            }
2369 	            if (pcmk__is_set(iter->flags, pcmk__rsc_unique)) {
2370 	                break;
2371 	            }
2372 	            iter = iter->priv->parent;
2373 	        }
2374 	    }
2375 	
2376 	    /* If a managed resource is believed to be running, but node is down ... */
2377 	    if (known_active && !node->details->online && !node->details->maintenance
2378 	        && pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
2379 	
2380 	        gboolean should_fence = FALSE;
2381 	
2382 	        /* If this is a guest node, fence it (regardless of whether fencing is
2383 	         * enabled, because guest node fencing is done by recovery of the
2384 	         * container resource rather than by the fencer). Mark the resource
2385 	         * we're processing as failed. When the guest comes back up, its
2386 	         * operation history in the CIB will be cleared, freeing the affected
2387 	         * resource to run again once we are sure we know its state.
2388 	         */
2389 	        if (pcmk__is_guest_or_bundle_node(node)) {
2390 	            pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2391 	            should_fence = TRUE;
2392 	
2393 	        } else if (pcmk__is_set(scheduler->flags,
2394 	                                pcmk__sched_fencing_enabled)) {
2395 	            if (pcmk__is_remote_node(node)
2396 	                && (node->priv->remote != NULL)
2397 	                && !pcmk__is_set(node->priv->remote->flags,
2398 	                                 pcmk__rsc_failed)) {
2399 	
2400 	                /* Setting unseen means that fencing of the remote node will
2401 	                 * occur only if the connection resource is not going to start
2402 	                 * somewhere. This allows connection resources on a failed
2403 	                 * cluster node to move to another node without requiring the
2404 	                 * remote nodes to be fenced as well.
2405 	                 */
2406 	                pcmk__clear_node_flags(node, pcmk__node_seen);
2407 	                reason = pcmk__assert_asprintf("%s is active there (fencing "
2408 	                                               "will be revoked if remote "
2409 	                                               "connection can be "
2410 	                                               "re-established elsewhere)",
2411 	                                               rsc->id);
2412 	            }
2413 	            should_fence = TRUE;
2414 	        }
2415 	
2416 	        if (should_fence) {
2417 	            if (reason == NULL) {
2418 	               reason = pcmk__assert_asprintf("%s is thought to be active "
2419 	                                              "there",
2420 	                                              rsc->id);
2421 	            }
2422 	            pe_fence_node(scheduler, node, reason, FALSE);
2423 	        }
2424 	        free(reason);
2425 	    }
2426 	
2427 	    /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2428 	    save_on_fail = on_fail;
2429 	
2430 	    if (node->details->unclean) {
2431 	        /* No extra processing needed
2432 	         * Also allows resources to be started again after a node is shot
2433 	         */
2434 	        on_fail = pcmk__on_fail_ignore;
2435 	    }
2436 	
2437 	    switch (on_fail) {
2438 	        case pcmk__on_fail_ignore:
2439 	            /* nothing to do */
2440 	            break;
2441 	
2442 	        case pcmk__on_fail_demote:
2443 	            pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
2444 	            demote_action(rsc, node, FALSE);
2445 	            break;
2446 	
2447 	        case pcmk__on_fail_fence_node:
2448 	            /* treat it as if it is still running
2449 	             * but also mark the node as unclean
2450 	             */
2451 	            reason = pcmk__assert_asprintf("%s failed there", rsc->id);
2452 	            pe_fence_node(scheduler, node, reason, FALSE);
2453 	            free(reason);
2454 	            break;
2455 	
2456 	        case pcmk__on_fail_standby_node:
2457 	            pcmk__set_node_flags(node,
2458 	                                 pcmk__node_standby|pcmk__node_fail_standby);
2459 	            break;
2460 	
2461 	        case pcmk__on_fail_block:
2462 	            /* is_managed == FALSE will prevent any
2463 	             * actions being sent for the resource
2464 	             */
2465 	            pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
2466 	            pcmk__set_rsc_flags(rsc, pcmk__rsc_blocked);
2467 	            break;
2468 	
2469 	        case pcmk__on_fail_ban:
2470 	            /* make sure it comes up somewhere else
2471 	             * or not at all
2472 	             */
2473 	            resource_location(rsc, node, -PCMK_SCORE_INFINITY,
2474 	                              "__action_migration_auto__", scheduler);
2475 	            break;
2476 	
2477 	        case pcmk__on_fail_stop:
2478 	            pe__set_next_role(rsc, pcmk_role_stopped,
2479 	                              PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP);
2480 	            break;
2481 	
2482 	        case pcmk__on_fail_restart:
2483 	            if (known_active) {
2484 	                pcmk__set_rsc_flags(rsc,
2485 	                                    pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2486 	                stop_action(rsc, node, FALSE);
2487 	            }
2488 	            break;
2489 	
2490 	        case pcmk__on_fail_restart_container:
2491 	            pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2492 	            if ((rsc->priv->launcher != NULL) && pcmk__is_bundled(rsc)) {
2493 	                /* A bundle's remote connection can run on a different node than
2494 	                 * the bundle's container. We don't necessarily know where the
2495 	                 * container is running yet, so remember it and add a stop
2496 	                 * action for it later.
2497 	                 */
2498 	                scheduler->priv->stop_needed =
2499 	                    g_list_prepend(scheduler->priv->stop_needed,
2500 	                                   rsc->priv->launcher);
2501 	            } else if (rsc->priv->launcher != NULL) {
2502 	                stop_action(rsc->priv->launcher, node, FALSE);
2503 	            } else if (known_active) {
2504 	                stop_action(rsc, node, FALSE);
2505 	            }
2506 	            break;
2507 	
2508 	        case pcmk__on_fail_reset_remote:
2509 	            pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2510 	            if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
2511 	                tmpnode = NULL;
2512 	                if (pcmk__is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
2513 	                    tmpnode = pcmk_find_node(scheduler, rsc->id);
2514 	                }
2515 	                if (pcmk__is_remote_node(tmpnode)
2516 	                    && !pcmk__is_set(tmpnode->priv->flags,
2517 	                                     pcmk__node_remote_fenced)) {
2518 	                    /* The remote connection resource failed in a way that
2519 	                     * should result in fencing the remote node.
2520 	                     */
2521 	                    pe_fence_node(scheduler, tmpnode,
2522 	                                  "remote connection is unrecoverable", FALSE);
2523 	                }
2524 	            }
2525 	
2526 	            /* require the stop action regardless if fencing is occurring or not. */
2527 	            if (known_active) {
2528 	                stop_action(rsc, node, FALSE);
2529 	            }
2530 	
2531 	            /* if reconnect delay is in use, prevent the connection from exiting the
2532 	             * "STOPPED" role until the failure is cleared by the delay timeout. */
2533 	            if (rsc->priv->remote_reconnect_ms > 0U) {
2534 	                pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2535 	            }
2536 	            break;
2537 	    }
2538 	
2539 	    /* Ensure a remote connection failure forces an unclean Pacemaker Remote
2540 	     * node to be fenced. By marking the node as seen, the failure will result
2541 	     * in a fencing operation regardless if we're going to attempt to reconnect
2542 	     * in this transition.
2543 	     */
2544 	    if (pcmk__all_flags_set(rsc->flags,
2545 	                            pcmk__rsc_failed|pcmk__rsc_is_remote_connection)) {
2546 	        tmpnode = pcmk_find_node(scheduler, rsc->id);
2547 	        if (tmpnode && tmpnode->details->unclean) {
2548 	            pcmk__set_node_flags(tmpnode, pcmk__node_seen);
2549 	        }
2550 	    }
2551 	
2552 	    if (known_active) {
2553 	        if (pcmk__is_set(rsc->flags, pcmk__rsc_removed)) {
2554 	            if (pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
2555 	                pcmk__notice("Removed resource %s is active on %s and will be "
2556 	                             "stopped when possible",
2557 	                             rsc->id, pcmk__node_name(node));
2558 	
2559 	            } else {
2560 	                pcmk__notice("Removed resource %s must be stopped manually on "
2561 	                             "%s because " PCMK__OPT_STOP_REMOVED_RESOURCES
2562 	                             " is set to false",
2563 	                             rsc->id, pcmk__node_name(node));
2564 	            }
2565 	        }
2566 	
2567 	        native_add_running(rsc, node, scheduler,
2568 	                           (save_on_fail != pcmk__on_fail_ignore));
2569 	        switch (on_fail) {
2570 	            case pcmk__on_fail_ignore:
2571 	                break;
2572 	            case pcmk__on_fail_demote:
2573 	            case pcmk__on_fail_block:
2574 	                pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
2575 	                break;
2576 	            default:
2577 	                pcmk__set_rsc_flags(rsc,
2578 	                                    pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2579 	                break;
2580 	        }
2581 	
2582 	    } else if ((rsc->priv->history_id != NULL)
2583 	               && (strchr(rsc->priv->history_id, ':') != NULL)) {
2584 	        /* @COMPAT This is for older (<1.1.8) status sections that included
2585 	         * instance numbers, otherwise stopped instances are considered removed.
2586 	         *
2587 	         * @TODO We should be able to drop this, but some old regression tests
2588 	         * will need to be updated. Double-check that this is not still needed
2589 	         * for unique clones (which may have been later converted to anonymous).
2590 	         */
2591 	        pcmk__rsc_trace(rsc, "Clearing history ID %s for %s (stopped)",
2592 	                        rsc->priv->history_id, rsc->id);
2593 	        g_clear_pointer(&rsc->priv->history_id, free);
2594 	
2595 	    } else {
2596 	        GList *possible_matches = pe__resource_actions(rsc, node,
2597 	                                                       PCMK_ACTION_STOP, FALSE);
2598 	        GList *gIter = possible_matches;
2599 	
2600 	        for (; gIter != NULL; gIter = gIter->next) {
2601 	            pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2602 	
2603 	            pcmk__set_action_flags(stop, pcmk__action_optional);
2604 	        }
2605 	
2606 	        g_list_free(possible_matches);
2607 	    }
2608 	
2609 	    /* A successful stop after migrate_to on the migration source doesn't make
2610 	     * the partially migrated resource stopped on the migration target.
2611 	     */
2612 	    if ((rsc->priv->orig_role == pcmk_role_stopped)
2613 	        && (rsc->priv->active_nodes != NULL)
2614 	        && (rsc->priv->partial_migration_target != NULL)
2615 	        && pcmk__same_node(rsc->priv->partial_migration_source, node)) {
2616 	
2617 	        rsc->priv->orig_role = pcmk_role_started;
2618 	    }
2619 	}
2620 	
2621 	/* create active recurring operations as optional */
2622 	static void
2623 	process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
2624 	                  int start_index, int stop_index,
2625 	                  GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2626 	{
2627 	    int counter = -1;
2628 	    const char *task = NULL;
2629 	    const char *status = NULL;
2630 	    GList *gIter = sorted_op_list;
2631 	
2632 	    pcmk__assert(rsc != NULL);
2633 	    pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2634 	                    rsc->id, start_index, stop_index);
2635 	
2636 	    for (; gIter != NULL; gIter = gIter->next) {
2637 	        xmlNode *rsc_op = (xmlNode *) gIter->data;
2638 	
2639 	        guint interval_ms = 0;
2640 	        char *key = NULL;
2641 	        const char *id = pcmk__xe_id(rsc_op);
2642 	
2643 	        counter++;
2644 	
2645 	        if (node->details->online == FALSE) {
2646 	            pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2647 	                            rsc->id, pcmk__node_name(node));
2648 	            break;
2649 	
2650 	            /* Need to check if there's a monitor for role="Stopped" */
2651 	        } else if (start_index < stop_index && counter <= stop_index) {
2652 	            pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2653 	                            id, pcmk__node_name(node));
2654 	            continue;
2655 	
2656 	        } else if (counter < start_index) {
2657 	            pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2658 	                            id, pcmk__node_name(node), counter);
2659 	            continue;
2660 	        }
2661 	
2662 	        pcmk__xe_get_guint(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2663 	        if (interval_ms == 0) {
2664 	            pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2665 	                            id, pcmk__node_name(node));
2666 	            continue;
2667 	        }
2668 	
2669 	        status = pcmk__xe_get(rsc_op, PCMK__XA_OP_STATUS);
2670 	        if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2671 	            pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2672 	                            id, pcmk__node_name(node));
2673 	            continue;
2674 	        }
2675 	        task = pcmk__xe_get(rsc_op, PCMK_XA_OPERATION);
2676 	        /* create the action */
2677 	        key = pcmk__op_key(rsc->id, task, interval_ms);
2678 	        pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2679 	        custom_action(rsc, key, task, node, TRUE, scheduler);
2680 	    }
2681 	}
2682 	
2683 	void
2684 	calculate_active_ops(const GList *sorted_op_list, int *start_index,
2685 	                     int *stop_index)
2686 	{
2687 	    int counter = -1;
2688 	    int implied_monitor_start = -1;
2689 	    int implied_clone_start = -1;
2690 	    const char *task = NULL;
2691 	    const char *status = NULL;
2692 	
2693 	    *stop_index = -1;
2694 	    *start_index = -1;
2695 	
2696 	    for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2697 	        const xmlNode *rsc_op = (const xmlNode *) iter->data;
2698 	
2699 	        counter++;
2700 	
2701 	        task = pcmk__xe_get(rsc_op, PCMK_XA_OPERATION);
2702 	        status = pcmk__xe_get(rsc_op, PCMK__XA_OP_STATUS);
2703 	
2704 	        if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2705 	            && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2706 	            *stop_index = counter;
2707 	
2708 	        } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2709 	                                        PCMK_ACTION_MIGRATE_FROM, NULL)) {
2710 	            *start_index = counter;
2711 	
2712 	        } else if ((implied_monitor_start <= *stop_index)
2713 	                   && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2714 	                                   pcmk__str_casei)) {
2715 	            const char *rc = pcmk__xe_get(rsc_op, PCMK__XA_RC_CODE);
2716 	
2717 	            if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2718 	                implied_monitor_start = counter;
2719 	            }
2720 	        } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2721 	                                        PCMK_ACTION_DEMOTE, NULL)) {
2722 	            implied_clone_start = counter;
2723 	        }
2724 	    }
2725 	
2726 	    if (*start_index == -1) {
2727 	        if (implied_clone_start != -1) {
2728 	            *start_index = implied_clone_start;
2729 	        } else if (implied_monitor_start != -1) {
2730 	            *start_index = implied_monitor_start;
2731 	        }
2732 	    }
2733 	}
2734 	
2735 	// If resource history entry has shutdown lock, remember lock node and time
2736 	static void
2737 	unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
2738 	                     const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2739 	{
2740 	    time_t lock_time = 0;   // When lock started (i.e. node shutdown time)
2741 	    time_t sched_time = 0;
2742 	    guint shutdown_lock_ms = scheduler->priv->shutdown_lock_ms;
2743 	
2744 	    pcmk__xe_get_time(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK, &lock_time);
2745 	    if (lock_time == 0) {
2746 	        return;
2747 	    }
2748 	
2749 	    sched_time = pcmk__scheduler_epoch_time(scheduler);
2750 	    if ((shutdown_lock_ms > 0U)
2751 	        && (sched_time > (lock_time + pcmk__timeout_ms2s(shutdown_lock_ms)))) {
2752 	
2753 	        pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2754 	                       rsc->id, pcmk__node_name(node));
2755 	        pe__clear_resource_history(rsc, node);
2756 	
2757 	    } else {
2758 	        rsc->priv->lock_node = node;
2759 	        rsc->priv->lock_time = lock_time;
2760 	    }
2761 	}
2762 	
2763 	/*!
2764 	 * \internal
2765 	 * \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status
2766 	 *
2767 	 * \param[in,out] node       Node whose status is being unpacked
2768 	 * \param[in]     rsc_entry  \c PCMK__XE_LRM_RESOURCE XML being unpacked
2769 	 * \param[in,out] scheduler  Scheduler data
2770 	 *
2771 	 * \return Resource corresponding to the entry, or NULL if no operation history
2772 	 */
2773 	static pcmk_resource_t *
2774 	unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
2775 	                    pcmk_scheduler_t *scheduler)
2776 	{
2777 	    GList *gIter = NULL;
2778 	    int stop_index = -1;
2779 	    int start_index = -1;
2780 	    enum rsc_role_e req_role = pcmk_role_unknown;
2781 	
2782 	    const char *rsc_id = pcmk__xe_id(lrm_resource);
2783 	
2784 	    pcmk_resource_t *rsc = NULL;
2785 	    GList *op_list = NULL;
2786 	    GList *sorted_op_list = NULL;
2787 	
2788 	    xmlNode *rsc_op = NULL;
2789 	    xmlNode *last_failure = NULL;
2790 	
2791 	    enum pcmk__on_fail on_fail = pcmk__on_fail_ignore;
2792 	    enum rsc_role_e saved_role = pcmk_role_unknown;
2793 	
2794 	    if (rsc_id == NULL) {
2795 	        pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2796 	                         " entry: No " PCMK_XA_ID);
2797 	        pcmk__log_xml_info(lrm_resource, "missing-id");
2798 	        return NULL;
2799 	    }
2800 	    pcmk__trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s", rsc_id,
2801 	                pcmk__node_name(node));
2802 	
2803 	    /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2804 	     * them
2805 	     */
2806 	    for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2807 	                                       NULL);
2808 	         rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
2809 	
2810 	        op_list = g_list_prepend(op_list, rsc_op);
2811 	    }
2812 	
2813 	    if (!pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
2814 	        if (op_list == NULL) {
2815 	            // If there are no operations, there is nothing to do
2816 	            return NULL;
2817 	        }
2818 	    }
2819 	
2820 	    /* find the resource */
2821 	    rsc = unpack_find_resource(scheduler, node, rsc_id);
2822 	    if (rsc == NULL) {
2823 	        if (op_list == NULL) {
2824 	            // If there are no operations, there is nothing to do
2825 	            return NULL;
2826 	        } else {
2827 	            rsc = process_removed_resource(lrm_resource, node, scheduler);
2828 	        }
2829 	    }
2830 	    pcmk__assert(rsc != NULL);
2831 	
2832 	    // Check whether the resource is "shutdown-locked" to this node
2833 	    if (pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
2834 	        unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2835 	    }
2836 	
2837 	    /* process operations */
2838 	    saved_role = rsc->priv->orig_role;
2839 	    rsc->priv->orig_role = pcmk_role_unknown;
2840 	    sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2841 	
2842 	    for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2843 	        xmlNode *rsc_op = (xmlNode *) gIter->data;
2844 	
2845 	        unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2846 	    }
2847 	
2848 	    /* create active recurring operations as optional */
2849 	    calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2850 	    process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2851 	                      scheduler);
2852 	
2853 	    /* no need to free the contents */
2854 	    g_list_free(sorted_op_list);
2855 	
2856 	    process_rsc_state(rsc, node, on_fail);
2857 	
2858 	    if (get_target_role(rsc, &req_role)) {
2859 	        if ((rsc->priv->next_role == pcmk_role_unknown)
2860 	            || (req_role < rsc->priv->next_role)) {
2861 	
2862 	            pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
2863 	
2864 	        } else if (req_role > rsc->priv->next_role) {
2865 	            pcmk__rsc_info(rsc,
2866 	                           "%s: Not overwriting calculated next role %s"
2867 	                           " with requested next role %s",
2868 	                           rsc->id, pcmk_role_text(rsc->priv->next_role),
2869 	                           pcmk_role_text(req_role));
2870 	        }
2871 	    }
2872 	
2873 	    if (saved_role > rsc->priv->orig_role) {
2874 	        rsc->priv->orig_role = saved_role;
2875 	    }
2876 	
2877 	    return rsc;
2878 	}
2879 	
2880 	static void
2881 	handle_removed_launched_resources(const xmlNode *lrm_rsc_list,
2882 	                                  pcmk_scheduler_t *scheduler)
2883 	{
2884 	    for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list,
2885 	                                                         PCMK__XE_LRM_RESOURCE,
2886 	                                                         NULL, NULL);
2887 	         rsc_entry != NULL;
2888 	         rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2889 	
2890 	        pcmk_resource_t *rsc;
2891 	        pcmk_resource_t *launcher = NULL;
2892 	        const char *rsc_id;
2893 	        const char *launcher_id = NULL;
2894 	
2895 	        launcher_id = pcmk__xe_get(rsc_entry, PCMK__META_CONTAINER);
2896 	        rsc_id = pcmk__xe_get(rsc_entry, PCMK_XA_ID);
2897 	        if ((launcher_id == NULL) || (rsc_id == NULL)) {
2898 	            continue;
2899 	        }
2900 	
2901 	        launcher = pe_find_resource(scheduler->priv->resources, launcher_id);
2902 	        if (launcher == NULL) {
2903 	            continue;
2904 	        }
2905 	
2906 	        rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2907 	        if ((rsc == NULL) || (rsc->priv->launcher != NULL)
2908 	            || !pcmk__is_set(rsc->flags, pcmk__rsc_removed_launched)) {
2909 	            continue;
2910 	        }
2911 	
2912 	        pcmk__rsc_trace(rsc, "Mapped launcher of removed resource %s to %s",
2913 	                        rsc->id, launcher_id);
2914 	        rsc->priv->launcher = launcher;
2915 	        launcher->priv->launched = g_list_append(launcher->priv->launched,
2916 	                                                    rsc);
2917 	    }
2918 	}
2919 	
2920 	/*!
2921 	 * \internal
2922 	 * \brief Unpack one node's lrm status section
2923 	 *
2924 	 * \param[in,out] node       Node whose status is being unpacked
2925 	 * \param[in]     xml        CIB node state XML
2926 	 * \param[in,out] scheduler  Scheduler data
2927 	 */
2928 	static void
2929 	unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
2930 	                pcmk_scheduler_t *scheduler)
2931 	{
2932 	    bool found_removed_launched_resource = false;
2933 	
2934 	    // Drill down to PCMK__XE_LRM_RESOURCES section
2935 	    xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2936 	    if (xml == NULL) {
2937 	        return;
2938 	    }
2939 	    xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2940 	    if (xml == NULL) {
2941 	        return;
2942 	    }
2943 	
2944 	    // Unpack each PCMK__XE_LRM_RESOURCE entry
2945 	    for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2946 	                                                         PCMK__XE_LRM_RESOURCE,
2947 	                                                         NULL, NULL);
2948 	         rsc_entry != NULL;
2949 	         rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2950 	
2951 	        pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2952 	
2953 	        if ((rsc != NULL)
2954 	            && pcmk__is_set(rsc->flags, pcmk__rsc_removed_launched)) {
2955 	            found_removed_launched_resource = true;
2956 	        }
2957 	    }
2958 	
2959 	    /* Now that all resource state has been unpacked for this node, map any
2960 	     * removed launched resources to their launchers.
2961 	     */
2962 	    if (found_removed_launched_resource) {
2963 	        handle_removed_launched_resources(xml, scheduler);
2964 	    }
2965 	}
2966 	
2967 	static void
2968 	set_active(pcmk_resource_t *rsc)
2969 	{
2970 	    const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2971 	
2972 	    if ((top != NULL) && pcmk__is_set(top->flags, pcmk__rsc_promotable)) {
2973 	        rsc->priv->orig_role = pcmk_role_unpromoted;
2974 	    } else {
2975 	        rsc->priv->orig_role = pcmk_role_started;
2976 	    }
2977 	}
2978 	
2979 	static void
2980 	set_node_score(gpointer key, gpointer value, gpointer user_data)
2981 	{
2982 	    pcmk_node_t *node = value;
2983 	    int *score = user_data;
2984 	
2985 	    node->assign->score = *score;
2986 	}
2987 	
2988 	#define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
2989 	                         "/" PCMK__XE_NODE_STATE
2990 	#define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM             \
2991 	                               "/" PCMK__XE_LRM_RESOURCES   \
2992 	                               "/" PCMK__XE_LRM_RESOURCE
2993 	#define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
2994 	
2995 	static xmlNode *
2996 	find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2997 	            int target_rc, pcmk_scheduler_t *scheduler)
2998 	{
2999 	    GString *xpath = NULL;
3000 	    xmlNode *xml = NULL;
3001 	
3002 	    CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
3003 	              return NULL);
3004 	
3005 	    xpath = g_string_sized_new(256);
3006 	    pcmk__g_strcat(xpath,
3007 	                   XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
3008 	                   SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
3009 	                   SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
3010 	                   NULL);
3011 	
3012 	    /* Need to check against transition_magic too? */
3013 	    if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
3014 	        pcmk__g_strcat(xpath,
3015 	                       " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
3016 	                       NULL);
3017 	
3018 	    } else if ((source != NULL)
3019 	               && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
3020 	        pcmk__g_strcat(xpath,
3021 	                       " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
3022 	                       NULL);
3023 	    } else {
3024 	        g_string_append_c(xpath, ']');
3025 	    }
3026 	
3027 	    xml = pcmk__xpath_find_one(scheduler->input->doc, xpath->str, LOG_DEBUG);
3028 	    g_string_free(xpath, TRUE);
3029 	
3030 	    if (xml && target_rc >= 0) {
3031 	        int rc = PCMK_OCF_UNKNOWN_ERROR;
3032 	        int status = PCMK_EXEC_ERROR;
3033 	
3034 	        pcmk__xe_get_int(xml, PCMK__XA_RC_CODE, &rc);
3035 	        pcmk__xe_get_int(xml, PCMK__XA_OP_STATUS, &status);
3036 	        if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
3037 	            return NULL;
3038 	        }
3039 	    }
3040 	    return xml;
3041 	}
3042 	
3043 	static xmlNode *
3044 	find_lrm_resource(const char *rsc_id, const char *node_name,
3045 	                  pcmk_scheduler_t *scheduler)
3046 	{
3047 	    GString *xpath = NULL;
3048 	    xmlNode *xml = NULL;
3049 	
3050 	    CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
3051 	
3052 	    xpath = g_string_sized_new(256);
3053 	    pcmk__g_strcat(xpath,
3054 	                   XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
3055 	                   SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
3056 	                   NULL);
3057 	
3058 	    xml = pcmk__xpath_find_one(scheduler->input->doc, xpath->str, LOG_DEBUG);
3059 	
3060 	    g_string_free(xpath, TRUE);
3061 	    return xml;
3062 	}
3063 	
3064 	/*!
3065 	 * \internal
3066 	 * \brief Check whether a resource has no completed action history on a node
3067 	 *
3068 	 * \param[in,out] rsc        Resource to check
3069 	 * \param[in]     node_name  Node to check
3070 	 *
3071 	 * \return true if \p rsc_id is unknown on \p node_name, otherwise false
3072 	 */
3073 	static bool
3074 	unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
3075 	{
3076 	    bool result = false;
3077 	    xmlXPathObject *search;
3078 	    char *xpath = NULL;
3079 	
3080 	    xpath = pcmk__assert_asprintf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
3081 	                                  SUB_XPATH_LRM_RESOURCE
3082 	                                  "[@" PCMK_XA_ID "='%s']"
3083 	                                  SUB_XPATH_LRM_RSC_OP
3084 	                                  "[@" PCMK__XA_RC_CODE "!='%d']",
3085 	                                  node_name, rsc->id, PCMK_OCF_UNKNOWN);
3086 	
3087 	    search = pcmk__xpath_search(rsc->priv->scheduler->input->doc, xpath);
3088 	    result = (pcmk__xpath_num_results(search) == 0);
3089 	    xmlXPathFreeObject(search);
3090 	    free(xpath);
3091 	    return result;
3092 	}
3093 	
3094 	/*!
3095 	 * \internal
3096 	 * \brief Check whether a probe/monitor indicating the resource was not running
3097 	 *        on a node happened after some event
3098 	 *
3099 	 * \param[in]     rsc_id     Resource being checked
3100 	 * \param[in]     node_name  Node being checked
3101 	 * \param[in]     xml_op     Event that monitor is being compared to
3102 	 * \param[in,out] scheduler  Scheduler data
3103 	 *
3104 	 * \return true if such a monitor happened after event, false otherwise
3105 	 */
3106 	static bool
3107 	monitor_not_running_after(const char *rsc_id, const char *node_name,
3108 	                          const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3109 	{
3110 	    /* Any probe/monitor operation on the node indicating it was not running
3111 	     * there
3112 	     */
3113 	    xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3114 	                                   NULL, PCMK_OCF_NOT_RUNNING, scheduler);
3115 	
3116 	    return (monitor != NULL) && (pe__is_newer_op(monitor, xml_op) > 0);
3117 	}
3118 	
3119 	/*!
3120 	 * \internal
3121 	 * \brief Check whether any non-monitor operation on a node happened after some
3122 	 *        event
3123 	 *
3124 	 * \param[in]     rsc_id     Resource being checked
3125 	 * \param[in]     node_name  Node being checked
3126 	 * \param[in]     xml_op     Event that non-monitor is being compared to
3127 	 * \param[in,out] scheduler  Scheduler data
3128 	 *
3129 	 * \return true if such a operation happened after event, false otherwise
3130 	 */
3131 	static bool
3132 	non_monitor_after(const char *rsc_id, const char *node_name,
3133 	                  const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3134 	{
3135 	    xmlNode *lrm_resource = NULL;
3136 	
3137 	    lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3138 	    if (lrm_resource == NULL) {
3139 	        return false;
3140 	    }
3141 	
3142 	    for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3143 	                                            NULL, NULL);
3144 	         op != NULL; op = pcmk__xe_next(op, PCMK__XE_LRM_RSC_OP)) {
3145 	
3146 	        const char * task = NULL;
3147 	
3148 	        if (op == xml_op) {
3149 	            continue;
3150 	        }
3151 	
3152 	        task = pcmk__xe_get(op, PCMK_XA_OPERATION);
3153 	
3154 	        if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
3155 	                             PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
3156 	                             NULL)
3157 	            && pe__is_newer_op(op, xml_op) > 0) {
3158 	            return true;
3159 	        }
3160 	    }
3161 	
3162 	    return false;
3163 	}
3164 	
3165 	/*!
3166 	 * \internal
3167 	 * \brief Check whether the resource has newer state on a node after a migration
3168 	 *        attempt
3169 	 *
3170 	 * \param[in]     rsc_id        Resource being checked
3171 	 * \param[in]     node_name     Node being checked
3172 	 * \param[in]     migrate_to    Any migrate_to event that is being compared to
3173 	 * \param[in]     migrate_from  Any migrate_from event that is being compared to
3174 	 * \param[in,out] scheduler     Scheduler data
3175 	 *
3176 	 * \return true if such a operation happened after event, false otherwise
3177 	 */
3178 	static bool
3179 	newer_state_after_migrate(const char *rsc_id, const char *node_name,
3180 	                          const xmlNode *migrate_to,
3181 	                          const xmlNode *migrate_from,
3182 	                          pcmk_scheduler_t *scheduler)
3183 	{
3184 	    const xmlNode *xml_op = (migrate_from != NULL)? migrate_from : migrate_to;
3185 	    const char *source = pcmk__xe_get(xml_op, PCMK__META_MIGRATE_SOURCE);
3186 	
3187 	    /* It's preferred to compare to the migrate event on the same node if
3188 	     * existing, since call ids are more reliable.
3189 	     */
3190 	    if ((xml_op != migrate_to) && (migrate_to != NULL)
3191 	        && pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3192 	
3193 	        xml_op = migrate_to;
3194 	    }
3195 	
3196 	    /* If there's any newer non-monitor operation on the node, or any newer
3197 	     * probe/monitor operation on the node indicating it was not running there,
3198 	     * the migration events potentially no longer matter for the node.
3199 	     */
3200 	    return non_monitor_after(rsc_id, node_name, xml_op, scheduler)
3201 	           || monitor_not_running_after(rsc_id, node_name, xml_op, scheduler);
3202 	}
3203 	
3204 	/*!
3205 	 * \internal
3206 	 * \brief Parse migration source and target node names from history entry
3207 	 *
3208 	 * \param[in]  entry        Resource history entry for a migration action
3209 	 * \param[in]  source_node  If not NULL, source must match this node
3210 	 * \param[in]  target_node  If not NULL, target must match this node
3211 	 * \param[out] source_name  Where to store migration source node name
3212 	 * \param[out] target_name  Where to store migration target node name
3213 	 *
3214 	 * \return Standard Pacemaker return code
3215 	 */
3216 	static int
3217 	get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
3218 	                         const pcmk_node_t *target_node,
3219 	                         const char **source_name, const char **target_name)
3220 	{
3221 	    *source_name = pcmk__xe_get(entry, PCMK__META_MIGRATE_SOURCE);
3222 	    *target_name = pcmk__xe_get(entry, PCMK__META_MIGRATE_TARGET);
3223 	    if ((*source_name == NULL) || (*target_name == NULL)) {
3224 	        pcmk__config_err("Ignoring resource history entry %s without "
3225 	                         PCMK__META_MIGRATE_SOURCE " and "
3226 	                         PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3227 	        return pcmk_rc_unpack_error;
3228 	    }
3229 	
3230 	    if ((source_node != NULL)
3231 	        && !pcmk__str_eq(*source_name, source_node->priv->name,
3232 	                         pcmk__str_casei|pcmk__str_null_matches)) {
3233 	        pcmk__config_err("Ignoring resource history entry %s because "
3234 	                         PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3235 	                         pcmk__xe_id(entry), *source_name,
3236 	                         pcmk__node_name(source_node));
3237 	        return pcmk_rc_unpack_error;
3238 	    }
3239 	
3240 	    if ((target_node != NULL)
3241 	        && !pcmk__str_eq(*target_name, target_node->priv->name,
3242 	                         pcmk__str_casei|pcmk__str_null_matches)) {
3243 	        pcmk__config_err("Ignoring resource history entry %s because "
3244 	                         PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3245 	                         pcmk__xe_id(entry), *target_name,
3246 	                         pcmk__node_name(target_node));
3247 	        return pcmk_rc_unpack_error;
3248 	    }
3249 	
3250 	    return pcmk_rc_ok;
3251 	}
3252 	
3253 	/*
3254 	 * \internal
3255 	 * \brief Add a migration source to a resource's list of dangling migrations
3256 	 *
3257 	 * If the migrate_to and migrate_from actions in a live migration both
3258 	 * succeeded, but there is no stop on the source, the migration is considered
3259 	 * "dangling." Add the source to the resource's dangling migration list, which
3260 	 * will be used to schedule a stop on the source without affecting the target.
3261 	 *
3262 	 * \param[in,out] rsc   Resource involved in migration
3263 	 * \param[in]     node  Migration source
3264 	 */
3265 	static void
3266 	add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
3267 	{
3268 	    pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3269 	                    rsc->id, pcmk__node_name(node));
3270 	    rsc->priv->orig_role = pcmk_role_stopped;
3271 	    rsc->priv->dangling_migration_sources =
3272 	        g_list_prepend(rsc->priv->dangling_migration_sources,
3273 	                       (gpointer) node);
3274 	}
3275 	
3276 	/*!
3277 	 * \internal
3278 	 * \brief Update resource role etc. after a successful migrate_to action
3279 	 *
3280 	 * \param[in,out] history  Parsed action result history
3281 	 */
3282 	static void
3283 	unpack_migrate_to_success(struct action_history *history)
3284 	{
3285 	    /* A complete migration sequence is:
3286 	     * 1. migrate_to on source node (which succeeded if we get to this function)
3287 	     * 2. migrate_from on target node
3288 	     * 3. stop on source node
3289 	     *
3290 	     * If no migrate_from has happened, the migration is considered to be
3291 	     * "partial". If the migrate_from succeeded but no stop has happened, the
3292 	     * migration is considered to be "dangling".
3293 	     *
3294 	     * If a successful migrate_to and stop have happened on the source node, we
3295 	     * still need to check for a partial migration, due to scenarios (easier to
3296 	     * produce with batch-limit=1) like:
3297 	     *
3298 	     * - A resource is migrating from node1 to node2, and a migrate_to is
3299 	     *   initiated for it on node1.
3300 	     *
3301 	     * - node2 goes into standby mode while the migrate_to is pending, which
3302 	     *   aborts the transition.
3303 	     *
3304 	     * - Upon completion of the migrate_to, a new transition schedules a stop
3305 	     *   on both nodes and a start on node1.
3306 	     *
3307 	     * - If the new transition is aborted for any reason while the resource is
3308 	     *   stopping on node1, the transition after that stop completes will see
3309 	     *   the migrate_to and stop on the source, but it's still a partial
3310 	     *   migration, and the resource must be stopped on node2 because it is
3311 	     *   potentially active there due to the migrate_to.
3312 	     *
3313 	     *   We also need to take into account that either node's history may be
3314 	     *   cleared at any point in the migration process.
3315 	     */
3316 	    int from_rc = PCMK_OCF_OK;
3317 	    int from_status = PCMK_EXEC_PENDING;
3318 	    pcmk_node_t *target_node = NULL;
3319 	    xmlNode *migrate_from = NULL;
3320 	    const char *source = NULL;
3321 	    const char *target = NULL;
3322 	    bool source_newer_op = false;
3323 	    bool target_newer_state = false;
3324 	    bool active_on_target = false;
3325 	    pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3326 	
3327 	    // Get source and target node names from XML
3328 	    if (get_migration_node_names(history->xml, history->node, NULL, &source,
3329 	                                 &target) != pcmk_rc_ok) {
3330 	        return;
3331 	    }
3332 	
3333 	    // Check for newer state on the source
3334 	    source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3335 	                                        scheduler);
3336 	
3337 	    // Check for a migrate_from action from this source on the target
3338 	    migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3339 	                               target, source, -1, scheduler);
3340 	    if (migrate_from != NULL) {
3341 	        if (source_newer_op) {
3342 	            /* There's a newer non-monitor operation on the source and a
3343 	             * migrate_from on the target, so this migrate_to is irrelevant to
3344 	             * the resource's state.
3345 	             */
3346 	            return;
3347 	        }
3348 	        pcmk__xe_get_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3349 	        pcmk__xe_get_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3350 	    }
3351 	
3352 	    /* If the resource has newer state on both the source and target after the
3353 	     * migration events, this migrate_to is irrelevant to the resource's state.
3354 	     */
3355 	    target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3356 	                                                   history->xml, migrate_from,
3357 	                                                   scheduler);
3358 	    if (source_newer_op && target_newer_state) {
3359 	        return;
3360 	    }
3361 	
3362 	    /* Check for dangling migration (migrate_from succeeded but stop not done).
3363 	     * We know there's no stop because we already returned if the target has a
3364 	     * migrate_from and the source has any newer non-monitor operation.
3365 	     */
3366 	    if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3367 	        add_dangling_migration(history->rsc, history->node);
3368 	        return;
3369 	    }
3370 	
3371 	    /* Without newer state, this migrate_to implies the resource is active.
3372 	     * (Clones are not allowed to migrate, so role can't be promoted.)
3373 	     */
3374 	    history->rsc->priv->orig_role = pcmk_role_started;
3375 	
3376 	    target_node = pcmk_find_node(scheduler, target);
3377 	    active_on_target = !target_newer_state && (target_node != NULL)
3378 	                       && target_node->details->online;
3379 	
3380 	    if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3381 	        if (active_on_target) {
3382 	            native_add_running(history->rsc, target_node, scheduler, TRUE);
3383 	        } else {
3384 	            // Mark resource as failed, require recovery, and prevent migration
3385 	            pcmk__set_rsc_flags(history->rsc,
3386 	                                pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
3387 	            pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
3388 	        }
3389 	        return;
3390 	    }
3391 	
3392 	    // The migrate_from is pending, complete but erased, or to be scheduled
3393 	
3394 	    /* If there is no history at all for the resource on an online target, then
3395 	     * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3396 	     * have the probe result, it will be reflected in target_newer_state.
3397 	     */
3398 	    if ((target_node != NULL) && target_node->details->online
3399 	        && unknown_on_node(history->rsc, target)) {
3400 	        return;
3401 	    }
3402 	
3403 	    if (active_on_target) {
3404 	        pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3405 	
3406 	        native_add_running(history->rsc, target_node, scheduler, FALSE);
3407 	        if ((source_node != NULL) && source_node->details->online) {
3408 	            /* This is a partial migration: the migrate_to completed
3409 	             * successfully on the source, but the migrate_from has not
3410 	             * completed. Remember the source and target; if the newly
3411 	             * chosen target remains the same when we schedule actions
3412 	             * later, we may continue with the migration.
3413 	             */
3414 	            history->rsc->priv->partial_migration_target = target_node;
3415 	            history->rsc->priv->partial_migration_source = source_node;
3416 	        }
3417 	
3418 	    } else if (!source_newer_op) {
3419 	        // Mark resource as failed, require recovery, and prevent migration
3420 	        pcmk__set_rsc_flags(history->rsc,
3421 	                            pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
3422 	        pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
3423 	    }
3424 	}
3425 	
3426 	/*!
3427 	 * \internal
3428 	 * \brief Update resource role etc. after a failed migrate_to action
3429 	 *
3430 	 * \param[in,out] history  Parsed action result history
3431 	 */
3432 	static void
3433 	unpack_migrate_to_failure(struct action_history *history)
3434 	{
3435 	    xmlNode *target_migrate_from = NULL;
3436 	    const char *source = NULL;
3437 	    const char *target = NULL;
3438 	    pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3439 	
3440 	    // Get source and target node names from XML
3441 	    if (get_migration_node_names(history->xml, history->node, NULL, &source,
3442 	                                 &target) != pcmk_rc_ok) {
3443 	        return;
3444 	    }
3445 	
3446 	    /* If a migration failed, we have to assume the resource is active. Clones
3447 	     * are not allowed to migrate, so role can't be promoted.
3448 	     */
3449 	    history->rsc->priv->orig_role = pcmk_role_started;
3450 	
3451 	    // Check for migrate_from on the target
3452 	    target_migrate_from = find_lrm_op(history->rsc->id,
3453 	                                      PCMK_ACTION_MIGRATE_FROM, target, source,
3454 	                                      PCMK_OCF_OK, scheduler);
3455 	
3456 	    if (/* If the resource state is unknown on the target, it will likely be
3457 	         * probed there.
3458 	         * Don't just consider it running there. We will get back here anyway in
3459 	         * case the probe detects it's running there.
3460 	         */
3461 	        !unknown_on_node(history->rsc, target)
3462 	        /* If the resource has newer state on the target after the migration
3463 	         * events, this migrate_to no longer matters for the target.
3464 	         */
3465 	        && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3466 	                                      target_migrate_from, scheduler)) {
3467 	        /* The resource has no newer state on the target, so assume it's still
3468 	         * active there.
3469 	         * (if it is up).
3470 	         */
3471 	        pcmk_node_t *target_node = pcmk_find_node(scheduler, target);
3472 	
3473 	        if (target_node && target_node->details->online) {
3474 	            native_add_running(history->rsc, target_node, scheduler, FALSE);
3475 	        }
3476 	
3477 	    } else if (!non_monitor_after(history->rsc->id, source, history->xml,
3478 	                                  scheduler)) {
3479 	        /* We know the resource has newer state on the target, but this
3480 	         * migrate_to still matters for the source as long as there's no newer
3481 	         * non-monitor operation there.
3482 	         */
3483 	
3484 	        // Mark node as having dangling migration so we can force a stop later
3485 	        history->rsc->priv->dangling_migration_sources =
3486 	            g_list_prepend(history->rsc->priv->dangling_migration_sources,
3487 	                           (gpointer) history->node);
3488 	    }
3489 	}
3490 	
3491 	/*!
3492 	 * \internal
3493 	 * \brief Update resource role etc. after a failed migrate_from action
3494 	 *
3495 	 * \param[in,out] history  Parsed action result history
3496 	 */
3497 	static void
3498 	unpack_migrate_from_failure(struct action_history *history)
3499 	{
3500 	    xmlNode *source_migrate_to = NULL;
3501 	    const char *source = NULL;
3502 	    const char *target = NULL;
3503 	    pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3504 	
3505 	    // Get source and target node names from XML
3506 	    if (get_migration_node_names(history->xml, NULL, history->node, &source,
3507 	                                 &target) != pcmk_rc_ok) {
3508 	        return;
3509 	    }
3510 	
3511 	    /* If a migration failed, we have to assume the resource is active. Clones
3512 	     * are not allowed to migrate, so role can't be promoted.
3513 	     */
3514 	    history->rsc->priv->orig_role = pcmk_role_started;
3515 	
3516 	    // Check for a migrate_to on the source
3517 	    source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3518 	                                    source, target, PCMK_OCF_OK, scheduler);
3519 	
3520 	    if (/* If the resource state is unknown on the source, it will likely be
3521 	         * probed there.
3522 	         * Don't just consider it running there. We will get back here anyway in
3523 	         * case the probe detects it's running there.
3524 	         */
3525 	        !unknown_on_node(history->rsc, source)
3526 	        /* If the resource has newer state on the source after the migration
3527 	         * events, this migrate_from no longer matters for the source.
3528 	         */
3529 	        && !newer_state_after_migrate(history->rsc->id, source,
3530 	                                      source_migrate_to, history->xml,
3531 	                                      scheduler)) {
3532 	        /* The resource has no newer state on the source, so assume it's still
3533 	         * active there (if it is up).
3534 	         */
3535 	        pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3536 	
3537 	        if (source_node && source_node->details->online) {
3538 	            native_add_running(history->rsc, source_node, scheduler, TRUE);
3539 	        }
3540 	    }
3541 	}
3542 	
3543 	/*!
3544 	 * \internal
3545 	 * \brief Add an action to cluster's list of failed actions
3546 	 *
3547 	 * \param[in,out] history  Parsed action result history
3548 	 */
3549 	static void
3550 	record_failed_op(struct action_history *history)
3551 	{
3552 	    const pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3553 	
3554 	    if (!(history->node->details->online)) {
3555 	        return;
3556 	    }
3557 	
3558 	    for (const xmlNode *xIter = scheduler->priv->failed->children;
3559 	         xIter != NULL; xIter = xIter->next) {
3560 	
3561 	        const char *key = pcmk__xe_history_key(xIter);
3562 	        const char *uname = pcmk__xe_get(xIter, PCMK_XA_UNAME);
3563 	
3564 	        if (pcmk__str_eq(history->key, key, pcmk__str_none)
3565 	            && pcmk__str_eq(uname, history->node->priv->name,
3566 	                            pcmk__str_casei)) {
3567 	            pcmk__trace("Skipping duplicate entry %s on %s", history->key,
3568 	                        pcmk__node_name(history->node));
3569 	            return;
3570 	        }
3571 	    }
3572 	
3573 	    pcmk__trace("Adding entry for %s on %s to failed action list",
3574 	                history->key, pcmk__node_name(history->node));
3575 	    pcmk__xe_set(history->xml, PCMK_XA_UNAME, history->node->priv->name);
3576 	    pcmk__xe_set(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3577 	    pcmk__xml_copy(scheduler->priv->failed, history->xml);
3578 	}
3579 	
3580 	static char *
3581 	last_change_str(const xmlNode *xml_op)
3582 	{
3583 	    time_t when;
3584 	    char *result = NULL;
3585 	
3586 	    if (pcmk__xe_get_time(xml_op, PCMK_XA_LAST_RC_CHANGE,
3587 	                          &when) == pcmk_rc_ok) {
3588 	        char *when_s = pcmk__epoch2str(&when, 0);
3589 	        const char *p = strchr(when_s, ' ');
3590 	
3591 	        // Skip day of week to make message shorter
3592 	        if ((p != NULL) && (*(++p) != '\0')) {
3593 	            result = pcmk__str_copy(p);
3594 	        }
3595 	        free(when_s);
3596 	    }
3597 	
3598 	    if (result == NULL) {
3599 	        result = pcmk__str_copy("unknown_time");
3600 	    }
3601 	
3602 	    return result;
3603 	}
3604 	
3605 	/*!
3606 	 * \internal
3607 	 * \brief Ban a resource (or its clone if an anonymous instance) from all nodes
3608 	 *
3609 	 * \param[in,out] rsc  Resource to ban
3610 	 */
3611 	static void
3612 	ban_from_all_nodes(pcmk_resource_t *rsc)
3613 	{
3614 	    int score = -PCMK_SCORE_INFINITY;
3615 	    const pcmk_scheduler_t *scheduler = rsc->priv->scheduler;
3616 	
3617 	    if (rsc->priv->parent != NULL) {
3618 	        pcmk_resource_t *parent = uber_parent(rsc);
3619 	
3620 	        if (pcmk__is_anonymous_clone(parent)) {
3621 	            /* For anonymous clones, if an operation with
3622 	             * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3623 	             * entire clone must stop.
3624 	             */
3625 	            rsc = parent;
3626 	        }
3627 	    }
3628 	
3629 	    // Ban the resource from all nodes
3630 	    pcmk__notice("%s will not be started under current conditions", rsc->id);
3631 	    g_clear_pointer(&rsc->priv->allowed_nodes, g_hash_table_destroy);
3632 	    rsc->priv->allowed_nodes = pe__node_list2table(scheduler->nodes);
3633 	    g_hash_table_foreach(rsc->priv->allowed_nodes, set_node_score, &score);
3634 	}
3635 	
3636 	/*!
3637 	 * \internal
3638 	 * \brief Get configured failure handling and role after failure for an action
3639 	 *
3640 	 * \param[in,out] history    Unpacked action history entry
3641 	 * \param[out]    on_fail    Where to set configured failure handling
3642 	 * \param[out]    fail_role  Where to set to role after failure
3643 	 */
3644 	static void
3645 	unpack_failure_handling(struct action_history *history,
3646 	                        enum pcmk__on_fail *on_fail,
3647 	                        enum rsc_role_e *fail_role)
3648 	{
3649 	    xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3650 	                                               history->interval_ms, true);
3651 	
3652 	    GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3653 	                                                history->task,
3654 	                                                history->interval_ms, config);
3655 	
3656 	    const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3657 	
3658 	    *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3659 	                                   history->interval_ms, on_fail_str);
3660 	    *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3661 	                                          meta);
3662 	    g_hash_table_destroy(meta);
3663 	}
3664 	
3665 	/*!
3666 	 * \internal
3667 	 * \brief Update resource role, failure handling, etc., after a failed action
3668 	 *
3669 	 * \param[in,out] history         Parsed action result history
3670 	 * \param[in]     config_on_fail  Action failure handling from configuration
3671 	 * \param[in]     fail_role       Resource's role after failure of this action
3672 	 * \param[out]    last_failure    This will be set to the history XML
3673 	 * \param[in,out] on_fail         Actual handling of action result
3674 	 */
3675 	static void
3676 	unpack_rsc_op_failure(struct action_history *history,
3677 	                      enum pcmk__on_fail config_on_fail,
3678 	                      enum rsc_role_e fail_role, xmlNode **last_failure,
3679 	                      enum pcmk__on_fail *on_fail)
3680 	{
3681 	    bool is_probe = false;
3682 	    char *last_change_s = NULL;
3683 	    pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3684 	
3685 	    *last_failure = history->xml;
3686 	
3687 	    is_probe = pcmk_xe_is_probe(history->xml);
3688 	    last_change_s = last_change_str(history->xml);
3689 	
3690 	    if (!pcmk__is_set(scheduler->flags, pcmk__sched_symmetric_cluster)
3691 	        && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3692 	        pcmk__trace("Unexpected result (%s%s%s) was recorded for "
3693 	                    "%s of %s on %s at %s " QB_XS " exit-status=%d id=%s",
3694 	                    crm_exit_str(history->exit_status),
3695 	                    (pcmk__str_empty(history->exit_reason)? "" : ": "),
3696 	                    pcmk__s(history->exit_reason, ""),
3697 	                    (is_probe? "probe" : history->task), history->rsc->id,
3698 	                    pcmk__node_name(history->node), last_change_s,
3699 	                    history->exit_status, history->id);
3700 	    } else {
3701 	        pcmk__sched_warn(scheduler,
3702 	                         "Unexpected result (%s%s%s) was recorded for %s of "
3703 	                         "%s on %s at %s " QB_XS " exit-status=%d id=%s",
3704 	                         crm_exit_str(history->exit_status),
3705 	                         (pcmk__str_empty(history->exit_reason)? "" : ": "),
3706 	                         pcmk__s(history->exit_reason, ""),
3707 	                         (is_probe? "probe" : history->task), history->rsc->id,
3708 	                         pcmk__node_name(history->node), last_change_s,
3709 	                         history->exit_status, history->id);
3710 	
3711 	        if (is_probe && (history->exit_status != PCMK_OCF_OK)
3712 	            && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3713 	            && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3714 	
3715 	            /* A failed (not just unexpected) probe result could mean the user
3716 	             * didn't know resources will be probed even where they can't run.
3717 	             */
3718 	            pcmk__notice("If it is not possible for %s to run on %s, see the "
3719 	                         PCMK_XA_RESOURCE_DISCOVERY " option for location "
3720 	                         "constraints",
3721 	                         history->rsc->id, pcmk__node_name(history->node));
3722 	        }
3723 	
3724 	        record_failed_op(history);
3725 	    }
3726 	
3727 	    free(last_change_s);
3728 	
3729 	    if (*on_fail < config_on_fail) {
3730 	        pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3731 	                        pcmk__on_fail_text(*on_fail),
3732 	                        pcmk__on_fail_text(config_on_fail), history->key);
3733 	        *on_fail = config_on_fail;
3734 	    }
3735 	
3736 	    if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3737 	        resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3738 	                          "__stop_fail__", scheduler);
3739 	
3740 	    } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3741 	        unpack_migrate_to_failure(history);
3742 	
3743 	    } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3744 	        unpack_migrate_from_failure(history);
3745 	
3746 	    } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3747 	        history->rsc->priv->orig_role = pcmk_role_promoted;
3748 	
3749 	    } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3750 	        if (config_on_fail == pcmk__on_fail_block) {
3751 	            history->rsc->priv->orig_role = pcmk_role_promoted;
3752 	            pe__set_next_role(history->rsc, pcmk_role_stopped,
3753 	                              "demote with " PCMK_META_ON_FAIL "=block");
3754 	
3755 	        } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3756 	            history->rsc->priv->orig_role = pcmk_role_stopped;
3757 	
3758 	        } else {
3759 	            /* Staying in the promoted role would put the scheduler and
3760 	             * controller into a loop. Setting the role to unpromoted is not
3761 	             * dangerous because the resource will be stopped as part of
3762 	             * recovery, and any promotion will be ordered after that stop.
3763 	             */
3764 	            history->rsc->priv->orig_role = pcmk_role_unpromoted;
3765 	        }
3766 	    }
3767 	
3768 	    if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3769 	        /* leave stopped */
3770 	        pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3771 	        history->rsc->priv->orig_role = pcmk_role_stopped;
3772 	
3773 	    } else if (history->rsc->priv->orig_role < pcmk_role_started) {
3774 	        pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3775 	        set_active(history->rsc);
3776 	    }
3777 	
3778 	    pcmk__rsc_trace(history->rsc,
3779 	                    "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3780 	                    history->rsc->id,
3781 	                    pcmk_role_text(history->rsc->priv->orig_role),
3782 	                    pcmk__btoa(history->node->details->unclean),
3783 	                    pcmk__on_fail_text(config_on_fail),
3784 	                    pcmk_role_text(fail_role));
3785 	
3786 	    if ((fail_role != pcmk_role_started)
3787 	        && (history->rsc->priv->next_role < fail_role)) {
3788 	        pe__set_next_role(history->rsc, fail_role, "failure");
3789 	    }
3790 	
3791 	    if (fail_role == pcmk_role_stopped) {
3792 	        ban_from_all_nodes(history->rsc);
3793 	    }
3794 	}
3795 	
3796 	/*!
3797 	 * \internal
3798 	 * \brief Block a resource with a failed action if it cannot be recovered
3799 	 *
3800 	 * If resource action is a failed stop and fencing is not possible, mark the
3801 	 * resource as unmanaged and blocked, since recovery cannot be done.
3802 	 *
3803 	 * \param[in,out] history  Parsed action history entry
3804 	 */
3805 	static void
3806 	block_if_unrecoverable(struct action_history *history)
3807 	{
3808 	    char *last_change_s = NULL;
3809 	
3810 	    if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3811 	        return; // All actions besides stop are always recoverable
3812 	    }
3813 	    if (pe_can_fence(history->node->priv->scheduler, history->node)) {
3814 	        return; // Failed stops are recoverable via fencing
3815 	    }
3816 	
3817 	    last_change_s = last_change_str(history->xml);
3818 	    pcmk__sched_err(history->node->priv->scheduler,
3819 	                    "No further recovery can be attempted for %s "
3820 	                    "because %s on %s failed (%s%s%s) at %s "
3821 	                    QB_XS " rc=%d id=%s",
3822 	                    history->rsc->id, history->task,
3823 	                    pcmk__node_name(history->node),
3824 	                    crm_exit_str(history->exit_status),
3825 	                    (pcmk__str_empty(history->exit_reason)? "" : ": "),
3826 	                    pcmk__s(history->exit_reason, ""),
3827 	                    last_change_s, history->exit_status, history->id);
3828 	
3829 	    free(last_change_s);
3830 	
3831 	    pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_managed);
3832 	    pcmk__set_rsc_flags(history->rsc, pcmk__rsc_blocked);
3833 	}
3834 	
3835 	/*!
3836 	 * \internal
3837 	 * \brief Update action history's execution status and why
3838 	 *
3839 	 * \param[in,out] history  Parsed action history entry
3840 	 * \param[out]    why      Where to store reason for update
3841 	 * \param[in]     value    New value
3842 	 * \param[in]     reason   Description of why value was changed
3843 	 */
3844 	static inline void
3845 	remap_because(struct action_history *history, const char **why, int value,
3846 	              const char *reason)
3847 	{
3848 	    if (history->execution_status != value) {
3849 	        history->execution_status = value;
3850 	        *why = reason;
3851 	    }
3852 	}
3853 	
3854 	/*!
3855 	 * \internal
3856 	 * \brief Remap informational monitor results and operation status
3857 	 *
3858 	 * For the monitor results, certain OCF codes are for providing extended information
3859 	 * to the user about services that aren't yet failed but not entirely healthy either.
3860 	 * These must be treated as the "normal" result by Pacemaker.
3861 	 *
3862 	 * For operation status, the action result can be used to determine an appropriate
3863 	 * status for the purposes of responding to the action.  The status provided by the
3864 	 * executor is not directly usable since the executor does not know what was expected.
3865 	 *
3866 	 * \param[in,out] history  Parsed action history entry
3867 	 * \param[in,out] on_fail  What should be done about the result
3868 	 * \param[in]     expired  Whether result is expired
3869 	 *
3870 	 * \note If the result is remapped and the node is not shutting down or failed,
3871 	 *       the operation will be recorded in the scheduler data's list of failed
3872 	 *       operations to highlight it for the user.
3873 	 *
3874 	 * \note This may update the resource's current and next role.
3875 	 */
3876 	static void
3877 	remap_operation(struct action_history *history,
3878 	                enum pcmk__on_fail *on_fail, bool expired)
3879 	{
3880 	    /* @TODO It would probably also be a good idea to map an exit status of
3881 	     * CRM_EX_PROMOTED or CRM_EX_DEGRADED_PROMOTED to CRM_EX_OK for promote
3882 	     * actions
3883 	     */
3884 	
3885 	    bool is_probe = false;
3886 	    int orig_exit_status = history->exit_status;
3887 	    int orig_exec_status = history->execution_status;
3888 	    const char *why = NULL;
3889 	    const char *task = history->task;
3890 	
3891 	    // Remap degraded results to their successful counterparts
3892 	    history->exit_status = pcmk__effective_rc(history->exit_status);
3893 	    if (history->exit_status != orig_exit_status) {
3894 	        why = "degraded result";
3895 	        if (!expired && (!history->node->details->shutdown
3896 	                         || history->node->details->online)) {
3897 	            record_failed_op(history);
3898 	        }
3899 	    }
3900 	
3901 	    if (!pcmk__is_bundled(history->rsc)
3902 	        && pcmk_xe_mask_probe_failure(history->xml)
3903 	        && ((history->execution_status != PCMK_EXEC_DONE)
3904 	            || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3905 	        history->execution_status = PCMK_EXEC_DONE;
3906 	        history->exit_status = PCMK_OCF_NOT_RUNNING;
3907 	        why = "equivalent probe result";
3908 	    }
3909 	
3910 	    /* If the executor reported an execution status of anything but done or
3911 	     * error, consider that final. But for done or error, we know better whether
3912 	     * it should be treated as a failure or not, because we know the expected
3913 	     * result.
3914 	     */
3915 	    switch (history->execution_status) {
3916 	        case PCMK_EXEC_DONE:
3917 	        case PCMK_EXEC_ERROR:
3918 	            break;
3919 	
3920 	        // These should be treated as node-fatal
3921 	        case PCMK_EXEC_NO_FENCE_DEVICE:
3922 	        case PCMK_EXEC_NO_SECRETS:
3923 	            remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3924 	                          "node-fatal error");
3925 	            goto remap_done;
3926 	
3927 	        default:
3928 	            goto remap_done;
3929 	    }
3930 	
3931 	    is_probe = pcmk_xe_is_probe(history->xml);
3932 	    if (is_probe) {
3933 	        task = "probe";
3934 	    }
3935 	
3936 	    if (history->expected_exit_status < 0) {
3937 	        /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3938 	         * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3939 	         * expected exit status in the transition key, which (along with the
3940 	         * similar case of a corrupted transition key in the CIB) will be
3941 	         * reported to this function as -1. Pacemaker 2.0+ does not support
3942 	         * rolling upgrades from those versions or processing of saved CIB files
3943 	         * from those versions, so we do not need to care much about this case.
3944 	         */
3945 	        remap_because(history, &why, PCMK_EXEC_ERROR,
3946 	                      "obsolete history format");
3947 	        pcmk__config_warn("Expected result not found for %s on %s "
3948 	                          "(corrupt or obsolete CIB?)",
3949 	                          history->key, pcmk__node_name(history->node));
3950 	
3951 	    } else if (history->exit_status == history->expected_exit_status) {
3952 	        remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
3953 	
3954 	    } else {
3955 	        remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
3956 	        pcmk__rsc_debug(history->rsc,
3957 	                        "%s on %s: expected %d (%s), got %d (%s%s%s)",
3958 	                        history->key, pcmk__node_name(history->node),
3959 	                        history->expected_exit_status,
3960 	                        crm_exit_str(history->expected_exit_status),
3961 	                        history->exit_status,
3962 	                        crm_exit_str(history->exit_status),
3963 	                        (pcmk__str_empty(history->exit_reason)? "" : ": "),
3964 	                        pcmk__s(history->exit_reason, ""));
3965 	    }
3966 	
3967 	    switch (history->exit_status) {
3968 	        case PCMK_OCF_OK:
3969 	            if (is_probe
3970 	                && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
3971 	                char *last_change_s = last_change_str(history->xml);
3972 	
3973 	                remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3974 	                pcmk__rsc_info(history->rsc,
3975 	                               "Probe found %s active on %s at %s",
3976 	                               history->rsc->id, pcmk__node_name(history->node),
3977 	                               last_change_s);
3978 	                free(last_change_s);
3979 	            }
3980 	            break;
3981 	
3982 	        case PCMK_OCF_NOT_RUNNING:
3983 	            if (is_probe
3984 	                || (history->expected_exit_status == history->exit_status)
3985 	                || !pcmk__is_set(history->rsc->flags, pcmk__rsc_managed)) {
3986 	
3987 	                /* For probes, recurring monitors for the Stopped role, and
3988 	                 * unmanaged resources, "not running" is not considered a
3989 	                 * failure.
3990 	                 */
3991 	                remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
3992 	                history->rsc->priv->orig_role = pcmk_role_stopped;
3993 	                *on_fail = pcmk__on_fail_ignore;
3994 	                pe__set_next_role(history->rsc, pcmk_role_unknown,
3995 	                                  "not running");
3996 	            }
3997 	            break;
3998 	
3999 	        case PCMK_OCF_RUNNING_PROMOTED:
4000 	            if (is_probe
4001 	                && (history->exit_status != history->expected_exit_status)) {
4002 	                char *last_change_s = last_change_str(history->xml);
4003 	
4004 	                remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4005 	                pcmk__rsc_info(history->rsc,
4006 	                               "Probe found %s active and promoted on %s at %s",
4007 	                                history->rsc->id,
4008 	                                pcmk__node_name(history->node), last_change_s);
4009 	                free(last_change_s);
4010 	            }
4011 	            if (!expired
4012 	                || (history->exit_status == history->expected_exit_status)) {
4013 	                history->rsc->priv->orig_role = pcmk_role_promoted;
4014 	            }
4015 	            break;
4016 	
4017 	        case PCMK_OCF_FAILED_PROMOTED:
4018 	            if (!expired) {
4019 	                history->rsc->priv->orig_role = pcmk_role_promoted;
4020 	            }
4021 	            remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
4022 	            break;
4023 	
4024 	        case PCMK_OCF_NOT_CONFIGURED:
4025 	            remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
4026 	            break;
4027 	
4028 	        case PCMK_OCF_UNIMPLEMENT_FEATURE:
4029 	            {
4030 	                guint interval_ms = 0;
4031 	                pcmk__xe_get_guint(history->xml, PCMK_META_INTERVAL,
4032 	                                   &interval_ms);
4033 	
4034 	                if (interval_ms == 0) {
4035 	                    if (!expired) {
4036 	                        block_if_unrecoverable(history);
4037 	                    }
4038 	                    remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
4039 	                                  "exit status");
4040 	                } else {
4041 	                    remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
4042 	                                  "exit status");
4043 	                }
4044 	            }
4045 	            break;
4046 	
4047 	        case PCMK_OCF_NOT_INSTALLED:
4048 	        case PCMK_OCF_INVALID_PARAM:
4049 	        case PCMK_OCF_INSUFFICIENT_PRIV:
4050 	            if (!expired) {
4051 	                block_if_unrecoverable(history);
4052 	            }
4053 	            remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
4054 	            break;
4055 	
4056 	        default:
4057 	            if (history->execution_status == PCMK_EXEC_DONE) {
4058 	                char *last_change_s = last_change_str(history->xml);
4059 	
4060 	                pcmk__info("Treating unknown exit status %d from %s of %s on "
4061 	                           "%s at %s as failure",
4062 	                           history->exit_status, task, history->rsc->id,
4063 	                           pcmk__node_name(history->node), last_change_s);
4064 	                remap_because(history, &why, PCMK_EXEC_ERROR,
4065 	                              "unknown exit status");
4066 	                free(last_change_s);
4067 	            }
4068 	            break;
4069 	    }
4070 	
4071 	remap_done:
4072 	    if (why != NULL) {
4073 	        pcmk__rsc_trace(history->rsc,
4074 	                        "Remapped %s result from [%s: %s] to [%s: %s] "
4075 	                        "because of %s",
4076 	                        history->key, pcmk_exec_status_str(orig_exec_status),
4077 	                        crm_exit_str(orig_exit_status),
4078 	                        pcmk_exec_status_str(history->execution_status),
4079 	                        crm_exit_str(history->exit_status), why);
4080 	    }
4081 	}
4082 	
4083 	// return TRUE if start or monitor last failure but parameters changed
4084 	static bool
4085 	should_clear_for_param_change(const xmlNode *xml_op, const char *task,
4086 	                              pcmk_resource_t *rsc, pcmk_node_t *node)
4087 	{
4088 	    if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
4089 	        if (pe__bundle_needs_remote_name(rsc)) {
4090 	            /* We haven't allocated resources yet, so we can't reliably
4091 	             * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4092 	             * When that's needed, defer the check until later.
4093 	             */
4094 	            pcmk__add_param_check(xml_op, rsc, node, pcmk__check_last_failure);
4095 	
4096 	        } else {
4097 	            pcmk__op_digest_t *digest_data = NULL;
4098 	
4099 	            digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4100 	                                                rsc->priv->scheduler);
4101 	            switch (digest_data->rc) {
4102 	                case pcmk__digest_unknown:
4103 	                    pcmk__trace("Resource %s history entry %s on %s"
4104 	                                " has no digest to compare",
4105 	                                rsc->id, pcmk__xe_history_key(xml_op),
4106 	                                node->priv->id);
4107 	                    break;
4108 	                case pcmk__digest_match:
4109 	                    break;
4110 	                default:
4111 	                    return TRUE;
4112 	            }
4113 	        }
4114 	    }
4115 	    return FALSE;
4116 	}
4117 	
4118 	// Order action after fencing of remote node, given connection rsc
4119 	static void
4120 	order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
4121 	                           pcmk_scheduler_t *scheduler)
4122 	{
4123 	    pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4124 	
4125 	    if (remote_node) {
4126 	        pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4127 	                                           FALSE, scheduler);
4128 	
4129 	        order_actions(fence, action, pcmk__ar_first_implies_then);
4130 	    }
4131 	}
4132 	
4133 	static bool
4134 	should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
4135 	                              guint interval_ms, bool is_last_failure)
4136 	{
4137 	    /* Clearing failures of recurring monitors has special concerns. The
4138 	     * executor reports only changes in the monitor result, so if the
4139 	     * monitor is still active and still getting the same failure result,
4140 	     * that will go undetected after the failure is cleared.
4141 	     *
4142 	     * Also, the operation history will have the time when the recurring
4143 	     * monitor result changed to the given code, not the time when the
4144 	     * result last happened.
4145 	     *
4146 	     * @TODO We probably should clear such failures only when the failure
4147 	     * timeout has passed since the last occurrence of the failed result.
4148 	     * However we don't record that information. We could maybe approximate
4149 	     * that by clearing only if there is a more recent successful monitor or
4150 	     * stop result, but we don't even have that information at this point
4151 	     * since we are still unpacking the resource's operation history.
4152 	     *
4153 	     * This is especially important for remote connection resources with a
4154 	     * reconnect interval, so in that case, we skip clearing failures
4155 	     * if the remote node hasn't been fenced.
4156 	     */
4157 	    if ((rsc->priv->remote_reconnect_ms > 0U)
4158 	        && pcmk__is_set(rsc->priv->scheduler->flags,
4159 	                        pcmk__sched_fencing_enabled)
4160 	        && (interval_ms != 0)
4161 	        && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4162 	
4163 	        pcmk_node_t *remote_node = pcmk_find_node(rsc->priv->scheduler,
4164 	                                                  rsc->id);
4165 	
4166 	        if (remote_node && !pcmk__is_set(remote_node->priv->flags,
4167 	                                         pcmk__node_remote_fenced)) {
4168 	            if (is_last_failure) {
4169 	                pcmk__info("Waiting to clear monitor failure for remote node %s"
4170 	                           " until fencing has occurred",
4171 	                           rsc->id);
4172 	            }
4173 	            return TRUE;
4174 	        }
4175 	    }
4176 	    return FALSE;
4177 	}
4178 	
4179 	/*!
4180 	 * \internal
4181 	 * \brief Check operation age and schedule failure clearing when appropriate
4182 	 *
4183 	 * This function has two distinct purposes. The first is to check whether an
4184 	 * operation history entry is expired (i.e. the resource has a failure timeout,
4185 	 * the entry is older than the timeout, and the resource either has no fail
4186 	 * count or its fail count is entirely older than the timeout). The second is to
4187 	 * schedule fail count clearing when appropriate (i.e. the operation is expired
4188 	 * and either the resource has an expired fail count or the operation is a
4189 	 * last_failure for a remote connection resource with a reconnect interval,
4190 	 * or the operation is a last_failure for a start or monitor operation and the
4191 	 * resource's parameters have changed since the operation).
4192 	 *
4193 	 * \param[in,out] history  Parsed action result history
4194 	 *
4195 	 * \return true if operation history entry is expired, otherwise false
4196 	 */
4197 	static bool
4198 	check_operation_expiry(struct action_history *history)
4199 	{
4200 	    bool expired = false;
4201 	    bool is_last_failure = (history->id != NULL)
4202 	                           && g_str_has_suffix(history->id, "_last_failure_0");
4203 	    time_t last_run = 0;
4204 	    int unexpired_fail_count = 0;
4205 	    const char *clear_reason = NULL;
4206 	    const guint expiration_sec =
4207 	        pcmk__timeout_ms2s(history->rsc->priv->failure_expiration_ms);
4208 	    pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
4209 	
4210 	    if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4211 	        pcmk__rsc_trace(history->rsc,
4212 	                        "Resource history entry %s on %s is not expired: "
4213 	                        "Not Installed does not expire",
4214 	                        history->id, pcmk__node_name(history->node));
4215 	        return false; // "Not installed" must always be cleared manually
4216 	    }
4217 	
4218 	    if ((expiration_sec > 0)
4219 	        && (pcmk__xe_get_time(history->xml, PCMK_XA_LAST_RC_CHANGE,
4220 	                              &last_run) == pcmk_rc_ok)) {
4221 	
4222 	        /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4223 	         * timestamp
4224 	         */
4225 	
4226 	        time_t now = pcmk__scheduler_epoch_time(scheduler);
4227 	        time_t last_failure = 0;
4228 	
4229 	        // Is this particular operation history older than the failure timeout?
4230 	        if ((now >= (last_run + expiration_sec))
4231 	            && !should_ignore_failure_timeout(history->rsc, history->task,
4232 	                                              history->interval_ms,
4233 	                                              is_last_failure)) {
4234 	            expired = true;
4235 	        }
4236 	
4237 	        // Does the resource as a whole have an unexpired fail count?
4238 	        unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4239 	                                                &last_failure,
4240 	                                                pcmk__fc_effective,
4241 	                                                history->xml);
4242 	
4243 	        // Update scheduler recheck time according to *last* failure
4244 	        pcmk__trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d "
4245 	                    "expiration=%s last-failure@%lld",
4246 	                    history->id, (long long) last_run, (expired? "" : "not "),
4247 	                    (long long) now, unexpired_fail_count,
4248 	                    pcmk__readable_interval(expiration_sec * 1000),
4249 	                    (long long) last_failure);
4250 	        last_failure += expiration_sec + 1;
4251 	        if (unexpired_fail_count && (now < last_failure)) {
4252 	            pcmk__update_recheck_time(last_failure, scheduler,
4253 	                                      "fail count expiration");
4254 	        }
4255 	    }
4256 	
4257 	    if (expired) {
4258 	        if (pe_get_failcount(history->node, history->rsc, NULL,
4259 	                             pcmk__fc_default, history->xml)) {
4260 	            // There is a fail count ignoring timeout
4261 	
4262 	            if (unexpired_fail_count == 0) {
4263 	                // There is no fail count considering timeout
4264 	                clear_reason = "it expired";
4265 	
4266 	            } else {
4267 	                /* This operation is old, but there is an unexpired fail count.
4268 	                 * In a properly functioning cluster, this should only be
4269 	                 * possible if this operation is not a failure (otherwise the
4270 	                 * fail count should be expired too), so this is really just a
4271 	                 * failsafe.
4272 	                 */
4273 	                pcmk__rsc_trace(history->rsc,
4274 	                                "Resource history entry %s on %s is not "
4275 	                                "expired: Unexpired fail count",
4276 	                                history->id, pcmk__node_name(history->node));
4277 	                expired = false;
4278 	            }
4279 	
4280 	        } else if (is_last_failure
4281 	                   && (history->rsc->priv->remote_reconnect_ms > 0U)) {
4282 	            /* Clear any expired last failure when reconnect interval is set,
4283 	             * even if there is no fail count.
4284 	             */
4285 	            clear_reason = "reconnect interval is set";
4286 	        }
4287 	    }
4288 	
4289 	    if (!expired && is_last_failure
4290 	        && should_clear_for_param_change(history->xml, history->task,
4291 	                                         history->rsc, history->node)) {
4292 	        clear_reason = "resource parameters have changed";
4293 	    }
4294 	
4295 	    if (clear_reason != NULL) {
4296 	        pcmk_action_t *clear_op = NULL;
4297 	
4298 	        // Schedule clearing of the fail count
4299 	        clear_op = pe__clear_failcount(history->rsc, history->node,
4300 	                                       clear_reason, scheduler);
4301 	
4302 	        if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)
4303 	            && (history->rsc->priv->remote_reconnect_ms > 0)) {
4304 	            /* If we're clearing a remote connection due to a reconnect
4305 	             * interval, we want to wait until any scheduled fencing
4306 	             * completes.
4307 	             *
4308 	             * We could limit this to remote_node->details->unclean, but at
4309 	             * this point, that's always true (it won't be reliable until
4310 	             * after unpack_node_history() is done).
4311 	             */
4312 	            pcmk__info("Clearing %s failure will wait until any scheduled "
4313 	                       "fencing of %s completes",
4314 	                       history->task, history->rsc->id);
4315 	            order_after_remote_fencing(clear_op, history->rsc, scheduler);
4316 	        }
4317 	    }
4318 	
4319 	    if (expired && (history->interval_ms == 0)
4320 	        && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4321 	        switch (history->exit_status) {
4322 	            case PCMK_OCF_OK:
4323 	            case PCMK_OCF_NOT_RUNNING:
4324 	            case PCMK_OCF_RUNNING_PROMOTED:
4325 	            case PCMK_OCF_DEGRADED:
4326 	            case PCMK_OCF_DEGRADED_PROMOTED:
4327 	                // Don't expire probes that return these values
4328 	                pcmk__rsc_trace(history->rsc,
4329 	                                "Resource history entry %s on %s is not "
4330 	                                "expired: Probe result",
4331 	                             history->id, pcmk__node_name(history->node));
4332 	                expired = false;
4333 	                break;
4334 	        }
4335 	    }
4336 	
4337 	    return expired;
4338 	}
4339 	
4340 	int
4341 	pe__target_rc_from_xml(const xmlNode *xml_op)
4342 	{
4343 	    int target_rc = 0;
4344 	    const char *key = pcmk__xe_get(xml_op, PCMK__XA_TRANSITION_KEY);
4345 	
4346 	    if (key == NULL) {
4347 	        return -1;
4348 	    }
4349 	    decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4350 	    return target_rc;
4351 	}
4352 	
4353 	/*!
4354 	 * \internal
4355 	 * \brief Update a resource's state for an action result
4356 	 *
4357 	 * \param[in,out] history       Parsed action history entry
4358 	 * \param[in]     exit_status   Exit status to base new state on
4359 	 * \param[in]     last_failure  Resource's last_failure entry, if known
4360 	 * \param[in,out] on_fail       Resource's current failure handling
4361 	 */
4362 	static void
4363 	update_resource_state(struct action_history *history, int exit_status,
4364 	                      const xmlNode *last_failure,
4365 	                      enum pcmk__on_fail *on_fail)
4366 	{
4367 	    bool clear_past_failure = false;
4368 	
4369 	    if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4370 	        || (!pcmk__is_bundled(history->rsc)
4371 	            && pcmk_xe_mask_probe_failure(history->xml))) {
4372 	        history->rsc->priv->orig_role = pcmk_role_stopped;
4373 	
4374 	    } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4375 	        clear_past_failure = true;
4376 	
4377 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4378 	                            pcmk__str_none)) {
4379 	        if ((last_failure != NULL)
4380 	            && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4381 	                            pcmk__str_none)) {
4382 	            clear_past_failure = true;
4383 	        }
4384 	        if (history->rsc->priv->orig_role < pcmk_role_started) {
4385 	            set_active(history->rsc);
4386 	        }
4387 	
4388 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4389 	        history->rsc->priv->orig_role = pcmk_role_started;
4390 	        clear_past_failure = true;
4391 	
4392 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4393 	        history->rsc->priv->orig_role = pcmk_role_stopped;
4394 	        clear_past_failure = true;
4395 	
4396 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4397 	                            pcmk__str_none)) {
4398 	        history->rsc->priv->orig_role = pcmk_role_promoted;
4399 	        clear_past_failure = true;
4400 	
4401 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4402 	                            pcmk__str_none)) {
4403 	        if (*on_fail == pcmk__on_fail_demote) {
4404 	            /* Demote clears an error only if
4405 	             * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4406 	             */
4407 	            clear_past_failure = true;
4408 	        }
4409 	        history->rsc->priv->orig_role = pcmk_role_unpromoted;
4410 	
4411 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4412 	                            pcmk__str_none)) {
4413 	        history->rsc->priv->orig_role = pcmk_role_started;
4414 	        clear_past_failure = true;
4415 	
4416 	    } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4417 	                            pcmk__str_none)) {
4418 	        unpack_migrate_to_success(history);
4419 	
4420 	    } else if (history->rsc->priv->orig_role < pcmk_role_started) {
4421 	        pcmk__rsc_trace(history->rsc, "%s active on %s",
4422 	                        history->rsc->id, pcmk__node_name(history->node));
4423 	        set_active(history->rsc);
4424 	    }
4425 	
4426 	    if (!clear_past_failure) {
4427 	        return;
4428 	    }
4429 	
4430 	    switch (*on_fail) {
4431 	        case pcmk__on_fail_stop:
4432 	        case pcmk__on_fail_ban:
4433 	        case pcmk__on_fail_standby_node:
4434 	        case pcmk__on_fail_fence_node:
4435 	            pcmk__rsc_trace(history->rsc,
4436 	                            "%s (%s) is not cleared by a completed %s",
4437 	                            history->rsc->id, pcmk__on_fail_text(*on_fail),
4438 	                            history->task);
4439 	            break;
4440 	
4441 	        case pcmk__on_fail_block:
4442 	        case pcmk__on_fail_ignore:
4443 	        case pcmk__on_fail_demote:
4444 	        case pcmk__on_fail_restart:
4445 	        case pcmk__on_fail_restart_container:
4446 	            *on_fail = pcmk__on_fail_ignore;
4447 	            pe__set_next_role(history->rsc, pcmk_role_unknown,
4448 	                              "clear past failures");
4449 	            break;
4450 	
4451 	        case pcmk__on_fail_reset_remote:
4452 	            if (history->rsc->priv->remote_reconnect_ms == 0U) {
4453 	                /* With no reconnect interval, the connection is allowed to
4454 	                 * start again after the remote node is fenced and
4455 	                 * completely stopped. (With a reconnect interval, we wait
4456 	                 * for the failure to be cleared entirely before attempting
4457 	                 * to reconnect.)
4458 	                 */
4459 	                *on_fail = pcmk__on_fail_ignore;
4460 	                pe__set_next_role(history->rsc, pcmk_role_unknown,
4461 	                                  "clear past failures and reset remote");
4462 	            }
4463 	            break;
4464 	    }
4465 	}
4466 	
4467 	/*!
4468 	 * \internal
4469 	 * \brief Check whether a given history entry matters for resource state
4470 	 *
4471 	 * \param[in] history  Parsed action history entry
4472 	 *
4473 	 * \return true if action can affect resource state, otherwise false
4474 	 */
4475 	static inline bool
4476 	can_affect_state(struct action_history *history)
4477 	{
4478 	     return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4479 	                             PCMK_ACTION_START, PCMK_ACTION_STOP,
4480 	                             PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
4481 	                             PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
4482 	                             "asyncmon", NULL);
4483 	}
4484 	
4485 	/*!
4486 	 * \internal
4487 	 * \brief Unpack execution/exit status and exit reason from a history entry
4488 	 *
4489 	 * \param[in,out] history  Action history entry to unpack
4490 	 *
4491 	 * \return Standard Pacemaker return code
4492 	 */
4493 	static int
4494 	unpack_action_result(struct action_history *history)
4495 	{
4496 	    if ((pcmk__xe_get_int(history->xml, PCMK__XA_OP_STATUS,
4497 	                          &(history->execution_status)) != pcmk_rc_ok)
4498 	        || (history->execution_status < PCMK_EXEC_PENDING)
4499 	        || (history->execution_status > PCMK_EXEC_MAX)
4500 	        || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4501 	        pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4502 	                         "with invalid " PCMK__XA_OP_STATUS " '%s'",
4503 	                         history->id, history->rsc->id,
4504 	                         pcmk__node_name(history->node),
4505 	                         pcmk__s(pcmk__xe_get(history->xml, PCMK__XA_OP_STATUS),
4506 	                                 ""));
4507 	        return pcmk_rc_unpack_error;
4508 	    }
4509 	    if ((pcmk__xe_get_int(history->xml, PCMK__XA_RC_CODE,
4510 	                          &(history->exit_status)) != pcmk_rc_ok)
4511 	        || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4512 	        pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4513 	                         "with invalid " PCMK__XA_RC_CODE " '%s'",
4514 	                         history->id, history->rsc->id,
4515 	                         pcmk__node_name(history->node),
4516 	                         pcmk__s(pcmk__xe_get(history->xml, PCMK__XA_RC_CODE),
4517 	                                 ""));
4518 	        return pcmk_rc_unpack_error;
4519 	    }
4520 	    history->exit_reason = pcmk__xe_get(history->xml, PCMK_XA_EXIT_REASON);
4521 	    return pcmk_rc_ok;
4522 	}
4523 	
4524 	/*!
4525 	 * \internal
4526 	 * \brief Process an action history entry whose result expired
4527 	 *
4528 	 * \param[in,out] history           Parsed action history entry
4529 	 * \param[in]     orig_exit_status  Action exit status before remapping
4530 	 *
4531 	 * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the
4532 	 *         entry needs no further processing)
4533 	 */
4534 	static int
4535 	process_expired_result(struct action_history *history, int orig_exit_status)
4536 	{
4537 	    if (!pcmk__is_bundled(history->rsc)
4538 	        && pcmk_xe_mask_probe_failure(history->xml)
4539 	        && (orig_exit_status != history->expected_exit_status)) {
4540 	
4541 	        if (history->rsc->priv->orig_role <= pcmk_role_stopped) {
4542 	            history->rsc->priv->orig_role = pcmk_role_unknown;
4543 	        }
4544 	        pcmk__trace("Ignoring resource history entry %s for probe of %s on %s: "
4545 	                    "Masked failure expired",
4546 	                    history->id, history->rsc->id,
4547 	                    pcmk__node_name(history->node));
4548 	        return pcmk_rc_ok;
4549 	    }
4550 	
4551 	    if (history->exit_status == history->expected_exit_status) {
4552 	        return pcmk_rc_undetermined; // Only failures expire
4553 	    }
4554 	
4555 	    if (history->interval_ms == 0) {
4556 	        pcmk__notice("Ignoring resource history entry %s for %s of %s on %s: "
4557 	                     "Expired failure",
4558 	                     history->id, history->task, history->rsc->id,
4559 	                     pcmk__node_name(history->node));
4560 	        return pcmk_rc_ok;
4561 	    }
4562 	
4563 	    if (history->node->details->online && !history->node->details->unclean) {
4564 	        /* Reschedule the recurring action. schedule_cancel() won't work at
4565 	         * this stage, so as a hacky workaround, forcibly change the restart
4566 	         * digest so pcmk__check_action_config() does what we want later.
4567 	         *
4568 	         * @TODO We should skip this if there is a newer successful monitor.
4569 	         *       Also, this causes rescheduling only if the history entry
4570 	         *       has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4571 	         *       scheduler regression test doesn't, but that may not be a
4572 	         *       realistic scenario in production).
4573 	         */
4574 	        pcmk__notice("Rescheduling %s-interval %s of %s on %s after failure "
4575 	                     "expired",
4576 	                     pcmk__readable_interval(history->interval_ms),
4577 	                     history->task, history->rsc->id,
4578 	                     pcmk__node_name(history->node));
4579 	        pcmk__xe_set(history->xml, PCMK__XA_OP_RESTART_DIGEST,
4580 	                     "calculated-failure-timeout");
4581 	        return pcmk_rc_ok;
4582 	    }
4583 	
4584 	    return pcmk_rc_undetermined;
4585 	}
4586 	
4587 	/*!
4588 	 * \internal
4589 	 * \brief Process a masked probe failure
4590 	 *
4591 	 * \param[in,out] history           Parsed action history entry
4592 	 * \param[in]     orig_exit_status  Action exit status before remapping
4593 	 * \param[in]     last_failure      Resource's last_failure entry, if known
4594 	 * \param[in,out] on_fail           Resource's current failure handling
4595 	 */
4596 	static void
4597 	mask_probe_failure(struct action_history *history, int orig_exit_status,
4598 	                   const xmlNode *last_failure,
4599 	                   enum pcmk__on_fail *on_fail)
4600 	{
4601 	    pcmk_resource_t *ban_rsc = history->rsc;
4602 	
4603 	    if (!pcmk__is_set(history->rsc->flags, pcmk__rsc_unique)) {
4604 	        ban_rsc = uber_parent(history->rsc);
4605 	    }
4606 	
4607 	    pcmk__notice("Treating probe result '%s' for %s on %s as 'not running'",
4608 	                 crm_exit_str(orig_exit_status), history->rsc->id,
4609 	                 pcmk__node_name(history->node));
4610 	    update_resource_state(history, history->expected_exit_status, last_failure,
4611 	                          on_fail);
4612 	    pcmk__xe_set(history->xml, PCMK_XA_UNAME, history->node->priv->name);
4613 	
4614 	    record_failed_op(history);
4615 	    resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4616 	                      "masked-probe-failure", ban_rsc->priv->scheduler);
4617 	}
4618 	
4619 	/*!
4620 	 * \internal Check whether a given failure is for a given pending action
4621 	 *
4622 	 * \param[in] history       Parsed history entry for pending action
4623 	 * \param[in] last_failure  Resource's last_failure entry, if known
4624 	 *
4625 	 * \return true if \p last_failure is failure of pending action in \p history,
4626 	 *         otherwise false
4627 	 * \note Both \p history and \p last_failure must come from the same
4628 	 *       \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be
4629 	 *       the same.
4630 	 */
4631 	static bool
4632 	failure_is_newer(const struct action_history *history,
4633 	                 const xmlNode *last_failure)
4634 	{
4635 	    guint failure_interval_ms = 0U;
4636 	    long long failure_change = 0LL;
4637 	    long long this_change = 0LL;
4638 	
4639 	    if (last_failure == NULL) {
4640 	        return false; // Resource has no last_failure entry
4641 	    }
4642 	
4643 	    if (!pcmk__str_eq(history->task,
4644 	                      pcmk__xe_get(last_failure, PCMK_XA_OPERATION),
4645 	                      pcmk__str_none)) {
4646 	        return false; // last_failure is for different action
4647 	    }
4648 	
4649 	    if ((pcmk__xe_get_guint(last_failure, PCMK_META_INTERVAL,
4650 	                            &failure_interval_ms) != pcmk_rc_ok)
4651 	        || (history->interval_ms != failure_interval_ms)) {
4652 	        return false; // last_failure is for action with different interval
4653 	    }
4654 	
4655 	    if ((pcmk__scan_ll(pcmk__xe_get(history->xml, PCMK_XA_LAST_RC_CHANGE),
4656 	                       &this_change, 0LL) != pcmk_rc_ok)
4657 	        || (pcmk__scan_ll(pcmk__xe_get(last_failure, PCMK_XA_LAST_RC_CHANGE),
4658 	                          &failure_change, 0LL) != pcmk_rc_ok)
4659 	        || (failure_change < this_change)) {
4660 	        return false; // Failure is not known to be newer
4661 	    }
4662 	
4663 	    return true;
4664 	}
4665 	
4666 	/*!
4667 	 * \internal
4668 	 * \brief Update a resource's role etc. for a pending action
4669 	 *
4670 	 * \param[in,out] history       Parsed history entry for pending action
4671 	 * \param[in]     last_failure  Resource's last_failure entry, if known
4672 	 */
4673 	static void
4674 	process_pending_action(struct action_history *history,
4675 	                       const xmlNode *last_failure)
4676 	{
4677 	    /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4678 	     * and there might be a RSC_monitor_INTERVAL entry with the last successful
4679 	     * or pending result.
4680 	     *
4681 	     * If last_failure contains the failure of the pending recurring monitor
4682 	     * we're processing here, and is newer, the action is no longer pending.
4683 	     * (Pending results have call ID -1, which sorts last, so the last failure
4684 	     * if any should be known.)
4685 	     */
4686 	    if (failure_is_newer(history, last_failure)) {
4687 	        return;
4688 	    }
4689 	
4690 	    if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4691 	        pcmk__set_rsc_flags(history->rsc, pcmk__rsc_start_pending);
4692 	        set_active(history->rsc);
4693 	
4694 	    } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4695 	        history->rsc->priv->orig_role = pcmk_role_promoted;
4696 	
4697 	    } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4698 	               && history->node->details->unclean) {
4699 	        /* A migrate_to action is pending on a unclean source, so force a stop
4700 	         * on the target.
4701 	         */
4702 	        const char *migrate_target = NULL;
4703 	        pcmk_node_t *target = NULL;
4704 	
4705 	        migrate_target = pcmk__xe_get(history->xml, PCMK__META_MIGRATE_TARGET);
4706 	        target = pcmk_find_node(history->rsc->priv->scheduler,
4707 	                                migrate_target);
4708 	        if (target != NULL) {
4709 	            stop_action(history->rsc, target, FALSE);
4710 	        }
4711 	    }
4712 	
4713 	    if (history->rsc->priv->pending_action != NULL) {
4714 	        /* There should never be multiple pending actions, but as a failsafe,
4715 	         * just remember the first one processed for display purposes.
4716 	         */
4717 	        return;
4718 	    }
4719 	
4720 	    if (pcmk_is_probe(history->task, history->interval_ms)) {
4721 	        /* Pending probes are currently never displayed, even if pending
4722 	         * operations are requested. If we ever want to change that,
4723 	         * enable the below and the corresponding part of
4724 	         * native.c:native_pending_action().
4725 	         */
4726 	#if 0
4727 	        history->rsc->private->pending_action = strdup("probe");
4728 	        history->rsc->private->pending_node = history->node;
4729 	#endif
4730 	    } else {
4731 	        history->rsc->priv->pending_action = strdup(history->task);
4732 	        history->rsc->priv->pending_node = history->node;
4733 	    }
4734 	}
4735 	
4736 	static void
4737 	unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
4738 	              xmlNode **last_failure, enum pcmk__on_fail *on_fail)
4739 	{
4740 	    int old_rc = 0;
4741 	    bool expired = false;
4742 	    pcmk_resource_t *parent = rsc;
4743 	    enum rsc_role_e fail_role = pcmk_role_unknown;
4744 	    enum pcmk__on_fail failure_strategy = pcmk__on_fail_restart;
4745 	
4746 	    struct action_history history = {
4747 	        .rsc = rsc,
4748 	        .node = node,
4749 	        .xml = xml_op,
4750 	        .execution_status = PCMK_EXEC_UNKNOWN,
4751 	    };
4752 	
4753 	    CRM_CHECK(rsc && node && xml_op, return);
4754 	
4755 	    history.id = pcmk__xe_id(xml_op);
4756 	    if (history.id == NULL) {
4757 	        pcmk__config_err("Ignoring resource history entry for %s on %s "
4758 	                         "without ID", rsc->id, pcmk__node_name(node));
4759 	        return;
4760 	    }
4761 	
4762 	    // Task and interval
4763 	    history.task = pcmk__xe_get(xml_op, PCMK_XA_OPERATION);
4764 	    if (history.task == NULL) {
4765 	        pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4766 	                         "without " PCMK_XA_OPERATION,
4767 	                         history.id, rsc->id, pcmk__node_name(node));
4768 	        return;
4769 	    }
4770 	    pcmk__xe_get_guint(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4771 	    if (!can_affect_state(&history)) {
4772 	        pcmk__rsc_trace(rsc,
4773 	                        "Ignoring resource history entry %s for %s on %s "
4774 	                        "with irrelevant action '%s'",
4775 	                        history.id, rsc->id, pcmk__node_name(node),
4776 	                        history.task);
4777 	        return;
4778 	    }
4779 	
4780 	    if (unpack_action_result(&history) != pcmk_rc_ok) {
4781 	        return; // Error already logged
4782 	    }
4783 	
4784 	    history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4785 	    history.key = pcmk__xe_history_key(xml_op);
4786 	    pcmk__xe_get_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4787 	
4788 	    pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4789 	                    history.id, history.task, history.call_id,
4790 	                    pcmk__node_name(node),
4791 	                    pcmk_exec_status_str(history.execution_status),
4792 	                    crm_exit_str(history.exit_status));
4793 	
4794 	    if (node->details->unclean) {
4795 	        pcmk__rsc_trace(rsc,
4796 	                        "%s is running on %s, which is unclean (further action "
4797 	                        "depends on value of stop's on-fail attribute)",
4798 	                        rsc->id, pcmk__node_name(node));
4799 	    }
4800 	
4801 	    expired = check_operation_expiry(&history);
4802 	    old_rc = history.exit_status;
4803 	
4804 	    remap_operation(&history, on_fail, expired);
4805 	
4806 	    if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4807 	        goto done;
4808 	    }
4809 	
4810 	    if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4811 	        mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4812 	        goto done;
4813 	    }
4814 	
4815 	    if (!pcmk__is_set(rsc->flags, pcmk__rsc_unique)) {
4816 	        parent = uber_parent(rsc);
4817 	    }
4818 	
4819 	    switch (history.execution_status) {
4820 	        case PCMK_EXEC_PENDING:
4821 	            process_pending_action(&history, *last_failure);
4822 	            goto done;
4823 	
4824 	        case PCMK_EXEC_DONE:
4825 	            update_resource_state(&history, history.exit_status, *last_failure,
4826 	                                  on_fail);
4827 	            goto done;
4828 	
4829 	        case PCMK_EXEC_NOT_INSTALLED:
4830 	            unpack_failure_handling(&history, &failure_strategy, &fail_role);
4831 	            if (failure_strategy == pcmk__on_fail_ignore) {
4832 	                pcmk__warn("Cannot ignore failed %s of %s on %s: Resource "
4833 	                           "agent doesn't exist "
4834 	                           QB_XS " status=%d rc=%d id=%s",
4835 	                           history.task, rsc->id, pcmk__node_name(node),
4836 	                           history.execution_status, history.exit_status,
4837 	                           history.id);
4838 	                /* Also for printing it as "FAILED" by marking it as
4839 	                 * pcmk__rsc_failed later
4840 	                 */
4841 	                *on_fail = pcmk__on_fail_ban;
4842 	            }
4843 	            resource_location(parent, node, -PCMK_SCORE_INFINITY,
4844 	                              "hard-error", rsc->priv->scheduler);
4845 	            unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4846 	                                  last_failure, on_fail);
4847 	            goto done;
4848 	
4849 	        case PCMK_EXEC_NOT_CONNECTED:
4850 	            if (pcmk__is_pacemaker_remote_node(node)
4851 	                && pcmk__is_set(node->priv->remote->flags,
4852 	                                pcmk__rsc_managed)) {
4853 	                /* We should never get into a situation where a managed remote
4854 	                 * connection resource is considered OK but a resource action
4855 	                 * behind the connection gets a "not connected" status. But as a
4856 	                 * fail-safe in case a bug or unusual circumstances do lead to
4857 	                 * that, ensure the remote connection is considered failed.
4858 	                 */
4859 	                pcmk__set_rsc_flags(node->priv->remote,
4860 	                                    pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
4861 	            }
4862 	            break; // Not done, do error handling
4863 	
4864 	        case PCMK_EXEC_ERROR:
4865 	        case PCMK_EXEC_ERROR_HARD:
4866 	        case PCMK_EXEC_ERROR_FATAL:
4867 	        case PCMK_EXEC_TIMEOUT:
4868 	        case PCMK_EXEC_NOT_SUPPORTED:
4869 	        case PCMK_EXEC_INVALID:
4870 	            break; // Not done, do error handling
4871 	
4872 	        default: // No other value should be possible at this point
4873 	            break;
4874 	    }
4875 	
4876 	    unpack_failure_handling(&history, &failure_strategy, &fail_role);
4877 	    if ((failure_strategy == pcmk__on_fail_ignore)
4878 	        || ((failure_strategy == pcmk__on_fail_restart_container)
4879 	            && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4880 	
4881 	        char *last_change_s = last_change_str(xml_op);
4882 	
4883 	        pcmk__warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4884 	                   QB_XS " %s",
4885 	                   history.task, crm_exit_str(history.exit_status),
4886 	                   (pcmk__str_empty(history.exit_reason)? "" : ": "),
4887 	                   pcmk__s(history.exit_reason, ""), rsc->id,
4888 	                   pcmk__node_name(node), last_change_s, history.id);
4889 	        free(last_change_s);
4890 	
4891 	        update_resource_state(&history, history.expected_exit_status,
4892 	                              *last_failure, on_fail);
4893 	        pcmk__xe_set(xml_op, PCMK_XA_UNAME, node->priv->name);
4894 	        pcmk__set_rsc_flags(rsc, pcmk__rsc_ignore_failure);
4895 	
4896 	        record_failed_op(&history);
4897 	
4898 	        if ((failure_strategy == pcmk__on_fail_restart_container)
4899 	            && (*on_fail <= pcmk__on_fail_restart)) {
4900 	            *on_fail = failure_strategy;
4901 	        }
4902 	
4903 	    } else {
4904 	        unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4905 	                              last_failure, on_fail);
4906 	
4907 	        if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4908 	            uint8_t log_level = LOG_ERR;
4909 	
4910 	            if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4911 	                log_level = LOG_NOTICE;
4912 	            }
4913 	            do_crm_log(log_level,
4914 	                       "Preventing %s from restarting on %s because "
4915 	                       "of hard failure (%s%s%s) " QB_XS " %s",
4916 	                       parent->id, pcmk__node_name(node),
4917 	                       crm_exit_str(history.exit_status),
4918 	                       (pcmk__str_empty(history.exit_reason)? "" : ": "),
4919 	                       pcmk__s(history.exit_reason, ""), history.id);
4920 	            resource_location(parent, node, -PCMK_SCORE_INFINITY,
4921 	                              "hard-error", rsc->priv->scheduler);
4922 	
4923 	        } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4924 	            pcmk__sched_err(rsc->priv->scheduler,
4925 	                            "Preventing %s from restarting anywhere because "
4926 	                            "of fatal failure (%s%s%s) " QB_XS " %s",
4927 	                            parent->id, crm_exit_str(history.exit_status),
4928 	                            (pcmk__str_empty(history.exit_reason)? "" : ": "),
4929 	                            pcmk__s(history.exit_reason, ""), history.id);
4930 	            resource_location(parent, NULL, -PCMK_SCORE_INFINITY,
4931 	                              "fatal-error", rsc->priv->scheduler);
4932 	        }
4933 	    }
4934 	
4935 	done:
4936 	    pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
4937 	                    rsc->id, pcmk__node_name(node), history.id,
4938 	                    pcmk_role_text(rsc->priv->orig_role),
4939 	                    pcmk_role_text(rsc->priv->next_role));
4940 	}
4941 	
4942 	/*!
4943 	 * \internal
4944 	 * \brief Insert a node attribute with value into a \c GHashTable
4945 	 *
4946 	 * \param[in,out] key        Key to insert (either freed or owned by
4947 	 *                           \p user_data upon return)
4948 	 * \param[in]     value      Value to insert (owned by \p user_data upon return)
4949 	 * \param[in]     user_data  \c GHashTable to insert into
4950 	 */
4951 	static gboolean
4952 	insert_attr(gpointer key, gpointer value, gpointer user_data)
4953 	{
4954 	    GHashTable *table = user_data;
4955 	
4956 	    g_hash_table_insert(table, key, value);
4957 	    return TRUE;
4958 	}
4959 	
4960 	static void
4961 	add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
4962 	               pcmk_scheduler_t *scheduler)
4963 	{
4964 	    const char *cluster_name = NULL;
4965 	    const char *dc_id = pcmk__xe_get(scheduler->input, PCMK_XA_DC_UUID);
4966 	    const pcmk_rule_input_t rule_input = {
4967 	        .now = scheduler->priv->now,
4968 	    };
4969 	
4970 	    pcmk__insert_dup(node->priv->attrs,
4971 	                     CRM_ATTR_UNAME, node->priv->name);
4972 	
4973 	    pcmk__insert_dup(node->priv->attrs, CRM_ATTR_ID, node->priv->id);
4974 	
4975 	    if ((scheduler->dc_node == NULL)
4976 	        && pcmk__str_eq(node->priv->id, dc_id, pcmk__str_casei)) {
4977 	
4978 	        scheduler->dc_node = node;
4979 	        pcmk__insert_dup(node->priv->attrs,
4980 	                         CRM_ATTR_IS_DC, PCMK_VALUE_TRUE);
4981 	
4982 	    } else if (!pcmk__same_node(node, scheduler->dc_node)) {
4983 	        pcmk__insert_dup(node->priv->attrs,
4984 	                         CRM_ATTR_IS_DC, PCMK_VALUE_FALSE);
4985 	    }
4986 	
4987 	    cluster_name = g_hash_table_lookup(scheduler->priv->options,
4988 	                                       PCMK_OPT_CLUSTER_NAME);
4989 	    if (cluster_name) {
4990 	        pcmk__insert_dup(node->priv->attrs, CRM_ATTR_CLUSTER_NAME,
4991 	                         cluster_name);
4992 	    }
4993 	
4994 	    if (overwrite) {
4995 	        /* @TODO Try to reorder some unpacking so that we don't need the
4996 	         * overwrite argument or to unpack into a temporary table
4997 	         */
4998 	        GHashTable *unpacked = pcmk__strkey_table(free, free);
4999 	
5000 	        pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
5001 	                                   &rule_input, unpacked, NULL, scheduler);
5002 	        g_hash_table_foreach_steal(unpacked, insert_attr, node->priv->attrs);
5003 	        g_hash_table_destroy(unpacked);
5004 	
5005 	    } else {
5006 	        pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
5007 	                                   &rule_input, node->priv->attrs, NULL,
5008 	                                   scheduler);
5009 	    }
5010 	
5011 	    pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_input,
5012 	                               node->priv->utilization, NULL, scheduler);
5013 	
5014 	    if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
5015 	                        pcmk__rsc_node_current) == NULL) {
5016 	        const char *site_name = pcmk__node_attr(node, "site-name", NULL,
5017 	                                                pcmk__rsc_node_current);
5018 	
5019 	        if (site_name) {
5020 	            pcmk__insert_dup(node->priv->attrs,
5021 	                             CRM_ATTR_SITE_NAME, site_name);
5022 	
5023 	        } else if (cluster_name) {
5024 	            /* Default to cluster-name if unset */
5025 	            pcmk__insert_dup(node->priv->attrs,
5026 	                             CRM_ATTR_SITE_NAME, cluster_name);
5027 	        }
5028 	    }
5029 	}
5030 	
5031 	static GList *
5032 	extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
5033 	{
5034 	    int counter = -1;
5035 	    int stop_index = -1;
5036 	    int start_index = -1;
5037 	
5038 	    xmlNode *rsc_op = NULL;
5039 	
5040 	    GList *gIter = NULL;
5041 	    GList *op_list = NULL;
5042 	    GList *sorted_op_list = NULL;
5043 	
5044 	    /* extract operations */
5045 	    op_list = NULL;
5046 	    sorted_op_list = NULL;
5047 	
5048 	    for (rsc_op = pcmk__xe_first_child(rsc_entry, PCMK__XE_LRM_RSC_OP, NULL,
5049 	                                       NULL);
5050 	         rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
5051 	
5052 	        pcmk__xe_set(rsc_op, PCMK_XA_RESOURCE, rsc);
5053 	        pcmk__xe_set(rsc_op, PCMK_XA_UNAME, node);
5054 	        op_list = g_list_prepend(op_list, rsc_op);
5055 	    }
5056 	
5057 	    if (op_list == NULL) {
5058 	        /* if there are no operations, there is nothing to do */
5059 	        return NULL;
5060 	    }
5061 	
5062 	    sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5063 	
5064 	    /* create active recurring operations as optional */
5065 	    if (active_filter == FALSE) {
5066 	        return sorted_op_list;
5067 	    }
5068 	
5069 	    op_list = NULL;
5070 	
5071 	    calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5072 	
5073 	    for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5074 	        xmlNode *rsc_op = (xmlNode *) gIter->data;
5075 	
5076 	        counter++;
5077 	
5078 	        if (start_index < stop_index) {
5079 	            pcmk__trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5080 	            break;
5081 	
5082 	        } else if (counter < start_index) {
5083 	            pcmk__trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5084 	            continue;
5085 	        }
5086 	        op_list = g_list_append(op_list, rsc_op);
5087 	    }
5088 	
5089 	    g_list_free(sorted_op_list);
5090 	    return op_list;
5091 	}
5092 	
5093 	GList *
5094 	find_operations(const char *rsc, const char *node, gboolean active_filter,
5095 	                pcmk_scheduler_t *scheduler)
5096 	{
5097 	    GList *output = NULL;
5098 	    GList *intermediate = NULL;
5099 	
5100 	    xmlNode *tmp = NULL;
5101 	    xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
5102 	                                           NULL, NULL);
5103 	
5104 	    pcmk_node_t *this_node = NULL;
5105 	
5106 	    xmlNode *node_state = NULL;
5107 	
5108 	    CRM_CHECK(status != NULL, return NULL);
5109 	
5110 	    for (node_state = pcmk__xe_first_child(status, PCMK__XE_NODE_STATE, NULL,
5111 	                                           NULL);
5112 	         node_state != NULL;
5113 	         node_state = pcmk__xe_next(node_state, PCMK__XE_NODE_STATE)) {
5114 	
5115 	        const char *uname = pcmk__xe_get(node_state, PCMK_XA_UNAME);
5116 	
5117 	        if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5118 	            continue;
5119 	        }
5120 	
5121 	        this_node = pcmk_find_node(scheduler, uname);
5122 	        if(this_node == NULL) {
5123 	            CRM_LOG_ASSERT(this_node != NULL);
5124 	            continue;
5125 	
5126 	        } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5127 	            determine_remote_online_status(scheduler, this_node);
5128 	
5129 	        } else {
5130 	            determine_online_status(node_state, this_node, scheduler);
5131 	        }
5132 	
5133 	        if (this_node->details->online
5134 	            || pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
5135 	
5136 	            /* Offline nodes run no resources if fencing is disabled. If fencing
5137 	             * is enabled, we need to ensure that resource start events happen
5138 	             * after the fencing event.
5139 	             */
5140 	            xmlNode *lrm_rsc = NULL;
5141 	
5142 	            tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5143 	                                       NULL);
5144 	            tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL,
5145 	                                       NULL);
5146 	
5147 	            for (lrm_rsc = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCE,
5148 	                                                NULL, NULL);
5149 	                 lrm_rsc != NULL;
5150 	                 lrm_rsc = pcmk__xe_next(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5151 	
5152 	                const char *rsc_id = pcmk__xe_get(lrm_rsc, PCMK_XA_ID);
5153 	
5154 	                if ((rsc != NULL)
5155 	                    && !pcmk__str_eq(rsc_id, rsc, pcmk__str_none)) {
5156 	                    continue;
5157 	                }
5158 	
5159 	                intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5160 	                output = g_list_concat(output, intermediate);
5161 	            }
5162 	        }
5163 	    }
5164 	
5165 	    return output;
5166 	}
5167