1 /*
2 * Copyright 2004-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <time.h>
16
17 #include <glib.h>
18 #include <libxml/tree.h> // xmlNode
19 #include <libxml/xpath.h> // xmlXPathObject, etc.
20
21 #include <crm/crm.h>
22 #include <crm/services.h>
23 #include <crm/common/xml.h>
24
25 #include <crm/common/util.h>
26 #include <crm/pengine/internal.h>
27 #include <pe_status_private.h>
28
29 // A (parsed) resource action history entry
30 struct action_history {
31 pcmk_resource_t *rsc; // Resource that history is for
32 pcmk_node_t *node; // Node that history is for
33 xmlNode *xml; // History entry XML
34
35 // Parsed from entry XML
36 const char *id; // XML ID of history entry
37 const char *key; // Operation key of action
38 const char *task; // Action name
39 const char *exit_reason; // Exit reason given for result
40 guint interval_ms; // Action interval
41 int call_id; // Call ID of action
42 int expected_exit_status; // Expected exit status of action
43 int exit_status; // Actual exit status of action
44 int execution_status; // Execution status of action
45 };
46
47 /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
48 * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
49 * flag is stringified more readably in log messages.
50 */
51 #define set_config_flag(scheduler, option, flag) do { \
52 GHashTable *config_hash = (scheduler)->priv->options; \
53 const char *scf_value = pcmk__cluster_option(config_hash, (option)); \
54 \
55 if (scf_value != NULL) { \
56 if (pcmk__is_true(scf_value)) { \
57 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
58 LOG_TRACE, "Scheduler", \
59 crm_system_name, (scheduler)->flags, \
60 (flag), #flag); \
61 } else { \
62 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
63 LOG_TRACE, "Scheduler", \
64 crm_system_name, (scheduler)->flags, \
65 (flag), #flag); \
66 } \
67 } \
68 } while(0)
69
70 static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
71 xmlNode *xml_op, xmlNode **last_failure,
72 enum pcmk__on_fail *failed);
73 static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
74 pcmk_node_t *this_node);
75 static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
76 bool overwrite, pcmk_scheduler_t *scheduler);
77 static void determine_online_status(const xmlNode *node_state,
78 pcmk_node_t *this_node,
79 pcmk_scheduler_t *scheduler);
80
81 static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
82 pcmk_scheduler_t *scheduler);
83
84
85 /*!
86 * \internal
87 * \brief Check whether a node is a dangling guest node
88 *
89 * \param[in] node Node to check
90 *
91 * \return true if \p node had a Pacemaker Remote connection resource with a
92 * launcher that was removed from the CIB, otherwise false.
93 */
94 static bool
95 is_dangling_guest_node(pcmk_node_t *node)
96 {
97 return pcmk__is_pacemaker_remote_node(node)
98 && (node->priv->remote != NULL)
99 && (node->priv->remote->priv->launcher == NULL)
100 && pcmk__is_set(node->priv->remote->flags,
101 pcmk__rsc_removed_launched);
102 }
103
104 /*!
105 * \brief Schedule a fence action for a node
106 *
107 * \param[in,out] scheduler Scheduler data
108 * \param[in,out] node Node to fence
109 * \param[in] reason Text description of why fencing is needed
110 * \param[in] priority_delay Whether to consider
111 * \c PCMK_OPT_PRIORITY_FENCING_DELAY
112 */
113 void
114 pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node,
115 const char *reason, bool priority_delay)
116 {
117 CRM_CHECK(node, return);
118
119 if (pcmk__is_guest_or_bundle_node(node)) {
120 // Fence a guest or bundle node by marking its launcher as failed
121 pcmk_resource_t *rsc = node->priv->remote->priv->launcher;
122
123 if (!pcmk__is_set(rsc->flags, pcmk__rsc_failed)) {
124 if (!pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
125 pcmk__notice("Not fencing guest node %s (otherwise would "
126 "because %s): its guest resource %s is unmanaged",
127 pcmk__node_name(node), reason, rsc->id);
128 } else {
129 pcmk__sched_warn(scheduler,
130 "Guest node %s will be fenced "
131 "(by recovering its guest resource %s): %s",
132 pcmk__node_name(node), rsc->id, reason);
133
134 /* We don't mark the node as unclean because that would prevent the
135 * node from running resources. We want to allow it to run resources
136 * in this transition if the recovery succeeds.
137 */
138 pcmk__set_node_flags(node, pcmk__node_remote_reset);
139 pcmk__set_rsc_flags(rsc,
140 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
141 }
142 }
143
144 } else if (is_dangling_guest_node(node)) {
145 pcmk__info("Cleaning up dangling connection for guest node %s: fencing "
146 "was already done because %s, and guest resource no longer "
147 "exists",
148 pcmk__node_name(node), reason);
149 pcmk__set_rsc_flags(node->priv->remote,
150 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
151
152 } else if (pcmk__is_remote_node(node)) {
153 pcmk_resource_t *rsc = node->priv->remote;
154
155 if ((rsc != NULL) && !pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
156 pcmk__notice("Not fencing remote node %s (otherwise would because "
157 "%s): connection is unmanaged",
158 pcmk__node_name(node), reason);
159 } else if (!pcmk__is_set(node->priv->flags, pcmk__node_remote_reset)) {
160 pcmk__set_node_flags(node, pcmk__node_remote_reset);
161 pcmk__sched_warn(scheduler, "Remote node %s %s: %s",
162 pcmk__node_name(node),
163 pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
164 reason);
165 }
166 node->details->unclean = TRUE;
167 // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
168 pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
169
170 } else if (node->details->unclean) {
171 const char *fenced_s = "also is unclean";
172
173 if (pe_can_fence(scheduler, node)) {
174 fenced_s = "would also be fenced";
175 }
176 pcmk__trace("Cluster node %s %s because %s",
177 pcmk__node_name(node), fenced_s, reason);
178
179 } else {
180 pcmk__sched_warn(scheduler, "Cluster node %s %s: %s",
181 pcmk__node_name(node),
182 pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
183 reason);
184 node->details->unclean = TRUE;
185 pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
186 }
187 }
188
189 // @TODO xpaths can't handle templates, rules, or id-refs
190
191 // nvpair with provides or requires set to unfencing
192 #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \
193 "[(@" PCMK_XA_NAME "='" PCMK_FENCING_PROVIDES "'" \
194 "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \
195 "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
196
197 // unfencing in rsc_defaults or any resource
198 #define XPATH_ENABLE_UNFENCING \
199 "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \
200 "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \
201 "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
202 "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
203
204 static void
205 set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
206 {
207 xmlXPathObject *result = NULL;
208
209 if (!pcmk__is_set(scheduler->flags, flag)) {
210 result = pcmk__xpath_search(scheduler->input->doc, xpath);
211 if (pcmk__xpath_num_results(result) > 0) {
212 pcmk__set_scheduler_flags(scheduler, flag);
213 }
214 xmlXPathFreeObject(result);
215 }
216 }
217
218 gboolean
219 unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
220 {
221 const char *value = NULL;
222 GHashTable *config_hash = pcmk__strkey_table(free, free);
223
224 const pcmk_rule_input_t rule_input = {
225 .now = scheduler->priv->now,
226 };
227
228 scheduler->priv->options = config_hash;
229
230 pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET,
231 &rule_input, config_hash,
232 PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, scheduler);
233
234 pcmk__validate_cluster_options(config_hash);
235
236 set_config_flag(scheduler, PCMK__OPT_ENABLE_STARTUP_PROBES,
237 pcmk__sched_probe_resources);
238 if (!pcmk__is_set(scheduler->flags, pcmk__sched_probe_resources)) {
239 pcmk__warn_once(pcmk__wo_enable_startup_probes,
240 "Support for the " PCMK__OPT_ENABLE_STARTUP_PROBES " "
241 "cluster property is deprecated and will be removed "
242 "(and behave as true) in a future release. Use a "
243 "location constraint with "
244 PCMK_XA_RESOURCE_DISCOVERY "=" PCMK_VALUE_NEVER " "
245 "instead to disable probes where desired.");
246 }
247
248 value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
249 if (pcmk__is_true(value)) {
250 pcmk__info("Watchdog-based self-fencing will be performed via SBD if "
251 "fencing is required and " PCMK_OPT_FENCING_WATCHDOG_TIMEOUT
252 " is nonzero");
253 pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_fencing);
254 }
255
256 /* Set certain flags via xpath here, so they can be used before the relevant
257 * configuration sections are unpacked.
258 */
259 set_if_xpath(pcmk__sched_enable_unfencing, XPATH_ENABLE_UNFENCING,
260 scheduler);
261
262 value = pcmk__cluster_option(config_hash, PCMK_OPT_FENCING_TIMEOUT);
263 pcmk_parse_interval_spec(value, &(scheduler->priv->fence_timeout_ms));
264
265 pcmk__debug("Default fencing action timeout: %s",
266 pcmk__readable_interval(scheduler->priv->fence_timeout_ms));
267
268 set_config_flag(scheduler, PCMK_OPT_FENCING_ENABLED,
269 pcmk__sched_fencing_enabled);
270 if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
271 pcmk__debug("Fencing of failed nodes is enabled");
272 } else {
273 pcmk__debug("Fencing of failed nodes is disabled");
274 }
275
276 scheduler->priv->fence_action =
277 pcmk__cluster_option(config_hash, PCMK_OPT_FENCING_ACTION);
278 pcmk__trace("Fencing will %s nodes", scheduler->priv->fence_action);
279
280 set_config_flag(scheduler, PCMK__OPT_CONCURRENT_FENCING,
281 pcmk__sched_concurrent_fencing);
282 if (pcmk__is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) {
283 pcmk__debug("Concurrent fencing is enabled");
284
285 } else {
286 pcmk__warn_once(pcmk__wo_concurrent_fencing,
287 "Support for the " PCMK__OPT_CONCURRENT_FENCING " "
288 "cluster property is deprecated and will be removed "
289 "(and behave as true) in a future release.");
290 }
291
292 value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY);
293 if (value) {
294 guint *delay_ms = &(scheduler->priv->priority_fencing_ms);
295
296 pcmk_parse_interval_spec(value, delay_ms);
297 pcmk__trace("Priority fencing delay is %s",
298 pcmk__readable_interval(*delay_ms));
299 }
300
301 set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES,
302 pcmk__sched_stop_all);
303 pcmk__debug("Stop all active resources: %s",
304 pcmk__flag_text(scheduler->flags, pcmk__sched_stop_all));
305
306 set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER,
307 pcmk__sched_symmetric_cluster);
308 if (pcmk__is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) {
309 pcmk__debug("Cluster is symmetric - resources can run anywhere by "
310 "default");
311 }
312
313 value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
314
315 if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
316 scheduler->no_quorum_policy = pcmk_no_quorum_ignore;
317
318 } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
319 scheduler->no_quorum_policy = pcmk_no_quorum_freeze;
320
321 } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
322 scheduler->no_quorum_policy = pcmk_no_quorum_demote;
323
324 } else if (pcmk__strcase_any_of(value, PCMK_VALUE_FENCE,
325 PCMK_VALUE_FENCE_LEGACY, NULL)) {
326 if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
327 int do_panic = 0;
328
329 pcmk__xe_get_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC,
330 &do_panic);
331 if (do_panic
332 || pcmk__is_set(scheduler->flags, pcmk__sched_quorate)) {
333 scheduler->no_quorum_policy = pcmk_no_quorum_fence;
334 } else {
335 pcmk__notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to "
336 "'" PCMK_VALUE_STOP "': cluster has never had "
337 "quorum");
338 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
339 }
340 } else {
341 pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY
342 " to 'stop' because fencing is disabled");
343 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
344 }
345
346 } else {
347 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
348 }
349
350 switch (scheduler->no_quorum_policy) {
351 case pcmk_no_quorum_freeze:
352 pcmk__debug("On loss of quorum: Freeze resources that require "
353 "quorum");
354 break;
355 case pcmk_no_quorum_stop:
356 pcmk__debug("On loss of quorum: Stop resources that require "
357 "quorum");
358 break;
359 case pcmk_no_quorum_demote:
360 pcmk__debug("On loss of quorum: Demote promotable resources and "
361 "stop other resources");
362 break;
363 case pcmk_no_quorum_fence:
364 pcmk__notice("On loss of quorum: Fence all remaining nodes");
365 break;
366 case pcmk_no_quorum_ignore:
367 pcmk__notice("On loss of quorum: Ignore");
368 break;
369 }
370
371 set_config_flag(scheduler, PCMK__OPT_STOP_REMOVED_RESOURCES,
372 pcmk__sched_stop_removed_resources);
373 if (pcmk__is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
374 pcmk__trace("Removed resources are stopped");
375 } else {
376 pcmk__warn_once(pcmk__wo_stop_removed_resources,
377 "Support for the " PCMK__OPT_STOP_REMOVED_RESOURCES " "
378 "cluster property is deprecated and will be removed "
379 "(and behave as true) in a future release.");
380 }
381
382 set_config_flag(scheduler, PCMK__OPT_CANCEL_REMOVED_ACTIONS,
383 pcmk__sched_cancel_removed_actions);
384 if (pcmk__is_set(scheduler->flags, pcmk__sched_cancel_removed_actions)) {
385 pcmk__trace("Removed resource actions are stopped");
386 } else {
387 pcmk__warn_once(pcmk__wo_cancel_removed_actions,
388 "Support for the " PCMK__OPT_CANCEL_REMOVED_ACTIONS " "
389 "cluster property is deprecated and will be removed "
390 "(and behave as true) in a future release.");
391 }
392
393 set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE,
394 pcmk__sched_in_maintenance);
395 pcmk__trace("Maintenance mode: %s",
396 pcmk__flag_text(scheduler->flags, pcmk__sched_in_maintenance));
397
398 set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL,
399 pcmk__sched_start_failure_fatal);
400 if (pcmk__is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) {
401 pcmk__trace("Start failures are always fatal");
402 } else {
403 pcmk__trace("Start failures are handled by failcount");
404 }
405
406 if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
407 set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING,
408 pcmk__sched_startup_fencing);
409 }
410 if (pcmk__is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
411 pcmk__trace("Unseen nodes will be fenced");
412 } else {
413 pcmk__warn_once(pcmk__wo_blind,
414 "Blind faith: not fencing unseen nodes");
415 }
416
417 pe__unpack_node_health_scores(scheduler);
418
419 scheduler->priv->placement_strategy =
420 pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY);
421 pcmk__trace("Placement strategy: %s", scheduler->priv->placement_strategy);
422
423 set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK,
424 pcmk__sched_shutdown_lock);
425 if (pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
426 value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
427 pcmk_parse_interval_spec(value, &(scheduler->priv->shutdown_lock_ms));
428 pcmk__trace("Resources will be locked to nodes that were cleanly "
429 "shut down (locks expire after %s)",
430 pcmk__readable_interval(scheduler->priv->shutdown_lock_ms));
431 } else {
432 pcmk__trace("Resources will not be locked to nodes that were cleanly "
433 "shut down");
434 }
435
436 value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
437 pcmk_parse_interval_spec(value, &(scheduler->priv->node_pending_ms));
438 if (scheduler->priv->node_pending_ms == 0U) {
439 pcmk__trace("Do not fence pending nodes");
440 } else {
441 pcmk__trace("Fence pending nodes after %s",
442 pcmk__readable_interval(scheduler->priv->node_pending_ms));
443 }
444
445 set_config_flag(scheduler, PCMK_OPT_FENCE_REMOTE_WITHOUT_QUORUM,
446 pcmk__sched_fence_remote_no_quorum);
447 if (pcmk__is_set(scheduler->flags, pcmk__sched_fence_remote_no_quorum)) {
448 pcmk__trace("Pacemaker Remote nodes may be fenced without quorum");
449
450 } else {
451 pcmk__trace("Pacemaker Remote nodes require quorum to be fenced");
452 }
453
454 return TRUE;
455 }
456
457 /*!
458 * \internal
459 * \brief Create a new node object in scheduler data
460 *
461 * \param[in] id ID of new node
462 * \param[in] uname Name of new node
463 * \param[in] type Type of new node
464 * \param[in] score Score of new node
465 * \param[in,out] scheduler Scheduler data
466 *
467 * \return Newly created node object
468 * \note The returned object is part of the scheduler data and should not be
469 * freed separately.
470 */
471 pcmk_node_t *
472 pe__create_node(const char *id, const char *uname, const char *type, int score,
473 pcmk_scheduler_t *scheduler)
474 {
475 enum pcmk__node_variant variant = pcmk__node_variant_cluster;
476 pcmk_node_t *new_node = NULL;
477
478 if (pcmk_find_node(scheduler, uname) != NULL) {
479 pcmk__config_warn("More than one node entry has name '%s'", uname);
480 }
481
482 if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
483 pcmk__str_null_matches|pcmk__str_casei)) {
484 variant = pcmk__node_variant_cluster;
485
486 } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
487 variant = pcmk__node_variant_remote;
488
489 } else {
490 pcmk__config_err("Ignoring node %s with unrecognized type '%s'",
491 pcmk__s(uname, "without name"), type);
492 return NULL;
493 }
494
495 new_node = pcmk__assert_alloc(1, sizeof(pcmk_node_t));
496 new_node->assign = pcmk__assert_alloc(1,
497 sizeof(struct pcmk__node_assignment));
498 new_node->details = pcmk__assert_alloc(1,
499 sizeof(struct pcmk__node_details));
500 new_node->priv = pcmk__assert_alloc(1, sizeof(pcmk__node_private_t));
501
502 pcmk__trace("Creating node for entry %s/%s", uname, id);
503 new_node->assign->score = score;
504 new_node->priv->id = pcmk__str_copy(id);
505 new_node->priv->name = pcmk__str_copy(uname);
506 new_node->priv->flags = pcmk__node_probes_allowed;
507 new_node->details->online = false;
508 new_node->details->shutdown = false;
509 new_node->details->running_rsc = NULL;
510 new_node->priv->scheduler = scheduler;
511 new_node->priv->variant = variant;
512 new_node->priv->attrs = pcmk__strkey_table(free, free);
513 new_node->priv->utilization = pcmk__strkey_table(free, free);
514 new_node->priv->digest_cache = pcmk__strkey_table(free, pe__free_digests);
515
516 if (pcmk__is_pacemaker_remote_node(new_node)) {
517 pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "remote");
518 pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_remote_nodes);
519
520 } else {
521 pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "cluster");
522 }
523
524 scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
525 pe__cmp_node_name);
526 return new_node;
527 }
528
529 static const char *
530 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
531 {
532 xmlNode *attr_set = NULL;
533 xmlNode *attr = NULL;
534
535 const char *container_id = pcmk__xe_id(xml_obj);
536 const char *remote_name = NULL;
537 const char *remote_server = NULL;
538 const char *remote_port = NULL;
539 const char *connect_timeout = "60s";
540 const char *remote_allow_migrate=NULL;
541 const char *is_managed = NULL;
542
543 // @TODO This doesn't handle rules or id-ref
544 for (attr_set = pcmk__xe_first_child(xml_obj, PCMK_XE_META_ATTRIBUTES,
545 NULL, NULL);
546 attr_set != NULL;
547 attr_set = pcmk__xe_next(attr_set, PCMK_XE_META_ATTRIBUTES)) {
548
549 for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
550 attr != NULL; attr = pcmk__xe_next(attr, NULL)) {
551
552 const char *value = pcmk__xe_get(attr, PCMK_XA_VALUE);
553 const char *name = pcmk__xe_get(attr, PCMK_XA_NAME);
554
555 if (name == NULL) { // Sanity
556 continue;
557 }
558
559 if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
560 remote_name = value;
561
562 } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
563 remote_server = value;
564
565 } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
566 remote_port = value;
567
568 } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
569 connect_timeout = value;
570
571 } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
572 remote_allow_migrate = value;
573
574 } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
575 is_managed = value;
576 }
577 }
578 }
579
580 if (remote_name == NULL) {
581 return NULL;
582 }
583
584 if (pe_find_resource(data->priv->resources, remote_name) != NULL) {
585 return NULL;
586 }
587
588 pe_create_remote_xml(parent, remote_name, container_id,
589 remote_allow_migrate, is_managed,
590 connect_timeout, remote_server, remote_port);
591 return remote_name;
592 }
593
594 static void
595 handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
596 {
597 if ((new_node->priv->variant == pcmk__node_variant_remote)
598 && (new_node->priv->remote == NULL)) {
599 /* Ignore fencing for remote nodes that don't have a connection resource
600 * associated with them. This happens when remote node entries get left
601 * in the nodes section after the connection resource is removed.
602 */
603 return;
604 }
605
606 if (pcmk__is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
607 // All nodes are unclean until we've seen their status entry
608 new_node->details->unclean = TRUE;
609
610 } else {
611 // Blind faith ...
612 new_node->details->unclean = FALSE;
613 }
614 }
615
616 gboolean
617 unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
618 {
619 xmlNode *xml_obj = NULL;
620 pcmk_node_t *new_node = NULL;
621 const char *id = NULL;
622 const char *uname = NULL;
623 const char *type = NULL;
624
625 for (xml_obj = pcmk__xe_first_child(xml_nodes, PCMK_XE_NODE, NULL, NULL);
626 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, PCMK_XE_NODE)) {
627
628 int score = 0;
629 int rc = pcmk__xe_get_score(xml_obj, PCMK_XA_SCORE, &score, 0);
630
631 new_node = NULL;
632
633 id = pcmk__xe_get(xml_obj, PCMK_XA_ID);
634 uname = pcmk__xe_get(xml_obj, PCMK_XA_UNAME);
635 type = pcmk__xe_get(xml_obj, PCMK_XA_TYPE);
636 pcmk__trace("Processing node %s/%s", uname, id);
637
638 if (id == NULL) {
639 pcmk__config_err("Ignoring <" PCMK_XE_NODE
640 "> entry in configuration without id");
641 continue;
642 }
643 if (rc != pcmk_rc_ok) {
644 // Not possible with schema validation enabled
645 pcmk__config_warn("Using 0 as score for node %s "
646 "because '%s' is not a valid score: %s",
647 pcmk__s(uname, "without name"),
648 pcmk__xe_get(xml_obj, PCMK_XA_SCORE),
649 pcmk_rc_str(rc));
650 }
651 new_node = pe__create_node(id, uname, type, score, scheduler);
652
653 if (new_node == NULL) {
654 return FALSE;
655 }
656
657 handle_startup_fencing(scheduler, new_node);
658
659 add_node_attrs(xml_obj, new_node, FALSE, scheduler);
660
661 pcmk__trace("Done with node %s", pcmk__xe_get(xml_obj, PCMK_XA_UNAME));
662 }
663
664 return TRUE;
665 }
666
667 static void
668 unpack_launcher(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
669 {
670 const char *launcher_id = NULL;
671
672 if (rsc->priv->children != NULL) {
673 g_list_foreach(rsc->priv->children, (GFunc) unpack_launcher,
674 scheduler);
675 return;
676 }
677
678 launcher_id = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CONTAINER);
679 if ((launcher_id != NULL)
680 && !pcmk__str_eq(launcher_id, rsc->id, pcmk__str_none)) {
681 pcmk_resource_t *launcher = pe_find_resource(scheduler->priv->resources,
682 launcher_id);
683
684 if (launcher != NULL) {
685 rsc->priv->launcher = launcher;
686 launcher->priv->launched =
687 g_list_append(launcher->priv->launched, rsc);
688 pcmk__rsc_trace(rsc, "Resource %s's launcher is %s",
689 rsc->id, launcher_id);
690 } else {
691 pcmk__config_err("Resource %s: Unknown " PCMK__META_CONTAINER " %s",
692 rsc->id, launcher_id);
693 }
694 }
695 }
696
697 gboolean
698 unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
699 {
700 xmlNode *xml_obj = NULL;
701
702 /* Create remote nodes and guest nodes from the resource configuration
703 * before unpacking resources.
704 */
705 for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
706 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
707
708 const char *new_node_id = NULL;
709
710 /* Check for remote nodes, which are defined by ocf:pacemaker:remote
711 * primitives.
712 */
713 if (xml_contains_remote_node(xml_obj)) {
714 new_node_id = pcmk__xe_id(xml_obj);
715 /* The pcmk_find_node() check ensures we don't iterate over an
716 * expanded node that has already been added to the node list
717 */
718 if (new_node_id
719 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
720 pcmk__trace("Found remote node %s defined by resource %s",
721 new_node_id, pcmk__xe_id(xml_obj));
722 pe__create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, 0,
723 scheduler);
724 }
725 continue;
726 }
727
728 /* Check for guest nodes, which are defined by special meta-attributes
729 * of a primitive of any type (for example, VirtualDomain or Xen).
730 */
731 if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
732 /* This will add an ocf:pacemaker:remote primitive to the
733 * configuration for the guest node's connection, to be unpacked
734 * later.
735 */
736 new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
737 scheduler);
738 if (new_node_id
739 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
740 pcmk__trace("Found guest node %s in resource %s",
741 new_node_id, pcmk__xe_id(xml_obj));
742 pe__create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, 0,
743 scheduler);
744 }
745 continue;
746 }
747
748 /* Check for guest nodes inside a group. Clones are currently not
749 * supported as guest nodes.
750 */
751 if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
752 xmlNode *xml_obj2 = NULL;
753 for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
754 xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2, NULL)) {
755
756 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
757 scheduler);
758
759 if (new_node_id
760 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
761 pcmk__trace("Found guest node %s in resource %s inside "
762 "group %s",
763 new_node_id, pcmk__xe_id(xml_obj2),
764 pcmk__xe_id(xml_obj));
765 pe__create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
766 0, scheduler);
767 }
768 }
769 }
770 }
771 return TRUE;
772 }
773
774 /* Call this after all the nodes and resources have been
775 * unpacked, but before the status section is read.
776 *
777 * A remote node's online status is reflected by the state
778 * of the remote node's connection resource. We need to link
779 * the remote node to this connection resource so we can have
780 * easy access to the connection resource during the scheduler calculations.
781 */
782 static void
783 link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
784 {
785 pcmk_node_t *remote_node = NULL;
786
787 if (!pcmk__is_set(new_rsc->flags, pcmk__rsc_is_remote_connection)) {
788 return;
789 }
790
791 if (pcmk__is_set(scheduler->flags, pcmk__sched_location_only)) {
792 /* remote_nodes and remote_resources are not linked in quick location calculations */
793 return;
794 }
795
796 remote_node = pcmk_find_node(scheduler, new_rsc->id);
797 CRM_CHECK(remote_node != NULL, return);
798
799 pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
800 new_rsc->id, pcmk__node_name(remote_node));
801 remote_node->priv->remote = new_rsc;
802
803 if (new_rsc->priv->launcher == NULL) {
804 /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
805 * the same as is done for cluster nodes.
806 */
807 handle_startup_fencing(scheduler, remote_node);
808
809 } else {
810 /* pe__create_node() marks the new node as "remote" or "cluster"; now
811 * that we know the node is a guest node, update it correctly.
812 */
813 pcmk__insert_dup(remote_node->priv->attrs,
814 CRM_ATTR_KIND, "container");
815 }
816 }
817
818 /*!
819 * \internal
820 * \brief Parse configuration XML for resource information
821 *
822 * \param[in] xml_resources Top of resource configuration XML
823 * \param[in,out] scheduler Scheduler data
824 *
825 * \return TRUE
826 *
827 * \note unpack_remote_nodes() MUST be called before this, so that the nodes can
828 * be used when pe__unpack_resource() calls resource_location()
829 */
830 gboolean
831 unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
832 {
833 xmlNode *xml_obj = NULL;
834 GList *gIter = NULL;
835
836 scheduler->priv->templates = pcmk__strkey_table(free, pcmk__free_idref);
837
838 for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
839 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
840
841 pcmk_resource_t *new_rsc = NULL;
842 const char *id = pcmk__xe_id(xml_obj);
843
844 if (pcmk__str_empty(id)) {
845 pcmk__config_err("Ignoring <%s> resource without ID",
846 xml_obj->name);
847 continue;
848 }
849
850 if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
851 if (g_hash_table_lookup_extended(scheduler->priv->templates, id,
852 NULL, NULL) == FALSE) {
853 /* Record the template's ID for the knowledge of its existence anyway. */
854 pcmk__insert_dup(scheduler->priv->templates, id, NULL);
855 }
856 continue;
857 }
858
859 pcmk__trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
860
861 if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
862 scheduler) != pcmk_rc_ok) {
863
864 pcmk__config_err("Ignoring <%s> resource '%s' because "
865 "configuration is invalid", xml_obj->name, id);
866 continue;
867 }
868
869 scheduler->priv->resources = g_list_append(scheduler->priv->resources,
870 new_rsc);
871 pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
872 }
873
874 for (gIter = scheduler->priv->resources;
875 gIter != NULL; gIter = gIter->next) {
876
877 pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
878
879 unpack_launcher(rsc, scheduler);
880 link_rsc2remotenode(scheduler, rsc);
881 }
882
883 scheduler->priv->resources = g_list_sort(scheduler->priv->resources,
884 pe__cmp_rsc_priority);
885 if (pcmk__is_set(scheduler->flags, pcmk__sched_location_only)) {
886 /* Ignore */
887
888 } else if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)
889 && !pcmk__is_set(scheduler->flags, pcmk__sched_have_fencing)) {
890
891 /* pcs's CI tests look for this specific error message. Confer with the
892 * pcs team before changing it. If the dependency still exists, bump the
893 * CRM_FEATURE_SET and inform the pcs maintainers.
894 *
895 * Also, ResyncCIB.errors_to_ignore() looks for this specific error
896 * message as well.
897 */
898 pcmk__config_err("Resource start-up disabled since no fencing "
899 "resources have been defined. Either configure some "
900 "or disable fencing with the "
901 PCMK_OPT_FENCING_ENABLED " option. NOTE: Clusters "
902 "with shared data need fencing to ensure data "
903 "integrity.");
904 }
905
906 return TRUE;
907 }
908
909 /*!
910 * \internal
911 * \brief Validate the levels in a fencing topology
912 *
913 * \param[in] xml \c PCMK_XE_FENCING_TOPOLOGY element
914 */
915 void
916 pcmk__validate_fencing_topology(const xmlNode *xml)
917 {
918 if (xml == NULL) {
919 return;
920 }
921
922 CRM_CHECK(pcmk__xe_is(xml, PCMK_XE_FENCING_TOPOLOGY), return);
923
924 for (const xmlNode *level = pcmk__xe_first_child(xml, PCMK_XE_FENCING_LEVEL,
925 NULL, NULL);
926 level != NULL; level = pcmk__xe_next(level, PCMK_XE_FENCING_LEVEL)) {
927
928 const char *id = pcmk__xe_id(level);
929 int index = 0;
930
931 if (pcmk__str_empty(id)) {
932 pcmk__config_err("Ignoring fencing level without ID");
933 continue;
934 }
935
936 if (pcmk__xe_get_int(level, PCMK_XA_INDEX, &index) != pcmk_rc_ok) {
937 pcmk__config_err("Ignoring fencing level %s with invalid index",
938 id);
939 continue;
940 }
941
942 if ((index < ST__LEVEL_MIN) || (index > ST__LEVEL_MAX)) {
943 pcmk__config_err("Ignoring fencing level %s with out-of-range "
944 "index %d",
945 id, index);
946 }
947 }
948 }
949
950 gboolean
951 unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
952 {
953 xmlNode *xml_tag = NULL;
954
955 scheduler->priv->tags = pcmk__strkey_table(free, pcmk__free_idref);
956
957 for (xml_tag = pcmk__xe_first_child(xml_tags, PCMK_XE_TAG, NULL, NULL);
958 xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag, PCMK_XE_TAG)) {
959
960 xmlNode *xml_obj_ref = NULL;
961 const char *tag_id = pcmk__xe_id(xml_tag);
962
963 if (tag_id == NULL) {
964 pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
965 (const char *) xml_tag->name);
966 continue;
967 }
968
969 for (xml_obj_ref = pcmk__xe_first_child(xml_tag, PCMK_XE_OBJ_REF,
970 NULL, NULL);
971 xml_obj_ref != NULL;
972 xml_obj_ref = pcmk__xe_next(xml_obj_ref, PCMK_XE_OBJ_REF)) {
973
974 const char *obj_ref = pcmk__xe_id(xml_obj_ref);
975
976 if (obj_ref == NULL) {
977 pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
978 xml_obj_ref->name, tag_id);
979 continue;
980 }
981
982 pcmk__add_idref(scheduler->priv->tags, tag_id, obj_ref);
983 }
984 }
985
986 return TRUE;
987 }
988
989 /*!
990 * \internal
991 * \brief Unpack a ticket state entry
992 *
993 * \param[in] xml_ticket XML ticket state to unpack
994 * \param[in,out] userdata Scheduler data
995 *
996 * \return pcmk_rc_ok (to always continue unpacking further entries)
997 */
998 static int
999 unpack_ticket_state(xmlNode *xml_ticket, void *userdata)
1000 {
1001 pcmk_scheduler_t *scheduler = userdata;
1002
1003 const char *ticket_id = NULL;
1004 const char *granted = NULL;
1005 const char *last_granted = NULL;
1006 const char *standby = NULL;
1007 xmlAttrPtr xIter = NULL;
1008
1009 pcmk__ticket_t *ticket = NULL;
1010
1011 ticket_id = pcmk__xe_id(xml_ticket);
1012 if (pcmk__str_empty(ticket_id)) {
1013 return pcmk_rc_ok;
1014 }
1015
1016 pcmk__trace("Processing ticket state for %s", ticket_id);
1017
1018 ticket = g_hash_table_lookup(scheduler->priv->ticket_constraints,
1019 ticket_id);
1020 if (ticket == NULL) {
1021 ticket = ticket_new(ticket_id, scheduler);
1022 if (ticket == NULL) {
1023 return pcmk_rc_ok;
1024 }
1025 }
1026
1027 for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1028 const char *prop_name = (const char *)xIter->name;
1029 const char *prop_value = pcmk__xml_attr_value(xIter);
1030
1031 if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
1032 continue;
1033 }
1034 pcmk__insert_dup(ticket->state, prop_name, prop_value);
1035 }
1036
1037 granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
1038 if (pcmk__is_true(granted)) {
1039 pcmk__set_ticket_flags(ticket, pcmk__ticket_granted);
1040 pcmk__info("We have ticket '%s'", ticket->id);
1041 } else {
1042 pcmk__clear_ticket_flags(ticket, pcmk__ticket_granted);
1043 pcmk__info("We do not have ticket '%s'", ticket->id);
1044 }
1045
1046 last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1047 if (last_granted) {
1048 long long last_granted_ll = 0LL;
1049 int rc = pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1050
1051 if (rc != pcmk_rc_ok) {
1052 pcmk__warn("Using %lld instead of invalid " PCMK_XA_LAST_GRANTED
1053 " value '%s' in state for ticket %s: %s",
1054 last_granted_ll, last_granted, ticket->id,
1055 pcmk_rc_str(rc));
1056 }
1057 ticket->last_granted = (time_t) last_granted_ll;
1058 }
1059
1060 standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1061 if (pcmk__is_true(standby)) {
1062 pcmk__set_ticket_flags(ticket, pcmk__ticket_standby);
1063 if (pcmk__is_set(ticket->flags, pcmk__ticket_granted)) {
1064 pcmk__info("Granted ticket '%s' is in standby-mode", ticket->id);
1065 }
1066 } else {
1067 pcmk__clear_ticket_flags(ticket, pcmk__ticket_standby);
1068 }
1069
1070 pcmk__trace("Done with ticket state for %s", ticket_id);
1071
1072 return pcmk_rc_ok;
1073 }
1074
1075 static void
1076 unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
1077 pcmk_scheduler_t *scheduler)
1078 {
1079 const char *discovery = NULL;
1080 const xmlNode *attrs = NULL;
1081 pcmk_resource_t *rsc = NULL;
1082 int maint = 0;
1083
1084 if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1085 return;
1086 }
1087
1088 if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1089 return;
1090 }
1091 pcmk__trace("Processing Pacemaker Remote node %s",
1092 pcmk__node_name(this_node));
1093
1094 pcmk__scan_min_int(pcmk__xe_get(state, PCMK__XA_NODE_IN_MAINTENANCE),
1095 &maint, 0);
1096 if (maint) {
1097 pcmk__set_node_flags(this_node, pcmk__node_remote_maint);
1098 } else {
1099 pcmk__clear_node_flags(this_node, pcmk__node_remote_maint);
1100 }
1101
1102 rsc = this_node->priv->remote;
1103 if (!pcmk__is_set(this_node->priv->flags, pcmk__node_remote_reset)) {
1104 this_node->details->unclean = FALSE;
1105 pcmk__set_node_flags(this_node, pcmk__node_seen);
1106 }
1107 attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL,
1108 NULL);
1109 add_node_attrs(attrs, this_node, TRUE, scheduler);
1110
1111 if (pe__shutdown_requested(this_node)) {
1112 pcmk__info("%s is shutting down", pcmk__node_name(this_node));
1113 this_node->details->shutdown = TRUE;
1114 }
1115
1116 if (pcmk__is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
1117 pcmk__rsc_node_current))) {
1118 pcmk__info("%s is in standby mode", pcmk__node_name(this_node));
1119 pcmk__set_node_flags(this_node, pcmk__node_standby);
1120 }
1121
1122 if (pcmk__is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE,
1123 NULL, pcmk__rsc_node_current))
1124 || ((rsc != NULL) && !pcmk__is_set(rsc->flags, pcmk__rsc_managed))) {
1125 pcmk__info("%s is in maintenance mode", pcmk__node_name(this_node));
1126 this_node->details->maintenance = TRUE;
1127 }
1128
1129 discovery = pcmk__node_attr(this_node,
1130 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1131 NULL, pcmk__rsc_node_current);
1132 if ((discovery != NULL) && !pcmk__is_true(discovery)) {
1133 pcmk__warn_once(pcmk__wo_rdisc_enabled,
1134 "Support for the "
1135 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1136 " node attribute is deprecated and will be removed"
1137 " (and behave as 'true') in a future release.");
1138
1139 if (pcmk__is_remote_node(this_node)
1140 && !pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
1141 pcmk__config_warn("Ignoring "
1142 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1143 " attribute on Pacemaker Remote node %s"
1144 " because fencing is disabled",
1145 pcmk__node_name(this_node));
1146 } else {
1147 /* This is either a remote node with fencing enabled, or a guest
1148 * node. We don't care whether fencing is enabled when fencing guest
1149 * nodes, because they are "fenced" by recovering their containing
1150 * resource.
1151 */
1152 pcmk__info("%s has resource discovery disabled",
1153 pcmk__node_name(this_node));
1154 pcmk__clear_node_flags(this_node, pcmk__node_probes_allowed);
1155 }
1156 }
1157 }
1158
1159 /*!
1160 * \internal
1161 * \brief Unpack a cluster node's transient attributes
1162 *
1163 * \param[in] state CIB node state XML
1164 * \param[in,out] node Cluster node whose attributes are being unpacked
1165 * \param[in,out] scheduler Scheduler data
1166 */
1167 static void
1168 unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
1169 pcmk_scheduler_t *scheduler)
1170 {
1171 const char *discovery = NULL;
1172 const xmlNode *attrs = pcmk__xe_first_child(state,
1173 PCMK__XE_TRANSIENT_ATTRIBUTES,
1174 NULL, NULL);
1175
1176 add_node_attrs(attrs, node, TRUE, scheduler);
1177
1178 if (pcmk__is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL,
1179 pcmk__rsc_node_current))) {
1180 pcmk__info("%s is in standby mode", pcmk__node_name(node));
1181 pcmk__set_node_flags(node, pcmk__node_standby);
1182 }
1183
1184 if (pcmk__is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
1185 pcmk__rsc_node_current))) {
1186 pcmk__info("%s is in maintenance mode", pcmk__node_name(node));
1187 node->details->maintenance = TRUE;
1188 }
1189
1190 discovery = pcmk__node_attr(node,
1191 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1192 NULL, pcmk__rsc_node_current);
1193 if ((discovery != NULL) && !pcmk__is_true(discovery)) {
1194 pcmk__config_warn("Ignoring "
1195 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1196 " attribute for %s because disabling resource"
1197 " discovery is not allowed for cluster nodes",
1198 pcmk__node_name(node));
1199 }
1200 }
1201
1202 /*!
1203 * \internal
1204 * \brief Unpack a node state entry (first pass)
1205 *
1206 * Unpack one node state entry from status. This unpacks information from the
1207 * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not
1208 * the resource history inside it. Multiple passes through the status are needed
1209 * to fully unpack everything.
1210 *
1211 * \param[in] state CIB node state XML
1212 * \param[in,out] scheduler Scheduler data
1213 */
1214 static void
1215 unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
1216 {
1217 const char *id = NULL;
1218 const char *uname = NULL;
1219 pcmk_node_t *this_node = NULL;
1220
1221 id = pcmk__xe_get(state, PCMK_XA_ID);
1222 if (id == NULL) {
1223 pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1224 PCMK_XA_ID);
1225 pcmk__log_xml_info(state, "missing-id");
1226 return;
1227 }
1228
1229 uname = pcmk__xe_get(state, PCMK_XA_UNAME);
1230 if (uname == NULL) {
1231 /* If a joining peer makes the cluster acquire the quorum from Corosync
1232 * but has not joined the controller CPG membership yet, it's possible
1233 * that the created PCMK__XE_NODE_STATE entry doesn't have a
1234 * PCMK_XA_UNAME yet. Recognize the node as pending and wait for it to
1235 * join CPG.
1236 */
1237 pcmk__trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1238 "without " PCMK_XA_UNAME,
1239 id);
1240 }
1241
1242 this_node = pe_find_node_any(scheduler->nodes, id, uname);
1243 if (this_node == NULL) {
1244 pcmk__notice("Ignoring recorded state for removed node with name %s "
1245 "and " PCMK_XA_ID " %s",
1246 pcmk__s(uname, "unknown"), id);
1247 return;
1248 }
1249
1250 if (pcmk__is_pacemaker_remote_node(this_node)) {
1251 int remote_fenced = 0;
1252
1253 /* We can't determine the online status of Pacemaker Remote nodes until
1254 * after all resource history has been unpacked. In this first pass, we
1255 * do need to mark whether the node has been fenced, as this plays a
1256 * role during unpacking cluster node resource state.
1257 */
1258 pcmk__scan_min_int(pcmk__xe_get(state, PCMK__XA_NODE_FENCED),
1259 &remote_fenced, 0);
1260 if (remote_fenced) {
1261 pcmk__set_node_flags(this_node, pcmk__node_remote_fenced);
1262 } else {
1263 pcmk__clear_node_flags(this_node, pcmk__node_remote_fenced);
1264 }
1265 return;
1266 }
1267
1268 unpack_transient_attributes(state, this_node, scheduler);
1269
1270 /* Provisionally mark this cluster node as clean. We have at least seen it
1271 * in the current cluster's lifetime.
1272 */
1273 this_node->details->unclean = FALSE;
1274 pcmk__set_node_flags(this_node, pcmk__node_seen);
1275
1276 pcmk__trace("Determining online status of cluster node %s (id %s)",
1277 pcmk__node_name(this_node), id);
1278 determine_online_status(state, this_node, scheduler);
1279
1280 if (!pcmk__is_set(scheduler->flags, pcmk__sched_quorate)
1281 && this_node->details->online
1282 && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) {
1283 /* Everything else should flow from this automatically
1284 * (at least until the scheduler becomes able to migrate off
1285 * healthy resources)
1286 */
1287 pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1288 FALSE);
1289 }
1290 }
1291
1292 /*!
1293 * \internal
1294 * \brief Unpack nodes' resource history as much as possible
1295 *
1296 * Unpack as many nodes' resource history as possible in one pass through the
1297 * status. We need to process Pacemaker Remote nodes' connections/containers
1298 * before unpacking their history; the connection/container history will be
1299 * in another node's history, so it might take multiple passes to unpack
1300 * everything.
1301 *
1302 * \param[in] status CIB XML status section
1303 * \param[in] fence If true, treat any not-yet-unpacked nodes as unseen
1304 * \param[in,out] scheduler Scheduler data
1305 *
1306 * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
1307 * or EAGAIN if more unpacking remains to be done)
1308 */
1309 static int
1310 unpack_node_history(const xmlNode *status, bool fence,
1311 pcmk_scheduler_t *scheduler)
1312 {
1313 int rc = pcmk_rc_ok;
1314
1315 // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1316 for (const xmlNode *state = pcmk__xe_first_child(status,
1317 PCMK__XE_NODE_STATE, NULL,
1318 NULL);
1319 state != NULL; state = pcmk__xe_next(state, PCMK__XE_NODE_STATE)) {
1320
1321 const char *id = pcmk__xe_id(state);
1322 const char *uname = pcmk__xe_get(state, PCMK_XA_UNAME);
1323 pcmk_node_t *this_node = NULL;
1324
1325 if ((id == NULL) || (uname == NULL)) {
1326 // Warning already logged in first pass through status section
1327 pcmk__trace("Not unpacking resource history from malformed "
1328 PCMK__XE_NODE_STATE " without id and/or uname");
1329 continue;
1330 }
1331
1332 this_node = pe_find_node_any(scheduler->nodes, id, uname);
1333 if (this_node == NULL) {
1334 // Warning already logged in first pass through status section
1335 pcmk__trace("Not unpacking resource history for node %s because "
1336 "no longer in configuration",
1337 id);
1338 continue;
1339 }
1340
1341 if (pcmk__is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1342 pcmk__trace("Not unpacking resource history for node %s because "
1343 "already unpacked",
1344 id);
1345 continue;
1346 }
1347
1348 if (fence) {
1349 // We're processing all remaining nodes
1350
1351 } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1352 /* We can unpack a guest node's history only after we've unpacked
1353 * other resource history to the point that we know that the node's
1354 * connection and containing resource are both up.
1355 */
1356 const pcmk_resource_t *remote = this_node->priv->remote;
1357 const pcmk_resource_t *launcher = remote->priv->launcher;
1358
1359 if ((remote->priv->orig_role != pcmk_role_started)
1360 || (launcher->priv->orig_role != pcmk_role_started)) {
1361 pcmk__trace("Not unpacking resource history for guest node %s "
1362 "because launcher and connection are not known to "
1363 "be up",
1364 id);
1365 continue;
1366 }
1367
1368 } else if (pcmk__is_remote_node(this_node)) {
1369 /* We can unpack a remote node's history only after we've unpacked
1370 * other resource history to the point that we know that the node's
1371 * connection is up, with the exception of when shutdown locks are
1372 * in use.
1373 */
1374 pcmk_resource_t *rsc = this_node->priv->remote;
1375
1376 if ((rsc == NULL)
1377 || (!pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)
1378 && (rsc->priv->orig_role != pcmk_role_started))) {
1379 pcmk__trace("Not unpacking resource history for remote node %s "
1380 "because connection is not known to be up",
1381 id);
1382 continue;
1383 }
1384
1385 /* If fencing and shutdown locks are disabled and we're not processing
1386 * unseen nodes, then we don't want to unpack offline nodes until online
1387 * nodes have been unpacked. This allows us to number active clone
1388 * instances first.
1389 */
1390 } else if (!pcmk__any_flags_set(scheduler->flags,
1391 pcmk__sched_fencing_enabled
1392 |pcmk__sched_shutdown_lock)
1393 && !this_node->details->online) {
1394 pcmk__trace("Not unpacking resource history for offline "
1395 "cluster node %s",
1396 id);
1397 continue;
1398 }
1399
1400 if (pcmk__is_pacemaker_remote_node(this_node)) {
1401 determine_remote_online_status(scheduler, this_node);
1402 unpack_handle_remote_attrs(this_node, state, scheduler);
1403 }
1404
1405 pcmk__trace("Unpacking resource history for %snode %s",
1406 (fence? "unseen " : ""), id);
1407
1408 pcmk__set_node_flags(this_node, pcmk__node_unpacked);
1409 unpack_node_lrm(this_node, state, scheduler);
1410
1411 rc = EAGAIN; // Other node histories might depend on this one
1412 }
1413 return rc;
1414 }
1415
1416 /* remove nodes that are down, stopping */
1417 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1418 /* anything else? */
1419 gboolean
1420 unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
1421 {
1422 xmlNode *state = NULL;
1423
|
(1) Event path: |
Switch case default. |
|
(2) Event path: |
Condition "trace_cs == NULL", taking true branch. |
|
(3) Event path: |
Condition "crm_is_callsite_active(trace_cs, _level, 0)", taking false branch. |
|
(4) Event path: |
Breaking from switch. |
1424 pcmk__trace("Beginning unpack");
1425
|
(5) Event path: |
Condition "scheduler->priv->ticket_constraints == NULL", taking true branch. |
1426 if (scheduler->priv->ticket_constraints == NULL) {
1427 scheduler->priv->ticket_constraints =
1428 pcmk__strkey_table(free, destroy_ticket);
1429 }
1430
|
(6) Event path: |
Condition "state != NULL", taking true branch. |
|
(10) Event path: |
Condition "state != NULL", taking false branch. |
1431 for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1432 state = pcmk__xe_next(state, NULL)) {
1433
|
(7) Event path: |
Condition "pcmk__xe_is(state, "tickets")", taking true branch. |
1434 if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1435 pcmk__xe_foreach_child(state, PCMK__XE_TICKET_STATE,
1436 unpack_ticket_state, scheduler);
1437
|
(8) Event path: |
Falling through to end of if statement. |
1438 } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1439 unpack_node_state(state, scheduler);
1440 }
|
(9) Event path: |
Jumping back to the beginning of the loop. |
1441 }
1442
|
(11) Event path: |
Condition "unpack_node_history(status, false /* 0 */, scheduler) == 11", taking false branch. |
1443 while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1444 pcmk__trace("Another pass through node resource histories is needed");
1445 }
1446
1447 // Now catch any nodes we didn't see
1448 unpack_node_history(status,
1449 pcmk__is_set(scheduler->flags,
1450 pcmk__sched_fencing_enabled),
1451 scheduler);
1452
1453 /* Now that we know where resources are, we can schedule stops of containers
1454 * with failed bundle connections
1455 */
|
(12) Event path: |
Condition "scheduler->priv->stop_needed != NULL", taking true branch. |
1456 if (scheduler->priv->stop_needed != NULL) {
|
(13) Event path: |
Condition "item != NULL", taking true branch. |
|
(16) Event path: |
Condition "item != NULL", taking false branch. |
1457 for (GList *item = scheduler->priv->stop_needed;
1458 item != NULL; item = item->next) {
1459
1460 pcmk_resource_t *container = item->data;
1461 pcmk_node_t *node = pcmk__current_node(container);
1462
|
(14) Event path: |
Condition "node", taking false branch. |
1463 if (node) {
1464 stop_action(container, node, FALSE);
1465 }
|
(15) Event path: |
Jumping back to the beginning of the loop. |
1466 }
1467
|
CID (unavailable; MK=28a526cd6ab68e387a886c1ef64f6d67) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS): |
|
(17) Event assign_union_field: |
The union field "in" of "_pp" is written. |
|
(18) Event inconsistent_union_field_access: |
In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in". |
1468 g_clear_pointer(&scheduler->priv->stop_needed, g_list_free);
1469 }
1470
1471 /* Now that we know status of all Pacemaker Remote connections and nodes,
1472 * we can stop connections for node shutdowns, and check the online status
1473 * of remote/guest nodes that didn't have any node history to unpack.
1474 */
1475 for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1476 pcmk_node_t *this_node = gIter->data;
1477
1478 if (!pcmk__is_pacemaker_remote_node(this_node)) {
1479 continue;
1480 }
1481 if (this_node->details->shutdown
1482 && (this_node->priv->remote != NULL)) {
1483 pe__set_next_role(this_node->priv->remote, pcmk_role_stopped,
1484 "remote shutdown");
1485 }
1486 if (!pcmk__is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1487 determine_remote_online_status(scheduler, this_node);
1488 }
1489 }
1490
1491 return TRUE;
1492 }
1493
1494 /*!
1495 * \internal
1496 * \brief Unpack node's time when it became a member at the cluster layer
1497 *
1498 * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry
1499 * \param[in,out] scheduler Scheduler data
1500 *
1501 * \return Epoch time when node became a cluster member
1502 * (or scheduler effective time for legacy entries) if a member,
1503 * 0 if not a member, or -1 if no valid information available
1504 */
1505 static long long
1506 unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
1507 {
1508 const char *member_time = pcmk__xe_get(node_state, PCMK__XA_IN_CCM);
1509 bool is_member = false;
1510
1511 if (member_time == NULL) {
1512 return -1LL;
1513 }
1514
1515 if (pcmk__parse_bool(member_time, &is_member) != pcmk_rc_ok) {
1516 long long when_member = 0LL;
1517
1518 if ((pcmk__scan_ll(member_time, &when_member,
1519 0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1520 pcmk__warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM " in "
1521 PCMK__XE_NODE_STATE " entry",
1522 member_time);
1523 return -1LL;
1524 }
1525 return when_member;
1526 }
1527
1528 /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1529 * recorded as a boolean for a DC < 2.1.7, or the node is pending shutdown
1530 * and has left the CPG, in which case it was set to 1 to avoid fencing for
1531 * PCMK_OPT_NODE_PENDING_TIMEOUT.
1532 *
1533 * We return the effective time for in_ccm=1 because what's important to
1534 * avoid fencing is that effective time minus this value is less than the
1535 * pending node timeout.
1536 */
1537 return is_member? (long long) pcmk__scheduler_epoch_time(scheduler) : 0LL;
1538 }
1539
1540 /*!
1541 * \internal
1542 * \brief Unpack node's time when it became online in process group
1543 *
1544 * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry
1545 *
1546 * \return Epoch time when node became online in process group (or 0 if not
1547 * online, or 1 for legacy online entries)
1548 */
1549 static long long
1550 unpack_node_online(const xmlNode *node_state)
1551 {
1552 const char *peer_time = pcmk__xe_get(node_state, PCMK_XA_CRMD);
1553
1554 // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1555 if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1556 pcmk__str_casei|pcmk__str_null_matches)) {
1557 return 0LL;
1558
1559 } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1560 return 1LL;
1561
1562 } else {
1563 long long when_online = 0LL;
1564
1565 if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1566 || (when_online < 0)) {
1567 pcmk__warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1568 PCMK__XE_NODE_STATE " entry, assuming offline",
1569 peer_time);
1570 return 0LL;
1571 }
1572 return when_online;
1573 }
1574 }
1575
1576 /*!
1577 * \internal
1578 * \brief Unpack node attribute for user-requested fencing
1579 *
1580 * \param[in] node Node to check
1581 * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry in CIB status
1582 *
1583 * \return \c true if fencing has been requested for \p node, otherwise \c false
1584 */
1585 static bool
1586 unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
1587 {
1588 bool value_b = false;
1589 long long value_ll = 0LL;
1590 int rc = pcmk_rc_ok;
1591 const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1592 NULL, pcmk__rsc_node_current);
1593
1594 // Value may be boolean or an epoch time
1595 if ((value_s != NULL)
1596 && (pcmk__parse_bool(value_s, &value_b) == pcmk_rc_ok)) {
1597 return value_b;
1598 }
1599
1600 rc = pcmk__scan_ll(value_s, &value_ll, 0LL);
1601 if (rc == pcmk_rc_ok) {
1602 return (value_ll > 0);
1603 }
1604 pcmk__warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1605 "node attribute for %s: %s",
1606 value_s, pcmk__node_name(node), pcmk_rc_str(rc));
1607 return false;
1608 }
1609
1610 static gboolean
1611 determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
1612 const xmlNode *node_state,
1613 pcmk_node_t *this_node)
1614 {
1615 gboolean online = FALSE;
1616 const char *join = pcmk__xe_get(node_state, PCMK__XA_JOIN);
1617 const char *exp_state = pcmk__xe_get(node_state, PCMK_XA_EXPECTED);
1618 long long when_member = unpack_node_member(node_state, scheduler);
1619 long long when_online = unpack_node_online(node_state);
1620
1621 if (when_member <= 0) {
1622 pcmk__trace("Node %s is %sdown", pcmk__node_name(this_node),
1623 ((when_member < 0)? "presumed " : ""));
1624
1625 } else if (when_online > 0) {
1626 if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1627 online = TRUE;
1628 } else {
1629 pcmk__debug("Node %s is not ready to run resources: %s",
1630 pcmk__node_name(this_node), join);
1631 }
1632
1633 } else if (!pcmk__is_set(this_node->priv->flags,
1634 pcmk__node_expected_up)) {
1635 pcmk__trace("Node %s controller is down: "
1636 "member@%lld online@%lld join=%s expected=%s",
1637 pcmk__node_name(this_node), when_member, when_online,
1638 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1639
1640 } else {
1641 /* mark it unclean */
1642 pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1643 pcmk__info("Node %s member@%lld online@%lld join=%s expected=%s",
1644 pcmk__node_name(this_node), when_member, when_online,
1645 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1646 }
1647 return online;
1648 }
1649
1650 /*!
1651 * \internal
1652 * \brief Check whether a node has taken too long to join controller group
1653 *
1654 * \param[in,out] scheduler Scheduler data
1655 * \param[in] node Node to check
1656 * \param[in] when_member Epoch time when node became a cluster member
1657 * \param[in] when_online Epoch time when node joined controller group
1658 *
1659 * \return true if node has been pending (on the way up) longer than
1660 * \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false
1661 * \note This will also update the cluster's recheck time if appropriate.
1662 */
1663 static inline bool
1664 pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1665 long long when_member, long long when_online)
1666 {
1667 if ((scheduler->priv->node_pending_ms > 0U)
1668 && (when_member > 0) && (when_online <= 0)) {
1669 // There is a timeout on pending nodes, and node is pending
1670
1671 time_t timeout = when_member
1672 + pcmk__timeout_ms2s(scheduler->priv->node_pending_ms);
1673
1674 if (pcmk__scheduler_epoch_time(node->priv->scheduler) >= timeout) {
1675 return true; // Node has timed out
1676 }
1677
1678 // Node is pending, but still has time
1679 pcmk__update_recheck_time(timeout, scheduler, "pending node timeout");
1680 }
1681 return false;
1682 }
1683
1684 static bool
1685 determine_online_status_fencing(pcmk_scheduler_t *scheduler,
1686 const xmlNode *node_state,
1687 pcmk_node_t *this_node)
1688 {
1689 bool termination_requested = unpack_node_terminate(this_node, node_state);
1690 const char *join = pcmk__xe_get(node_state, PCMK__XA_JOIN);
1691 const char *exp_state = pcmk__xe_get(node_state, PCMK_XA_EXPECTED);
1692 long long when_member = unpack_node_member(node_state, scheduler);
1693 long long when_online = unpack_node_online(node_state);
1694
1695 /*
1696 - PCMK__XA_JOIN ::= member|down|pending|banned
1697 - PCMK_XA_EXPECTED ::= member|down
1698
1699 @COMPAT with entries recorded for DCs < 2.1.7
1700 - PCMK__XA_IN_CCM ::= true|false
1701 - PCMK_XA_CRMD ::= online|offline
1702
1703 Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1704 - PCMK__XA_IN_CCM ::= <timestamp>|0
1705 Since when node has been a cluster member. A value 0 of means the node is not
1706 a cluster member.
1707
1708 - PCMK_XA_CRMD ::= <timestamp>|0
1709 Since when peer has been online in CPG. A value 0 means the peer is offline
1710 in CPG.
1711 */
1712
1713 pcmk__trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1714 pcmk__node_name(this_node), when_member, when_online,
1715 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1716 (termination_requested? " (termination requested)" : ""));
1717
1718 if (this_node->details->shutdown) {
1719 pcmk__debug("%s is shutting down", pcmk__node_name(this_node));
1720
1721 /* Slightly different criteria since we can't shut down a dead peer */
1722 return (when_online > 0);
1723 }
1724
1725 if (when_member < 0) {
1726 pe_fence_node(scheduler, this_node,
1727 "peer has not been seen by the cluster", FALSE);
1728 return false;
1729 }
1730
1731 if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1732 pe_fence_node(scheduler, this_node,
1733 "peer failed Pacemaker membership criteria", FALSE);
1734
1735 } else if (termination_requested) {
1736 if ((when_member <= 0) && (when_online <= 0)
1737 && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1738 pcmk__info("%s was fenced as requested",
1739 pcmk__node_name(this_node));
1740 return false;
1741 }
1742 pe_fence_node(scheduler, this_node, "fencing was requested", false);
1743
1744 } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1745 pcmk__str_null_matches)) {
1746
1747 if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1748 pe_fence_node(scheduler, this_node,
1749 "peer pending timed out on joining the process group",
1750 FALSE);
1751
1752 } else if ((when_member > 0) || (when_online > 0)) {
1753 pcmk__info("- %s is not ready to run resources",
1754 pcmk__node_name(this_node));
1755 pcmk__set_node_flags(this_node, pcmk__node_standby);
1756 this_node->details->pending = TRUE;
1757
1758 } else {
1759 pcmk__trace("%s is down or still coming up",
1760 pcmk__node_name(this_node));
1761 }
1762
1763 } else if (when_member <= 0) {
1764 // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1765 pe_fence_node(scheduler, this_node,
1766 "peer is no longer part of the cluster", TRUE);
1767
1768 } else if (when_online <= 0) {
1769 pe_fence_node(scheduler, this_node,
1770 "peer process is no longer available", FALSE);
1771
1772 /* Everything is running at this point, now check join state */
1773
1774 } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1775 pcmk__info("%s is active", pcmk__node_name(this_node));
1776
1777 } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1778 CRMD_JOINSTATE_DOWN, NULL)) {
1779 pcmk__info("%s is not ready to run resources",
1780 pcmk__node_name(this_node));
1781 pcmk__set_node_flags(this_node, pcmk__node_standby);
1782 this_node->details->pending = TRUE;
1783
1784 } else {
1785 pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1786 FALSE);
1787 }
1788
1789 return (when_member > 0);
1790 }
1791
1792 static void
1793 determine_remote_online_status(pcmk_scheduler_t *scheduler,
1794 pcmk_node_t *this_node)
1795 {
1796 pcmk_resource_t *rsc = this_node->priv->remote;
1797 pcmk_resource_t *launcher = NULL;
1798 pcmk_node_t *host = NULL;
1799 const char *node_type = "Remote";
1800
1801 if (rsc == NULL) {
1802 /* This is a leftover node state entry for a former Pacemaker Remote
1803 * node whose connection resource was removed. Consider it offline.
1804 */
1805 pcmk__trace("Pacemaker Remote node %s is considered OFFLINE because "
1806 "its connection resource has been removed from the CIB",
1807 this_node->priv->id);
1808 this_node->details->online = FALSE;
1809 return;
1810 }
1811
1812 launcher = rsc->priv->launcher;
1813 if (launcher != NULL) {
1814 node_type = "Guest";
1815 if (pcmk__list_of_1(rsc->priv->active_nodes)) {
1816 host = rsc->priv->active_nodes->data;
1817 }
1818 }
1819
1820 /* If the resource is currently started, mark it online. */
1821 if (rsc->priv->orig_role == pcmk_role_started) {
1822 this_node->details->online = TRUE;
1823 }
1824
1825 /* consider this node shutting down if transitioning start->stop */
1826 if ((rsc->priv->orig_role == pcmk_role_started)
1827 && (rsc->priv->next_role == pcmk_role_stopped)) {
1828
1829 pcmk__trace("%s node %s shutting down because connection resource is "
1830 "stopping",
1831 node_type, this_node->priv->id);
1832 this_node->details->shutdown = TRUE;
1833 }
1834
1835 /* Now check all the failure conditions. */
1836 if ((launcher != NULL) && pcmk__is_set(launcher->flags, pcmk__rsc_failed)) {
1837 pcmk__trace("Guest node %s UNCLEAN because guest resource failed",
1838 this_node->priv->id);
1839 this_node->details->online = FALSE;
1840 pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
1841
1842 } else if (pcmk__is_set(rsc->flags, pcmk__rsc_failed)) {
1843 pcmk__trace("%s node %s OFFLINE because connection resource failed",
1844 node_type, this_node->priv->id);
1845 this_node->details->online = FALSE;
1846
1847 } else if ((rsc->priv->orig_role == pcmk_role_stopped)
1848 || ((launcher != NULL)
1849 && (launcher->priv->orig_role == pcmk_role_stopped))) {
1850
1851 pcmk__trace("%s node %s OFFLINE because its resource is stopped",
1852 node_type, this_node->priv->id);
1853 this_node->details->online = FALSE;
1854 pcmk__clear_node_flags(this_node, pcmk__node_remote_reset);
1855
1856 } else if (host && (host->details->online == FALSE)
1857 && host->details->unclean) {
1858 pcmk__trace("Guest node %s UNCLEAN because host is unclean",
1859 this_node->priv->id);
1860 this_node->details->online = FALSE;
1861 pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
1862
1863 } else {
1864 pcmk__trace("%s node %s is %s",
1865 node_type, this_node->priv->id,
1866 (this_node->details->online? "ONLINE" : "OFFLINE"));
1867 }
1868 }
1869
1870 static void
1871 determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
1872 pcmk_scheduler_t *scheduler)
1873 {
1874 gboolean online = FALSE;
1875 const char *exp_state = pcmk__xe_get(node_state, PCMK_XA_EXPECTED);
1876
1877 CRM_CHECK(this_node != NULL, return);
1878
1879 this_node->details->shutdown = FALSE;
1880
1881 if (pe__shutdown_requested(this_node)) {
1882 this_node->details->shutdown = TRUE;
1883
1884 } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1885 pcmk__set_node_flags(this_node, pcmk__node_expected_up);
1886 }
1887
1888 if (!pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
1889 online = determine_online_status_no_fencing(scheduler, node_state,
1890 this_node);
1891
1892 } else {
1893 online = determine_online_status_fencing(scheduler, node_state,
1894 this_node);
1895 }
1896
1897 if (online) {
1898 this_node->details->online = TRUE;
1899
1900 } else {
1901 /* remove node from contention */
1902 this_node->assign->score = -PCMK_SCORE_INFINITY;
1903 }
1904
1905 if (online && this_node->details->shutdown) {
1906 /* don't run resources here */
1907 this_node->assign->score = -PCMK_SCORE_INFINITY;
1908 }
1909
1910 if (this_node->details->unclean) {
1911 pcmk__sched_warn(scheduler, "%s is unclean",
1912 pcmk__node_name(this_node));
1913
1914 } else if (!this_node->details->online) {
1915 pcmk__trace("%s is offline", pcmk__node_name(this_node));
1916
1917 } else if (this_node->details->shutdown) {
1918 pcmk__info("%s is shutting down", pcmk__node_name(this_node));
1919
1920 } else if (this_node->details->pending) {
1921 pcmk__info("%s is pending", pcmk__node_name(this_node));
1922
1923 } else if (pcmk__is_set(this_node->priv->flags, pcmk__node_standby)) {
1924 pcmk__info("%s is in standby", pcmk__node_name(this_node));
1925
1926 } else if (this_node->details->maintenance) {
1927 pcmk__info("%s is in maintenance", pcmk__node_name(this_node));
1928
1929 } else {
1930 pcmk__info("%s is online", pcmk__node_name(this_node));
1931 }
1932 }
1933
1934 /*!
1935 * \internal
1936 * \brief Find the end of a resource's name, excluding any clone suffix
1937 *
1938 * \param[in] id Resource ID to check
1939 *
1940 * \return Pointer to last character of resource's base name
1941 */
1942 const char *
1943 pe_base_name_end(const char *id)
1944 {
1945 if (!pcmk__str_empty(id)) {
1946 const char *end = id + strlen(id) - 1;
1947
1948 for (const char *s = end; s > id; --s) {
1949 switch (*s) {
1950 case '0':
1951 case '1':
1952 case '2':
1953 case '3':
1954 case '4':
1955 case '5':
1956 case '6':
1957 case '7':
1958 case '8':
1959 case '9':
1960 break;
1961 case ':':
1962 return (s == end)? s : (s - 1);
1963 default:
1964 return end;
1965 }
1966 }
1967 return end;
1968 }
1969 return NULL;
1970 }
1971
1972 /*!
1973 * \internal
1974 * \brief Get a resource name excluding any clone suffix
1975 *
1976 * \param[in] last_rsc_id Resource ID to check
1977 *
1978 * \return Pointer to newly allocated string with resource's base name
1979 * \note It is the caller's responsibility to free() the result.
1980 * This asserts on error, so callers can assume result is not NULL.
1981 */
1982 char *
1983 clone_strip(const char *last_rsc_id)
1984 {
1985 const char *end = pe_base_name_end(last_rsc_id);
1986 char *basename = NULL;
1987
1988 pcmk__assert(end != NULL);
1989 basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1990 pcmk__assert(basename != NULL);
1991 return basename;
1992 }
1993
1994 /*!
1995 * \internal
1996 * \brief Get the name of the first instance of a cloned resource
1997 *
1998 * \param[in] last_rsc_id Resource ID to check
1999 *
2000 * \return Pointer to newly allocated string with resource's base name plus :0
2001 * \note It is the caller's responsibility to free() the result.
2002 * This asserts on error, so callers can assume result is not NULL.
2003 */
2004 char *
2005 clone_zero(const char *last_rsc_id)
2006 {
2007 const char *end = pe_base_name_end(last_rsc_id);
2008 size_t base_name_len = end - last_rsc_id + 1;
2009 char *zero = NULL;
2010
2011 pcmk__assert(end != NULL);
2012 zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
2013 memcpy(zero, last_rsc_id, base_name_len);
2014 zero[base_name_len] = ':';
2015 zero[base_name_len + 1] = '0';
2016 return zero;
2017 }
2018
2019 static pcmk_resource_t *
2020 create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
2021 pcmk_scheduler_t *scheduler)
2022 {
2023 pcmk_resource_t *rsc = NULL;
2024 xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
2025
2026 pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
2027 pcmk__xe_set(xml_rsc, PCMK_XA_ID, rsc_id);
2028 pcmk__log_xml_debug(xml_rsc, "Removed resource");
2029
2030 if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
2031 return NULL;
2032 }
2033
2034 if (xml_contains_remote_node(xml_rsc)) {
2035 pcmk_node_t *node;
2036
2037 pcmk__debug("Detected removed remote node %s", rsc_id);
2038 node = pcmk_find_node(scheduler, rsc_id);
2039 if (node == NULL) {
2040 node = pe__create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, 0,
2041 scheduler);
2042 }
2043 link_rsc2remotenode(scheduler, rsc);
2044
2045 if (node) {
2046 pcmk__trace("Setting node %s as shutting down due to removed "
2047 "connection resource", rsc_id);
2048 node->details->shutdown = TRUE;
2049 }
2050 }
2051
2052 if (pcmk__xe_get(rsc_entry, PCMK__META_CONTAINER)) {
2053 // This removed resource needs to be mapped to a launcher
2054 pcmk__trace("Launched resource %s was removed from the configuration",
2055 rsc_id);
2056 pcmk__set_rsc_flags(rsc, pcmk__rsc_removed_launched);
2057 }
2058 pcmk__set_rsc_flags(rsc, pcmk__rsc_removed);
2059 scheduler->priv->resources = g_list_append(scheduler->priv->resources, rsc);
2060 return rsc;
2061 }
2062
2063 /*!
2064 * \internal
2065 * \brief Create "removed" instance for anonymous clone resource history
2066 *
2067 * \param[in,out] parent Clone resource that instance will be added to
2068 * \param[in] rsc_id Instance's resource ID
2069 * \param[in] node Where instance is active (for logging only)
2070 * \param[in,out] scheduler Scheduler data
2071 *
2072 * \return Newly created "removed" instance of \p parent
2073 */
2074 static pcmk_resource_t *
2075 create_anonymous_removed_instance(pcmk_resource_t *parent, const char *rsc_id,
2076 const pcmk_node_t *node,
2077 pcmk_scheduler_t *scheduler)
2078 {
2079 pcmk_resource_t *top = pe__create_clone_child(parent, scheduler);
2080 pcmk_resource_t *instance = NULL;
2081
2082 // find_rsc() because we might be a cloned group
2083 instance = top->priv->fns->find_rsc(top, rsc_id, NULL,
2084 pcmk_rsc_match_clone_only);
2085
2086 pcmk__rsc_debug(parent, "Created \"removed\" instance %s for %s: %s on %s",
2087 top->id, parent->id, rsc_id, pcmk__node_name(node));
2088 return instance;
2089 }
2090
2091 /*!
2092 * \internal
2093 * \brief Check a node for an instance of an anonymous clone
2094 *
2095 * Return a child instance of the specified anonymous clone, in order of
2096 * preference: (1) the instance running on the specified node, if any;
2097 * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX
2098 * instances); (3) a newly created "removed" instance (that is,
2099 * \c PCMK_META_CLONE_MAX instances are already active).
2100 *
2101 * \param[in,out] scheduler Scheduler data
2102 * \param[in] node Node on which to check for instance
2103 * \param[in,out] parent Clone to check
2104 * \param[in] rsc_id Name of cloned resource in history (no instance)
2105 */
2106 static pcmk_resource_t *
2107 find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2108 pcmk_resource_t *parent, const char *rsc_id)
2109 {
2110 GList *rIter = NULL;
2111 pcmk_resource_t *rsc = NULL;
2112 pcmk_resource_t *inactive_instance = NULL;
2113 gboolean skip_inactive = FALSE;
2114
2115 pcmk__assert(pcmk__is_anonymous_clone(parent));
2116
2117 // Check for active (or partially active, for cloned groups) instance
2118 pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2119 rsc_id, pcmk__node_name(node), parent->id);
2120
2121 for (rIter = parent->priv->children;
2122 (rIter != NULL) && (rsc == NULL); rIter = rIter->next) {
2123
2124 GList *locations = NULL;
2125 pcmk_resource_t *child = rIter->data;
2126
2127 /* Check whether this instance is already known to be active or pending
2128 * anywhere, at this stage of unpacking. Because this function is called
2129 * for a resource before the resource's individual operation history
2130 * entries are unpacked, locations will generally not contain the
2131 * desired node.
2132 *
2133 * However, there are three exceptions:
2134 * (1) when child is a cloned group and we have already unpacked the
2135 * history of another member of the group on the same node;
2136 * (2) when we've already unpacked the history of another numbered
2137 * instance on the same node (which can happen if
2138 * PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2139 * (3) when we re-run calculations on the same scheduler data as part of
2140 * a simulation.
2141 */
2142 child->priv->fns->location(child, &locations, pcmk__rsc_node_current
2143 |pcmk__rsc_node_pending);
2144 if (locations) {
2145 /* We should never associate the same numbered anonymous clone
2146 * instance with multiple nodes, and clone instances can't migrate,
2147 * so there must be only one location, regardless of history.
2148 */
2149 CRM_LOG_ASSERT(locations->next == NULL);
2150
2151 if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2152 /* This child instance is active on the requested node, so check
2153 * for a corresponding configured resource. We use find_rsc()
2154 * instead of child because child may be a cloned group, and we
2155 * need the particular member corresponding to rsc_id.
2156 *
2157 * If the history entry represents a removed instance, rsc will
2158 * be NULL.
2159 */
2160 rsc = parent->priv->fns->find_rsc(child, rsc_id, NULL,
2161 pcmk_rsc_match_clone_only);
2162 if (rsc) {
2163 /* If there are multiple instance history entries for an
2164 * anonymous clone in a single node's history (which can
2165 * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2166 * to false), we want to consider the instances beyond the
2167 * first as removed, even if there are inactive instance
2168 * numbers available.
2169 */
2170 if (rsc->priv->active_nodes != NULL) {
2171 pcmk__notice("Active (now-)anonymous clone %s has "
2172 "multiple \"removed\" instance histories "
2173 "on %s",
2174 parent->id, pcmk__node_name(node));
2175 skip_inactive = TRUE;
2176 rsc = NULL;
2177 } else {
2178 pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2179 }
2180 }
2181 }
2182 g_list_free(locations);
2183
2184 } else {
2185 pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2186 if (!skip_inactive && !inactive_instance
2187 && !pcmk__is_set(child->flags, pcmk__rsc_blocked)) {
2188 // Remember one inactive instance in case we don't find active
2189 inactive_instance =
2190 parent->priv->fns->find_rsc(child, rsc_id, NULL,
2191 pcmk_rsc_match_clone_only);
2192
2193 /* ... but don't use it if it was already associated with a
2194 * pending action on another node
2195 */
2196 if (inactive_instance != NULL) {
2197 const pcmk_node_t *pending_node = NULL;
2198
2199 pending_node = inactive_instance->priv->pending_node;
2200 if ((pending_node != NULL)
2201 && !pcmk__same_node(pending_node, node)) {
2202 inactive_instance = NULL;
2203 }
2204 }
2205 }
2206 }
2207 }
2208
2209 if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2210 pcmk__rsc_trace(parent, "Resource %s, empty slot",
2211 inactive_instance->id);
2212 rsc = inactive_instance;
2213 }
2214
2215 /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2216 * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2217 * don't want to consume a valid instance number for unclean nodes. Such
2218 * instances may appear to be active according to the history, but should be
2219 * considered inactive, so we can start an instance elsewhere. Treat such
2220 * instances as removed.
2221 *
2222 * An exception is instances running on guest nodes -- since guest node
2223 * "fencing" is actually just a resource stop, requires shouldn't apply.
2224 *
2225 * @TODO Ideally, we'd use an inactive instance number if it is not needed
2226 * for any clean instances. However, we don't know that at this point.
2227 */
2228 if ((rsc != NULL) && !pcmk__is_set(rsc->flags, pcmk__rsc_needs_fencing)
2229 && (!node->details->online || node->details->unclean)
2230 && !pcmk__is_guest_or_bundle_node(node)
2231 && !pe__is_universal_clone(parent, scheduler)) {
2232
2233 rsc = NULL;
2234 }
2235
2236 if (rsc == NULL) {
2237 rsc = create_anonymous_removed_instance(parent, rsc_id, node,
2238 scheduler);
2239 pcmk__rsc_trace(parent, "Resource %s, removed", rsc->id);
2240 }
2241 return rsc;
2242 }
2243
2244 static pcmk_resource_t *
2245 unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2246 const char *rsc_id)
2247 {
2248 pcmk_resource_t *rsc = NULL;
2249 pcmk_resource_t *parent = NULL;
2250
2251 pcmk__trace("looking for %s", rsc_id);
2252 rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2253
2254 if (rsc == NULL) {
2255 /* If we didn't find the resource by its name in the operation history,
2256 * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2257 * we create a single :0 "removed" instance to match against here.
2258 */
2259 char *clone0_id = clone_zero(rsc_id);
2260 pcmk_resource_t *clone0 = pe_find_resource(scheduler->priv->resources,
2261 clone0_id);
2262
2263 if ((clone0 != NULL)
2264 && !pcmk__is_set(clone0->flags, pcmk__rsc_unique)) {
2265
2266 rsc = clone0;
2267 parent = uber_parent(clone0);
2268 pcmk__trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2269 } else {
2270 pcmk__trace("%s is not known as %s either (removed)", rsc_id,
2271 clone0_id);
2272 }
2273 free(clone0_id);
2274
2275 } else if (rsc->priv->variant > pcmk__rsc_variant_primitive) {
2276 pcmk__trace("Resource history for %s is considered removed "
2277 "because it is no longer primitive", rsc_id);
2278 return NULL;
2279
2280 } else {
2281 parent = uber_parent(rsc);
2282 }
2283
2284 if (pcmk__is_anonymous_clone(parent)) {
2285
2286 if (pcmk__is_bundled(parent)) {
2287 rsc = pe__find_bundle_replica(parent->priv->parent, node);
2288 } else {
2289 char *base = clone_strip(rsc_id);
2290
2291 rsc = find_anonymous_clone(scheduler, node, parent, base);
2292 free(base);
2293 pcmk__assert(rsc != NULL);
2294 }
2295 }
2296
2297 if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2298 && !pcmk__str_eq(rsc_id, rsc->priv->history_id, pcmk__str_none)) {
2299
2300 const bool removed = pcmk__is_set(rsc->flags, pcmk__rsc_removed);
2301
2302 pcmk__str_update(&(rsc->priv->history_id), rsc_id);
2303 pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2304 rsc_id, pcmk__node_name(node), rsc->id,
2305 (removed? " (removed)" : ""));
2306 }
2307 return rsc;
2308 }
2309
2310 static pcmk_resource_t *
2311 process_removed_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
2312 pcmk_scheduler_t *scheduler)
2313 {
2314 pcmk_resource_t *rsc = NULL;
2315 const char *rsc_id = pcmk__xe_get(rsc_entry, PCMK_XA_ID);
2316
2317 pcmk__debug("Detected removed resource %s on %s", rsc_id,
2318 pcmk__node_name(node));
2319 rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2320 if (rsc == NULL) {
2321 return NULL;
2322 }
2323
2324 if (!pcmk__is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
2325 pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
2326
2327 } else {
2328 CRM_CHECK(rsc != NULL, return NULL);
2329 pcmk__rsc_trace(rsc, "Added \"removed\" resource %s", rsc->id);
2330 resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
2331 "__removed_do_not_run__", scheduler);
2332 }
2333 return rsc;
2334 }
2335
2336 static void
2337 process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
2338 enum pcmk__on_fail on_fail)
2339 {
2340 pcmk_node_t *tmpnode = NULL;
2341 char *reason = NULL;
2342 enum pcmk__on_fail save_on_fail = pcmk__on_fail_ignore;
2343 pcmk_scheduler_t *scheduler = NULL;
2344 bool known_active = false;
2345
2346 pcmk__assert(rsc != NULL);
2347 scheduler = rsc->priv->scheduler;
2348 known_active = (rsc->priv->orig_role > pcmk_role_stopped);
2349 pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2350 rsc->id, pcmk_role_text(rsc->priv->orig_role),
2351 pcmk__node_name(node), pcmk__on_fail_text(on_fail));
2352
2353 /* process current state */
2354 if (rsc->priv->orig_role != pcmk_role_unknown) {
2355 pcmk_resource_t *iter = rsc;
2356
2357 while (iter) {
2358 if (g_hash_table_lookup(iter->priv->probed_nodes,
2359 node->priv->id) == NULL) {
2360 pcmk_node_t *n = pe__copy_node(node);
2361
2362 pcmk__rsc_trace(rsc, "%s (%s in history) known on %s",
2363 rsc->id,
2364 pcmk__s(rsc->priv->history_id, "the same"),
2365 pcmk__node_name(n));
2366 g_hash_table_insert(iter->priv->probed_nodes,
2367 (gpointer) n->priv->id, n);
2368 }
2369 if (pcmk__is_set(iter->flags, pcmk__rsc_unique)) {
2370 break;
2371 }
2372 iter = iter->priv->parent;
2373 }
2374 }
2375
2376 /* If a managed resource is believed to be running, but node is down ... */
2377 if (known_active && !node->details->online && !node->details->maintenance
2378 && pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
2379
2380 gboolean should_fence = FALSE;
2381
2382 /* If this is a guest node, fence it (regardless of whether fencing is
2383 * enabled, because guest node fencing is done by recovery of the
2384 * container resource rather than by the fencer). Mark the resource
2385 * we're processing as failed. When the guest comes back up, its
2386 * operation history in the CIB will be cleared, freeing the affected
2387 * resource to run again once we are sure we know its state.
2388 */
2389 if (pcmk__is_guest_or_bundle_node(node)) {
2390 pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2391 should_fence = TRUE;
2392
2393 } else if (pcmk__is_set(scheduler->flags,
2394 pcmk__sched_fencing_enabled)) {
2395 if (pcmk__is_remote_node(node)
2396 && (node->priv->remote != NULL)
2397 && !pcmk__is_set(node->priv->remote->flags,
2398 pcmk__rsc_failed)) {
2399
2400 /* Setting unseen means that fencing of the remote node will
2401 * occur only if the connection resource is not going to start
2402 * somewhere. This allows connection resources on a failed
2403 * cluster node to move to another node without requiring the
2404 * remote nodes to be fenced as well.
2405 */
2406 pcmk__clear_node_flags(node, pcmk__node_seen);
2407 reason = pcmk__assert_asprintf("%s is active there (fencing "
2408 "will be revoked if remote "
2409 "connection can be "
2410 "re-established elsewhere)",
2411 rsc->id);
2412 }
2413 should_fence = TRUE;
2414 }
2415
2416 if (should_fence) {
2417 if (reason == NULL) {
2418 reason = pcmk__assert_asprintf("%s is thought to be active "
2419 "there",
2420 rsc->id);
2421 }
2422 pe_fence_node(scheduler, node, reason, FALSE);
2423 }
2424 free(reason);
2425 }
2426
2427 /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2428 save_on_fail = on_fail;
2429
2430 if (node->details->unclean) {
2431 /* No extra processing needed
2432 * Also allows resources to be started again after a node is shot
2433 */
2434 on_fail = pcmk__on_fail_ignore;
2435 }
2436
2437 switch (on_fail) {
2438 case pcmk__on_fail_ignore:
2439 /* nothing to do */
2440 break;
2441
2442 case pcmk__on_fail_demote:
2443 pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
2444 demote_action(rsc, node, FALSE);
2445 break;
2446
2447 case pcmk__on_fail_fence_node:
2448 /* treat it as if it is still running
2449 * but also mark the node as unclean
2450 */
2451 reason = pcmk__assert_asprintf("%s failed there", rsc->id);
2452 pe_fence_node(scheduler, node, reason, FALSE);
2453 free(reason);
2454 break;
2455
2456 case pcmk__on_fail_standby_node:
2457 pcmk__set_node_flags(node,
2458 pcmk__node_standby|pcmk__node_fail_standby);
2459 break;
2460
2461 case pcmk__on_fail_block:
2462 /* is_managed == FALSE will prevent any
2463 * actions being sent for the resource
2464 */
2465 pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
2466 pcmk__set_rsc_flags(rsc, pcmk__rsc_blocked);
2467 break;
2468
2469 case pcmk__on_fail_ban:
2470 /* make sure it comes up somewhere else
2471 * or not at all
2472 */
2473 resource_location(rsc, node, -PCMK_SCORE_INFINITY,
2474 "__action_migration_auto__", scheduler);
2475 break;
2476
2477 case pcmk__on_fail_stop:
2478 pe__set_next_role(rsc, pcmk_role_stopped,
2479 PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP);
2480 break;
2481
2482 case pcmk__on_fail_restart:
2483 if (known_active) {
2484 pcmk__set_rsc_flags(rsc,
2485 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2486 stop_action(rsc, node, FALSE);
2487 }
2488 break;
2489
2490 case pcmk__on_fail_restart_container:
2491 pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2492 if ((rsc->priv->launcher != NULL) && pcmk__is_bundled(rsc)) {
2493 /* A bundle's remote connection can run on a different node than
2494 * the bundle's container. We don't necessarily know where the
2495 * container is running yet, so remember it and add a stop
2496 * action for it later.
2497 */
2498 scheduler->priv->stop_needed =
2499 g_list_prepend(scheduler->priv->stop_needed,
2500 rsc->priv->launcher);
2501 } else if (rsc->priv->launcher != NULL) {
2502 stop_action(rsc->priv->launcher, node, FALSE);
2503 } else if (known_active) {
2504 stop_action(rsc, node, FALSE);
2505 }
2506 break;
2507
2508 case pcmk__on_fail_reset_remote:
2509 pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2510 if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
2511 tmpnode = NULL;
2512 if (pcmk__is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
2513 tmpnode = pcmk_find_node(scheduler, rsc->id);
2514 }
2515 if (pcmk__is_remote_node(tmpnode)
2516 && !pcmk__is_set(tmpnode->priv->flags,
2517 pcmk__node_remote_fenced)) {
2518 /* The remote connection resource failed in a way that
2519 * should result in fencing the remote node.
2520 */
2521 pe_fence_node(scheduler, tmpnode,
2522 "remote connection is unrecoverable", FALSE);
2523 }
2524 }
2525
2526 /* require the stop action regardless if fencing is occurring or not. */
2527 if (known_active) {
2528 stop_action(rsc, node, FALSE);
2529 }
2530
2531 /* if reconnect delay is in use, prevent the connection from exiting the
2532 * "STOPPED" role until the failure is cleared by the delay timeout. */
2533 if (rsc->priv->remote_reconnect_ms > 0U) {
2534 pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2535 }
2536 break;
2537 }
2538
2539 /* Ensure a remote connection failure forces an unclean Pacemaker Remote
2540 * node to be fenced. By marking the node as seen, the failure will result
2541 * in a fencing operation regardless if we're going to attempt to reconnect
2542 * in this transition.
2543 */
2544 if (pcmk__all_flags_set(rsc->flags,
2545 pcmk__rsc_failed|pcmk__rsc_is_remote_connection)) {
2546 tmpnode = pcmk_find_node(scheduler, rsc->id);
2547 if (tmpnode && tmpnode->details->unclean) {
2548 pcmk__set_node_flags(tmpnode, pcmk__node_seen);
2549 }
2550 }
2551
2552 if (known_active) {
2553 if (pcmk__is_set(rsc->flags, pcmk__rsc_removed)) {
2554 if (pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
2555 pcmk__notice("Removed resource %s is active on %s and will be "
2556 "stopped when possible",
2557 rsc->id, pcmk__node_name(node));
2558
2559 } else {
2560 pcmk__notice("Removed resource %s must be stopped manually on "
2561 "%s because " PCMK__OPT_STOP_REMOVED_RESOURCES
2562 " is set to false",
2563 rsc->id, pcmk__node_name(node));
2564 }
2565 }
2566
2567 native_add_running(rsc, node, scheduler,
2568 (save_on_fail != pcmk__on_fail_ignore));
2569 switch (on_fail) {
2570 case pcmk__on_fail_ignore:
2571 break;
2572 case pcmk__on_fail_demote:
2573 case pcmk__on_fail_block:
2574 pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
2575 break;
2576 default:
2577 pcmk__set_rsc_flags(rsc,
2578 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2579 break;
2580 }
2581
2582 } else if ((rsc->priv->history_id != NULL)
2583 && (strchr(rsc->priv->history_id, ':') != NULL)) {
2584 /* @COMPAT This is for older (<1.1.8) status sections that included
2585 * instance numbers, otherwise stopped instances are considered removed.
2586 *
2587 * @TODO We should be able to drop this, but some old regression tests
2588 * will need to be updated. Double-check that this is not still needed
2589 * for unique clones (which may have been later converted to anonymous).
2590 */
2591 pcmk__rsc_trace(rsc, "Clearing history ID %s for %s (stopped)",
2592 rsc->priv->history_id, rsc->id);
2593 g_clear_pointer(&rsc->priv->history_id, free);
2594
2595 } else {
2596 GList *possible_matches = pe__resource_actions(rsc, node,
2597 PCMK_ACTION_STOP, FALSE);
2598 GList *gIter = possible_matches;
2599
2600 for (; gIter != NULL; gIter = gIter->next) {
2601 pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2602
2603 pcmk__set_action_flags(stop, pcmk__action_optional);
2604 }
2605
2606 g_list_free(possible_matches);
2607 }
2608
2609 /* A successful stop after migrate_to on the migration source doesn't make
2610 * the partially migrated resource stopped on the migration target.
2611 */
2612 if ((rsc->priv->orig_role == pcmk_role_stopped)
2613 && (rsc->priv->active_nodes != NULL)
2614 && (rsc->priv->partial_migration_target != NULL)
2615 && pcmk__same_node(rsc->priv->partial_migration_source, node)) {
2616
2617 rsc->priv->orig_role = pcmk_role_started;
2618 }
2619 }
2620
2621 /* create active recurring operations as optional */
2622 static void
2623 process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
2624 int start_index, int stop_index,
2625 GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2626 {
2627 int counter = -1;
2628 const char *task = NULL;
2629 const char *status = NULL;
2630 GList *gIter = sorted_op_list;
2631
2632 pcmk__assert(rsc != NULL);
2633 pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2634 rsc->id, start_index, stop_index);
2635
2636 for (; gIter != NULL; gIter = gIter->next) {
2637 xmlNode *rsc_op = (xmlNode *) gIter->data;
2638
2639 guint interval_ms = 0;
2640 char *key = NULL;
2641 const char *id = pcmk__xe_id(rsc_op);
2642
2643 counter++;
2644
2645 if (node->details->online == FALSE) {
2646 pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2647 rsc->id, pcmk__node_name(node));
2648 break;
2649
2650 /* Need to check if there's a monitor for role="Stopped" */
2651 } else if (start_index < stop_index && counter <= stop_index) {
2652 pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2653 id, pcmk__node_name(node));
2654 continue;
2655
2656 } else if (counter < start_index) {
2657 pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2658 id, pcmk__node_name(node), counter);
2659 continue;
2660 }
2661
2662 pcmk__xe_get_guint(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2663 if (interval_ms == 0) {
2664 pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2665 id, pcmk__node_name(node));
2666 continue;
2667 }
2668
2669 status = pcmk__xe_get(rsc_op, PCMK__XA_OP_STATUS);
2670 if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2671 pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2672 id, pcmk__node_name(node));
2673 continue;
2674 }
2675 task = pcmk__xe_get(rsc_op, PCMK_XA_OPERATION);
2676 /* create the action */
2677 key = pcmk__op_key(rsc->id, task, interval_ms);
2678 pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2679 custom_action(rsc, key, task, node, TRUE, scheduler);
2680 }
2681 }
2682
2683 void
2684 calculate_active_ops(const GList *sorted_op_list, int *start_index,
2685 int *stop_index)
2686 {
2687 int counter = -1;
2688 int implied_monitor_start = -1;
2689 int implied_clone_start = -1;
2690 const char *task = NULL;
2691 const char *status = NULL;
2692
2693 *stop_index = -1;
2694 *start_index = -1;
2695
2696 for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2697 const xmlNode *rsc_op = (const xmlNode *) iter->data;
2698
2699 counter++;
2700
2701 task = pcmk__xe_get(rsc_op, PCMK_XA_OPERATION);
2702 status = pcmk__xe_get(rsc_op, PCMK__XA_OP_STATUS);
2703
2704 if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2705 && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2706 *stop_index = counter;
2707
2708 } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2709 PCMK_ACTION_MIGRATE_FROM, NULL)) {
2710 *start_index = counter;
2711
2712 } else if ((implied_monitor_start <= *stop_index)
2713 && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2714 pcmk__str_casei)) {
2715 const char *rc = pcmk__xe_get(rsc_op, PCMK__XA_RC_CODE);
2716
2717 if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2718 implied_monitor_start = counter;
2719 }
2720 } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2721 PCMK_ACTION_DEMOTE, NULL)) {
2722 implied_clone_start = counter;
2723 }
2724 }
2725
2726 if (*start_index == -1) {
2727 if (implied_clone_start != -1) {
2728 *start_index = implied_clone_start;
2729 } else if (implied_monitor_start != -1) {
2730 *start_index = implied_monitor_start;
2731 }
2732 }
2733 }
2734
2735 // If resource history entry has shutdown lock, remember lock node and time
2736 static void
2737 unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
2738 const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2739 {
2740 time_t lock_time = 0; // When lock started (i.e. node shutdown time)
2741 time_t sched_time = 0;
2742 guint shutdown_lock_ms = scheduler->priv->shutdown_lock_ms;
2743
2744 pcmk__xe_get_time(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK, &lock_time);
2745 if (lock_time == 0) {
2746 return;
2747 }
2748
2749 sched_time = pcmk__scheduler_epoch_time(scheduler);
2750 if ((shutdown_lock_ms > 0U)
2751 && (sched_time > (lock_time + pcmk__timeout_ms2s(shutdown_lock_ms)))) {
2752
2753 pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2754 rsc->id, pcmk__node_name(node));
2755 pe__clear_resource_history(rsc, node);
2756
2757 } else {
2758 rsc->priv->lock_node = node;
2759 rsc->priv->lock_time = lock_time;
2760 }
2761 }
2762
2763 /*!
2764 * \internal
2765 * \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status
2766 *
2767 * \param[in,out] node Node whose status is being unpacked
2768 * \param[in] rsc_entry \c PCMK__XE_LRM_RESOURCE XML being unpacked
2769 * \param[in,out] scheduler Scheduler data
2770 *
2771 * \return Resource corresponding to the entry, or NULL if no operation history
2772 */
2773 static pcmk_resource_t *
2774 unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
2775 pcmk_scheduler_t *scheduler)
2776 {
2777 GList *gIter = NULL;
2778 int stop_index = -1;
2779 int start_index = -1;
2780 enum rsc_role_e req_role = pcmk_role_unknown;
2781
2782 const char *rsc_id = pcmk__xe_id(lrm_resource);
2783
2784 pcmk_resource_t *rsc = NULL;
2785 GList *op_list = NULL;
2786 GList *sorted_op_list = NULL;
2787
2788 xmlNode *rsc_op = NULL;
2789 xmlNode *last_failure = NULL;
2790
2791 enum pcmk__on_fail on_fail = pcmk__on_fail_ignore;
2792 enum rsc_role_e saved_role = pcmk_role_unknown;
2793
2794 if (rsc_id == NULL) {
2795 pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2796 " entry: No " PCMK_XA_ID);
2797 pcmk__log_xml_info(lrm_resource, "missing-id");
2798 return NULL;
2799 }
2800 pcmk__trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s", rsc_id,
2801 pcmk__node_name(node));
2802
2803 /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2804 * them
2805 */
2806 for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2807 NULL);
2808 rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
2809
2810 op_list = g_list_prepend(op_list, rsc_op);
2811 }
2812
2813 if (!pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
2814 if (op_list == NULL) {
2815 // If there are no operations, there is nothing to do
2816 return NULL;
2817 }
2818 }
2819
2820 /* find the resource */
2821 rsc = unpack_find_resource(scheduler, node, rsc_id);
2822 if (rsc == NULL) {
2823 if (op_list == NULL) {
2824 // If there are no operations, there is nothing to do
2825 return NULL;
2826 } else {
2827 rsc = process_removed_resource(lrm_resource, node, scheduler);
2828 }
2829 }
2830 pcmk__assert(rsc != NULL);
2831
2832 // Check whether the resource is "shutdown-locked" to this node
2833 if (pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
2834 unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2835 }
2836
2837 /* process operations */
2838 saved_role = rsc->priv->orig_role;
2839 rsc->priv->orig_role = pcmk_role_unknown;
2840 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2841
2842 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2843 xmlNode *rsc_op = (xmlNode *) gIter->data;
2844
2845 unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2846 }
2847
2848 /* create active recurring operations as optional */
2849 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2850 process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2851 scheduler);
2852
2853 /* no need to free the contents */
2854 g_list_free(sorted_op_list);
2855
2856 process_rsc_state(rsc, node, on_fail);
2857
2858 if (get_target_role(rsc, &req_role)) {
2859 if ((rsc->priv->next_role == pcmk_role_unknown)
2860 || (req_role < rsc->priv->next_role)) {
2861
2862 pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
2863
2864 } else if (req_role > rsc->priv->next_role) {
2865 pcmk__rsc_info(rsc,
2866 "%s: Not overwriting calculated next role %s"
2867 " with requested next role %s",
2868 rsc->id, pcmk_role_text(rsc->priv->next_role),
2869 pcmk_role_text(req_role));
2870 }
2871 }
2872
2873 if (saved_role > rsc->priv->orig_role) {
2874 rsc->priv->orig_role = saved_role;
2875 }
2876
2877 return rsc;
2878 }
2879
2880 static void
2881 handle_removed_launched_resources(const xmlNode *lrm_rsc_list,
2882 pcmk_scheduler_t *scheduler)
2883 {
2884 for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list,
2885 PCMK__XE_LRM_RESOURCE,
2886 NULL, NULL);
2887 rsc_entry != NULL;
2888 rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2889
2890 pcmk_resource_t *rsc;
2891 pcmk_resource_t *launcher = NULL;
2892 const char *rsc_id;
2893 const char *launcher_id = NULL;
2894
2895 launcher_id = pcmk__xe_get(rsc_entry, PCMK__META_CONTAINER);
2896 rsc_id = pcmk__xe_get(rsc_entry, PCMK_XA_ID);
2897 if ((launcher_id == NULL) || (rsc_id == NULL)) {
2898 continue;
2899 }
2900
2901 launcher = pe_find_resource(scheduler->priv->resources, launcher_id);
2902 if (launcher == NULL) {
2903 continue;
2904 }
2905
2906 rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2907 if ((rsc == NULL) || (rsc->priv->launcher != NULL)
2908 || !pcmk__is_set(rsc->flags, pcmk__rsc_removed_launched)) {
2909 continue;
2910 }
2911
2912 pcmk__rsc_trace(rsc, "Mapped launcher of removed resource %s to %s",
2913 rsc->id, launcher_id);
2914 rsc->priv->launcher = launcher;
2915 launcher->priv->launched = g_list_append(launcher->priv->launched,
2916 rsc);
2917 }
2918 }
2919
2920 /*!
2921 * \internal
2922 * \brief Unpack one node's lrm status section
2923 *
2924 * \param[in,out] node Node whose status is being unpacked
2925 * \param[in] xml CIB node state XML
2926 * \param[in,out] scheduler Scheduler data
2927 */
2928 static void
2929 unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
2930 pcmk_scheduler_t *scheduler)
2931 {
2932 bool found_removed_launched_resource = false;
2933
2934 // Drill down to PCMK__XE_LRM_RESOURCES section
2935 xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2936 if (xml == NULL) {
2937 return;
2938 }
2939 xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2940 if (xml == NULL) {
2941 return;
2942 }
2943
2944 // Unpack each PCMK__XE_LRM_RESOURCE entry
2945 for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2946 PCMK__XE_LRM_RESOURCE,
2947 NULL, NULL);
2948 rsc_entry != NULL;
2949 rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2950
2951 pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2952
2953 if ((rsc != NULL)
2954 && pcmk__is_set(rsc->flags, pcmk__rsc_removed_launched)) {
2955 found_removed_launched_resource = true;
2956 }
2957 }
2958
2959 /* Now that all resource state has been unpacked for this node, map any
2960 * removed launched resources to their launchers.
2961 */
2962 if (found_removed_launched_resource) {
2963 handle_removed_launched_resources(xml, scheduler);
2964 }
2965 }
2966
2967 static void
2968 set_active(pcmk_resource_t *rsc)
2969 {
2970 const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2971
2972 if ((top != NULL) && pcmk__is_set(top->flags, pcmk__rsc_promotable)) {
2973 rsc->priv->orig_role = pcmk_role_unpromoted;
2974 } else {
2975 rsc->priv->orig_role = pcmk_role_started;
2976 }
2977 }
2978
2979 static void
2980 set_node_score(gpointer key, gpointer value, gpointer user_data)
2981 {
2982 pcmk_node_t *node = value;
2983 int *score = user_data;
2984
2985 node->assign->score = *score;
2986 }
2987
2988 #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
2989 "/" PCMK__XE_NODE_STATE
2990 #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \
2991 "/" PCMK__XE_LRM_RESOURCES \
2992 "/" PCMK__XE_LRM_RESOURCE
2993 #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
2994
2995 static xmlNode *
2996 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2997 int target_rc, pcmk_scheduler_t *scheduler)
2998 {
2999 GString *xpath = NULL;
3000 xmlNode *xml = NULL;
3001
3002 CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
3003 return NULL);
3004
3005 xpath = g_string_sized_new(256);
3006 pcmk__g_strcat(xpath,
3007 XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
3008 SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
3009 SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
3010 NULL);
3011
3012 /* Need to check against transition_magic too? */
3013 if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
3014 pcmk__g_strcat(xpath,
3015 " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
3016 NULL);
3017
3018 } else if ((source != NULL)
3019 && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
3020 pcmk__g_strcat(xpath,
3021 " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
3022 NULL);
3023 } else {
3024 g_string_append_c(xpath, ']');
3025 }
3026
3027 xml = pcmk__xpath_find_one(scheduler->input->doc, xpath->str, LOG_DEBUG);
3028 g_string_free(xpath, TRUE);
3029
3030 if (xml && target_rc >= 0) {
3031 int rc = PCMK_OCF_UNKNOWN_ERROR;
3032 int status = PCMK_EXEC_ERROR;
3033
3034 pcmk__xe_get_int(xml, PCMK__XA_RC_CODE, &rc);
3035 pcmk__xe_get_int(xml, PCMK__XA_OP_STATUS, &status);
3036 if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
3037 return NULL;
3038 }
3039 }
3040 return xml;
3041 }
3042
3043 static xmlNode *
3044 find_lrm_resource(const char *rsc_id, const char *node_name,
3045 pcmk_scheduler_t *scheduler)
3046 {
3047 GString *xpath = NULL;
3048 xmlNode *xml = NULL;
3049
3050 CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
3051
3052 xpath = g_string_sized_new(256);
3053 pcmk__g_strcat(xpath,
3054 XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
3055 SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
3056 NULL);
3057
3058 xml = pcmk__xpath_find_one(scheduler->input->doc, xpath->str, LOG_DEBUG);
3059
3060 g_string_free(xpath, TRUE);
3061 return xml;
3062 }
3063
3064 /*!
3065 * \internal
3066 * \brief Check whether a resource has no completed action history on a node
3067 *
3068 * \param[in,out] rsc Resource to check
3069 * \param[in] node_name Node to check
3070 *
3071 * \return true if \p rsc_id is unknown on \p node_name, otherwise false
3072 */
3073 static bool
3074 unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
3075 {
3076 bool result = false;
3077 xmlXPathObject *search;
3078 char *xpath = NULL;
3079
3080 xpath = pcmk__assert_asprintf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
3081 SUB_XPATH_LRM_RESOURCE
3082 "[@" PCMK_XA_ID "='%s']"
3083 SUB_XPATH_LRM_RSC_OP
3084 "[@" PCMK__XA_RC_CODE "!='%d']",
3085 node_name, rsc->id, PCMK_OCF_UNKNOWN);
3086
3087 search = pcmk__xpath_search(rsc->priv->scheduler->input->doc, xpath);
3088 result = (pcmk__xpath_num_results(search) == 0);
3089 xmlXPathFreeObject(search);
3090 free(xpath);
3091 return result;
3092 }
3093
3094 /*!
3095 * \internal
3096 * \brief Check whether a probe/monitor indicating the resource was not running
3097 * on a node happened after some event
3098 *
3099 * \param[in] rsc_id Resource being checked
3100 * \param[in] node_name Node being checked
3101 * \param[in] xml_op Event that monitor is being compared to
3102 * \param[in,out] scheduler Scheduler data
3103 *
3104 * \return true if such a monitor happened after event, false otherwise
3105 */
3106 static bool
3107 monitor_not_running_after(const char *rsc_id, const char *node_name,
3108 const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3109 {
3110 /* Any probe/monitor operation on the node indicating it was not running
3111 * there
3112 */
3113 xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3114 NULL, PCMK_OCF_NOT_RUNNING, scheduler);
3115
3116 return (monitor != NULL) && (pe__is_newer_op(monitor, xml_op) > 0);
3117 }
3118
3119 /*!
3120 * \internal
3121 * \brief Check whether any non-monitor operation on a node happened after some
3122 * event
3123 *
3124 * \param[in] rsc_id Resource being checked
3125 * \param[in] node_name Node being checked
3126 * \param[in] xml_op Event that non-monitor is being compared to
3127 * \param[in,out] scheduler Scheduler data
3128 *
3129 * \return true if such a operation happened after event, false otherwise
3130 */
3131 static bool
3132 non_monitor_after(const char *rsc_id, const char *node_name,
3133 const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3134 {
3135 xmlNode *lrm_resource = NULL;
3136
3137 lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3138 if (lrm_resource == NULL) {
3139 return false;
3140 }
3141
3142 for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3143 NULL, NULL);
3144 op != NULL; op = pcmk__xe_next(op, PCMK__XE_LRM_RSC_OP)) {
3145
3146 const char * task = NULL;
3147
3148 if (op == xml_op) {
3149 continue;
3150 }
3151
3152 task = pcmk__xe_get(op, PCMK_XA_OPERATION);
3153
3154 if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
3155 PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
3156 NULL)
3157 && pe__is_newer_op(op, xml_op) > 0) {
3158 return true;
3159 }
3160 }
3161
3162 return false;
3163 }
3164
3165 /*!
3166 * \internal
3167 * \brief Check whether the resource has newer state on a node after a migration
3168 * attempt
3169 *
3170 * \param[in] rsc_id Resource being checked
3171 * \param[in] node_name Node being checked
3172 * \param[in] migrate_to Any migrate_to event that is being compared to
3173 * \param[in] migrate_from Any migrate_from event that is being compared to
3174 * \param[in,out] scheduler Scheduler data
3175 *
3176 * \return true if such a operation happened after event, false otherwise
3177 */
3178 static bool
3179 newer_state_after_migrate(const char *rsc_id, const char *node_name,
3180 const xmlNode *migrate_to,
3181 const xmlNode *migrate_from,
3182 pcmk_scheduler_t *scheduler)
3183 {
3184 const xmlNode *xml_op = (migrate_from != NULL)? migrate_from : migrate_to;
3185 const char *source = pcmk__xe_get(xml_op, PCMK__META_MIGRATE_SOURCE);
3186
3187 /* It's preferred to compare to the migrate event on the same node if
3188 * existing, since call ids are more reliable.
3189 */
3190 if ((xml_op != migrate_to) && (migrate_to != NULL)
3191 && pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3192
3193 xml_op = migrate_to;
3194 }
3195
3196 /* If there's any newer non-monitor operation on the node, or any newer
3197 * probe/monitor operation on the node indicating it was not running there,
3198 * the migration events potentially no longer matter for the node.
3199 */
3200 return non_monitor_after(rsc_id, node_name, xml_op, scheduler)
3201 || monitor_not_running_after(rsc_id, node_name, xml_op, scheduler);
3202 }
3203
3204 /*!
3205 * \internal
3206 * \brief Parse migration source and target node names from history entry
3207 *
3208 * \param[in] entry Resource history entry for a migration action
3209 * \param[in] source_node If not NULL, source must match this node
3210 * \param[in] target_node If not NULL, target must match this node
3211 * \param[out] source_name Where to store migration source node name
3212 * \param[out] target_name Where to store migration target node name
3213 *
3214 * \return Standard Pacemaker return code
3215 */
3216 static int
3217 get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
3218 const pcmk_node_t *target_node,
3219 const char **source_name, const char **target_name)
3220 {
3221 *source_name = pcmk__xe_get(entry, PCMK__META_MIGRATE_SOURCE);
3222 *target_name = pcmk__xe_get(entry, PCMK__META_MIGRATE_TARGET);
3223 if ((*source_name == NULL) || (*target_name == NULL)) {
3224 pcmk__config_err("Ignoring resource history entry %s without "
3225 PCMK__META_MIGRATE_SOURCE " and "
3226 PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3227 return pcmk_rc_unpack_error;
3228 }
3229
3230 if ((source_node != NULL)
3231 && !pcmk__str_eq(*source_name, source_node->priv->name,
3232 pcmk__str_casei|pcmk__str_null_matches)) {
3233 pcmk__config_err("Ignoring resource history entry %s because "
3234 PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3235 pcmk__xe_id(entry), *source_name,
3236 pcmk__node_name(source_node));
3237 return pcmk_rc_unpack_error;
3238 }
3239
3240 if ((target_node != NULL)
3241 && !pcmk__str_eq(*target_name, target_node->priv->name,
3242 pcmk__str_casei|pcmk__str_null_matches)) {
3243 pcmk__config_err("Ignoring resource history entry %s because "
3244 PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3245 pcmk__xe_id(entry), *target_name,
3246 pcmk__node_name(target_node));
3247 return pcmk_rc_unpack_error;
3248 }
3249
3250 return pcmk_rc_ok;
3251 }
3252
3253 /*
3254 * \internal
3255 * \brief Add a migration source to a resource's list of dangling migrations
3256 *
3257 * If the migrate_to and migrate_from actions in a live migration both
3258 * succeeded, but there is no stop on the source, the migration is considered
3259 * "dangling." Add the source to the resource's dangling migration list, which
3260 * will be used to schedule a stop on the source without affecting the target.
3261 *
3262 * \param[in,out] rsc Resource involved in migration
3263 * \param[in] node Migration source
3264 */
3265 static void
3266 add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
3267 {
3268 pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3269 rsc->id, pcmk__node_name(node));
3270 rsc->priv->orig_role = pcmk_role_stopped;
3271 rsc->priv->dangling_migration_sources =
3272 g_list_prepend(rsc->priv->dangling_migration_sources,
3273 (gpointer) node);
3274 }
3275
3276 /*!
3277 * \internal
3278 * \brief Update resource role etc. after a successful migrate_to action
3279 *
3280 * \param[in,out] history Parsed action result history
3281 */
3282 static void
3283 unpack_migrate_to_success(struct action_history *history)
3284 {
3285 /* A complete migration sequence is:
3286 * 1. migrate_to on source node (which succeeded if we get to this function)
3287 * 2. migrate_from on target node
3288 * 3. stop on source node
3289 *
3290 * If no migrate_from has happened, the migration is considered to be
3291 * "partial". If the migrate_from succeeded but no stop has happened, the
3292 * migration is considered to be "dangling".
3293 *
3294 * If a successful migrate_to and stop have happened on the source node, we
3295 * still need to check for a partial migration, due to scenarios (easier to
3296 * produce with batch-limit=1) like:
3297 *
3298 * - A resource is migrating from node1 to node2, and a migrate_to is
3299 * initiated for it on node1.
3300 *
3301 * - node2 goes into standby mode while the migrate_to is pending, which
3302 * aborts the transition.
3303 *
3304 * - Upon completion of the migrate_to, a new transition schedules a stop
3305 * on both nodes and a start on node1.
3306 *
3307 * - If the new transition is aborted for any reason while the resource is
3308 * stopping on node1, the transition after that stop completes will see
3309 * the migrate_to and stop on the source, but it's still a partial
3310 * migration, and the resource must be stopped on node2 because it is
3311 * potentially active there due to the migrate_to.
3312 *
3313 * We also need to take into account that either node's history may be
3314 * cleared at any point in the migration process.
3315 */
3316 int from_rc = PCMK_OCF_OK;
3317 int from_status = PCMK_EXEC_PENDING;
3318 pcmk_node_t *target_node = NULL;
3319 xmlNode *migrate_from = NULL;
3320 const char *source = NULL;
3321 const char *target = NULL;
3322 bool source_newer_op = false;
3323 bool target_newer_state = false;
3324 bool active_on_target = false;
3325 pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3326
3327 // Get source and target node names from XML
3328 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3329 &target) != pcmk_rc_ok) {
3330 return;
3331 }
3332
3333 // Check for newer state on the source
3334 source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3335 scheduler);
3336
3337 // Check for a migrate_from action from this source on the target
3338 migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3339 target, source, -1, scheduler);
3340 if (migrate_from != NULL) {
3341 if (source_newer_op) {
3342 /* There's a newer non-monitor operation on the source and a
3343 * migrate_from on the target, so this migrate_to is irrelevant to
3344 * the resource's state.
3345 */
3346 return;
3347 }
3348 pcmk__xe_get_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3349 pcmk__xe_get_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3350 }
3351
3352 /* If the resource has newer state on both the source and target after the
3353 * migration events, this migrate_to is irrelevant to the resource's state.
3354 */
3355 target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3356 history->xml, migrate_from,
3357 scheduler);
3358 if (source_newer_op && target_newer_state) {
3359 return;
3360 }
3361
3362 /* Check for dangling migration (migrate_from succeeded but stop not done).
3363 * We know there's no stop because we already returned if the target has a
3364 * migrate_from and the source has any newer non-monitor operation.
3365 */
3366 if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3367 add_dangling_migration(history->rsc, history->node);
3368 return;
3369 }
3370
3371 /* Without newer state, this migrate_to implies the resource is active.
3372 * (Clones are not allowed to migrate, so role can't be promoted.)
3373 */
3374 history->rsc->priv->orig_role = pcmk_role_started;
3375
3376 target_node = pcmk_find_node(scheduler, target);
3377 active_on_target = !target_newer_state && (target_node != NULL)
3378 && target_node->details->online;
3379
3380 if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3381 if (active_on_target) {
3382 native_add_running(history->rsc, target_node, scheduler, TRUE);
3383 } else {
3384 // Mark resource as failed, require recovery, and prevent migration
3385 pcmk__set_rsc_flags(history->rsc,
3386 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
3387 pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
3388 }
3389 return;
3390 }
3391
3392 // The migrate_from is pending, complete but erased, or to be scheduled
3393
3394 /* If there is no history at all for the resource on an online target, then
3395 * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3396 * have the probe result, it will be reflected in target_newer_state.
3397 */
3398 if ((target_node != NULL) && target_node->details->online
3399 && unknown_on_node(history->rsc, target)) {
3400 return;
3401 }
3402
3403 if (active_on_target) {
3404 pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3405
3406 native_add_running(history->rsc, target_node, scheduler, FALSE);
3407 if ((source_node != NULL) && source_node->details->online) {
3408 /* This is a partial migration: the migrate_to completed
3409 * successfully on the source, but the migrate_from has not
3410 * completed. Remember the source and target; if the newly
3411 * chosen target remains the same when we schedule actions
3412 * later, we may continue with the migration.
3413 */
3414 history->rsc->priv->partial_migration_target = target_node;
3415 history->rsc->priv->partial_migration_source = source_node;
3416 }
3417
3418 } else if (!source_newer_op) {
3419 // Mark resource as failed, require recovery, and prevent migration
3420 pcmk__set_rsc_flags(history->rsc,
3421 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
3422 pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
3423 }
3424 }
3425
3426 /*!
3427 * \internal
3428 * \brief Update resource role etc. after a failed migrate_to action
3429 *
3430 * \param[in,out] history Parsed action result history
3431 */
3432 static void
3433 unpack_migrate_to_failure(struct action_history *history)
3434 {
3435 xmlNode *target_migrate_from = NULL;
3436 const char *source = NULL;
3437 const char *target = NULL;
3438 pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3439
3440 // Get source and target node names from XML
3441 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3442 &target) != pcmk_rc_ok) {
3443 return;
3444 }
3445
3446 /* If a migration failed, we have to assume the resource is active. Clones
3447 * are not allowed to migrate, so role can't be promoted.
3448 */
3449 history->rsc->priv->orig_role = pcmk_role_started;
3450
3451 // Check for migrate_from on the target
3452 target_migrate_from = find_lrm_op(history->rsc->id,
3453 PCMK_ACTION_MIGRATE_FROM, target, source,
3454 PCMK_OCF_OK, scheduler);
3455
3456 if (/* If the resource state is unknown on the target, it will likely be
3457 * probed there.
3458 * Don't just consider it running there. We will get back here anyway in
3459 * case the probe detects it's running there.
3460 */
3461 !unknown_on_node(history->rsc, target)
3462 /* If the resource has newer state on the target after the migration
3463 * events, this migrate_to no longer matters for the target.
3464 */
3465 && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3466 target_migrate_from, scheduler)) {
3467 /* The resource has no newer state on the target, so assume it's still
3468 * active there.
3469 * (if it is up).
3470 */
3471 pcmk_node_t *target_node = pcmk_find_node(scheduler, target);
3472
3473 if (target_node && target_node->details->online) {
3474 native_add_running(history->rsc, target_node, scheduler, FALSE);
3475 }
3476
3477 } else if (!non_monitor_after(history->rsc->id, source, history->xml,
3478 scheduler)) {
3479 /* We know the resource has newer state on the target, but this
3480 * migrate_to still matters for the source as long as there's no newer
3481 * non-monitor operation there.
3482 */
3483
3484 // Mark node as having dangling migration so we can force a stop later
3485 history->rsc->priv->dangling_migration_sources =
3486 g_list_prepend(history->rsc->priv->dangling_migration_sources,
3487 (gpointer) history->node);
3488 }
3489 }
3490
3491 /*!
3492 * \internal
3493 * \brief Update resource role etc. after a failed migrate_from action
3494 *
3495 * \param[in,out] history Parsed action result history
3496 */
3497 static void
3498 unpack_migrate_from_failure(struct action_history *history)
3499 {
3500 xmlNode *source_migrate_to = NULL;
3501 const char *source = NULL;
3502 const char *target = NULL;
3503 pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3504
3505 // Get source and target node names from XML
3506 if (get_migration_node_names(history->xml, NULL, history->node, &source,
3507 &target) != pcmk_rc_ok) {
3508 return;
3509 }
3510
3511 /* If a migration failed, we have to assume the resource is active. Clones
3512 * are not allowed to migrate, so role can't be promoted.
3513 */
3514 history->rsc->priv->orig_role = pcmk_role_started;
3515
3516 // Check for a migrate_to on the source
3517 source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3518 source, target, PCMK_OCF_OK, scheduler);
3519
3520 if (/* If the resource state is unknown on the source, it will likely be
3521 * probed there.
3522 * Don't just consider it running there. We will get back here anyway in
3523 * case the probe detects it's running there.
3524 */
3525 !unknown_on_node(history->rsc, source)
3526 /* If the resource has newer state on the source after the migration
3527 * events, this migrate_from no longer matters for the source.
3528 */
3529 && !newer_state_after_migrate(history->rsc->id, source,
3530 source_migrate_to, history->xml,
3531 scheduler)) {
3532 /* The resource has no newer state on the source, so assume it's still
3533 * active there (if it is up).
3534 */
3535 pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3536
3537 if (source_node && source_node->details->online) {
3538 native_add_running(history->rsc, source_node, scheduler, TRUE);
3539 }
3540 }
3541 }
3542
3543 /*!
3544 * \internal
3545 * \brief Add an action to cluster's list of failed actions
3546 *
3547 * \param[in,out] history Parsed action result history
3548 */
3549 static void
3550 record_failed_op(struct action_history *history)
3551 {
3552 const pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3553
3554 if (!(history->node->details->online)) {
3555 return;
3556 }
3557
3558 for (const xmlNode *xIter = scheduler->priv->failed->children;
3559 xIter != NULL; xIter = xIter->next) {
3560
3561 const char *key = pcmk__xe_history_key(xIter);
3562 const char *uname = pcmk__xe_get(xIter, PCMK_XA_UNAME);
3563
3564 if (pcmk__str_eq(history->key, key, pcmk__str_none)
3565 && pcmk__str_eq(uname, history->node->priv->name,
3566 pcmk__str_casei)) {
3567 pcmk__trace("Skipping duplicate entry %s on %s", history->key,
3568 pcmk__node_name(history->node));
3569 return;
3570 }
3571 }
3572
3573 pcmk__trace("Adding entry for %s on %s to failed action list",
3574 history->key, pcmk__node_name(history->node));
3575 pcmk__xe_set(history->xml, PCMK_XA_UNAME, history->node->priv->name);
3576 pcmk__xe_set(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3577 pcmk__xml_copy(scheduler->priv->failed, history->xml);
3578 }
3579
3580 static char *
3581 last_change_str(const xmlNode *xml_op)
3582 {
3583 time_t when;
3584 char *result = NULL;
3585
3586 if (pcmk__xe_get_time(xml_op, PCMK_XA_LAST_RC_CHANGE,
3587 &when) == pcmk_rc_ok) {
3588 char *when_s = pcmk__epoch2str(&when, 0);
3589 const char *p = strchr(when_s, ' ');
3590
3591 // Skip day of week to make message shorter
3592 if ((p != NULL) && (*(++p) != '\0')) {
3593 result = pcmk__str_copy(p);
3594 }
3595 free(when_s);
3596 }
3597
3598 if (result == NULL) {
3599 result = pcmk__str_copy("unknown_time");
3600 }
3601
3602 return result;
3603 }
3604
3605 /*!
3606 * \internal
3607 * \brief Ban a resource (or its clone if an anonymous instance) from all nodes
3608 *
3609 * \param[in,out] rsc Resource to ban
3610 */
3611 static void
3612 ban_from_all_nodes(pcmk_resource_t *rsc)
3613 {
3614 int score = -PCMK_SCORE_INFINITY;
3615 const pcmk_scheduler_t *scheduler = rsc->priv->scheduler;
3616
3617 if (rsc->priv->parent != NULL) {
3618 pcmk_resource_t *parent = uber_parent(rsc);
3619
3620 if (pcmk__is_anonymous_clone(parent)) {
3621 /* For anonymous clones, if an operation with
3622 * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3623 * entire clone must stop.
3624 */
3625 rsc = parent;
3626 }
3627 }
3628
3629 // Ban the resource from all nodes
3630 pcmk__notice("%s will not be started under current conditions", rsc->id);
3631 g_clear_pointer(&rsc->priv->allowed_nodes, g_hash_table_destroy);
3632 rsc->priv->allowed_nodes = pe__node_list2table(scheduler->nodes);
3633 g_hash_table_foreach(rsc->priv->allowed_nodes, set_node_score, &score);
3634 }
3635
3636 /*!
3637 * \internal
3638 * \brief Get configured failure handling and role after failure for an action
3639 *
3640 * \param[in,out] history Unpacked action history entry
3641 * \param[out] on_fail Where to set configured failure handling
3642 * \param[out] fail_role Where to set to role after failure
3643 */
3644 static void
3645 unpack_failure_handling(struct action_history *history,
3646 enum pcmk__on_fail *on_fail,
3647 enum rsc_role_e *fail_role)
3648 {
3649 xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3650 history->interval_ms, true);
3651
3652 GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3653 history->task,
3654 history->interval_ms, config);
3655
3656 const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3657
3658 *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3659 history->interval_ms, on_fail_str);
3660 *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3661 meta);
3662 g_hash_table_destroy(meta);
3663 }
3664
3665 /*!
3666 * \internal
3667 * \brief Update resource role, failure handling, etc., after a failed action
3668 *
3669 * \param[in,out] history Parsed action result history
3670 * \param[in] config_on_fail Action failure handling from configuration
3671 * \param[in] fail_role Resource's role after failure of this action
3672 * \param[out] last_failure This will be set to the history XML
3673 * \param[in,out] on_fail Actual handling of action result
3674 */
3675 static void
3676 unpack_rsc_op_failure(struct action_history *history,
3677 enum pcmk__on_fail config_on_fail,
3678 enum rsc_role_e fail_role, xmlNode **last_failure,
3679 enum pcmk__on_fail *on_fail)
3680 {
3681 bool is_probe = false;
3682 char *last_change_s = NULL;
3683 pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3684
3685 *last_failure = history->xml;
3686
3687 is_probe = pcmk_xe_is_probe(history->xml);
3688 last_change_s = last_change_str(history->xml);
3689
3690 if (!pcmk__is_set(scheduler->flags, pcmk__sched_symmetric_cluster)
3691 && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3692 pcmk__trace("Unexpected result (%s%s%s) was recorded for "
3693 "%s of %s on %s at %s " QB_XS " exit-status=%d id=%s",
3694 crm_exit_str(history->exit_status),
3695 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3696 pcmk__s(history->exit_reason, ""),
3697 (is_probe? "probe" : history->task), history->rsc->id,
3698 pcmk__node_name(history->node), last_change_s,
3699 history->exit_status, history->id);
3700 } else {
3701 pcmk__sched_warn(scheduler,
3702 "Unexpected result (%s%s%s) was recorded for %s of "
3703 "%s on %s at %s " QB_XS " exit-status=%d id=%s",
3704 crm_exit_str(history->exit_status),
3705 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3706 pcmk__s(history->exit_reason, ""),
3707 (is_probe? "probe" : history->task), history->rsc->id,
3708 pcmk__node_name(history->node), last_change_s,
3709 history->exit_status, history->id);
3710
3711 if (is_probe && (history->exit_status != PCMK_OCF_OK)
3712 && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3713 && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3714
3715 /* A failed (not just unexpected) probe result could mean the user
3716 * didn't know resources will be probed even where they can't run.
3717 */
3718 pcmk__notice("If it is not possible for %s to run on %s, see the "
3719 PCMK_XA_RESOURCE_DISCOVERY " option for location "
3720 "constraints",
3721 history->rsc->id, pcmk__node_name(history->node));
3722 }
3723
3724 record_failed_op(history);
3725 }
3726
3727 free(last_change_s);
3728
3729 if (*on_fail < config_on_fail) {
3730 pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3731 pcmk__on_fail_text(*on_fail),
3732 pcmk__on_fail_text(config_on_fail), history->key);
3733 *on_fail = config_on_fail;
3734 }
3735
3736 if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3737 resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3738 "__stop_fail__", scheduler);
3739
3740 } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3741 unpack_migrate_to_failure(history);
3742
3743 } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3744 unpack_migrate_from_failure(history);
3745
3746 } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3747 history->rsc->priv->orig_role = pcmk_role_promoted;
3748
3749 } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3750 if (config_on_fail == pcmk__on_fail_block) {
3751 history->rsc->priv->orig_role = pcmk_role_promoted;
3752 pe__set_next_role(history->rsc, pcmk_role_stopped,
3753 "demote with " PCMK_META_ON_FAIL "=block");
3754
3755 } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3756 history->rsc->priv->orig_role = pcmk_role_stopped;
3757
3758 } else {
3759 /* Staying in the promoted role would put the scheduler and
3760 * controller into a loop. Setting the role to unpromoted is not
3761 * dangerous because the resource will be stopped as part of
3762 * recovery, and any promotion will be ordered after that stop.
3763 */
3764 history->rsc->priv->orig_role = pcmk_role_unpromoted;
3765 }
3766 }
3767
3768 if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3769 /* leave stopped */
3770 pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3771 history->rsc->priv->orig_role = pcmk_role_stopped;
3772
3773 } else if (history->rsc->priv->orig_role < pcmk_role_started) {
3774 pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3775 set_active(history->rsc);
3776 }
3777
3778 pcmk__rsc_trace(history->rsc,
3779 "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3780 history->rsc->id,
3781 pcmk_role_text(history->rsc->priv->orig_role),
3782 pcmk__btoa(history->node->details->unclean),
3783 pcmk__on_fail_text(config_on_fail),
3784 pcmk_role_text(fail_role));
3785
3786 if ((fail_role != pcmk_role_started)
3787 && (history->rsc->priv->next_role < fail_role)) {
3788 pe__set_next_role(history->rsc, fail_role, "failure");
3789 }
3790
3791 if (fail_role == pcmk_role_stopped) {
3792 ban_from_all_nodes(history->rsc);
3793 }
3794 }
3795
3796 /*!
3797 * \internal
3798 * \brief Block a resource with a failed action if it cannot be recovered
3799 *
3800 * If resource action is a failed stop and fencing is not possible, mark the
3801 * resource as unmanaged and blocked, since recovery cannot be done.
3802 *
3803 * \param[in,out] history Parsed action history entry
3804 */
3805 static void
3806 block_if_unrecoverable(struct action_history *history)
3807 {
3808 char *last_change_s = NULL;
3809
3810 if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3811 return; // All actions besides stop are always recoverable
3812 }
3813 if (pe_can_fence(history->node->priv->scheduler, history->node)) {
3814 return; // Failed stops are recoverable via fencing
3815 }
3816
3817 last_change_s = last_change_str(history->xml);
3818 pcmk__sched_err(history->node->priv->scheduler,
3819 "No further recovery can be attempted for %s "
3820 "because %s on %s failed (%s%s%s) at %s "
3821 QB_XS " rc=%d id=%s",
3822 history->rsc->id, history->task,
3823 pcmk__node_name(history->node),
3824 crm_exit_str(history->exit_status),
3825 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3826 pcmk__s(history->exit_reason, ""),
3827 last_change_s, history->exit_status, history->id);
3828
3829 free(last_change_s);
3830
3831 pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_managed);
3832 pcmk__set_rsc_flags(history->rsc, pcmk__rsc_blocked);
3833 }
3834
3835 /*!
3836 * \internal
3837 * \brief Update action history's execution status and why
3838 *
3839 * \param[in,out] history Parsed action history entry
3840 * \param[out] why Where to store reason for update
3841 * \param[in] value New value
3842 * \param[in] reason Description of why value was changed
3843 */
3844 static inline void
3845 remap_because(struct action_history *history, const char **why, int value,
3846 const char *reason)
3847 {
3848 if (history->execution_status != value) {
3849 history->execution_status = value;
3850 *why = reason;
3851 }
3852 }
3853
3854 /*!
3855 * \internal
3856 * \brief Remap informational monitor results and operation status
3857 *
3858 * For the monitor results, certain OCF codes are for providing extended information
3859 * to the user about services that aren't yet failed but not entirely healthy either.
3860 * These must be treated as the "normal" result by Pacemaker.
3861 *
3862 * For operation status, the action result can be used to determine an appropriate
3863 * status for the purposes of responding to the action. The status provided by the
3864 * executor is not directly usable since the executor does not know what was expected.
3865 *
3866 * \param[in,out] history Parsed action history entry
3867 * \param[in,out] on_fail What should be done about the result
3868 * \param[in] expired Whether result is expired
3869 *
3870 * \note If the result is remapped and the node is not shutting down or failed,
3871 * the operation will be recorded in the scheduler data's list of failed
3872 * operations to highlight it for the user.
3873 *
3874 * \note This may update the resource's current and next role.
3875 */
3876 static void
3877 remap_operation(struct action_history *history,
3878 enum pcmk__on_fail *on_fail, bool expired)
3879 {
3880 /* @TODO It would probably also be a good idea to map an exit status of
3881 * CRM_EX_PROMOTED or CRM_EX_DEGRADED_PROMOTED to CRM_EX_OK for promote
3882 * actions
3883 */
3884
3885 bool is_probe = false;
3886 int orig_exit_status = history->exit_status;
3887 int orig_exec_status = history->execution_status;
3888 const char *why = NULL;
3889 const char *task = history->task;
3890
3891 // Remap degraded results to their successful counterparts
3892 history->exit_status = pcmk__effective_rc(history->exit_status);
3893 if (history->exit_status != orig_exit_status) {
3894 why = "degraded result";
3895 if (!expired && (!history->node->details->shutdown
3896 || history->node->details->online)) {
3897 record_failed_op(history);
3898 }
3899 }
3900
3901 if (!pcmk__is_bundled(history->rsc)
3902 && pcmk_xe_mask_probe_failure(history->xml)
3903 && ((history->execution_status != PCMK_EXEC_DONE)
3904 || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3905 history->execution_status = PCMK_EXEC_DONE;
3906 history->exit_status = PCMK_OCF_NOT_RUNNING;
3907 why = "equivalent probe result";
3908 }
3909
3910 /* If the executor reported an execution status of anything but done or
3911 * error, consider that final. But for done or error, we know better whether
3912 * it should be treated as a failure or not, because we know the expected
3913 * result.
3914 */
3915 switch (history->execution_status) {
3916 case PCMK_EXEC_DONE:
3917 case PCMK_EXEC_ERROR:
3918 break;
3919
3920 // These should be treated as node-fatal
3921 case PCMK_EXEC_NO_FENCE_DEVICE:
3922 case PCMK_EXEC_NO_SECRETS:
3923 remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3924 "node-fatal error");
3925 goto remap_done;
3926
3927 default:
3928 goto remap_done;
3929 }
3930
3931 is_probe = pcmk_xe_is_probe(history->xml);
3932 if (is_probe) {
3933 task = "probe";
3934 }
3935
3936 if (history->expected_exit_status < 0) {
3937 /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3938 * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3939 * expected exit status in the transition key, which (along with the
3940 * similar case of a corrupted transition key in the CIB) will be
3941 * reported to this function as -1. Pacemaker 2.0+ does not support
3942 * rolling upgrades from those versions or processing of saved CIB files
3943 * from those versions, so we do not need to care much about this case.
3944 */
3945 remap_because(history, &why, PCMK_EXEC_ERROR,
3946 "obsolete history format");
3947 pcmk__config_warn("Expected result not found for %s on %s "
3948 "(corrupt or obsolete CIB?)",
3949 history->key, pcmk__node_name(history->node));
3950
3951 } else if (history->exit_status == history->expected_exit_status) {
3952 remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
3953
3954 } else {
3955 remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
3956 pcmk__rsc_debug(history->rsc,
3957 "%s on %s: expected %d (%s), got %d (%s%s%s)",
3958 history->key, pcmk__node_name(history->node),
3959 history->expected_exit_status,
3960 crm_exit_str(history->expected_exit_status),
3961 history->exit_status,
3962 crm_exit_str(history->exit_status),
3963 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3964 pcmk__s(history->exit_reason, ""));
3965 }
3966
3967 switch (history->exit_status) {
3968 case PCMK_OCF_OK:
3969 if (is_probe
3970 && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
3971 char *last_change_s = last_change_str(history->xml);
3972
3973 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3974 pcmk__rsc_info(history->rsc,
3975 "Probe found %s active on %s at %s",
3976 history->rsc->id, pcmk__node_name(history->node),
3977 last_change_s);
3978 free(last_change_s);
3979 }
3980 break;
3981
3982 case PCMK_OCF_NOT_RUNNING:
3983 if (is_probe
3984 || (history->expected_exit_status == history->exit_status)
3985 || !pcmk__is_set(history->rsc->flags, pcmk__rsc_managed)) {
3986
3987 /* For probes, recurring monitors for the Stopped role, and
3988 * unmanaged resources, "not running" is not considered a
3989 * failure.
3990 */
3991 remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
3992 history->rsc->priv->orig_role = pcmk_role_stopped;
3993 *on_fail = pcmk__on_fail_ignore;
3994 pe__set_next_role(history->rsc, pcmk_role_unknown,
3995 "not running");
3996 }
3997 break;
3998
3999 case PCMK_OCF_RUNNING_PROMOTED:
4000 if (is_probe
4001 && (history->exit_status != history->expected_exit_status)) {
4002 char *last_change_s = last_change_str(history->xml);
4003
4004 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4005 pcmk__rsc_info(history->rsc,
4006 "Probe found %s active and promoted on %s at %s",
4007 history->rsc->id,
4008 pcmk__node_name(history->node), last_change_s);
4009 free(last_change_s);
4010 }
4011 if (!expired
4012 || (history->exit_status == history->expected_exit_status)) {
4013 history->rsc->priv->orig_role = pcmk_role_promoted;
4014 }
4015 break;
4016
4017 case PCMK_OCF_FAILED_PROMOTED:
4018 if (!expired) {
4019 history->rsc->priv->orig_role = pcmk_role_promoted;
4020 }
4021 remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
4022 break;
4023
4024 case PCMK_OCF_NOT_CONFIGURED:
4025 remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
4026 break;
4027
4028 case PCMK_OCF_UNIMPLEMENT_FEATURE:
4029 {
4030 guint interval_ms = 0;
4031 pcmk__xe_get_guint(history->xml, PCMK_META_INTERVAL,
4032 &interval_ms);
4033
4034 if (interval_ms == 0) {
4035 if (!expired) {
4036 block_if_unrecoverable(history);
4037 }
4038 remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
4039 "exit status");
4040 } else {
4041 remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
4042 "exit status");
4043 }
4044 }
4045 break;
4046
4047 case PCMK_OCF_NOT_INSTALLED:
4048 case PCMK_OCF_INVALID_PARAM:
4049 case PCMK_OCF_INSUFFICIENT_PRIV:
4050 if (!expired) {
4051 block_if_unrecoverable(history);
4052 }
4053 remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
4054 break;
4055
4056 default:
4057 if (history->execution_status == PCMK_EXEC_DONE) {
4058 char *last_change_s = last_change_str(history->xml);
4059
4060 pcmk__info("Treating unknown exit status %d from %s of %s on "
4061 "%s at %s as failure",
4062 history->exit_status, task, history->rsc->id,
4063 pcmk__node_name(history->node), last_change_s);
4064 remap_because(history, &why, PCMK_EXEC_ERROR,
4065 "unknown exit status");
4066 free(last_change_s);
4067 }
4068 break;
4069 }
4070
4071 remap_done:
4072 if (why != NULL) {
4073 pcmk__rsc_trace(history->rsc,
4074 "Remapped %s result from [%s: %s] to [%s: %s] "
4075 "because of %s",
4076 history->key, pcmk_exec_status_str(orig_exec_status),
4077 crm_exit_str(orig_exit_status),
4078 pcmk_exec_status_str(history->execution_status),
4079 crm_exit_str(history->exit_status), why);
4080 }
4081 }
4082
4083 // return TRUE if start or monitor last failure but parameters changed
4084 static bool
4085 should_clear_for_param_change(const xmlNode *xml_op, const char *task,
4086 pcmk_resource_t *rsc, pcmk_node_t *node)
4087 {
4088 if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
4089 if (pe__bundle_needs_remote_name(rsc)) {
4090 /* We haven't allocated resources yet, so we can't reliably
4091 * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4092 * When that's needed, defer the check until later.
4093 */
4094 pcmk__add_param_check(xml_op, rsc, node, pcmk__check_last_failure);
4095
4096 } else {
4097 pcmk__op_digest_t *digest_data = NULL;
4098
4099 digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4100 rsc->priv->scheduler);
4101 switch (digest_data->rc) {
4102 case pcmk__digest_unknown:
4103 pcmk__trace("Resource %s history entry %s on %s"
4104 " has no digest to compare",
4105 rsc->id, pcmk__xe_history_key(xml_op),
4106 node->priv->id);
4107 break;
4108 case pcmk__digest_match:
4109 break;
4110 default:
4111 return TRUE;
4112 }
4113 }
4114 }
4115 return FALSE;
4116 }
4117
4118 // Order action after fencing of remote node, given connection rsc
4119 static void
4120 order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
4121 pcmk_scheduler_t *scheduler)
4122 {
4123 pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4124
4125 if (remote_node) {
4126 pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4127 FALSE, scheduler);
4128
4129 order_actions(fence, action, pcmk__ar_first_implies_then);
4130 }
4131 }
4132
4133 static bool
4134 should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
4135 guint interval_ms, bool is_last_failure)
4136 {
4137 /* Clearing failures of recurring monitors has special concerns. The
4138 * executor reports only changes in the monitor result, so if the
4139 * monitor is still active and still getting the same failure result,
4140 * that will go undetected after the failure is cleared.
4141 *
4142 * Also, the operation history will have the time when the recurring
4143 * monitor result changed to the given code, not the time when the
4144 * result last happened.
4145 *
4146 * @TODO We probably should clear such failures only when the failure
4147 * timeout has passed since the last occurrence of the failed result.
4148 * However we don't record that information. We could maybe approximate
4149 * that by clearing only if there is a more recent successful monitor or
4150 * stop result, but we don't even have that information at this point
4151 * since we are still unpacking the resource's operation history.
4152 *
4153 * This is especially important for remote connection resources with a
4154 * reconnect interval, so in that case, we skip clearing failures
4155 * if the remote node hasn't been fenced.
4156 */
4157 if ((rsc->priv->remote_reconnect_ms > 0U)
4158 && pcmk__is_set(rsc->priv->scheduler->flags,
4159 pcmk__sched_fencing_enabled)
4160 && (interval_ms != 0)
4161 && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4162
4163 pcmk_node_t *remote_node = pcmk_find_node(rsc->priv->scheduler,
4164 rsc->id);
4165
4166 if (remote_node && !pcmk__is_set(remote_node->priv->flags,
4167 pcmk__node_remote_fenced)) {
4168 if (is_last_failure) {
4169 pcmk__info("Waiting to clear monitor failure for remote node %s"
4170 " until fencing has occurred",
4171 rsc->id);
4172 }
4173 return TRUE;
4174 }
4175 }
4176 return FALSE;
4177 }
4178
4179 /*!
4180 * \internal
4181 * \brief Check operation age and schedule failure clearing when appropriate
4182 *
4183 * This function has two distinct purposes. The first is to check whether an
4184 * operation history entry is expired (i.e. the resource has a failure timeout,
4185 * the entry is older than the timeout, and the resource either has no fail
4186 * count or its fail count is entirely older than the timeout). The second is to
4187 * schedule fail count clearing when appropriate (i.e. the operation is expired
4188 * and either the resource has an expired fail count or the operation is a
4189 * last_failure for a remote connection resource with a reconnect interval,
4190 * or the operation is a last_failure for a start or monitor operation and the
4191 * resource's parameters have changed since the operation).
4192 *
4193 * \param[in,out] history Parsed action result history
4194 *
4195 * \return true if operation history entry is expired, otherwise false
4196 */
4197 static bool
4198 check_operation_expiry(struct action_history *history)
4199 {
4200 bool expired = false;
4201 bool is_last_failure = (history->id != NULL)
4202 && g_str_has_suffix(history->id, "_last_failure_0");
4203 time_t last_run = 0;
4204 int unexpired_fail_count = 0;
4205 const char *clear_reason = NULL;
4206 const guint expiration_sec =
4207 pcmk__timeout_ms2s(history->rsc->priv->failure_expiration_ms);
4208 pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
4209
4210 if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4211 pcmk__rsc_trace(history->rsc,
4212 "Resource history entry %s on %s is not expired: "
4213 "Not Installed does not expire",
4214 history->id, pcmk__node_name(history->node));
4215 return false; // "Not installed" must always be cleared manually
4216 }
4217
4218 if ((expiration_sec > 0)
4219 && (pcmk__xe_get_time(history->xml, PCMK_XA_LAST_RC_CHANGE,
4220 &last_run) == pcmk_rc_ok)) {
4221
4222 /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4223 * timestamp
4224 */
4225
4226 time_t now = pcmk__scheduler_epoch_time(scheduler);
4227 time_t last_failure = 0;
4228
4229 // Is this particular operation history older than the failure timeout?
4230 if ((now >= (last_run + expiration_sec))
4231 && !should_ignore_failure_timeout(history->rsc, history->task,
4232 history->interval_ms,
4233 is_last_failure)) {
4234 expired = true;
4235 }
4236
4237 // Does the resource as a whole have an unexpired fail count?
4238 unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4239 &last_failure,
4240 pcmk__fc_effective,
4241 history->xml);
4242
4243 // Update scheduler recheck time according to *last* failure
4244 pcmk__trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d "
4245 "expiration=%s last-failure@%lld",
4246 history->id, (long long) last_run, (expired? "" : "not "),
4247 (long long) now, unexpired_fail_count,
4248 pcmk__readable_interval(expiration_sec * 1000),
4249 (long long) last_failure);
4250 last_failure += expiration_sec + 1;
4251 if (unexpired_fail_count && (now < last_failure)) {
4252 pcmk__update_recheck_time(last_failure, scheduler,
4253 "fail count expiration");
4254 }
4255 }
4256
4257 if (expired) {
4258 if (pe_get_failcount(history->node, history->rsc, NULL,
4259 pcmk__fc_default, history->xml)) {
4260 // There is a fail count ignoring timeout
4261
4262 if (unexpired_fail_count == 0) {
4263 // There is no fail count considering timeout
4264 clear_reason = "it expired";
4265
4266 } else {
4267 /* This operation is old, but there is an unexpired fail count.
4268 * In a properly functioning cluster, this should only be
4269 * possible if this operation is not a failure (otherwise the
4270 * fail count should be expired too), so this is really just a
4271 * failsafe.
4272 */
4273 pcmk__rsc_trace(history->rsc,
4274 "Resource history entry %s on %s is not "
4275 "expired: Unexpired fail count",
4276 history->id, pcmk__node_name(history->node));
4277 expired = false;
4278 }
4279
4280 } else if (is_last_failure
4281 && (history->rsc->priv->remote_reconnect_ms > 0U)) {
4282 /* Clear any expired last failure when reconnect interval is set,
4283 * even if there is no fail count.
4284 */
4285 clear_reason = "reconnect interval is set";
4286 }
4287 }
4288
4289 if (!expired && is_last_failure
4290 && should_clear_for_param_change(history->xml, history->task,
4291 history->rsc, history->node)) {
4292 clear_reason = "resource parameters have changed";
4293 }
4294
4295 if (clear_reason != NULL) {
4296 pcmk_action_t *clear_op = NULL;
4297
4298 // Schedule clearing of the fail count
4299 clear_op = pe__clear_failcount(history->rsc, history->node,
4300 clear_reason, scheduler);
4301
4302 if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)
4303 && (history->rsc->priv->remote_reconnect_ms > 0)) {
4304 /* If we're clearing a remote connection due to a reconnect
4305 * interval, we want to wait until any scheduled fencing
4306 * completes.
4307 *
4308 * We could limit this to remote_node->details->unclean, but at
4309 * this point, that's always true (it won't be reliable until
4310 * after unpack_node_history() is done).
4311 */
4312 pcmk__info("Clearing %s failure will wait until any scheduled "
4313 "fencing of %s completes",
4314 history->task, history->rsc->id);
4315 order_after_remote_fencing(clear_op, history->rsc, scheduler);
4316 }
4317 }
4318
4319 if (expired && (history->interval_ms == 0)
4320 && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4321 switch (history->exit_status) {
4322 case PCMK_OCF_OK:
4323 case PCMK_OCF_NOT_RUNNING:
4324 case PCMK_OCF_RUNNING_PROMOTED:
4325 case PCMK_OCF_DEGRADED:
4326 case PCMK_OCF_DEGRADED_PROMOTED:
4327 // Don't expire probes that return these values
4328 pcmk__rsc_trace(history->rsc,
4329 "Resource history entry %s on %s is not "
4330 "expired: Probe result",
4331 history->id, pcmk__node_name(history->node));
4332 expired = false;
4333 break;
4334 }
4335 }
4336
4337 return expired;
4338 }
4339
4340 int
4341 pe__target_rc_from_xml(const xmlNode *xml_op)
4342 {
4343 int target_rc = 0;
4344 const char *key = pcmk__xe_get(xml_op, PCMK__XA_TRANSITION_KEY);
4345
4346 if (key == NULL) {
4347 return -1;
4348 }
4349 decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4350 return target_rc;
4351 }
4352
4353 /*!
4354 * \internal
4355 * \brief Update a resource's state for an action result
4356 *
4357 * \param[in,out] history Parsed action history entry
4358 * \param[in] exit_status Exit status to base new state on
4359 * \param[in] last_failure Resource's last_failure entry, if known
4360 * \param[in,out] on_fail Resource's current failure handling
4361 */
4362 static void
4363 update_resource_state(struct action_history *history, int exit_status,
4364 const xmlNode *last_failure,
4365 enum pcmk__on_fail *on_fail)
4366 {
4367 bool clear_past_failure = false;
4368
4369 if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4370 || (!pcmk__is_bundled(history->rsc)
4371 && pcmk_xe_mask_probe_failure(history->xml))) {
4372 history->rsc->priv->orig_role = pcmk_role_stopped;
4373
4374 } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4375 clear_past_failure = true;
4376
4377 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4378 pcmk__str_none)) {
4379 if ((last_failure != NULL)
4380 && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4381 pcmk__str_none)) {
4382 clear_past_failure = true;
4383 }
4384 if (history->rsc->priv->orig_role < pcmk_role_started) {
4385 set_active(history->rsc);
4386 }
4387
4388 } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4389 history->rsc->priv->orig_role = pcmk_role_started;
4390 clear_past_failure = true;
4391
4392 } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4393 history->rsc->priv->orig_role = pcmk_role_stopped;
4394 clear_past_failure = true;
4395
4396 } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4397 pcmk__str_none)) {
4398 history->rsc->priv->orig_role = pcmk_role_promoted;
4399 clear_past_failure = true;
4400
4401 } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4402 pcmk__str_none)) {
4403 if (*on_fail == pcmk__on_fail_demote) {
4404 /* Demote clears an error only if
4405 * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4406 */
4407 clear_past_failure = true;
4408 }
4409 history->rsc->priv->orig_role = pcmk_role_unpromoted;
4410
4411 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4412 pcmk__str_none)) {
4413 history->rsc->priv->orig_role = pcmk_role_started;
4414 clear_past_failure = true;
4415
4416 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4417 pcmk__str_none)) {
4418 unpack_migrate_to_success(history);
4419
4420 } else if (history->rsc->priv->orig_role < pcmk_role_started) {
4421 pcmk__rsc_trace(history->rsc, "%s active on %s",
4422 history->rsc->id, pcmk__node_name(history->node));
4423 set_active(history->rsc);
4424 }
4425
4426 if (!clear_past_failure) {
4427 return;
4428 }
4429
4430 switch (*on_fail) {
4431 case pcmk__on_fail_stop:
4432 case pcmk__on_fail_ban:
4433 case pcmk__on_fail_standby_node:
4434 case pcmk__on_fail_fence_node:
4435 pcmk__rsc_trace(history->rsc,
4436 "%s (%s) is not cleared by a completed %s",
4437 history->rsc->id, pcmk__on_fail_text(*on_fail),
4438 history->task);
4439 break;
4440
4441 case pcmk__on_fail_block:
4442 case pcmk__on_fail_ignore:
4443 case pcmk__on_fail_demote:
4444 case pcmk__on_fail_restart:
4445 case pcmk__on_fail_restart_container:
4446 *on_fail = pcmk__on_fail_ignore;
4447 pe__set_next_role(history->rsc, pcmk_role_unknown,
4448 "clear past failures");
4449 break;
4450
4451 case pcmk__on_fail_reset_remote:
4452 if (history->rsc->priv->remote_reconnect_ms == 0U) {
4453 /* With no reconnect interval, the connection is allowed to
4454 * start again after the remote node is fenced and
4455 * completely stopped. (With a reconnect interval, we wait
4456 * for the failure to be cleared entirely before attempting
4457 * to reconnect.)
4458 */
4459 *on_fail = pcmk__on_fail_ignore;
4460 pe__set_next_role(history->rsc, pcmk_role_unknown,
4461 "clear past failures and reset remote");
4462 }
4463 break;
4464 }
4465 }
4466
4467 /*!
4468 * \internal
4469 * \brief Check whether a given history entry matters for resource state
4470 *
4471 * \param[in] history Parsed action history entry
4472 *
4473 * \return true if action can affect resource state, otherwise false
4474 */
4475 static inline bool
4476 can_affect_state(struct action_history *history)
4477 {
4478 return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4479 PCMK_ACTION_START, PCMK_ACTION_STOP,
4480 PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
4481 PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
4482 "asyncmon", NULL);
4483 }
4484
4485 /*!
4486 * \internal
4487 * \brief Unpack execution/exit status and exit reason from a history entry
4488 *
4489 * \param[in,out] history Action history entry to unpack
4490 *
4491 * \return Standard Pacemaker return code
4492 */
4493 static int
4494 unpack_action_result(struct action_history *history)
4495 {
4496 if ((pcmk__xe_get_int(history->xml, PCMK__XA_OP_STATUS,
4497 &(history->execution_status)) != pcmk_rc_ok)
4498 || (history->execution_status < PCMK_EXEC_PENDING)
4499 || (history->execution_status > PCMK_EXEC_MAX)
4500 || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4501 pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4502 "with invalid " PCMK__XA_OP_STATUS " '%s'",
4503 history->id, history->rsc->id,
4504 pcmk__node_name(history->node),
4505 pcmk__s(pcmk__xe_get(history->xml, PCMK__XA_OP_STATUS),
4506 ""));
4507 return pcmk_rc_unpack_error;
4508 }
4509 if ((pcmk__xe_get_int(history->xml, PCMK__XA_RC_CODE,
4510 &(history->exit_status)) != pcmk_rc_ok)
4511 || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4512 pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4513 "with invalid " PCMK__XA_RC_CODE " '%s'",
4514 history->id, history->rsc->id,
4515 pcmk__node_name(history->node),
4516 pcmk__s(pcmk__xe_get(history->xml, PCMK__XA_RC_CODE),
4517 ""));
4518 return pcmk_rc_unpack_error;
4519 }
4520 history->exit_reason = pcmk__xe_get(history->xml, PCMK_XA_EXIT_REASON);
4521 return pcmk_rc_ok;
4522 }
4523
4524 /*!
4525 * \internal
4526 * \brief Process an action history entry whose result expired
4527 *
4528 * \param[in,out] history Parsed action history entry
4529 * \param[in] orig_exit_status Action exit status before remapping
4530 *
4531 * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the
4532 * entry needs no further processing)
4533 */
4534 static int
4535 process_expired_result(struct action_history *history, int orig_exit_status)
4536 {
4537 if (!pcmk__is_bundled(history->rsc)
4538 && pcmk_xe_mask_probe_failure(history->xml)
4539 && (orig_exit_status != history->expected_exit_status)) {
4540
4541 if (history->rsc->priv->orig_role <= pcmk_role_stopped) {
4542 history->rsc->priv->orig_role = pcmk_role_unknown;
4543 }
4544 pcmk__trace("Ignoring resource history entry %s for probe of %s on %s: "
4545 "Masked failure expired",
4546 history->id, history->rsc->id,
4547 pcmk__node_name(history->node));
4548 return pcmk_rc_ok;
4549 }
4550
4551 if (history->exit_status == history->expected_exit_status) {
4552 return pcmk_rc_undetermined; // Only failures expire
4553 }
4554
4555 if (history->interval_ms == 0) {
4556 pcmk__notice("Ignoring resource history entry %s for %s of %s on %s: "
4557 "Expired failure",
4558 history->id, history->task, history->rsc->id,
4559 pcmk__node_name(history->node));
4560 return pcmk_rc_ok;
4561 }
4562
4563 if (history->node->details->online && !history->node->details->unclean) {
4564 /* Reschedule the recurring action. schedule_cancel() won't work at
4565 * this stage, so as a hacky workaround, forcibly change the restart
4566 * digest so pcmk__check_action_config() does what we want later.
4567 *
4568 * @TODO We should skip this if there is a newer successful monitor.
4569 * Also, this causes rescheduling only if the history entry
4570 * has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4571 * scheduler regression test doesn't, but that may not be a
4572 * realistic scenario in production).
4573 */
4574 pcmk__notice("Rescheduling %s-interval %s of %s on %s after failure "
4575 "expired",
4576 pcmk__readable_interval(history->interval_ms),
4577 history->task, history->rsc->id,
4578 pcmk__node_name(history->node));
4579 pcmk__xe_set(history->xml, PCMK__XA_OP_RESTART_DIGEST,
4580 "calculated-failure-timeout");
4581 return pcmk_rc_ok;
4582 }
4583
4584 return pcmk_rc_undetermined;
4585 }
4586
4587 /*!
4588 * \internal
4589 * \brief Process a masked probe failure
4590 *
4591 * \param[in,out] history Parsed action history entry
4592 * \param[in] orig_exit_status Action exit status before remapping
4593 * \param[in] last_failure Resource's last_failure entry, if known
4594 * \param[in,out] on_fail Resource's current failure handling
4595 */
4596 static void
4597 mask_probe_failure(struct action_history *history, int orig_exit_status,
4598 const xmlNode *last_failure,
4599 enum pcmk__on_fail *on_fail)
4600 {
4601 pcmk_resource_t *ban_rsc = history->rsc;
4602
4603 if (!pcmk__is_set(history->rsc->flags, pcmk__rsc_unique)) {
4604 ban_rsc = uber_parent(history->rsc);
4605 }
4606
4607 pcmk__notice("Treating probe result '%s' for %s on %s as 'not running'",
4608 crm_exit_str(orig_exit_status), history->rsc->id,
4609 pcmk__node_name(history->node));
4610 update_resource_state(history, history->expected_exit_status, last_failure,
4611 on_fail);
4612 pcmk__xe_set(history->xml, PCMK_XA_UNAME, history->node->priv->name);
4613
4614 record_failed_op(history);
4615 resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4616 "masked-probe-failure", ban_rsc->priv->scheduler);
4617 }
4618
4619 /*!
4620 * \internal Check whether a given failure is for a given pending action
4621 *
4622 * \param[in] history Parsed history entry for pending action
4623 * \param[in] last_failure Resource's last_failure entry, if known
4624 *
4625 * \return true if \p last_failure is failure of pending action in \p history,
4626 * otherwise false
4627 * \note Both \p history and \p last_failure must come from the same
4628 * \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be
4629 * the same.
4630 */
4631 static bool
4632 failure_is_newer(const struct action_history *history,
4633 const xmlNode *last_failure)
4634 {
4635 guint failure_interval_ms = 0U;
4636 long long failure_change = 0LL;
4637 long long this_change = 0LL;
4638
4639 if (last_failure == NULL) {
4640 return false; // Resource has no last_failure entry
4641 }
4642
4643 if (!pcmk__str_eq(history->task,
4644 pcmk__xe_get(last_failure, PCMK_XA_OPERATION),
4645 pcmk__str_none)) {
4646 return false; // last_failure is for different action
4647 }
4648
4649 if ((pcmk__xe_get_guint(last_failure, PCMK_META_INTERVAL,
4650 &failure_interval_ms) != pcmk_rc_ok)
4651 || (history->interval_ms != failure_interval_ms)) {
4652 return false; // last_failure is for action with different interval
4653 }
4654
4655 if ((pcmk__scan_ll(pcmk__xe_get(history->xml, PCMK_XA_LAST_RC_CHANGE),
4656 &this_change, 0LL) != pcmk_rc_ok)
4657 || (pcmk__scan_ll(pcmk__xe_get(last_failure, PCMK_XA_LAST_RC_CHANGE),
4658 &failure_change, 0LL) != pcmk_rc_ok)
4659 || (failure_change < this_change)) {
4660 return false; // Failure is not known to be newer
4661 }
4662
4663 return true;
4664 }
4665
4666 /*!
4667 * \internal
4668 * \brief Update a resource's role etc. for a pending action
4669 *
4670 * \param[in,out] history Parsed history entry for pending action
4671 * \param[in] last_failure Resource's last_failure entry, if known
4672 */
4673 static void
4674 process_pending_action(struct action_history *history,
4675 const xmlNode *last_failure)
4676 {
4677 /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4678 * and there might be a RSC_monitor_INTERVAL entry with the last successful
4679 * or pending result.
4680 *
4681 * If last_failure contains the failure of the pending recurring monitor
4682 * we're processing here, and is newer, the action is no longer pending.
4683 * (Pending results have call ID -1, which sorts last, so the last failure
4684 * if any should be known.)
4685 */
4686 if (failure_is_newer(history, last_failure)) {
4687 return;
4688 }
4689
4690 if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4691 pcmk__set_rsc_flags(history->rsc, pcmk__rsc_start_pending);
4692 set_active(history->rsc);
4693
4694 } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4695 history->rsc->priv->orig_role = pcmk_role_promoted;
4696
4697 } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4698 && history->node->details->unclean) {
4699 /* A migrate_to action is pending on a unclean source, so force a stop
4700 * on the target.
4701 */
4702 const char *migrate_target = NULL;
4703 pcmk_node_t *target = NULL;
4704
4705 migrate_target = pcmk__xe_get(history->xml, PCMK__META_MIGRATE_TARGET);
4706 target = pcmk_find_node(history->rsc->priv->scheduler,
4707 migrate_target);
4708 if (target != NULL) {
4709 stop_action(history->rsc, target, FALSE);
4710 }
4711 }
4712
4713 if (history->rsc->priv->pending_action != NULL) {
4714 /* There should never be multiple pending actions, but as a failsafe,
4715 * just remember the first one processed for display purposes.
4716 */
4717 return;
4718 }
4719
4720 if (pcmk_is_probe(history->task, history->interval_ms)) {
4721 /* Pending probes are currently never displayed, even if pending
4722 * operations are requested. If we ever want to change that,
4723 * enable the below and the corresponding part of
4724 * native.c:native_pending_action().
4725 */
4726 #if 0
4727 history->rsc->private->pending_action = strdup("probe");
4728 history->rsc->private->pending_node = history->node;
4729 #endif
4730 } else {
4731 history->rsc->priv->pending_action = strdup(history->task);
4732 history->rsc->priv->pending_node = history->node;
4733 }
4734 }
4735
4736 static void
4737 unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
4738 xmlNode **last_failure, enum pcmk__on_fail *on_fail)
4739 {
4740 int old_rc = 0;
4741 bool expired = false;
4742 pcmk_resource_t *parent = rsc;
4743 enum rsc_role_e fail_role = pcmk_role_unknown;
4744 enum pcmk__on_fail failure_strategy = pcmk__on_fail_restart;
4745
4746 struct action_history history = {
4747 .rsc = rsc,
4748 .node = node,
4749 .xml = xml_op,
4750 .execution_status = PCMK_EXEC_UNKNOWN,
4751 };
4752
4753 CRM_CHECK(rsc && node && xml_op, return);
4754
4755 history.id = pcmk__xe_id(xml_op);
4756 if (history.id == NULL) {
4757 pcmk__config_err("Ignoring resource history entry for %s on %s "
4758 "without ID", rsc->id, pcmk__node_name(node));
4759 return;
4760 }
4761
4762 // Task and interval
4763 history.task = pcmk__xe_get(xml_op, PCMK_XA_OPERATION);
4764 if (history.task == NULL) {
4765 pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4766 "without " PCMK_XA_OPERATION,
4767 history.id, rsc->id, pcmk__node_name(node));
4768 return;
4769 }
4770 pcmk__xe_get_guint(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4771 if (!can_affect_state(&history)) {
4772 pcmk__rsc_trace(rsc,
4773 "Ignoring resource history entry %s for %s on %s "
4774 "with irrelevant action '%s'",
4775 history.id, rsc->id, pcmk__node_name(node),
4776 history.task);
4777 return;
4778 }
4779
4780 if (unpack_action_result(&history) != pcmk_rc_ok) {
4781 return; // Error already logged
4782 }
4783
4784 history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4785 history.key = pcmk__xe_history_key(xml_op);
4786 pcmk__xe_get_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4787
4788 pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4789 history.id, history.task, history.call_id,
4790 pcmk__node_name(node),
4791 pcmk_exec_status_str(history.execution_status),
4792 crm_exit_str(history.exit_status));
4793
4794 if (node->details->unclean) {
4795 pcmk__rsc_trace(rsc,
4796 "%s is running on %s, which is unclean (further action "
4797 "depends on value of stop's on-fail attribute)",
4798 rsc->id, pcmk__node_name(node));
4799 }
4800
4801 expired = check_operation_expiry(&history);
4802 old_rc = history.exit_status;
4803
4804 remap_operation(&history, on_fail, expired);
4805
4806 if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4807 goto done;
4808 }
4809
4810 if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4811 mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4812 goto done;
4813 }
4814
4815 if (!pcmk__is_set(rsc->flags, pcmk__rsc_unique)) {
4816 parent = uber_parent(rsc);
4817 }
4818
4819 switch (history.execution_status) {
4820 case PCMK_EXEC_PENDING:
4821 process_pending_action(&history, *last_failure);
4822 goto done;
4823
4824 case PCMK_EXEC_DONE:
4825 update_resource_state(&history, history.exit_status, *last_failure,
4826 on_fail);
4827 goto done;
4828
4829 case PCMK_EXEC_NOT_INSTALLED:
4830 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4831 if (failure_strategy == pcmk__on_fail_ignore) {
4832 pcmk__warn("Cannot ignore failed %s of %s on %s: Resource "
4833 "agent doesn't exist "
4834 QB_XS " status=%d rc=%d id=%s",
4835 history.task, rsc->id, pcmk__node_name(node),
4836 history.execution_status, history.exit_status,
4837 history.id);
4838 /* Also for printing it as "FAILED" by marking it as
4839 * pcmk__rsc_failed later
4840 */
4841 *on_fail = pcmk__on_fail_ban;
4842 }
4843 resource_location(parent, node, -PCMK_SCORE_INFINITY,
4844 "hard-error", rsc->priv->scheduler);
4845 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4846 last_failure, on_fail);
4847 goto done;
4848
4849 case PCMK_EXEC_NOT_CONNECTED:
4850 if (pcmk__is_pacemaker_remote_node(node)
4851 && pcmk__is_set(node->priv->remote->flags,
4852 pcmk__rsc_managed)) {
4853 /* We should never get into a situation where a managed remote
4854 * connection resource is considered OK but a resource action
4855 * behind the connection gets a "not connected" status. But as a
4856 * fail-safe in case a bug or unusual circumstances do lead to
4857 * that, ensure the remote connection is considered failed.
4858 */
4859 pcmk__set_rsc_flags(node->priv->remote,
4860 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
4861 }
4862 break; // Not done, do error handling
4863
4864 case PCMK_EXEC_ERROR:
4865 case PCMK_EXEC_ERROR_HARD:
4866 case PCMK_EXEC_ERROR_FATAL:
4867 case PCMK_EXEC_TIMEOUT:
4868 case PCMK_EXEC_NOT_SUPPORTED:
4869 case PCMK_EXEC_INVALID:
4870 break; // Not done, do error handling
4871
4872 default: // No other value should be possible at this point
4873 break;
4874 }
4875
4876 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4877 if ((failure_strategy == pcmk__on_fail_ignore)
4878 || ((failure_strategy == pcmk__on_fail_restart_container)
4879 && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4880
4881 char *last_change_s = last_change_str(xml_op);
4882
4883 pcmk__warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4884 QB_XS " %s",
4885 history.task, crm_exit_str(history.exit_status),
4886 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4887 pcmk__s(history.exit_reason, ""), rsc->id,
4888 pcmk__node_name(node), last_change_s, history.id);
4889 free(last_change_s);
4890
4891 update_resource_state(&history, history.expected_exit_status,
4892 *last_failure, on_fail);
4893 pcmk__xe_set(xml_op, PCMK_XA_UNAME, node->priv->name);
4894 pcmk__set_rsc_flags(rsc, pcmk__rsc_ignore_failure);
4895
4896 record_failed_op(&history);
4897
4898 if ((failure_strategy == pcmk__on_fail_restart_container)
4899 && (*on_fail <= pcmk__on_fail_restart)) {
4900 *on_fail = failure_strategy;
4901 }
4902
4903 } else {
4904 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4905 last_failure, on_fail);
4906
4907 if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4908 uint8_t log_level = LOG_ERR;
4909
4910 if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4911 log_level = LOG_NOTICE;
4912 }
4913 do_crm_log(log_level,
4914 "Preventing %s from restarting on %s because "
4915 "of hard failure (%s%s%s) " QB_XS " %s",
4916 parent->id, pcmk__node_name(node),
4917 crm_exit_str(history.exit_status),
4918 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4919 pcmk__s(history.exit_reason, ""), history.id);
4920 resource_location(parent, node, -PCMK_SCORE_INFINITY,
4921 "hard-error", rsc->priv->scheduler);
4922
4923 } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4924 pcmk__sched_err(rsc->priv->scheduler,
4925 "Preventing %s from restarting anywhere because "
4926 "of fatal failure (%s%s%s) " QB_XS " %s",
4927 parent->id, crm_exit_str(history.exit_status),
4928 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4929 pcmk__s(history.exit_reason, ""), history.id);
4930 resource_location(parent, NULL, -PCMK_SCORE_INFINITY,
4931 "fatal-error", rsc->priv->scheduler);
4932 }
4933 }
4934
4935 done:
4936 pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
4937 rsc->id, pcmk__node_name(node), history.id,
4938 pcmk_role_text(rsc->priv->orig_role),
4939 pcmk_role_text(rsc->priv->next_role));
4940 }
4941
4942 /*!
4943 * \internal
4944 * \brief Insert a node attribute with value into a \c GHashTable
4945 *
4946 * \param[in,out] key Key to insert (either freed or owned by
4947 * \p user_data upon return)
4948 * \param[in] value Value to insert (owned by \p user_data upon return)
4949 * \param[in] user_data \c GHashTable to insert into
4950 */
4951 static gboolean
4952 insert_attr(gpointer key, gpointer value, gpointer user_data)
4953 {
4954 GHashTable *table = user_data;
4955
4956 g_hash_table_insert(table, key, value);
4957 return TRUE;
4958 }
4959
4960 static void
4961 add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
4962 pcmk_scheduler_t *scheduler)
4963 {
4964 const char *cluster_name = NULL;
4965 const char *dc_id = pcmk__xe_get(scheduler->input, PCMK_XA_DC_UUID);
4966 const pcmk_rule_input_t rule_input = {
4967 .now = scheduler->priv->now,
4968 };
4969
4970 pcmk__insert_dup(node->priv->attrs,
4971 CRM_ATTR_UNAME, node->priv->name);
4972
4973 pcmk__insert_dup(node->priv->attrs, CRM_ATTR_ID, node->priv->id);
4974
4975 if ((scheduler->dc_node == NULL)
4976 && pcmk__str_eq(node->priv->id, dc_id, pcmk__str_casei)) {
4977
4978 scheduler->dc_node = node;
4979 pcmk__insert_dup(node->priv->attrs,
4980 CRM_ATTR_IS_DC, PCMK_VALUE_TRUE);
4981
4982 } else if (!pcmk__same_node(node, scheduler->dc_node)) {
4983 pcmk__insert_dup(node->priv->attrs,
4984 CRM_ATTR_IS_DC, PCMK_VALUE_FALSE);
4985 }
4986
4987 cluster_name = g_hash_table_lookup(scheduler->priv->options,
4988 PCMK_OPT_CLUSTER_NAME);
4989 if (cluster_name) {
4990 pcmk__insert_dup(node->priv->attrs, CRM_ATTR_CLUSTER_NAME,
4991 cluster_name);
4992 }
4993
4994 if (overwrite) {
4995 /* @TODO Try to reorder some unpacking so that we don't need the
4996 * overwrite argument or to unpack into a temporary table
4997 */
4998 GHashTable *unpacked = pcmk__strkey_table(free, free);
4999
5000 pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
5001 &rule_input, unpacked, NULL, scheduler);
5002 g_hash_table_foreach_steal(unpacked, insert_attr, node->priv->attrs);
5003 g_hash_table_destroy(unpacked);
5004
5005 } else {
5006 pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
5007 &rule_input, node->priv->attrs, NULL,
5008 scheduler);
5009 }
5010
5011 pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_input,
5012 node->priv->utilization, NULL, scheduler);
5013
5014 if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
5015 pcmk__rsc_node_current) == NULL) {
5016 const char *site_name = pcmk__node_attr(node, "site-name", NULL,
5017 pcmk__rsc_node_current);
5018
5019 if (site_name) {
5020 pcmk__insert_dup(node->priv->attrs,
5021 CRM_ATTR_SITE_NAME, site_name);
5022
5023 } else if (cluster_name) {
5024 /* Default to cluster-name if unset */
5025 pcmk__insert_dup(node->priv->attrs,
5026 CRM_ATTR_SITE_NAME, cluster_name);
5027 }
5028 }
5029 }
5030
5031 static GList *
5032 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
5033 {
5034 int counter = -1;
5035 int stop_index = -1;
5036 int start_index = -1;
5037
5038 xmlNode *rsc_op = NULL;
5039
5040 GList *gIter = NULL;
5041 GList *op_list = NULL;
5042 GList *sorted_op_list = NULL;
5043
5044 /* extract operations */
5045 op_list = NULL;
5046 sorted_op_list = NULL;
5047
5048 for (rsc_op = pcmk__xe_first_child(rsc_entry, PCMK__XE_LRM_RSC_OP, NULL,
5049 NULL);
5050 rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
5051
5052 pcmk__xe_set(rsc_op, PCMK_XA_RESOURCE, rsc);
5053 pcmk__xe_set(rsc_op, PCMK_XA_UNAME, node);
5054 op_list = g_list_prepend(op_list, rsc_op);
5055 }
5056
5057 if (op_list == NULL) {
5058 /* if there are no operations, there is nothing to do */
5059 return NULL;
5060 }
5061
5062 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5063
5064 /* create active recurring operations as optional */
5065 if (active_filter == FALSE) {
5066 return sorted_op_list;
5067 }
5068
5069 op_list = NULL;
5070
5071 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5072
5073 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5074 xmlNode *rsc_op = (xmlNode *) gIter->data;
5075
5076 counter++;
5077
5078 if (start_index < stop_index) {
5079 pcmk__trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5080 break;
5081
5082 } else if (counter < start_index) {
5083 pcmk__trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5084 continue;
5085 }
5086 op_list = g_list_append(op_list, rsc_op);
5087 }
5088
5089 g_list_free(sorted_op_list);
5090 return op_list;
5091 }
5092
5093 GList *
5094 find_operations(const char *rsc, const char *node, gboolean active_filter,
5095 pcmk_scheduler_t *scheduler)
5096 {
5097 GList *output = NULL;
5098 GList *intermediate = NULL;
5099
5100 xmlNode *tmp = NULL;
5101 xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
5102 NULL, NULL);
5103
5104 pcmk_node_t *this_node = NULL;
5105
5106 xmlNode *node_state = NULL;
5107
5108 CRM_CHECK(status != NULL, return NULL);
5109
5110 for (node_state = pcmk__xe_first_child(status, PCMK__XE_NODE_STATE, NULL,
5111 NULL);
5112 node_state != NULL;
5113 node_state = pcmk__xe_next(node_state, PCMK__XE_NODE_STATE)) {
5114
5115 const char *uname = pcmk__xe_get(node_state, PCMK_XA_UNAME);
5116
5117 if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5118 continue;
5119 }
5120
5121 this_node = pcmk_find_node(scheduler, uname);
5122 if(this_node == NULL) {
5123 CRM_LOG_ASSERT(this_node != NULL);
5124 continue;
5125
5126 } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5127 determine_remote_online_status(scheduler, this_node);
5128
5129 } else {
5130 determine_online_status(node_state, this_node, scheduler);
5131 }
5132
5133 if (this_node->details->online
5134 || pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
5135
5136 /* Offline nodes run no resources if fencing is disabled. If fencing
5137 * is enabled, we need to ensure that resource start events happen
5138 * after the fencing event.
5139 */
5140 xmlNode *lrm_rsc = NULL;
5141
5142 tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5143 NULL);
5144 tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL,
5145 NULL);
5146
5147 for (lrm_rsc = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCE,
5148 NULL, NULL);
5149 lrm_rsc != NULL;
5150 lrm_rsc = pcmk__xe_next(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5151
5152 const char *rsc_id = pcmk__xe_get(lrm_rsc, PCMK_XA_ID);
5153
5154 if ((rsc != NULL)
5155 && !pcmk__str_eq(rsc_id, rsc, pcmk__str_none)) {
5156 continue;
5157 }
5158
5159 intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5160 output = g_list_concat(output, intermediate);
5161 }
5162 }
5163 }
5164
5165 return output;
5166 }
5167