1 /*
2 * Copyright 2004-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <time.h>
16
17 #include <glib.h>
18 #include <libxml/tree.h> // xmlNode
19 #include <libxml/xpath.h> // xmlXPathObject, etc.
20
21 #include <crm/crm.h>
22 #include <crm/services.h>
23 #include <crm/common/xml.h>
24
25 #include <crm/common/util.h>
26 #include <crm/pengine/internal.h>
27 #include <pe_status_private.h>
28
29 // A (parsed) resource action history entry
30 struct action_history {
31 pcmk_resource_t *rsc; // Resource that history is for
32 pcmk_node_t *node; // Node that history is for
33 xmlNode *xml; // History entry XML
34
35 // Parsed from entry XML
36 const char *id; // XML ID of history entry
37 const char *key; // Operation key of action
38 const char *task; // Action name
39 const char *exit_reason; // Exit reason given for result
40 guint interval_ms; // Action interval
41 int call_id; // Call ID of action
42 int expected_exit_status; // Expected exit status of action
43 int exit_status; // Actual exit status of action
44 int execution_status; // Execution status of action
45 };
46
47 /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
48 * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
49 * flag is stringified more readably in log messages.
50 */
51 #define set_config_flag(scheduler, option, flag) do { \
52 GHashTable *config_hash = (scheduler)->priv->options; \
53 const char *scf_value = pcmk__cluster_option(config_hash, (option)); \
54 \
55 if (scf_value != NULL) { \
56 if (pcmk__is_true(scf_value)) { \
57 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
58 LOG_TRACE, "Scheduler", \
59 crm_system_name, (scheduler)->flags, \
60 (flag), #flag); \
61 } else { \
62 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
63 LOG_TRACE, "Scheduler", \
64 crm_system_name, (scheduler)->flags, \
65 (flag), #flag); \
66 } \
67 } \
68 } while(0)
69
70 static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
71 xmlNode *xml_op, xmlNode **last_failure,
72 enum pcmk__on_fail *failed);
73 static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
74 pcmk_node_t *this_node);
75 static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
76 bool overwrite, pcmk_scheduler_t *scheduler);
77 static void determine_online_status(const xmlNode *node_state,
78 pcmk_node_t *this_node,
79 pcmk_scheduler_t *scheduler);
80
81 static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
82 pcmk_scheduler_t *scheduler);
83
84
85 /*!
86 * \internal
87 * \brief Check whether a node is a dangling guest node
88 *
89 * \param[in] node Node to check
90 *
91 * \return true if \p node had a Pacemaker Remote connection resource with a
92 * launcher that was removed from the CIB, otherwise false.
93 */
94 static bool
95 is_dangling_guest_node(pcmk_node_t *node)
96 {
97 return pcmk__is_pacemaker_remote_node(node)
98 && (node->priv->remote != NULL)
99 && (node->priv->remote->priv->launcher == NULL)
100 && pcmk__is_set(node->priv->remote->flags,
101 pcmk__rsc_removed_launched);
102 }
103
104 /*!
105 * \brief Schedule a fence action for a node
106 *
107 * \param[in,out] scheduler Scheduler data
108 * \param[in,out] node Node to fence
109 * \param[in] reason Text description of why fencing is needed
110 * \param[in] priority_delay Whether to consider
111 * \c PCMK_OPT_PRIORITY_FENCING_DELAY
112 */
113 void
114 pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node,
115 const char *reason, bool priority_delay)
116 {
117 CRM_CHECK(node, return);
118
119 if (pcmk__is_guest_or_bundle_node(node)) {
120 // Fence a guest or bundle node by marking its launcher as failed
121 pcmk_resource_t *rsc = node->priv->remote->priv->launcher;
122
123 if (!pcmk__is_set(rsc->flags, pcmk__rsc_failed)) {
124 if (!pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
125 pcmk__notice("Not fencing guest node %s (otherwise would "
126 "because %s): its guest resource %s is unmanaged",
127 pcmk__node_name(node), reason, rsc->id);
128 } else {
129 pcmk__sched_warn(scheduler,
130 "Guest node %s will be fenced "
131 "(by recovering its guest resource %s): %s",
132 pcmk__node_name(node), rsc->id, reason);
133
134 /* We don't mark the node as unclean because that would prevent the
135 * node from running resources. We want to allow it to run resources
136 * in this transition if the recovery succeeds.
137 */
138 pcmk__set_node_flags(node, pcmk__node_remote_reset);
139 pcmk__set_rsc_flags(rsc,
140 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
141 }
142 }
143
144 } else if (is_dangling_guest_node(node)) {
145 pcmk__info("Cleaning up dangling connection for guest node %s: fencing "
146 "was already done because %s, and guest resource no longer "
147 "exists",
148 pcmk__node_name(node), reason);
149 pcmk__set_rsc_flags(node->priv->remote,
150 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
151
152 } else if (pcmk__is_remote_node(node)) {
153 pcmk_resource_t *rsc = node->priv->remote;
154
155 if ((rsc != NULL) && !pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
156 pcmk__notice("Not fencing remote node %s (otherwise would because "
157 "%s): connection is unmanaged",
158 pcmk__node_name(node), reason);
159 } else if (!pcmk__is_set(node->priv->flags, pcmk__node_remote_reset)) {
160 pcmk__set_node_flags(node, pcmk__node_remote_reset);
161 pcmk__sched_warn(scheduler, "Remote node %s %s: %s",
162 pcmk__node_name(node),
163 pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
164 reason);
165 }
166 node->details->unclean = TRUE;
167 // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
168 pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
169
170 } else if (node->details->unclean) {
171 const char *fenced_s = "also is unclean";
172
173 if (pe_can_fence(scheduler, node)) {
174 fenced_s = "would also be fenced";
175 }
176 pcmk__trace("Cluster node %s %s because %s",
177 pcmk__node_name(node), fenced_s, reason);
178
179 } else {
180 pcmk__sched_warn(scheduler, "Cluster node %s %s: %s",
181 pcmk__node_name(node),
182 pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
183 reason);
184 node->details->unclean = TRUE;
185 pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
186 }
187 }
188
189 // @TODO xpaths can't handle templates, rules, or id-refs
190
191 // nvpair with provides or requires set to unfencing
192 #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \
193 "[(@" PCMK_XA_NAME "='" PCMK_FENCING_PROVIDES "'" \
194 "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \
195 "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
196
197 // unfencing in rsc_defaults or any resource
198 #define XPATH_ENABLE_UNFENCING \
199 "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \
200 "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \
201 "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
202 "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
203
204 static void
205 set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
206 {
207 xmlXPathObject *result = NULL;
208
209 if (!pcmk__is_set(scheduler->flags, flag)) {
210 result = pcmk__xpath_search(scheduler->input->doc, xpath);
211 if (pcmk__xpath_num_results(result) > 0) {
212 pcmk__set_scheduler_flags(scheduler, flag);
213 }
214 xmlXPathFreeObject(result);
215 }
216 }
217
218 gboolean
219 unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
220 {
221 const char *value = NULL;
222 GHashTable *config_hash = pcmk__strkey_table(free, free);
223
224 const pcmk_rule_input_t rule_input = {
225 .now = scheduler->priv->now,
226 };
227
228 scheduler->priv->options = config_hash;
229
230 pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET,
231 &rule_input, config_hash,
232 PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, scheduler);
233
234 pcmk__validate_cluster_options(config_hash);
235
236 set_config_flag(scheduler, PCMK__OPT_ENABLE_STARTUP_PROBES,
237 pcmk__sched_probe_resources);
238 if (!pcmk__is_set(scheduler->flags, pcmk__sched_probe_resources)) {
239 pcmk__warn_once(pcmk__wo_enable_startup_probes,
240 "Support for the " PCMK__OPT_ENABLE_STARTUP_PROBES " "
241 "cluster property is deprecated and will be removed "
242 "(and behave as true) in a future release. Use a "
243 "location constraint with "
244 PCMK_XA_RESOURCE_DISCOVERY "=" PCMK_VALUE_NEVER " "
245 "instead to disable probes where desired.");
246 }
247
248 value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
249 if (pcmk__is_true(value)) {
250 pcmk__info("Watchdog-based self-fencing will be performed via SBD if "
251 "fencing is required and " PCMK_OPT_FENCING_WATCHDOG_TIMEOUT
252 " is nonzero");
253 pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_fencing);
254 }
255
256 /* Set certain flags via xpath here, so they can be used before the relevant
257 * configuration sections are unpacked.
258 */
259 set_if_xpath(pcmk__sched_enable_unfencing, XPATH_ENABLE_UNFENCING,
260 scheduler);
261
262 value = pcmk__cluster_option(config_hash, PCMK_OPT_FENCING_TIMEOUT);
263 pcmk_parse_interval_spec(value, &(scheduler->priv->fence_timeout_ms));
264
265 pcmk__debug("Default fencing action timeout: %s",
266 pcmk__readable_interval(scheduler->priv->fence_timeout_ms));
267
268 set_config_flag(scheduler, PCMK_OPT_FENCING_ENABLED,
269 pcmk__sched_fencing_enabled);
270 if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
271 pcmk__debug("Fencing of failed nodes is enabled");
272 } else {
273 pcmk__debug("Fencing of failed nodes is disabled");
274 }
275
276 scheduler->priv->fence_action =
277 pcmk__cluster_option(config_hash, PCMK_OPT_FENCING_ACTION);
278 pcmk__trace("Fencing will %s nodes", scheduler->priv->fence_action);
279
280 set_config_flag(scheduler, PCMK__OPT_CONCURRENT_FENCING,
281 pcmk__sched_concurrent_fencing);
282 if (pcmk__is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) {
283 pcmk__debug("Concurrent fencing is enabled");
284
285 } else {
286 pcmk__warn_once(pcmk__wo_concurrent_fencing,
287 "Support for the " PCMK__OPT_CONCURRENT_FENCING " "
288 "cluster property is deprecated and will be removed "
289 "(and behave as true) in a future release.");
290 }
291
292 value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY);
293 if (value) {
294 guint *delay_ms = &(scheduler->priv->priority_fencing_ms);
295
296 pcmk_parse_interval_spec(value, delay_ms);
297 pcmk__trace("Priority fencing delay is %s",
298 pcmk__readable_interval(*delay_ms));
299 }
300
301 set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES,
302 pcmk__sched_stop_all);
303 pcmk__debug("Stop all active resources: %s",
304 pcmk__flag_text(scheduler->flags, pcmk__sched_stop_all));
305
306 set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER,
307 pcmk__sched_symmetric_cluster);
308 if (pcmk__is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) {
309 pcmk__debug("Cluster is symmetric - resources can run anywhere by "
310 "default");
311 }
312
313 value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
314
315 if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
316 scheduler->no_quorum_policy = pcmk_no_quorum_ignore;
317
318 } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
319 scheduler->no_quorum_policy = pcmk_no_quorum_freeze;
320
321 } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
322 scheduler->no_quorum_policy = pcmk_no_quorum_demote;
323
324 } else if (pcmk__strcase_any_of(value, PCMK_VALUE_FENCE,
325 PCMK_VALUE_FENCE_LEGACY, NULL)) {
326 if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
327 int do_panic = 0;
328
329 pcmk__xe_get_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC,
330 &do_panic);
331 if (do_panic
332 || pcmk__is_set(scheduler->flags, pcmk__sched_quorate)) {
333 scheduler->no_quorum_policy = pcmk_no_quorum_fence;
334 } else {
335 pcmk__notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to "
336 "'" PCMK_VALUE_STOP "': cluster has never had "
337 "quorum");
338 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
339 }
340 } else {
341 pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY
342 " to 'stop' because fencing is disabled");
343 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
344 }
345
346 } else {
347 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
348 }
349
350 switch (scheduler->no_quorum_policy) {
351 case pcmk_no_quorum_freeze:
352 pcmk__debug("On loss of quorum: Freeze resources that require "
353 "quorum");
354 break;
355 case pcmk_no_quorum_stop:
356 pcmk__debug("On loss of quorum: Stop resources that require "
357 "quorum");
358 break;
359 case pcmk_no_quorum_demote:
360 pcmk__debug("On loss of quorum: Demote promotable resources and "
361 "stop other resources");
362 break;
363 case pcmk_no_quorum_fence:
364 pcmk__notice("On loss of quorum: Fence all remaining nodes");
365 break;
366 case pcmk_no_quorum_ignore:
367 pcmk__notice("On loss of quorum: Ignore");
368 break;
369 }
370
371 set_config_flag(scheduler, PCMK__OPT_STOP_REMOVED_RESOURCES,
372 pcmk__sched_stop_removed_resources);
373 if (pcmk__is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
374 pcmk__trace("Removed resources are stopped");
375 } else {
376 pcmk__warn_once(pcmk__wo_stop_removed_resources,
377 "Support for the " PCMK__OPT_STOP_REMOVED_RESOURCES " "
378 "cluster property is deprecated and will be removed "
379 "(and behave as true) in a future release.");
380 }
381
382 set_config_flag(scheduler, PCMK__OPT_CANCEL_REMOVED_ACTIONS,
383 pcmk__sched_cancel_removed_actions);
384 if (pcmk__is_set(scheduler->flags, pcmk__sched_cancel_removed_actions)) {
385 pcmk__trace("Removed resource actions are stopped");
386 } else {
387 pcmk__warn_once(pcmk__wo_cancel_removed_actions,
388 "Support for the " PCMK__OPT_CANCEL_REMOVED_ACTIONS " "
389 "cluster property is deprecated and will be removed "
390 "(and behave as true) in a future release.");
391 }
392
393 set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE,
394 pcmk__sched_in_maintenance);
395 pcmk__trace("Maintenance mode: %s",
396 pcmk__flag_text(scheduler->flags, pcmk__sched_in_maintenance));
397
398 set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL,
399 pcmk__sched_start_failure_fatal);
400 if (pcmk__is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) {
401 pcmk__trace("Start failures are always fatal");
402 } else {
403 pcmk__trace("Start failures are handled by failcount");
404 }
405
406 if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
407 set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING,
408 pcmk__sched_startup_fencing);
409 }
410 if (pcmk__is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
411 pcmk__trace("Unseen nodes will be fenced");
412 } else {
413 pcmk__warn_once(pcmk__wo_blind,
414 "Blind faith: not fencing unseen nodes");
415 }
416
417 pe__unpack_node_health_scores(scheduler);
418
419 scheduler->priv->placement_strategy =
420 pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY);
421 pcmk__trace("Placement strategy: %s", scheduler->priv->placement_strategy);
422
423 set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK,
424 pcmk__sched_shutdown_lock);
425 if (pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
426 value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
427 pcmk_parse_interval_spec(value, &(scheduler->priv->shutdown_lock_ms));
428 pcmk__trace("Resources will be locked to nodes that were cleanly "
429 "shut down (locks expire after %s)",
430 pcmk__readable_interval(scheduler->priv->shutdown_lock_ms));
431 } else {
432 pcmk__trace("Resources will not be locked to nodes that were cleanly "
433 "shut down");
434 }
435
436 value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
437 pcmk_parse_interval_spec(value, &(scheduler->priv->node_pending_ms));
438 if (scheduler->priv->node_pending_ms == 0U) {
439 pcmk__trace("Do not fence pending nodes");
440 } else {
441 pcmk__trace("Fence pending nodes after %s",
442 pcmk__readable_interval(scheduler->priv->node_pending_ms));
443 }
444
445 set_config_flag(scheduler, PCMK_OPT_FENCE_REMOTE_WITHOUT_QUORUM,
446 pcmk__sched_fence_remote_no_quorum);
447 if (pcmk__is_set(scheduler->flags, pcmk__sched_fence_remote_no_quorum)) {
448 pcmk__trace("Pacemaker Remote nodes may be fenced without quorum");
449
450 } else {
451 pcmk__trace("Pacemaker Remote nodes require quorum to be fenced");
452 }
453
454 return TRUE;
455 }
456
457 /*!
458 * \internal
459 * \brief Create a new node object in scheduler data
460 *
461 * \param[in] id ID of new node
462 * \param[in] uname Name of new node
463 * \param[in] type Type of new node
464 * \param[in] score Score of new node
465 * \param[in,out] scheduler Scheduler data
466 *
467 * \return Newly created node object
468 * \note The returned object is part of the scheduler data and should not be
469 * freed separately.
470 */
471 pcmk_node_t *
472 pe_create_node(const char *id, const char *uname, const char *type,
473 int score, pcmk_scheduler_t *scheduler)
474 {
475 enum pcmk__node_variant variant = pcmk__node_variant_cluster;
476 pcmk_node_t *new_node = NULL;
477
478 if (pcmk_find_node(scheduler, uname) != NULL) {
479 pcmk__config_warn("More than one node entry has name '%s'", uname);
480 }
481
482 if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
483 pcmk__str_null_matches|pcmk__str_casei)) {
484 variant = pcmk__node_variant_cluster;
485
486 } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
487 variant = pcmk__node_variant_remote;
488
489 } else {
490 pcmk__config_err("Ignoring node %s with unrecognized type '%s'",
491 pcmk__s(uname, "without name"), type);
492 return NULL;
493 }
494
495 new_node = calloc(1, sizeof(pcmk_node_t));
496 if (new_node == NULL) {
497 pcmk__sched_err(scheduler, "Could not allocate memory for node %s",
498 uname);
499 return NULL;
500 }
501
502 new_node->assign = calloc(1, sizeof(struct pcmk__node_assignment));
503 new_node->details = calloc(1, sizeof(struct pcmk__node_details));
504 new_node->priv = calloc(1, sizeof(pcmk__node_private_t));
505 if ((new_node->assign == NULL) || (new_node->details == NULL)
506 || (new_node->priv == NULL)) {
507 free(new_node->assign);
508 free(new_node->details);
509 free(new_node->priv);
510 free(new_node);
511 pcmk__sched_err(scheduler, "Could not allocate memory for node %s",
512 uname);
513 return NULL;
514 }
515
516 pcmk__trace("Creating node for entry %s/%s", uname, id);
517 new_node->assign->score = score;
518 new_node->priv->id = id;
519 new_node->priv->name = uname;
520 new_node->priv->flags = pcmk__node_probes_allowed;
521 new_node->details->online = FALSE;
522 new_node->details->shutdown = FALSE;
523 new_node->details->running_rsc = NULL;
524 new_node->priv->scheduler = scheduler;
525 new_node->priv->variant = variant;
526 new_node->priv->attrs = pcmk__strkey_table(free, free);
527 new_node->priv->utilization = pcmk__strkey_table(free, free);
528 new_node->priv->digest_cache = pcmk__strkey_table(free, pe__free_digests);
529
530 if (pcmk__is_pacemaker_remote_node(new_node)) {
531 pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "remote");
532 pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_remote_nodes);
533 } else {
534 pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "cluster");
535 }
536
537 scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
538 pe__cmp_node_name);
539 return new_node;
540 }
541
542 static const char *
543 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
544 {
545 xmlNode *attr_set = NULL;
546 xmlNode *attr = NULL;
547
548 const char *container_id = pcmk__xe_id(xml_obj);
549 const char *remote_name = NULL;
550 const char *remote_server = NULL;
551 const char *remote_port = NULL;
552 const char *connect_timeout = "60s";
553 const char *remote_allow_migrate=NULL;
554 const char *is_managed = NULL;
555
556 // @TODO This doesn't handle rules or id-ref
557 for (attr_set = pcmk__xe_first_child(xml_obj, PCMK_XE_META_ATTRIBUTES,
558 NULL, NULL);
559 attr_set != NULL;
560 attr_set = pcmk__xe_next(attr_set, PCMK_XE_META_ATTRIBUTES)) {
561
562 for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
563 attr != NULL; attr = pcmk__xe_next(attr, NULL)) {
564
565 const char *value = pcmk__xe_get(attr, PCMK_XA_VALUE);
566 const char *name = pcmk__xe_get(attr, PCMK_XA_NAME);
567
568 if (name == NULL) { // Sanity
569 continue;
570 }
571
572 if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
573 remote_name = value;
574
575 } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
576 remote_server = value;
577
578 } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
579 remote_port = value;
580
581 } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
582 connect_timeout = value;
583
584 } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
585 remote_allow_migrate = value;
586
587 } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
588 is_managed = value;
589 }
590 }
591 }
592
593 if (remote_name == NULL) {
594 return NULL;
595 }
596
597 if (pe_find_resource(data->priv->resources, remote_name) != NULL) {
598 return NULL;
599 }
600
601 pe_create_remote_xml(parent, remote_name, container_id,
602 remote_allow_migrate, is_managed,
603 connect_timeout, remote_server, remote_port);
604 return remote_name;
605 }
606
607 static void
608 handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
609 {
610 if ((new_node->priv->variant == pcmk__node_variant_remote)
611 && (new_node->priv->remote == NULL)) {
612 /* Ignore fencing for remote nodes that don't have a connection resource
613 * associated with them. This happens when remote node entries get left
614 * in the nodes section after the connection resource is removed.
615 */
616 return;
617 }
618
619 if (pcmk__is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
620 // All nodes are unclean until we've seen their status entry
621 new_node->details->unclean = TRUE;
622
623 } else {
624 // Blind faith ...
625 new_node->details->unclean = FALSE;
626 }
627 }
628
629 gboolean
630 unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
631 {
632 xmlNode *xml_obj = NULL;
633 pcmk_node_t *new_node = NULL;
634 const char *id = NULL;
635 const char *uname = NULL;
636 const char *type = NULL;
637
638 for (xml_obj = pcmk__xe_first_child(xml_nodes, PCMK_XE_NODE, NULL, NULL);
639 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, PCMK_XE_NODE)) {
640
641 int score = 0;
642 int rc = pcmk__xe_get_score(xml_obj, PCMK_XA_SCORE, &score, 0);
643
644 new_node = NULL;
645
646 id = pcmk__xe_get(xml_obj, PCMK_XA_ID);
647 uname = pcmk__xe_get(xml_obj, PCMK_XA_UNAME);
648 type = pcmk__xe_get(xml_obj, PCMK_XA_TYPE);
649 pcmk__trace("Processing node %s/%s", uname, id);
650
651 if (id == NULL) {
652 pcmk__config_err("Ignoring <" PCMK_XE_NODE
653 "> entry in configuration without id");
654 continue;
655 }
656 if (rc != pcmk_rc_ok) {
657 // Not possible with schema validation enabled
658 pcmk__config_warn("Using 0 as score for node %s "
659 "because '%s' is not a valid score: %s",
660 pcmk__s(uname, "without name"),
661 pcmk__xe_get(xml_obj, PCMK_XA_SCORE),
662 pcmk_rc_str(rc));
663 }
664 new_node = pe_create_node(id, uname, type, score, scheduler);
665
666 if (new_node == NULL) {
667 return FALSE;
668 }
669
670 handle_startup_fencing(scheduler, new_node);
671
672 add_node_attrs(xml_obj, new_node, FALSE, scheduler);
673
674 pcmk__trace("Done with node %s", pcmk__xe_get(xml_obj, PCMK_XA_UNAME));
675 }
676
677 return TRUE;
678 }
679
680 static void
681 unpack_launcher(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
682 {
683 const char *launcher_id = NULL;
684
685 if (rsc->priv->children != NULL) {
686 g_list_foreach(rsc->priv->children, (GFunc) unpack_launcher,
687 scheduler);
688 return;
689 }
690
691 launcher_id = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CONTAINER);
692 if ((launcher_id != NULL)
693 && !pcmk__str_eq(launcher_id, rsc->id, pcmk__str_none)) {
694 pcmk_resource_t *launcher = pe_find_resource(scheduler->priv->resources,
695 launcher_id);
696
697 if (launcher != NULL) {
698 rsc->priv->launcher = launcher;
699 launcher->priv->launched =
700 g_list_append(launcher->priv->launched, rsc);
701 pcmk__rsc_trace(rsc, "Resource %s's launcher is %s",
702 rsc->id, launcher_id);
703 } else {
704 pcmk__config_err("Resource %s: Unknown " PCMK__META_CONTAINER " %s",
705 rsc->id, launcher_id);
706 }
707 }
708 }
709
710 gboolean
711 unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
712 {
713 xmlNode *xml_obj = NULL;
714
715 /* Create remote nodes and guest nodes from the resource configuration
716 * before unpacking resources.
717 */
718 for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
719 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
720
721 const char *new_node_id = NULL;
722
723 /* Check for remote nodes, which are defined by ocf:pacemaker:remote
724 * primitives.
725 */
726 if (xml_contains_remote_node(xml_obj)) {
727 new_node_id = pcmk__xe_id(xml_obj);
728 /* The pcmk_find_node() check ensures we don't iterate over an
729 * expanded node that has already been added to the node list
730 */
731 if (new_node_id
732 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
733 pcmk__trace("Found remote node %s defined by resource %s",
734 new_node_id, pcmk__xe_id(xml_obj));
735 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
736 0, scheduler);
737 }
738 continue;
739 }
740
741 /* Check for guest nodes, which are defined by special meta-attributes
742 * of a primitive of any type (for example, VirtualDomain or Xen).
743 */
744 if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
745 /* This will add an ocf:pacemaker:remote primitive to the
746 * configuration for the guest node's connection, to be unpacked
747 * later.
748 */
749 new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
750 scheduler);
751 if (new_node_id
752 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
753 pcmk__trace("Found guest node %s in resource %s",
754 new_node_id, pcmk__xe_id(xml_obj));
755 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
756 0, scheduler);
757 }
758 continue;
759 }
760
761 /* Check for guest nodes inside a group. Clones are currently not
762 * supported as guest nodes.
763 */
764 if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
765 xmlNode *xml_obj2 = NULL;
766 for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
767 xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2, NULL)) {
768
769 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
770 scheduler);
771
772 if (new_node_id
773 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
774 pcmk__trace("Found guest node %s in resource %s inside "
775 "group %s",
776 new_node_id, pcmk__xe_id(xml_obj2),
777 pcmk__xe_id(xml_obj));
778 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
779 0, scheduler);
780 }
781 }
782 }
783 }
784 return TRUE;
785 }
786
787 /* Call this after all the nodes and resources have been
788 * unpacked, but before the status section is read.
789 *
790 * A remote node's online status is reflected by the state
791 * of the remote node's connection resource. We need to link
792 * the remote node to this connection resource so we can have
793 * easy access to the connection resource during the scheduler calculations.
794 */
795 static void
796 link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
797 {
798 pcmk_node_t *remote_node = NULL;
799
800 if (!pcmk__is_set(new_rsc->flags, pcmk__rsc_is_remote_connection)) {
801 return;
802 }
803
804 if (pcmk__is_set(scheduler->flags, pcmk__sched_location_only)) {
805 /* remote_nodes and remote_resources are not linked in quick location calculations */
806 return;
807 }
808
809 remote_node = pcmk_find_node(scheduler, new_rsc->id);
810 CRM_CHECK(remote_node != NULL, return);
811
812 pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
813 new_rsc->id, pcmk__node_name(remote_node));
814 remote_node->priv->remote = new_rsc;
815
816 if (new_rsc->priv->launcher == NULL) {
817 /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
818 * the same as is done for cluster nodes.
819 */
820 handle_startup_fencing(scheduler, remote_node);
821
822 } else {
823 /* pe_create_node() marks the new node as "remote" or "cluster"; now
824 * that we know the node is a guest node, update it correctly.
825 */
826 pcmk__insert_dup(remote_node->priv->attrs,
827 CRM_ATTR_KIND, "container");
828 }
829 }
830
831 /*!
832 * \internal
833 * \brief Parse configuration XML for resource information
834 *
835 * \param[in] xml_resources Top of resource configuration XML
836 * \param[in,out] scheduler Scheduler data
837 *
838 * \return TRUE
839 *
840 * \note unpack_remote_nodes() MUST be called before this, so that the nodes can
841 * be used when pe__unpack_resource() calls resource_location()
842 */
843 gboolean
844 unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
845 {
846 xmlNode *xml_obj = NULL;
847 GList *gIter = NULL;
848
849 scheduler->priv->templates = pcmk__strkey_table(free, pcmk__free_idref);
850
851 for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
852 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
853
854 pcmk_resource_t *new_rsc = NULL;
855 const char *id = pcmk__xe_id(xml_obj);
856
857 if (pcmk__str_empty(id)) {
858 pcmk__config_err("Ignoring <%s> resource without ID",
859 xml_obj->name);
860 continue;
861 }
862
863 if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
864 if (g_hash_table_lookup_extended(scheduler->priv->templates, id,
865 NULL, NULL) == FALSE) {
866 /* Record the template's ID for the knowledge of its existence anyway. */
867 pcmk__insert_dup(scheduler->priv->templates, id, NULL);
868 }
869 continue;
870 }
871
872 pcmk__trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
873
874 if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
875 scheduler) != pcmk_rc_ok) {
876
877 pcmk__config_err("Ignoring <%s> resource '%s' because "
878 "configuration is invalid", xml_obj->name, id);
879 continue;
880 }
881
882 scheduler->priv->resources = g_list_append(scheduler->priv->resources,
883 new_rsc);
884 pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
885 }
886
887 for (gIter = scheduler->priv->resources;
888 gIter != NULL; gIter = gIter->next) {
889
890 pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
891
892 unpack_launcher(rsc, scheduler);
893 link_rsc2remotenode(scheduler, rsc);
894 }
895
896 scheduler->priv->resources = g_list_sort(scheduler->priv->resources,
897 pe__cmp_rsc_priority);
898 if (pcmk__is_set(scheduler->flags, pcmk__sched_location_only)) {
899 /* Ignore */
900
901 } else if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)
902 && !pcmk__is_set(scheduler->flags, pcmk__sched_have_fencing)) {
903
904 /* pcs's CI tests look for this specific error message. Confer with the
905 * pcs team before changing it. If the dependency still exists, bump the
906 * CRM_FEATURE_SET and inform the pcs maintainers.
907 *
908 * Also, ResyncCIB.errors_to_ignore() looks for this specific error
909 * message as well.
910 */
911 pcmk__config_err("Resource start-up disabled since no fencing "
912 "resources have been defined. Either configure some "
913 "or disable fencing with the "
914 PCMK_OPT_FENCING_ENABLED " option. NOTE: Clusters "
915 "with shared data need fencing to ensure data "
916 "integrity.");
917 }
918
919 return TRUE;
920 }
921
922 /*!
923 * \internal
924 * \brief Validate the levels in a fencing topology
925 *
926 * \param[in] xml \c PCMK_XE_FENCING_TOPOLOGY element
927 */
928 void
929 pcmk__validate_fencing_topology(const xmlNode *xml)
930 {
931 if (xml == NULL) {
932 return;
933 }
934
935 CRM_CHECK(pcmk__xe_is(xml, PCMK_XE_FENCING_TOPOLOGY), return);
936
937 for (const xmlNode *level = pcmk__xe_first_child(xml, PCMK_XE_FENCING_LEVEL,
938 NULL, NULL);
939 level != NULL; level = pcmk__xe_next(level, PCMK_XE_FENCING_LEVEL)) {
940
941 const char *id = pcmk__xe_id(level);
942 int index = 0;
943
944 if (pcmk__str_empty(id)) {
945 pcmk__config_err("Ignoring fencing level without ID");
946 continue;
947 }
948
949 if (pcmk__xe_get_int(level, PCMK_XA_INDEX, &index) != pcmk_rc_ok) {
950 pcmk__config_err("Ignoring fencing level %s with invalid index",
951 id);
952 continue;
953 }
954
955 if ((index < ST__LEVEL_MIN) || (index > ST__LEVEL_MAX)) {
956 pcmk__config_err("Ignoring fencing level %s with out-of-range "
957 "index %d",
958 id, index);
959 }
960 }
961 }
962
963 gboolean
964 unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
965 {
966 xmlNode *xml_tag = NULL;
967
968 scheduler->priv->tags = pcmk__strkey_table(free, pcmk__free_idref);
969
970 for (xml_tag = pcmk__xe_first_child(xml_tags, PCMK_XE_TAG, NULL, NULL);
971 xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag, PCMK_XE_TAG)) {
972
973 xmlNode *xml_obj_ref = NULL;
974 const char *tag_id = pcmk__xe_id(xml_tag);
975
976 if (tag_id == NULL) {
977 pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
978 (const char *) xml_tag->name);
979 continue;
980 }
981
982 for (xml_obj_ref = pcmk__xe_first_child(xml_tag, PCMK_XE_OBJ_REF,
983 NULL, NULL);
984 xml_obj_ref != NULL;
985 xml_obj_ref = pcmk__xe_next(xml_obj_ref, PCMK_XE_OBJ_REF)) {
986
987 const char *obj_ref = pcmk__xe_id(xml_obj_ref);
988
989 if (obj_ref == NULL) {
990 pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
991 xml_obj_ref->name, tag_id);
992 continue;
993 }
994
995 pcmk__add_idref(scheduler->priv->tags, tag_id, obj_ref);
996 }
997 }
998
999 return TRUE;
1000 }
1001
1002 /*!
1003 * \internal
1004 * \brief Unpack a ticket state entry
1005 *
1006 * \param[in] xml_ticket XML ticket state to unpack
1007 * \param[in,out] userdata Scheduler data
1008 *
1009 * \return pcmk_rc_ok (to always continue unpacking further entries)
1010 */
1011 static int
1012 unpack_ticket_state(xmlNode *xml_ticket, void *userdata)
1013 {
1014 pcmk_scheduler_t *scheduler = userdata;
1015
1016 const char *ticket_id = NULL;
1017 const char *granted = NULL;
1018 const char *last_granted = NULL;
1019 const char *standby = NULL;
1020 xmlAttrPtr xIter = NULL;
1021
1022 pcmk__ticket_t *ticket = NULL;
1023
1024 ticket_id = pcmk__xe_id(xml_ticket);
1025 if (pcmk__str_empty(ticket_id)) {
1026 return pcmk_rc_ok;
1027 }
1028
1029 pcmk__trace("Processing ticket state for %s", ticket_id);
1030
1031 ticket = g_hash_table_lookup(scheduler->priv->ticket_constraints,
1032 ticket_id);
1033 if (ticket == NULL) {
1034 ticket = ticket_new(ticket_id, scheduler);
1035 if (ticket == NULL) {
1036 return pcmk_rc_ok;
1037 }
1038 }
1039
1040 for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1041 const char *prop_name = (const char *)xIter->name;
1042 const char *prop_value = pcmk__xml_attr_value(xIter);
1043
1044 if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
1045 continue;
1046 }
1047 pcmk__insert_dup(ticket->state, prop_name, prop_value);
1048 }
1049
1050 granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
1051 if (pcmk__is_true(granted)) {
1052 pcmk__set_ticket_flags(ticket, pcmk__ticket_granted);
1053 pcmk__info("We have ticket '%s'", ticket->id);
1054 } else {
1055 pcmk__clear_ticket_flags(ticket, pcmk__ticket_granted);
1056 pcmk__info("We do not have ticket '%s'", ticket->id);
1057 }
1058
1059 last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1060 if (last_granted) {
1061 long long last_granted_ll = 0LL;
1062 int rc = pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1063
1064 if (rc != pcmk_rc_ok) {
1065 pcmk__warn("Using %lld instead of invalid " PCMK_XA_LAST_GRANTED
1066 " value '%s' in state for ticket %s: %s",
1067 last_granted_ll, last_granted, ticket->id,
1068 pcmk_rc_str(rc));
1069 }
1070 ticket->last_granted = (time_t) last_granted_ll;
1071 }
1072
1073 standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1074 if (pcmk__is_true(standby)) {
1075 pcmk__set_ticket_flags(ticket, pcmk__ticket_standby);
1076 if (pcmk__is_set(ticket->flags, pcmk__ticket_granted)) {
1077 pcmk__info("Granted ticket '%s' is in standby-mode", ticket->id);
1078 }
1079 } else {
1080 pcmk__clear_ticket_flags(ticket, pcmk__ticket_standby);
1081 }
1082
1083 pcmk__trace("Done with ticket state for %s", ticket_id);
1084
1085 return pcmk_rc_ok;
1086 }
1087
1088 static void
1089 unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
1090 pcmk_scheduler_t *scheduler)
1091 {
1092 const char *discovery = NULL;
1093 const xmlNode *attrs = NULL;
1094 pcmk_resource_t *rsc = NULL;
1095 int maint = 0;
1096
1097 if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1098 return;
1099 }
1100
1101 if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1102 return;
1103 }
1104 pcmk__trace("Processing Pacemaker Remote node %s",
1105 pcmk__node_name(this_node));
1106
1107 pcmk__scan_min_int(pcmk__xe_get(state, PCMK__XA_NODE_IN_MAINTENANCE),
1108 &maint, 0);
1109 if (maint) {
1110 pcmk__set_node_flags(this_node, pcmk__node_remote_maint);
1111 } else {
1112 pcmk__clear_node_flags(this_node, pcmk__node_remote_maint);
1113 }
1114
1115 rsc = this_node->priv->remote;
1116 if (!pcmk__is_set(this_node->priv->flags, pcmk__node_remote_reset)) {
1117 this_node->details->unclean = FALSE;
1118 pcmk__set_node_flags(this_node, pcmk__node_seen);
1119 }
1120 attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL,
1121 NULL);
1122 add_node_attrs(attrs, this_node, TRUE, scheduler);
1123
1124 if (pe__shutdown_requested(this_node)) {
1125 pcmk__info("%s is shutting down", pcmk__node_name(this_node));
1126 this_node->details->shutdown = TRUE;
1127 }
1128
1129 if (pcmk__is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
1130 pcmk__rsc_node_current))) {
1131 pcmk__info("%s is in standby mode", pcmk__node_name(this_node));
1132 pcmk__set_node_flags(this_node, pcmk__node_standby);
1133 }
1134
1135 if (pcmk__is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE,
1136 NULL, pcmk__rsc_node_current))
1137 || ((rsc != NULL) && !pcmk__is_set(rsc->flags, pcmk__rsc_managed))) {
1138 pcmk__info("%s is in maintenance mode", pcmk__node_name(this_node));
1139 this_node->details->maintenance = TRUE;
1140 }
1141
1142 discovery = pcmk__node_attr(this_node,
1143 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1144 NULL, pcmk__rsc_node_current);
1145 if ((discovery != NULL) && !pcmk__is_true(discovery)) {
1146 pcmk__warn_once(pcmk__wo_rdisc_enabled,
1147 "Support for the "
1148 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1149 " node attribute is deprecated and will be removed"
1150 " (and behave as 'true') in a future release.");
1151
1152 if (pcmk__is_remote_node(this_node)
1153 && !pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
1154 pcmk__config_warn("Ignoring "
1155 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1156 " attribute on Pacemaker Remote node %s"
1157 " because fencing is disabled",
1158 pcmk__node_name(this_node));
1159 } else {
1160 /* This is either a remote node with fencing enabled, or a guest
1161 * node. We don't care whether fencing is enabled when fencing guest
1162 * nodes, because they are "fenced" by recovering their containing
1163 * resource.
1164 */
1165 pcmk__info("%s has resource discovery disabled",
1166 pcmk__node_name(this_node));
1167 pcmk__clear_node_flags(this_node, pcmk__node_probes_allowed);
1168 }
1169 }
1170 }
1171
1172 /*!
1173 * \internal
1174 * \brief Unpack a cluster node's transient attributes
1175 *
1176 * \param[in] state CIB node state XML
1177 * \param[in,out] node Cluster node whose attributes are being unpacked
1178 * \param[in,out] scheduler Scheduler data
1179 */
1180 static void
1181 unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
1182 pcmk_scheduler_t *scheduler)
1183 {
1184 const char *discovery = NULL;
1185 const xmlNode *attrs = pcmk__xe_first_child(state,
1186 PCMK__XE_TRANSIENT_ATTRIBUTES,
1187 NULL, NULL);
1188
1189 add_node_attrs(attrs, node, TRUE, scheduler);
1190
1191 if (pcmk__is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL,
1192 pcmk__rsc_node_current))) {
1193 pcmk__info("%s is in standby mode", pcmk__node_name(node));
1194 pcmk__set_node_flags(node, pcmk__node_standby);
1195 }
1196
1197 if (pcmk__is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
1198 pcmk__rsc_node_current))) {
1199 pcmk__info("%s is in maintenance mode", pcmk__node_name(node));
1200 node->details->maintenance = TRUE;
1201 }
1202
1203 discovery = pcmk__node_attr(node,
1204 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1205 NULL, pcmk__rsc_node_current);
1206 if ((discovery != NULL) && !pcmk__is_true(discovery)) {
1207 pcmk__config_warn("Ignoring "
1208 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1209 " attribute for %s because disabling resource"
1210 " discovery is not allowed for cluster nodes",
1211 pcmk__node_name(node));
1212 }
1213 }
1214
1215 /*!
1216 * \internal
1217 * \brief Unpack a node state entry (first pass)
1218 *
1219 * Unpack one node state entry from status. This unpacks information from the
1220 * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not
1221 * the resource history inside it. Multiple passes through the status are needed
1222 * to fully unpack everything.
1223 *
1224 * \param[in] state CIB node state XML
1225 * \param[in,out] scheduler Scheduler data
1226 */
1227 static void
1228 unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
1229 {
1230 const char *id = NULL;
1231 const char *uname = NULL;
1232 pcmk_node_t *this_node = NULL;
1233
1234 id = pcmk__xe_get(state, PCMK_XA_ID);
1235 if (id == NULL) {
1236 pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1237 PCMK_XA_ID);
1238 pcmk__log_xml_info(state, "missing-id");
1239 return;
1240 }
1241
1242 uname = pcmk__xe_get(state, PCMK_XA_UNAME);
1243 if (uname == NULL) {
1244 /* If a joining peer makes the cluster acquire the quorum from Corosync
1245 * but has not joined the controller CPG membership yet, it's possible
1246 * that the created PCMK__XE_NODE_STATE entry doesn't have a
1247 * PCMK_XA_UNAME yet. Recognize the node as pending and wait for it to
1248 * join CPG.
1249 */
1250 pcmk__trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1251 "without " PCMK_XA_UNAME,
1252 id);
1253 }
1254
1255 this_node = pe_find_node_any(scheduler->nodes, id, uname);
1256 if (this_node == NULL) {
1257 pcmk__notice("Ignoring recorded state for removed node with name %s "
1258 "and " PCMK_XA_ID " %s",
1259 pcmk__s(uname, "unknown"), id);
1260 return;
1261 }
1262
1263 if (pcmk__is_pacemaker_remote_node(this_node)) {
1264 int remote_fenced = 0;
1265
1266 /* We can't determine the online status of Pacemaker Remote nodes until
1267 * after all resource history has been unpacked. In this first pass, we
1268 * do need to mark whether the node has been fenced, as this plays a
1269 * role during unpacking cluster node resource state.
1270 */
1271 pcmk__scan_min_int(pcmk__xe_get(state, PCMK__XA_NODE_FENCED),
1272 &remote_fenced, 0);
1273 if (remote_fenced) {
1274 pcmk__set_node_flags(this_node, pcmk__node_remote_fenced);
1275 } else {
1276 pcmk__clear_node_flags(this_node, pcmk__node_remote_fenced);
1277 }
1278 return;
1279 }
1280
1281 unpack_transient_attributes(state, this_node, scheduler);
1282
1283 /* Provisionally mark this cluster node as clean. We have at least seen it
1284 * in the current cluster's lifetime.
1285 */
1286 this_node->details->unclean = FALSE;
1287 pcmk__set_node_flags(this_node, pcmk__node_seen);
1288
1289 pcmk__trace("Determining online status of cluster node %s (id %s)",
1290 pcmk__node_name(this_node), id);
1291 determine_online_status(state, this_node, scheduler);
1292
1293 if (!pcmk__is_set(scheduler->flags, pcmk__sched_quorate)
1294 && this_node->details->online
1295 && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) {
1296 /* Everything else should flow from this automatically
1297 * (at least until the scheduler becomes able to migrate off
1298 * healthy resources)
1299 */
1300 pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1301 FALSE);
1302 }
1303 }
1304
1305 /*!
1306 * \internal
1307 * \brief Unpack nodes' resource history as much as possible
1308 *
1309 * Unpack as many nodes' resource history as possible in one pass through the
1310 * status. We need to process Pacemaker Remote nodes' connections/containers
1311 * before unpacking their history; the connection/container history will be
1312 * in another node's history, so it might take multiple passes to unpack
1313 * everything.
1314 *
1315 * \param[in] status CIB XML status section
1316 * \param[in] fence If true, treat any not-yet-unpacked nodes as unseen
1317 * \param[in,out] scheduler Scheduler data
1318 *
1319 * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
1320 * or EAGAIN if more unpacking remains to be done)
1321 */
1322 static int
1323 unpack_node_history(const xmlNode *status, bool fence,
1324 pcmk_scheduler_t *scheduler)
1325 {
1326 int rc = pcmk_rc_ok;
1327
1328 // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1329 for (const xmlNode *state = pcmk__xe_first_child(status,
1330 PCMK__XE_NODE_STATE, NULL,
1331 NULL);
1332 state != NULL; state = pcmk__xe_next(state, PCMK__XE_NODE_STATE)) {
1333
1334 const char *id = pcmk__xe_id(state);
1335 const char *uname = pcmk__xe_get(state, PCMK_XA_UNAME);
1336 pcmk_node_t *this_node = NULL;
1337
1338 if ((id == NULL) || (uname == NULL)) {
1339 // Warning already logged in first pass through status section
1340 pcmk__trace("Not unpacking resource history from malformed "
1341 PCMK__XE_NODE_STATE " without id and/or uname");
1342 continue;
1343 }
1344
1345 this_node = pe_find_node_any(scheduler->nodes, id, uname);
1346 if (this_node == NULL) {
1347 // Warning already logged in first pass through status section
1348 pcmk__trace("Not unpacking resource history for node %s because "
1349 "no longer in configuration",
1350 id);
1351 continue;
1352 }
1353
1354 if (pcmk__is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1355 pcmk__trace("Not unpacking resource history for node %s because "
1356 "already unpacked",
1357 id);
1358 continue;
1359 }
1360
1361 if (fence) {
1362 // We're processing all remaining nodes
1363
1364 } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1365 /* We can unpack a guest node's history only after we've unpacked
1366 * other resource history to the point that we know that the node's
1367 * connection and containing resource are both up.
1368 */
1369 const pcmk_resource_t *remote = this_node->priv->remote;
1370 const pcmk_resource_t *launcher = remote->priv->launcher;
1371
1372 if ((remote->priv->orig_role != pcmk_role_started)
1373 || (launcher->priv->orig_role != pcmk_role_started)) {
1374 pcmk__trace("Not unpacking resource history for guest node %s "
1375 "because launcher and connection are not known to "
1376 "be up",
1377 id);
1378 continue;
1379 }
1380
1381 } else if (pcmk__is_remote_node(this_node)) {
1382 /* We can unpack a remote node's history only after we've unpacked
1383 * other resource history to the point that we know that the node's
1384 * connection is up, with the exception of when shutdown locks are
1385 * in use.
1386 */
1387 pcmk_resource_t *rsc = this_node->priv->remote;
1388
1389 if ((rsc == NULL)
1390 || (!pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)
1391 && (rsc->priv->orig_role != pcmk_role_started))) {
1392 pcmk__trace("Not unpacking resource history for remote node %s "
1393 "because connection is not known to be up",
1394 id);
1395 continue;
1396 }
1397
1398 /* If fencing and shutdown locks are disabled and we're not processing
1399 * unseen nodes, then we don't want to unpack offline nodes until online
1400 * nodes have been unpacked. This allows us to number active clone
1401 * instances first.
1402 */
1403 } else if (!pcmk__any_flags_set(scheduler->flags,
1404 pcmk__sched_fencing_enabled
1405 |pcmk__sched_shutdown_lock)
1406 && !this_node->details->online) {
1407 pcmk__trace("Not unpacking resource history for offline "
1408 "cluster node %s",
1409 id);
1410 continue;
1411 }
1412
1413 if (pcmk__is_pacemaker_remote_node(this_node)) {
1414 determine_remote_online_status(scheduler, this_node);
1415 unpack_handle_remote_attrs(this_node, state, scheduler);
1416 }
1417
1418 pcmk__trace("Unpacking resource history for %snode %s",
1419 (fence? "unseen " : ""), id);
1420
1421 pcmk__set_node_flags(this_node, pcmk__node_unpacked);
1422 unpack_node_lrm(this_node, state, scheduler);
1423
1424 rc = EAGAIN; // Other node histories might depend on this one
1425 }
1426 return rc;
1427 }
1428
1429 /* remove nodes that are down, stopping */
1430 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1431 /* anything else? */
1432 gboolean
1433 unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
1434 {
1435 xmlNode *state = NULL;
1436
|
(1) Event path: |
Switch case default. |
|
(2) Event path: |
Condition "trace_cs == NULL", taking true branch. |
|
(3) Event path: |
Condition "crm_is_callsite_active(trace_cs, _level, 0)", taking false branch. |
|
(4) Event path: |
Breaking from switch. |
1437 pcmk__trace("Beginning unpack");
1438
|
(5) Event path: |
Condition "scheduler->priv->ticket_constraints == NULL", taking true branch. |
1439 if (scheduler->priv->ticket_constraints == NULL) {
1440 scheduler->priv->ticket_constraints =
1441 pcmk__strkey_table(free, destroy_ticket);
1442 }
1443
|
(6) Event path: |
Condition "state != NULL", taking true branch. |
|
(10) Event path: |
Condition "state != NULL", taking false branch. |
1444 for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1445 state = pcmk__xe_next(state, NULL)) {
1446
|
(7) Event path: |
Condition "pcmk__xe_is(state, "tickets")", taking true branch. |
1447 if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1448 pcmk__xe_foreach_child(state, PCMK__XE_TICKET_STATE,
1449 unpack_ticket_state, scheduler);
1450
|
(8) Event path: |
Falling through to end of if statement. |
1451 } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1452 unpack_node_state(state, scheduler);
1453 }
|
(9) Event path: |
Jumping back to the beginning of the loop. |
1454 }
1455
|
(11) Event path: |
Condition "unpack_node_history(status, false /* 0 */, scheduler) == 11", taking false branch. |
1456 while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1457 pcmk__trace("Another pass through node resource histories is needed");
1458 }
1459
1460 // Now catch any nodes we didn't see
1461 unpack_node_history(status,
1462 pcmk__is_set(scheduler->flags,
1463 pcmk__sched_fencing_enabled),
1464 scheduler);
1465
1466 /* Now that we know where resources are, we can schedule stops of containers
1467 * with failed bundle connections
1468 */
|
(12) Event path: |
Condition "scheduler->priv->stop_needed != NULL", taking true branch. |
1469 if (scheduler->priv->stop_needed != NULL) {
|
(13) Event path: |
Condition "item != NULL", taking true branch. |
|
(16) Event path: |
Condition "item != NULL", taking false branch. |
1470 for (GList *item = scheduler->priv->stop_needed;
1471 item != NULL; item = item->next) {
1472
1473 pcmk_resource_t *container = item->data;
1474 pcmk_node_t *node = pcmk__current_node(container);
1475
|
(14) Event path: |
Condition "node", taking false branch. |
1476 if (node) {
1477 stop_action(container, node, FALSE);
1478 }
|
(15) Event path: |
Jumping back to the beginning of the loop. |
1479 }
1480
|
CID (unavailable; MK=28a526cd6ab68e387a886c1ef64f6d67) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS): |
|
(17) Event assign_union_field: |
The union field "in" of "_pp" is written. |
|
(18) Event inconsistent_union_field_access: |
In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in". |
1481 g_clear_pointer(&scheduler->priv->stop_needed, g_list_free);
1482 }
1483
1484 /* Now that we know status of all Pacemaker Remote connections and nodes,
1485 * we can stop connections for node shutdowns, and check the online status
1486 * of remote/guest nodes that didn't have any node history to unpack.
1487 */
1488 for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1489 pcmk_node_t *this_node = gIter->data;
1490
1491 if (!pcmk__is_pacemaker_remote_node(this_node)) {
1492 continue;
1493 }
1494 if (this_node->details->shutdown
1495 && (this_node->priv->remote != NULL)) {
1496 pe__set_next_role(this_node->priv->remote, pcmk_role_stopped,
1497 "remote shutdown");
1498 }
1499 if (!pcmk__is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1500 determine_remote_online_status(scheduler, this_node);
1501 }
1502 }
1503
1504 return TRUE;
1505 }
1506
1507 /*!
1508 * \internal
1509 * \brief Unpack node's time when it became a member at the cluster layer
1510 *
1511 * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry
1512 * \param[in,out] scheduler Scheduler data
1513 *
1514 * \return Epoch time when node became a cluster member
1515 * (or scheduler effective time for legacy entries) if a member,
1516 * 0 if not a member, or -1 if no valid information available
1517 */
1518 static long long
1519 unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
1520 {
1521 const char *member_time = pcmk__xe_get(node_state, PCMK__XA_IN_CCM);
1522 bool is_member = false;
1523
1524 if (member_time == NULL) {
1525 return -1LL;
1526 }
1527
1528 if (pcmk__parse_bool(member_time, &is_member) != pcmk_rc_ok) {
1529 long long when_member = 0LL;
1530
1531 if ((pcmk__scan_ll(member_time, &when_member,
1532 0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1533 pcmk__warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM " in "
1534 PCMK__XE_NODE_STATE " entry",
1535 member_time);
1536 return -1LL;
1537 }
1538 return when_member;
1539 }
1540
1541 /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1542 * recorded as a boolean for a DC < 2.1.7, or the node is pending shutdown
1543 * and has left the CPG, in which case it was set to 1 to avoid fencing for
1544 * PCMK_OPT_NODE_PENDING_TIMEOUT.
1545 *
1546 * We return the effective time for in_ccm=1 because what's important to
1547 * avoid fencing is that effective time minus this value is less than the
1548 * pending node timeout.
1549 */
1550 return is_member? (long long) pcmk__scheduler_epoch_time(scheduler) : 0LL;
1551 }
1552
1553 /*!
1554 * \internal
1555 * \brief Unpack node's time when it became online in process group
1556 *
1557 * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry
1558 *
1559 * \return Epoch time when node became online in process group (or 0 if not
1560 * online, or 1 for legacy online entries)
1561 */
1562 static long long
1563 unpack_node_online(const xmlNode *node_state)
1564 {
1565 const char *peer_time = pcmk__xe_get(node_state, PCMK_XA_CRMD);
1566
1567 // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1568 if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1569 pcmk__str_casei|pcmk__str_null_matches)) {
1570 return 0LL;
1571
1572 } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1573 return 1LL;
1574
1575 } else {
1576 long long when_online = 0LL;
1577
1578 if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1579 || (when_online < 0)) {
1580 pcmk__warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1581 PCMK__XE_NODE_STATE " entry, assuming offline",
1582 peer_time);
1583 return 0LL;
1584 }
1585 return when_online;
1586 }
1587 }
1588
1589 /*!
1590 * \internal
1591 * \brief Unpack node attribute for user-requested fencing
1592 *
1593 * \param[in] node Node to check
1594 * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry in CIB status
1595 *
1596 * \return \c true if fencing has been requested for \p node, otherwise \c false
1597 */
1598 static bool
1599 unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
1600 {
1601 bool value_b = false;
1602 long long value_ll = 0LL;
1603 int rc = pcmk_rc_ok;
1604 const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1605 NULL, pcmk__rsc_node_current);
1606
1607 // Value may be boolean or an epoch time
1608 if ((value_s != NULL)
1609 && (pcmk__parse_bool(value_s, &value_b) == pcmk_rc_ok)) {
1610 return value_b;
1611 }
1612
1613 rc = pcmk__scan_ll(value_s, &value_ll, 0LL);
1614 if (rc == pcmk_rc_ok) {
1615 return (value_ll > 0);
1616 }
1617 pcmk__warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1618 "node attribute for %s: %s",
1619 value_s, pcmk__node_name(node), pcmk_rc_str(rc));
1620 return false;
1621 }
1622
1623 static gboolean
1624 determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
1625 const xmlNode *node_state,
1626 pcmk_node_t *this_node)
1627 {
1628 gboolean online = FALSE;
1629 const char *join = pcmk__xe_get(node_state, PCMK__XA_JOIN);
1630 const char *exp_state = pcmk__xe_get(node_state, PCMK_XA_EXPECTED);
1631 long long when_member = unpack_node_member(node_state, scheduler);
1632 long long when_online = unpack_node_online(node_state);
1633
1634 if (when_member <= 0) {
1635 pcmk__trace("Node %s is %sdown", pcmk__node_name(this_node),
1636 ((when_member < 0)? "presumed " : ""));
1637
1638 } else if (when_online > 0) {
1639 if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1640 online = TRUE;
1641 } else {
1642 pcmk__debug("Node %s is not ready to run resources: %s",
1643 pcmk__node_name(this_node), join);
1644 }
1645
1646 } else if (!pcmk__is_set(this_node->priv->flags,
1647 pcmk__node_expected_up)) {
1648 pcmk__trace("Node %s controller is down: "
1649 "member@%lld online@%lld join=%s expected=%s",
1650 pcmk__node_name(this_node), when_member, when_online,
1651 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1652
1653 } else {
1654 /* mark it unclean */
1655 pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1656 pcmk__info("Node %s member@%lld online@%lld join=%s expected=%s",
1657 pcmk__node_name(this_node), when_member, when_online,
1658 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1659 }
1660 return online;
1661 }
1662
1663 /*!
1664 * \internal
1665 * \brief Check whether a node has taken too long to join controller group
1666 *
1667 * \param[in,out] scheduler Scheduler data
1668 * \param[in] node Node to check
1669 * \param[in] when_member Epoch time when node became a cluster member
1670 * \param[in] when_online Epoch time when node joined controller group
1671 *
1672 * \return true if node has been pending (on the way up) longer than
1673 * \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false
1674 * \note This will also update the cluster's recheck time if appropriate.
1675 */
1676 static inline bool
1677 pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1678 long long when_member, long long when_online)
1679 {
1680 if ((scheduler->priv->node_pending_ms > 0U)
1681 && (when_member > 0) && (when_online <= 0)) {
1682 // There is a timeout on pending nodes, and node is pending
1683
1684 time_t timeout = when_member
1685 + pcmk__timeout_ms2s(scheduler->priv->node_pending_ms);
1686
1687 if (pcmk__scheduler_epoch_time(node->priv->scheduler) >= timeout) {
1688 return true; // Node has timed out
1689 }
1690
1691 // Node is pending, but still has time
1692 pcmk__update_recheck_time(timeout, scheduler, "pending node timeout");
1693 }
1694 return false;
1695 }
1696
1697 static bool
1698 determine_online_status_fencing(pcmk_scheduler_t *scheduler,
1699 const xmlNode *node_state,
1700 pcmk_node_t *this_node)
1701 {
1702 bool termination_requested = unpack_node_terminate(this_node, node_state);
1703 const char *join = pcmk__xe_get(node_state, PCMK__XA_JOIN);
1704 const char *exp_state = pcmk__xe_get(node_state, PCMK_XA_EXPECTED);
1705 long long when_member = unpack_node_member(node_state, scheduler);
1706 long long when_online = unpack_node_online(node_state);
1707
1708 /*
1709 - PCMK__XA_JOIN ::= member|down|pending|banned
1710 - PCMK_XA_EXPECTED ::= member|down
1711
1712 @COMPAT with entries recorded for DCs < 2.1.7
1713 - PCMK__XA_IN_CCM ::= true|false
1714 - PCMK_XA_CRMD ::= online|offline
1715
1716 Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1717 - PCMK__XA_IN_CCM ::= <timestamp>|0
1718 Since when node has been a cluster member. A value 0 of means the node is not
1719 a cluster member.
1720
1721 - PCMK_XA_CRMD ::= <timestamp>|0
1722 Since when peer has been online in CPG. A value 0 means the peer is offline
1723 in CPG.
1724 */
1725
1726 pcmk__trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1727 pcmk__node_name(this_node), when_member, when_online,
1728 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1729 (termination_requested? " (termination requested)" : ""));
1730
1731 if (this_node->details->shutdown) {
1732 pcmk__debug("%s is shutting down", pcmk__node_name(this_node));
1733
1734 /* Slightly different criteria since we can't shut down a dead peer */
1735 return (when_online > 0);
1736 }
1737
1738 if (when_member < 0) {
1739 pe_fence_node(scheduler, this_node,
1740 "peer has not been seen by the cluster", FALSE);
1741 return false;
1742 }
1743
1744 if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1745 pe_fence_node(scheduler, this_node,
1746 "peer failed Pacemaker membership criteria", FALSE);
1747
1748 } else if (termination_requested) {
1749 if ((when_member <= 0) && (when_online <= 0)
1750 && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1751 pcmk__info("%s was fenced as requested",
1752 pcmk__node_name(this_node));
1753 return false;
1754 }
1755 pe_fence_node(scheduler, this_node, "fencing was requested", false);
1756
1757 } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1758 pcmk__str_null_matches)) {
1759
1760 if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1761 pe_fence_node(scheduler, this_node,
1762 "peer pending timed out on joining the process group",
1763 FALSE);
1764
1765 } else if ((when_member > 0) || (when_online > 0)) {
1766 pcmk__info("- %s is not ready to run resources",
1767 pcmk__node_name(this_node));
1768 pcmk__set_node_flags(this_node, pcmk__node_standby);
1769 this_node->details->pending = TRUE;
1770
1771 } else {
1772 pcmk__trace("%s is down or still coming up",
1773 pcmk__node_name(this_node));
1774 }
1775
1776 } else if (when_member <= 0) {
1777 // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1778 pe_fence_node(scheduler, this_node,
1779 "peer is no longer part of the cluster", TRUE);
1780
1781 } else if (when_online <= 0) {
1782 pe_fence_node(scheduler, this_node,
1783 "peer process is no longer available", FALSE);
1784
1785 /* Everything is running at this point, now check join state */
1786
1787 } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1788 pcmk__info("%s is active", pcmk__node_name(this_node));
1789
1790 } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1791 CRMD_JOINSTATE_DOWN, NULL)) {
1792 pcmk__info("%s is not ready to run resources",
1793 pcmk__node_name(this_node));
1794 pcmk__set_node_flags(this_node, pcmk__node_standby);
1795 this_node->details->pending = TRUE;
1796
1797 } else {
1798 pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1799 FALSE);
1800 }
1801
1802 return (when_member > 0);
1803 }
1804
1805 static void
1806 determine_remote_online_status(pcmk_scheduler_t *scheduler,
1807 pcmk_node_t *this_node)
1808 {
1809 pcmk_resource_t *rsc = this_node->priv->remote;
1810 pcmk_resource_t *launcher = NULL;
1811 pcmk_node_t *host = NULL;
1812 const char *node_type = "Remote";
1813
1814 if (rsc == NULL) {
1815 /* This is a leftover node state entry for a former Pacemaker Remote
1816 * node whose connection resource was removed. Consider it offline.
1817 */
1818 pcmk__trace("Pacemaker Remote node %s is considered OFFLINE because "
1819 "its connection resource has been removed from the CIB",
1820 this_node->priv->id);
1821 this_node->details->online = FALSE;
1822 return;
1823 }
1824
1825 launcher = rsc->priv->launcher;
1826 if (launcher != NULL) {
1827 node_type = "Guest";
1828 if (pcmk__list_of_1(rsc->priv->active_nodes)) {
1829 host = rsc->priv->active_nodes->data;
1830 }
1831 }
1832
1833 /* If the resource is currently started, mark it online. */
1834 if (rsc->priv->orig_role == pcmk_role_started) {
1835 this_node->details->online = TRUE;
1836 }
1837
1838 /* consider this node shutting down if transitioning start->stop */
1839 if ((rsc->priv->orig_role == pcmk_role_started)
1840 && (rsc->priv->next_role == pcmk_role_stopped)) {
1841
1842 pcmk__trace("%s node %s shutting down because connection resource is "
1843 "stopping",
1844 node_type, this_node->priv->id);
1845 this_node->details->shutdown = TRUE;
1846 }
1847
1848 /* Now check all the failure conditions. */
1849 if ((launcher != NULL) && pcmk__is_set(launcher->flags, pcmk__rsc_failed)) {
1850 pcmk__trace("Guest node %s UNCLEAN because guest resource failed",
1851 this_node->priv->id);
1852 this_node->details->online = FALSE;
1853 pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
1854
1855 } else if (pcmk__is_set(rsc->flags, pcmk__rsc_failed)) {
1856 pcmk__trace("%s node %s OFFLINE because connection resource failed",
1857 node_type, this_node->priv->id);
1858 this_node->details->online = FALSE;
1859
1860 } else if ((rsc->priv->orig_role == pcmk_role_stopped)
1861 || ((launcher != NULL)
1862 && (launcher->priv->orig_role == pcmk_role_stopped))) {
1863
1864 pcmk__trace("%s node %s OFFLINE because its resource is stopped",
1865 node_type, this_node->priv->id);
1866 this_node->details->online = FALSE;
1867 pcmk__clear_node_flags(this_node, pcmk__node_remote_reset);
1868
1869 } else if (host && (host->details->online == FALSE)
1870 && host->details->unclean) {
1871 pcmk__trace("Guest node %s UNCLEAN because host is unclean",
1872 this_node->priv->id);
1873 this_node->details->online = FALSE;
1874 pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
1875
1876 } else {
1877 pcmk__trace("%s node %s is %s",
1878 node_type, this_node->priv->id,
1879 (this_node->details->online? "ONLINE" : "OFFLINE"));
1880 }
1881 }
1882
1883 static void
1884 determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
1885 pcmk_scheduler_t *scheduler)
1886 {
1887 gboolean online = FALSE;
1888 const char *exp_state = pcmk__xe_get(node_state, PCMK_XA_EXPECTED);
1889
1890 CRM_CHECK(this_node != NULL, return);
1891
1892 this_node->details->shutdown = FALSE;
1893
1894 if (pe__shutdown_requested(this_node)) {
1895 this_node->details->shutdown = TRUE;
1896
1897 } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1898 pcmk__set_node_flags(this_node, pcmk__node_expected_up);
1899 }
1900
1901 if (!pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
1902 online = determine_online_status_no_fencing(scheduler, node_state,
1903 this_node);
1904
1905 } else {
1906 online = determine_online_status_fencing(scheduler, node_state,
1907 this_node);
1908 }
1909
1910 if (online) {
1911 this_node->details->online = TRUE;
1912
1913 } else {
1914 /* remove node from contention */
1915 this_node->assign->score = -PCMK_SCORE_INFINITY;
1916 }
1917
1918 if (online && this_node->details->shutdown) {
1919 /* don't run resources here */
1920 this_node->assign->score = -PCMK_SCORE_INFINITY;
1921 }
1922
1923 if (this_node->details->unclean) {
1924 pcmk__sched_warn(scheduler, "%s is unclean",
1925 pcmk__node_name(this_node));
1926
1927 } else if (!this_node->details->online) {
1928 pcmk__trace("%s is offline", pcmk__node_name(this_node));
1929
1930 } else if (this_node->details->shutdown) {
1931 pcmk__info("%s is shutting down", pcmk__node_name(this_node));
1932
1933 } else if (this_node->details->pending) {
1934 pcmk__info("%s is pending", pcmk__node_name(this_node));
1935
1936 } else if (pcmk__is_set(this_node->priv->flags, pcmk__node_standby)) {
1937 pcmk__info("%s is in standby", pcmk__node_name(this_node));
1938
1939 } else if (this_node->details->maintenance) {
1940 pcmk__info("%s is in maintenance", pcmk__node_name(this_node));
1941
1942 } else {
1943 pcmk__info("%s is online", pcmk__node_name(this_node));
1944 }
1945 }
1946
1947 /*!
1948 * \internal
1949 * \brief Find the end of a resource's name, excluding any clone suffix
1950 *
1951 * \param[in] id Resource ID to check
1952 *
1953 * \return Pointer to last character of resource's base name
1954 */
1955 const char *
1956 pe_base_name_end(const char *id)
1957 {
1958 if (!pcmk__str_empty(id)) {
1959 const char *end = id + strlen(id) - 1;
1960
1961 for (const char *s = end; s > id; --s) {
1962 switch (*s) {
1963 case '0':
1964 case '1':
1965 case '2':
1966 case '3':
1967 case '4':
1968 case '5':
1969 case '6':
1970 case '7':
1971 case '8':
1972 case '9':
1973 break;
1974 case ':':
1975 return (s == end)? s : (s - 1);
1976 default:
1977 return end;
1978 }
1979 }
1980 return end;
1981 }
1982 return NULL;
1983 }
1984
1985 /*!
1986 * \internal
1987 * \brief Get a resource name excluding any clone suffix
1988 *
1989 * \param[in] last_rsc_id Resource ID to check
1990 *
1991 * \return Pointer to newly allocated string with resource's base name
1992 * \note It is the caller's responsibility to free() the result.
1993 * This asserts on error, so callers can assume result is not NULL.
1994 */
1995 char *
1996 clone_strip(const char *last_rsc_id)
1997 {
1998 const char *end = pe_base_name_end(last_rsc_id);
1999 char *basename = NULL;
2000
2001 pcmk__assert(end != NULL);
2002 basename = strndup(last_rsc_id, end - last_rsc_id + 1);
2003 pcmk__assert(basename != NULL);
2004 return basename;
2005 }
2006
2007 /*!
2008 * \internal
2009 * \brief Get the name of the first instance of a cloned resource
2010 *
2011 * \param[in] last_rsc_id Resource ID to check
2012 *
2013 * \return Pointer to newly allocated string with resource's base name plus :0
2014 * \note It is the caller's responsibility to free() the result.
2015 * This asserts on error, so callers can assume result is not NULL.
2016 */
2017 char *
2018 clone_zero(const char *last_rsc_id)
2019 {
2020 const char *end = pe_base_name_end(last_rsc_id);
2021 size_t base_name_len = end - last_rsc_id + 1;
2022 char *zero = NULL;
2023
2024 pcmk__assert(end != NULL);
2025 zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
2026 memcpy(zero, last_rsc_id, base_name_len);
2027 zero[base_name_len] = ':';
2028 zero[base_name_len + 1] = '0';
2029 return zero;
2030 }
2031
2032 static pcmk_resource_t *
2033 create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
2034 pcmk_scheduler_t *scheduler)
2035 {
2036 pcmk_resource_t *rsc = NULL;
2037 xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
2038
2039 pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
2040 pcmk__xe_set(xml_rsc, PCMK_XA_ID, rsc_id);
2041 pcmk__log_xml_debug(xml_rsc, "Removed resource");
2042
2043 if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
2044 return NULL;
2045 }
2046
2047 if (xml_contains_remote_node(xml_rsc)) {
2048 pcmk_node_t *node;
2049
2050 pcmk__debug("Detected removed remote node %s", rsc_id);
2051 node = pcmk_find_node(scheduler, rsc_id);
2052 if (node == NULL) {
2053 node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, 0,
2054 scheduler);
2055 }
2056 link_rsc2remotenode(scheduler, rsc);
2057
2058 if (node) {
2059 pcmk__trace("Setting node %s as shutting down due to removed "
2060 "connection resource", rsc_id);
2061 node->details->shutdown = TRUE;
2062 }
2063 }
2064
2065 if (pcmk__xe_get(rsc_entry, PCMK__META_CONTAINER)) {
2066 // This removed resource needs to be mapped to a launcher
2067 pcmk__trace("Launched resource %s was removed from the configuration",
2068 rsc_id);
2069 pcmk__set_rsc_flags(rsc, pcmk__rsc_removed_launched);
2070 }
2071 pcmk__set_rsc_flags(rsc, pcmk__rsc_removed);
2072 scheduler->priv->resources = g_list_append(scheduler->priv->resources, rsc);
2073 return rsc;
2074 }
2075
2076 /*!
2077 * \internal
2078 * \brief Create "removed" instance for anonymous clone resource history
2079 *
2080 * \param[in,out] parent Clone resource that instance will be added to
2081 * \param[in] rsc_id Instance's resource ID
2082 * \param[in] node Where instance is active (for logging only)
2083 * \param[in,out] scheduler Scheduler data
2084 *
2085 * \return Newly created "removed" instance of \p parent
2086 */
2087 static pcmk_resource_t *
2088 create_anonymous_removed_instance(pcmk_resource_t *parent, const char *rsc_id,
2089 const pcmk_node_t *node,
2090 pcmk_scheduler_t *scheduler)
2091 {
2092 pcmk_resource_t *top = pe__create_clone_child(parent, scheduler);
2093 pcmk_resource_t *instance = NULL;
2094
2095 // find_rsc() because we might be a cloned group
2096 instance = top->priv->fns->find_rsc(top, rsc_id, NULL,
2097 pcmk_rsc_match_clone_only);
2098
2099 pcmk__rsc_debug(parent, "Created \"removed\" instance %s for %s: %s on %s",
2100 top->id, parent->id, rsc_id, pcmk__node_name(node));
2101 return instance;
2102 }
2103
2104 /*!
2105 * \internal
2106 * \brief Check a node for an instance of an anonymous clone
2107 *
2108 * Return a child instance of the specified anonymous clone, in order of
2109 * preference: (1) the instance running on the specified node, if any;
2110 * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX
2111 * instances); (3) a newly created "removed" instance (that is,
2112 * \c PCMK_META_CLONE_MAX instances are already active).
2113 *
2114 * \param[in,out] scheduler Scheduler data
2115 * \param[in] node Node on which to check for instance
2116 * \param[in,out] parent Clone to check
2117 * \param[in] rsc_id Name of cloned resource in history (no instance)
2118 */
2119 static pcmk_resource_t *
2120 find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2121 pcmk_resource_t *parent, const char *rsc_id)
2122 {
2123 GList *rIter = NULL;
2124 pcmk_resource_t *rsc = NULL;
2125 pcmk_resource_t *inactive_instance = NULL;
2126 gboolean skip_inactive = FALSE;
2127
2128 pcmk__assert(pcmk__is_anonymous_clone(parent));
2129
2130 // Check for active (or partially active, for cloned groups) instance
2131 pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2132 rsc_id, pcmk__node_name(node), parent->id);
2133
2134 for (rIter = parent->priv->children;
2135 (rIter != NULL) && (rsc == NULL); rIter = rIter->next) {
2136
2137 GList *locations = NULL;
2138 pcmk_resource_t *child = rIter->data;
2139
2140 /* Check whether this instance is already known to be active or pending
2141 * anywhere, at this stage of unpacking. Because this function is called
2142 * for a resource before the resource's individual operation history
2143 * entries are unpacked, locations will generally not contain the
2144 * desired node.
2145 *
2146 * However, there are three exceptions:
2147 * (1) when child is a cloned group and we have already unpacked the
2148 * history of another member of the group on the same node;
2149 * (2) when we've already unpacked the history of another numbered
2150 * instance on the same node (which can happen if
2151 * PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2152 * (3) when we re-run calculations on the same scheduler data as part of
2153 * a simulation.
2154 */
2155 child->priv->fns->location(child, &locations, pcmk__rsc_node_current
2156 |pcmk__rsc_node_pending);
2157 if (locations) {
2158 /* We should never associate the same numbered anonymous clone
2159 * instance with multiple nodes, and clone instances can't migrate,
2160 * so there must be only one location, regardless of history.
2161 */
2162 CRM_LOG_ASSERT(locations->next == NULL);
2163
2164 if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2165 /* This child instance is active on the requested node, so check
2166 * for a corresponding configured resource. We use find_rsc()
2167 * instead of child because child may be a cloned group, and we
2168 * need the particular member corresponding to rsc_id.
2169 *
2170 * If the history entry represents a removed instance, rsc will
2171 * be NULL.
2172 */
2173 rsc = parent->priv->fns->find_rsc(child, rsc_id, NULL,
2174 pcmk_rsc_match_clone_only);
2175 if (rsc) {
2176 /* If there are multiple instance history entries for an
2177 * anonymous clone in a single node's history (which can
2178 * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2179 * to false), we want to consider the instances beyond the
2180 * first as removed, even if there are inactive instance
2181 * numbers available.
2182 */
2183 if (rsc->priv->active_nodes != NULL) {
2184 pcmk__notice("Active (now-)anonymous clone %s has "
2185 "multiple \"removed\" instance histories "
2186 "on %s",
2187 parent->id, pcmk__node_name(node));
2188 skip_inactive = TRUE;
2189 rsc = NULL;
2190 } else {
2191 pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2192 }
2193 }
2194 }
2195 g_list_free(locations);
2196
2197 } else {
2198 pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2199 if (!skip_inactive && !inactive_instance
2200 && !pcmk__is_set(child->flags, pcmk__rsc_blocked)) {
2201 // Remember one inactive instance in case we don't find active
2202 inactive_instance =
2203 parent->priv->fns->find_rsc(child, rsc_id, NULL,
2204 pcmk_rsc_match_clone_only);
2205
2206 /* ... but don't use it if it was already associated with a
2207 * pending action on another node
2208 */
2209 if (inactive_instance != NULL) {
2210 const pcmk_node_t *pending_node = NULL;
2211
2212 pending_node = inactive_instance->priv->pending_node;
2213 if ((pending_node != NULL)
2214 && !pcmk__same_node(pending_node, node)) {
2215 inactive_instance = NULL;
2216 }
2217 }
2218 }
2219 }
2220 }
2221
2222 if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2223 pcmk__rsc_trace(parent, "Resource %s, empty slot",
2224 inactive_instance->id);
2225 rsc = inactive_instance;
2226 }
2227
2228 /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2229 * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2230 * don't want to consume a valid instance number for unclean nodes. Such
2231 * instances may appear to be active according to the history, but should be
2232 * considered inactive, so we can start an instance elsewhere. Treat such
2233 * instances as removed.
2234 *
2235 * An exception is instances running on guest nodes -- since guest node
2236 * "fencing" is actually just a resource stop, requires shouldn't apply.
2237 *
2238 * @TODO Ideally, we'd use an inactive instance number if it is not needed
2239 * for any clean instances. However, we don't know that at this point.
2240 */
2241 if ((rsc != NULL) && !pcmk__is_set(rsc->flags, pcmk__rsc_needs_fencing)
2242 && (!node->details->online || node->details->unclean)
2243 && !pcmk__is_guest_or_bundle_node(node)
2244 && !pe__is_universal_clone(parent, scheduler)) {
2245
2246 rsc = NULL;
2247 }
2248
2249 if (rsc == NULL) {
2250 rsc = create_anonymous_removed_instance(parent, rsc_id, node,
2251 scheduler);
2252 pcmk__rsc_trace(parent, "Resource %s, removed", rsc->id);
2253 }
2254 return rsc;
2255 }
2256
2257 static pcmk_resource_t *
2258 unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2259 const char *rsc_id)
2260 {
2261 pcmk_resource_t *rsc = NULL;
2262 pcmk_resource_t *parent = NULL;
2263
2264 pcmk__trace("looking for %s", rsc_id);
2265 rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2266
2267 if (rsc == NULL) {
2268 /* If we didn't find the resource by its name in the operation history,
2269 * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2270 * we create a single :0 "removed" instance to match against here.
2271 */
2272 char *clone0_id = clone_zero(rsc_id);
2273 pcmk_resource_t *clone0 = pe_find_resource(scheduler->priv->resources,
2274 clone0_id);
2275
2276 if ((clone0 != NULL)
2277 && !pcmk__is_set(clone0->flags, pcmk__rsc_unique)) {
2278
2279 rsc = clone0;
2280 parent = uber_parent(clone0);
2281 pcmk__trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2282 } else {
2283 pcmk__trace("%s is not known as %s either (removed)", rsc_id,
2284 clone0_id);
2285 }
2286 free(clone0_id);
2287
2288 } else if (rsc->priv->variant > pcmk__rsc_variant_primitive) {
2289 pcmk__trace("Resource history for %s is considered removed "
2290 "because it is no longer primitive", rsc_id);
2291 return NULL;
2292
2293 } else {
2294 parent = uber_parent(rsc);
2295 }
2296
2297 if (pcmk__is_anonymous_clone(parent)) {
2298
2299 if (pcmk__is_bundled(parent)) {
2300 rsc = pe__find_bundle_replica(parent->priv->parent, node);
2301 } else {
2302 char *base = clone_strip(rsc_id);
2303
2304 rsc = find_anonymous_clone(scheduler, node, parent, base);
2305 free(base);
2306 pcmk__assert(rsc != NULL);
2307 }
2308 }
2309
2310 if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2311 && !pcmk__str_eq(rsc_id, rsc->priv->history_id, pcmk__str_none)) {
2312
2313 const bool removed = pcmk__is_set(rsc->flags, pcmk__rsc_removed);
2314
2315 pcmk__str_update(&(rsc->priv->history_id), rsc_id);
2316 pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2317 rsc_id, pcmk__node_name(node), rsc->id,
2318 (removed? " (removed)" : ""));
2319 }
2320 return rsc;
2321 }
2322
2323 static pcmk_resource_t *
2324 process_removed_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
2325 pcmk_scheduler_t *scheduler)
2326 {
2327 pcmk_resource_t *rsc = NULL;
2328 const char *rsc_id = pcmk__xe_get(rsc_entry, PCMK_XA_ID);
2329
2330 pcmk__debug("Detected removed resource %s on %s", rsc_id,
2331 pcmk__node_name(node));
2332 rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2333 if (rsc == NULL) {
2334 return NULL;
2335 }
2336
2337 if (!pcmk__is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
2338 pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
2339
2340 } else {
2341 CRM_CHECK(rsc != NULL, return NULL);
2342 pcmk__rsc_trace(rsc, "Added \"removed\" resource %s", rsc->id);
2343 resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
2344 "__removed_do_not_run__", scheduler);
2345 }
2346 return rsc;
2347 }
2348
2349 static void
2350 process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
2351 enum pcmk__on_fail on_fail)
2352 {
2353 pcmk_node_t *tmpnode = NULL;
2354 char *reason = NULL;
2355 enum pcmk__on_fail save_on_fail = pcmk__on_fail_ignore;
2356 pcmk_scheduler_t *scheduler = NULL;
2357 bool known_active = false;
2358
2359 pcmk__assert(rsc != NULL);
2360 scheduler = rsc->priv->scheduler;
2361 known_active = (rsc->priv->orig_role > pcmk_role_stopped);
2362 pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2363 rsc->id, pcmk_role_text(rsc->priv->orig_role),
2364 pcmk__node_name(node), pcmk__on_fail_text(on_fail));
2365
2366 /* process current state */
2367 if (rsc->priv->orig_role != pcmk_role_unknown) {
2368 pcmk_resource_t *iter = rsc;
2369
2370 while (iter) {
2371 if (g_hash_table_lookup(iter->priv->probed_nodes,
2372 node->priv->id) == NULL) {
2373 pcmk_node_t *n = pe__copy_node(node);
2374
2375 pcmk__rsc_trace(rsc, "%s (%s in history) known on %s",
2376 rsc->id,
2377 pcmk__s(rsc->priv->history_id, "the same"),
2378 pcmk__node_name(n));
2379 g_hash_table_insert(iter->priv->probed_nodes,
2380 (gpointer) n->priv->id, n);
2381 }
2382 if (pcmk__is_set(iter->flags, pcmk__rsc_unique)) {
2383 break;
2384 }
2385 iter = iter->priv->parent;
2386 }
2387 }
2388
2389 /* If a managed resource is believed to be running, but node is down ... */
2390 if (known_active && !node->details->online && !node->details->maintenance
2391 && pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
2392
2393 gboolean should_fence = FALSE;
2394
2395 /* If this is a guest node, fence it (regardless of whether fencing is
2396 * enabled, because guest node fencing is done by recovery of the
2397 * container resource rather than by the fencer). Mark the resource
2398 * we're processing as failed. When the guest comes back up, its
2399 * operation history in the CIB will be cleared, freeing the affected
2400 * resource to run again once we are sure we know its state.
2401 */
2402 if (pcmk__is_guest_or_bundle_node(node)) {
2403 pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2404 should_fence = TRUE;
2405
2406 } else if (pcmk__is_set(scheduler->flags,
2407 pcmk__sched_fencing_enabled)) {
2408 if (pcmk__is_remote_node(node)
2409 && (node->priv->remote != NULL)
2410 && !pcmk__is_set(node->priv->remote->flags,
2411 pcmk__rsc_failed)) {
2412
2413 /* Setting unseen means that fencing of the remote node will
2414 * occur only if the connection resource is not going to start
2415 * somewhere. This allows connection resources on a failed
2416 * cluster node to move to another node without requiring the
2417 * remote nodes to be fenced as well.
2418 */
2419 pcmk__clear_node_flags(node, pcmk__node_seen);
2420 reason = pcmk__assert_asprintf("%s is active there (fencing "
2421 "will be revoked if remote "
2422 "connection can be "
2423 "re-established elsewhere)",
2424 rsc->id);
2425 }
2426 should_fence = TRUE;
2427 }
2428
2429 if (should_fence) {
2430 if (reason == NULL) {
2431 reason = pcmk__assert_asprintf("%s is thought to be active "
2432 "there",
2433 rsc->id);
2434 }
2435 pe_fence_node(scheduler, node, reason, FALSE);
2436 }
2437 free(reason);
2438 }
2439
2440 /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2441 save_on_fail = on_fail;
2442
2443 if (node->details->unclean) {
2444 /* No extra processing needed
2445 * Also allows resources to be started again after a node is shot
2446 */
2447 on_fail = pcmk__on_fail_ignore;
2448 }
2449
2450 switch (on_fail) {
2451 case pcmk__on_fail_ignore:
2452 /* nothing to do */
2453 break;
2454
2455 case pcmk__on_fail_demote:
2456 pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
2457 demote_action(rsc, node, FALSE);
2458 break;
2459
2460 case pcmk__on_fail_fence_node:
2461 /* treat it as if it is still running
2462 * but also mark the node as unclean
2463 */
2464 reason = pcmk__assert_asprintf("%s failed there", rsc->id);
2465 pe_fence_node(scheduler, node, reason, FALSE);
2466 free(reason);
2467 break;
2468
2469 case pcmk__on_fail_standby_node:
2470 pcmk__set_node_flags(node,
2471 pcmk__node_standby|pcmk__node_fail_standby);
2472 break;
2473
2474 case pcmk__on_fail_block:
2475 /* is_managed == FALSE will prevent any
2476 * actions being sent for the resource
2477 */
2478 pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
2479 pcmk__set_rsc_flags(rsc, pcmk__rsc_blocked);
2480 break;
2481
2482 case pcmk__on_fail_ban:
2483 /* make sure it comes up somewhere else
2484 * or not at all
2485 */
2486 resource_location(rsc, node, -PCMK_SCORE_INFINITY,
2487 "__action_migration_auto__", scheduler);
2488 break;
2489
2490 case pcmk__on_fail_stop:
2491 pe__set_next_role(rsc, pcmk_role_stopped,
2492 PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP);
2493 break;
2494
2495 case pcmk__on_fail_restart:
2496 if (known_active) {
2497 pcmk__set_rsc_flags(rsc,
2498 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2499 stop_action(rsc, node, FALSE);
2500 }
2501 break;
2502
2503 case pcmk__on_fail_restart_container:
2504 pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2505 if ((rsc->priv->launcher != NULL) && pcmk__is_bundled(rsc)) {
2506 /* A bundle's remote connection can run on a different node than
2507 * the bundle's container. We don't necessarily know where the
2508 * container is running yet, so remember it and add a stop
2509 * action for it later.
2510 */
2511 scheduler->priv->stop_needed =
2512 g_list_prepend(scheduler->priv->stop_needed,
2513 rsc->priv->launcher);
2514 } else if (rsc->priv->launcher != NULL) {
2515 stop_action(rsc->priv->launcher, node, FALSE);
2516 } else if (known_active) {
2517 stop_action(rsc, node, FALSE);
2518 }
2519 break;
2520
2521 case pcmk__on_fail_reset_remote:
2522 pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2523 if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
2524 tmpnode = NULL;
2525 if (pcmk__is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
2526 tmpnode = pcmk_find_node(scheduler, rsc->id);
2527 }
2528 if (pcmk__is_remote_node(tmpnode)
2529 && !pcmk__is_set(tmpnode->priv->flags,
2530 pcmk__node_remote_fenced)) {
2531 /* The remote connection resource failed in a way that
2532 * should result in fencing the remote node.
2533 */
2534 pe_fence_node(scheduler, tmpnode,
2535 "remote connection is unrecoverable", FALSE);
2536 }
2537 }
2538
2539 /* require the stop action regardless if fencing is occurring or not. */
2540 if (known_active) {
2541 stop_action(rsc, node, FALSE);
2542 }
2543
2544 /* if reconnect delay is in use, prevent the connection from exiting the
2545 * "STOPPED" role until the failure is cleared by the delay timeout. */
2546 if (rsc->priv->remote_reconnect_ms > 0U) {
2547 pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2548 }
2549 break;
2550 }
2551
2552 /* Ensure a remote connection failure forces an unclean Pacemaker Remote
2553 * node to be fenced. By marking the node as seen, the failure will result
2554 * in a fencing operation regardless if we're going to attempt to reconnect
2555 * in this transition.
2556 */
2557 if (pcmk__all_flags_set(rsc->flags,
2558 pcmk__rsc_failed|pcmk__rsc_is_remote_connection)) {
2559 tmpnode = pcmk_find_node(scheduler, rsc->id);
2560 if (tmpnode && tmpnode->details->unclean) {
2561 pcmk__set_node_flags(tmpnode, pcmk__node_seen);
2562 }
2563 }
2564
2565 if (known_active) {
2566 if (pcmk__is_set(rsc->flags, pcmk__rsc_removed)) {
2567 if (pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
2568 pcmk__notice("Removed resource %s is active on %s and will be "
2569 "stopped when possible",
2570 rsc->id, pcmk__node_name(node));
2571
2572 } else {
2573 pcmk__notice("Removed resource %s must be stopped manually on "
2574 "%s because " PCMK__OPT_STOP_REMOVED_RESOURCES
2575 " is set to false",
2576 rsc->id, pcmk__node_name(node));
2577 }
2578 }
2579
2580 native_add_running(rsc, node, scheduler,
2581 (save_on_fail != pcmk__on_fail_ignore));
2582 switch (on_fail) {
2583 case pcmk__on_fail_ignore:
2584 break;
2585 case pcmk__on_fail_demote:
2586 case pcmk__on_fail_block:
2587 pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
2588 break;
2589 default:
2590 pcmk__set_rsc_flags(rsc,
2591 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2592 break;
2593 }
2594
2595 } else if ((rsc->priv->history_id != NULL)
2596 && (strchr(rsc->priv->history_id, ':') != NULL)) {
2597 /* @COMPAT This is for older (<1.1.8) status sections that included
2598 * instance numbers, otherwise stopped instances are considered removed.
2599 *
2600 * @TODO We should be able to drop this, but some old regression tests
2601 * will need to be updated. Double-check that this is not still needed
2602 * for unique clones (which may have been later converted to anonymous).
2603 */
2604 pcmk__rsc_trace(rsc, "Clearing history ID %s for %s (stopped)",
2605 rsc->priv->history_id, rsc->id);
2606 g_clear_pointer(&rsc->priv->history_id, free);
2607
2608 } else {
2609 GList *possible_matches = pe__resource_actions(rsc, node,
2610 PCMK_ACTION_STOP, FALSE);
2611 GList *gIter = possible_matches;
2612
2613 for (; gIter != NULL; gIter = gIter->next) {
2614 pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2615
2616 pcmk__set_action_flags(stop, pcmk__action_optional);
2617 }
2618
2619 g_list_free(possible_matches);
2620 }
2621
2622 /* A successful stop after migrate_to on the migration source doesn't make
2623 * the partially migrated resource stopped on the migration target.
2624 */
2625 if ((rsc->priv->orig_role == pcmk_role_stopped)
2626 && (rsc->priv->active_nodes != NULL)
2627 && (rsc->priv->partial_migration_target != NULL)
2628 && pcmk__same_node(rsc->priv->partial_migration_source, node)) {
2629
2630 rsc->priv->orig_role = pcmk_role_started;
2631 }
2632 }
2633
2634 /* create active recurring operations as optional */
2635 static void
2636 process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
2637 int start_index, int stop_index,
2638 GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2639 {
2640 int counter = -1;
2641 const char *task = NULL;
2642 const char *status = NULL;
2643 GList *gIter = sorted_op_list;
2644
2645 pcmk__assert(rsc != NULL);
2646 pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2647 rsc->id, start_index, stop_index);
2648
2649 for (; gIter != NULL; gIter = gIter->next) {
2650 xmlNode *rsc_op = (xmlNode *) gIter->data;
2651
2652 guint interval_ms = 0;
2653 char *key = NULL;
2654 const char *id = pcmk__xe_id(rsc_op);
2655
2656 counter++;
2657
2658 if (node->details->online == FALSE) {
2659 pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2660 rsc->id, pcmk__node_name(node));
2661 break;
2662
2663 /* Need to check if there's a monitor for role="Stopped" */
2664 } else if (start_index < stop_index && counter <= stop_index) {
2665 pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2666 id, pcmk__node_name(node));
2667 continue;
2668
2669 } else if (counter < start_index) {
2670 pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2671 id, pcmk__node_name(node), counter);
2672 continue;
2673 }
2674
2675 pcmk__xe_get_guint(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2676 if (interval_ms == 0) {
2677 pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2678 id, pcmk__node_name(node));
2679 continue;
2680 }
2681
2682 status = pcmk__xe_get(rsc_op, PCMK__XA_OP_STATUS);
2683 if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2684 pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2685 id, pcmk__node_name(node));
2686 continue;
2687 }
2688 task = pcmk__xe_get(rsc_op, PCMK_XA_OPERATION);
2689 /* create the action */
2690 key = pcmk__op_key(rsc->id, task, interval_ms);
2691 pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2692 custom_action(rsc, key, task, node, TRUE, scheduler);
2693 }
2694 }
2695
2696 void
2697 calculate_active_ops(const GList *sorted_op_list, int *start_index,
2698 int *stop_index)
2699 {
2700 int counter = -1;
2701 int implied_monitor_start = -1;
2702 int implied_clone_start = -1;
2703 const char *task = NULL;
2704 const char *status = NULL;
2705
2706 *stop_index = -1;
2707 *start_index = -1;
2708
2709 for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2710 const xmlNode *rsc_op = (const xmlNode *) iter->data;
2711
2712 counter++;
2713
2714 task = pcmk__xe_get(rsc_op, PCMK_XA_OPERATION);
2715 status = pcmk__xe_get(rsc_op, PCMK__XA_OP_STATUS);
2716
2717 if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2718 && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2719 *stop_index = counter;
2720
2721 } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2722 PCMK_ACTION_MIGRATE_FROM, NULL)) {
2723 *start_index = counter;
2724
2725 } else if ((implied_monitor_start <= *stop_index)
2726 && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2727 pcmk__str_casei)) {
2728 const char *rc = pcmk__xe_get(rsc_op, PCMK__XA_RC_CODE);
2729
2730 if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2731 implied_monitor_start = counter;
2732 }
2733 } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2734 PCMK_ACTION_DEMOTE, NULL)) {
2735 implied_clone_start = counter;
2736 }
2737 }
2738
2739 if (*start_index == -1) {
2740 if (implied_clone_start != -1) {
2741 *start_index = implied_clone_start;
2742 } else if (implied_monitor_start != -1) {
2743 *start_index = implied_monitor_start;
2744 }
2745 }
2746 }
2747
2748 // If resource history entry has shutdown lock, remember lock node and time
2749 static void
2750 unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
2751 const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2752 {
2753 time_t lock_time = 0; // When lock started (i.e. node shutdown time)
2754 time_t sched_time = 0;
2755 guint shutdown_lock_ms = scheduler->priv->shutdown_lock_ms;
2756
2757 pcmk__xe_get_time(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK, &lock_time);
2758 if (lock_time == 0) {
2759 return;
2760 }
2761
2762 sched_time = pcmk__scheduler_epoch_time(scheduler);
2763 if ((shutdown_lock_ms > 0U)
2764 && (sched_time > (lock_time + pcmk__timeout_ms2s(shutdown_lock_ms)))) {
2765
2766 pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2767 rsc->id, pcmk__node_name(node));
2768 pe__clear_resource_history(rsc, node);
2769
2770 } else {
2771 rsc->priv->lock_node = node;
2772 rsc->priv->lock_time = lock_time;
2773 }
2774 }
2775
2776 /*!
2777 * \internal
2778 * \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status
2779 *
2780 * \param[in,out] node Node whose status is being unpacked
2781 * \param[in] rsc_entry \c PCMK__XE_LRM_RESOURCE XML being unpacked
2782 * \param[in,out] scheduler Scheduler data
2783 *
2784 * \return Resource corresponding to the entry, or NULL if no operation history
2785 */
2786 static pcmk_resource_t *
2787 unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
2788 pcmk_scheduler_t *scheduler)
2789 {
2790 GList *gIter = NULL;
2791 int stop_index = -1;
2792 int start_index = -1;
2793 enum rsc_role_e req_role = pcmk_role_unknown;
2794
2795 const char *rsc_id = pcmk__xe_id(lrm_resource);
2796
2797 pcmk_resource_t *rsc = NULL;
2798 GList *op_list = NULL;
2799 GList *sorted_op_list = NULL;
2800
2801 xmlNode *rsc_op = NULL;
2802 xmlNode *last_failure = NULL;
2803
2804 enum pcmk__on_fail on_fail = pcmk__on_fail_ignore;
2805 enum rsc_role_e saved_role = pcmk_role_unknown;
2806
2807 if (rsc_id == NULL) {
2808 pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2809 " entry: No " PCMK_XA_ID);
2810 pcmk__log_xml_info(lrm_resource, "missing-id");
2811 return NULL;
2812 }
2813 pcmk__trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s", rsc_id,
2814 pcmk__node_name(node));
2815
2816 /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2817 * them
2818 */
2819 for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2820 NULL);
2821 rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
2822
2823 op_list = g_list_prepend(op_list, rsc_op);
2824 }
2825
2826 if (!pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
2827 if (op_list == NULL) {
2828 // If there are no operations, there is nothing to do
2829 return NULL;
2830 }
2831 }
2832
2833 /* find the resource */
2834 rsc = unpack_find_resource(scheduler, node, rsc_id);
2835 if (rsc == NULL) {
2836 if (op_list == NULL) {
2837 // If there are no operations, there is nothing to do
2838 return NULL;
2839 } else {
2840 rsc = process_removed_resource(lrm_resource, node, scheduler);
2841 }
2842 }
2843 pcmk__assert(rsc != NULL);
2844
2845 // Check whether the resource is "shutdown-locked" to this node
2846 if (pcmk__is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
2847 unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2848 }
2849
2850 /* process operations */
2851 saved_role = rsc->priv->orig_role;
2852 rsc->priv->orig_role = pcmk_role_unknown;
2853 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2854
2855 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2856 xmlNode *rsc_op = (xmlNode *) gIter->data;
2857
2858 unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2859 }
2860
2861 /* create active recurring operations as optional */
2862 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2863 process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2864 scheduler);
2865
2866 /* no need to free the contents */
2867 g_list_free(sorted_op_list);
2868
2869 process_rsc_state(rsc, node, on_fail);
2870
2871 if (get_target_role(rsc, &req_role)) {
2872 if ((rsc->priv->next_role == pcmk_role_unknown)
2873 || (req_role < rsc->priv->next_role)) {
2874
2875 pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
2876
2877 } else if (req_role > rsc->priv->next_role) {
2878 pcmk__rsc_info(rsc,
2879 "%s: Not overwriting calculated next role %s"
2880 " with requested next role %s",
2881 rsc->id, pcmk_role_text(rsc->priv->next_role),
2882 pcmk_role_text(req_role));
2883 }
2884 }
2885
2886 if (saved_role > rsc->priv->orig_role) {
2887 rsc->priv->orig_role = saved_role;
2888 }
2889
2890 return rsc;
2891 }
2892
2893 static void
2894 handle_removed_launched_resources(const xmlNode *lrm_rsc_list,
2895 pcmk_scheduler_t *scheduler)
2896 {
2897 for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list,
2898 PCMK__XE_LRM_RESOURCE,
2899 NULL, NULL);
2900 rsc_entry != NULL;
2901 rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2902
2903 pcmk_resource_t *rsc;
2904 pcmk_resource_t *launcher = NULL;
2905 const char *rsc_id;
2906 const char *launcher_id = NULL;
2907
2908 launcher_id = pcmk__xe_get(rsc_entry, PCMK__META_CONTAINER);
2909 rsc_id = pcmk__xe_get(rsc_entry, PCMK_XA_ID);
2910 if ((launcher_id == NULL) || (rsc_id == NULL)) {
2911 continue;
2912 }
2913
2914 launcher = pe_find_resource(scheduler->priv->resources, launcher_id);
2915 if (launcher == NULL) {
2916 continue;
2917 }
2918
2919 rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2920 if ((rsc == NULL) || (rsc->priv->launcher != NULL)
2921 || !pcmk__is_set(rsc->flags, pcmk__rsc_removed_launched)) {
2922 continue;
2923 }
2924
2925 pcmk__rsc_trace(rsc, "Mapped launcher of removed resource %s to %s",
2926 rsc->id, launcher_id);
2927 rsc->priv->launcher = launcher;
2928 launcher->priv->launched = g_list_append(launcher->priv->launched,
2929 rsc);
2930 }
2931 }
2932
2933 /*!
2934 * \internal
2935 * \brief Unpack one node's lrm status section
2936 *
2937 * \param[in,out] node Node whose status is being unpacked
2938 * \param[in] xml CIB node state XML
2939 * \param[in,out] scheduler Scheduler data
2940 */
2941 static void
2942 unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
2943 pcmk_scheduler_t *scheduler)
2944 {
2945 bool found_removed_launched_resource = false;
2946
2947 // Drill down to PCMK__XE_LRM_RESOURCES section
2948 xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2949 if (xml == NULL) {
2950 return;
2951 }
2952 xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2953 if (xml == NULL) {
2954 return;
2955 }
2956
2957 // Unpack each PCMK__XE_LRM_RESOURCE entry
2958 for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2959 PCMK__XE_LRM_RESOURCE,
2960 NULL, NULL);
2961 rsc_entry != NULL;
2962 rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2963
2964 pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2965
2966 if ((rsc != NULL)
2967 && pcmk__is_set(rsc->flags, pcmk__rsc_removed_launched)) {
2968 found_removed_launched_resource = true;
2969 }
2970 }
2971
2972 /* Now that all resource state has been unpacked for this node, map any
2973 * removed launched resources to their launchers.
2974 */
2975 if (found_removed_launched_resource) {
2976 handle_removed_launched_resources(xml, scheduler);
2977 }
2978 }
2979
2980 static void
2981 set_active(pcmk_resource_t *rsc)
2982 {
2983 const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2984
2985 if ((top != NULL) && pcmk__is_set(top->flags, pcmk__rsc_promotable)) {
2986 rsc->priv->orig_role = pcmk_role_unpromoted;
2987 } else {
2988 rsc->priv->orig_role = pcmk_role_started;
2989 }
2990 }
2991
2992 static void
2993 set_node_score(gpointer key, gpointer value, gpointer user_data)
2994 {
2995 pcmk_node_t *node = value;
2996 int *score = user_data;
2997
2998 node->assign->score = *score;
2999 }
3000
3001 #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
3002 "/" PCMK__XE_NODE_STATE
3003 #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \
3004 "/" PCMK__XE_LRM_RESOURCES \
3005 "/" PCMK__XE_LRM_RESOURCE
3006 #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
3007
3008 static xmlNode *
3009 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
3010 int target_rc, pcmk_scheduler_t *scheduler)
3011 {
3012 GString *xpath = NULL;
3013 xmlNode *xml = NULL;
3014
3015 CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
3016 return NULL);
3017
3018 xpath = g_string_sized_new(256);
3019 pcmk__g_strcat(xpath,
3020 XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
3021 SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
3022 SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
3023 NULL);
3024
3025 /* Need to check against transition_magic too? */
3026 if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
3027 pcmk__g_strcat(xpath,
3028 " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
3029 NULL);
3030
3031 } else if ((source != NULL)
3032 && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
3033 pcmk__g_strcat(xpath,
3034 " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
3035 NULL);
3036 } else {
3037 g_string_append_c(xpath, ']');
3038 }
3039
3040 xml = pcmk__xpath_find_one(scheduler->input->doc, xpath->str, LOG_DEBUG);
3041 g_string_free(xpath, TRUE);
3042
3043 if (xml && target_rc >= 0) {
3044 int rc = PCMK_OCF_UNKNOWN_ERROR;
3045 int status = PCMK_EXEC_ERROR;
3046
3047 pcmk__xe_get_int(xml, PCMK__XA_RC_CODE, &rc);
3048 pcmk__xe_get_int(xml, PCMK__XA_OP_STATUS, &status);
3049 if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
3050 return NULL;
3051 }
3052 }
3053 return xml;
3054 }
3055
3056 static xmlNode *
3057 find_lrm_resource(const char *rsc_id, const char *node_name,
3058 pcmk_scheduler_t *scheduler)
3059 {
3060 GString *xpath = NULL;
3061 xmlNode *xml = NULL;
3062
3063 CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
3064
3065 xpath = g_string_sized_new(256);
3066 pcmk__g_strcat(xpath,
3067 XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
3068 SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
3069 NULL);
3070
3071 xml = pcmk__xpath_find_one(scheduler->input->doc, xpath->str, LOG_DEBUG);
3072
3073 g_string_free(xpath, TRUE);
3074 return xml;
3075 }
3076
3077 /*!
3078 * \internal
3079 * \brief Check whether a resource has no completed action history on a node
3080 *
3081 * \param[in,out] rsc Resource to check
3082 * \param[in] node_name Node to check
3083 *
3084 * \return true if \p rsc_id is unknown on \p node_name, otherwise false
3085 */
3086 static bool
3087 unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
3088 {
3089 bool result = false;
3090 xmlXPathObject *search;
3091 char *xpath = NULL;
3092
3093 xpath = pcmk__assert_asprintf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
3094 SUB_XPATH_LRM_RESOURCE
3095 "[@" PCMK_XA_ID "='%s']"
3096 SUB_XPATH_LRM_RSC_OP
3097 "[@" PCMK__XA_RC_CODE "!='%d']",
3098 node_name, rsc->id, PCMK_OCF_UNKNOWN);
3099
3100 search = pcmk__xpath_search(rsc->priv->scheduler->input->doc, xpath);
3101 result = (pcmk__xpath_num_results(search) == 0);
3102 xmlXPathFreeObject(search);
3103 free(xpath);
3104 return result;
3105 }
3106
3107 /*!
3108 * \internal
3109 * \brief Check whether a probe/monitor indicating the resource was not running
3110 * on a node happened after some event
3111 *
3112 * \param[in] rsc_id Resource being checked
3113 * \param[in] node_name Node being checked
3114 * \param[in] xml_op Event that monitor is being compared to
3115 * \param[in,out] scheduler Scheduler data
3116 *
3117 * \return true if such a monitor happened after event, false otherwise
3118 */
3119 static bool
3120 monitor_not_running_after(const char *rsc_id, const char *node_name,
3121 const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3122 {
3123 /* Any probe/monitor operation on the node indicating it was not running
3124 * there
3125 */
3126 xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3127 NULL, PCMK_OCF_NOT_RUNNING, scheduler);
3128
3129 return (monitor != NULL) && (pe__is_newer_op(monitor, xml_op) > 0);
3130 }
3131
3132 /*!
3133 * \internal
3134 * \brief Check whether any non-monitor operation on a node happened after some
3135 * event
3136 *
3137 * \param[in] rsc_id Resource being checked
3138 * \param[in] node_name Node being checked
3139 * \param[in] xml_op Event that non-monitor is being compared to
3140 * \param[in,out] scheduler Scheduler data
3141 *
3142 * \return true if such a operation happened after event, false otherwise
3143 */
3144 static bool
3145 non_monitor_after(const char *rsc_id, const char *node_name,
3146 const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3147 {
3148 xmlNode *lrm_resource = NULL;
3149
3150 lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3151 if (lrm_resource == NULL) {
3152 return false;
3153 }
3154
3155 for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3156 NULL, NULL);
3157 op != NULL; op = pcmk__xe_next(op, PCMK__XE_LRM_RSC_OP)) {
3158
3159 const char * task = NULL;
3160
3161 if (op == xml_op) {
3162 continue;
3163 }
3164
3165 task = pcmk__xe_get(op, PCMK_XA_OPERATION);
3166
3167 if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
3168 PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
3169 NULL)
3170 && pe__is_newer_op(op, xml_op) > 0) {
3171 return true;
3172 }
3173 }
3174
3175 return false;
3176 }
3177
3178 /*!
3179 * \internal
3180 * \brief Check whether the resource has newer state on a node after a migration
3181 * attempt
3182 *
3183 * \param[in] rsc_id Resource being checked
3184 * \param[in] node_name Node being checked
3185 * \param[in] migrate_to Any migrate_to event that is being compared to
3186 * \param[in] migrate_from Any migrate_from event that is being compared to
3187 * \param[in,out] scheduler Scheduler data
3188 *
3189 * \return true if such a operation happened after event, false otherwise
3190 */
3191 static bool
3192 newer_state_after_migrate(const char *rsc_id, const char *node_name,
3193 const xmlNode *migrate_to,
3194 const xmlNode *migrate_from,
3195 pcmk_scheduler_t *scheduler)
3196 {
3197 const xmlNode *xml_op = (migrate_from != NULL)? migrate_from : migrate_to;
3198 const char *source = pcmk__xe_get(xml_op, PCMK__META_MIGRATE_SOURCE);
3199
3200 /* It's preferred to compare to the migrate event on the same node if
3201 * existing, since call ids are more reliable.
3202 */
3203 if ((xml_op != migrate_to) && (migrate_to != NULL)
3204 && pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3205
3206 xml_op = migrate_to;
3207 }
3208
3209 /* If there's any newer non-monitor operation on the node, or any newer
3210 * probe/monitor operation on the node indicating it was not running there,
3211 * the migration events potentially no longer matter for the node.
3212 */
3213 return non_monitor_after(rsc_id, node_name, xml_op, scheduler)
3214 || monitor_not_running_after(rsc_id, node_name, xml_op, scheduler);
3215 }
3216
3217 /*!
3218 * \internal
3219 * \brief Parse migration source and target node names from history entry
3220 *
3221 * \param[in] entry Resource history entry for a migration action
3222 * \param[in] source_node If not NULL, source must match this node
3223 * \param[in] target_node If not NULL, target must match this node
3224 * \param[out] source_name Where to store migration source node name
3225 * \param[out] target_name Where to store migration target node name
3226 *
3227 * \return Standard Pacemaker return code
3228 */
3229 static int
3230 get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
3231 const pcmk_node_t *target_node,
3232 const char **source_name, const char **target_name)
3233 {
3234 *source_name = pcmk__xe_get(entry, PCMK__META_MIGRATE_SOURCE);
3235 *target_name = pcmk__xe_get(entry, PCMK__META_MIGRATE_TARGET);
3236 if ((*source_name == NULL) || (*target_name == NULL)) {
3237 pcmk__config_err("Ignoring resource history entry %s without "
3238 PCMK__META_MIGRATE_SOURCE " and "
3239 PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3240 return pcmk_rc_unpack_error;
3241 }
3242
3243 if ((source_node != NULL)
3244 && !pcmk__str_eq(*source_name, source_node->priv->name,
3245 pcmk__str_casei|pcmk__str_null_matches)) {
3246 pcmk__config_err("Ignoring resource history entry %s because "
3247 PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3248 pcmk__xe_id(entry), *source_name,
3249 pcmk__node_name(source_node));
3250 return pcmk_rc_unpack_error;
3251 }
3252
3253 if ((target_node != NULL)
3254 && !pcmk__str_eq(*target_name, target_node->priv->name,
3255 pcmk__str_casei|pcmk__str_null_matches)) {
3256 pcmk__config_err("Ignoring resource history entry %s because "
3257 PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3258 pcmk__xe_id(entry), *target_name,
3259 pcmk__node_name(target_node));
3260 return pcmk_rc_unpack_error;
3261 }
3262
3263 return pcmk_rc_ok;
3264 }
3265
3266 /*
3267 * \internal
3268 * \brief Add a migration source to a resource's list of dangling migrations
3269 *
3270 * If the migrate_to and migrate_from actions in a live migration both
3271 * succeeded, but there is no stop on the source, the migration is considered
3272 * "dangling." Add the source to the resource's dangling migration list, which
3273 * will be used to schedule a stop on the source without affecting the target.
3274 *
3275 * \param[in,out] rsc Resource involved in migration
3276 * \param[in] node Migration source
3277 */
3278 static void
3279 add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
3280 {
3281 pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3282 rsc->id, pcmk__node_name(node));
3283 rsc->priv->orig_role = pcmk_role_stopped;
3284 rsc->priv->dangling_migration_sources =
3285 g_list_prepend(rsc->priv->dangling_migration_sources,
3286 (gpointer) node);
3287 }
3288
3289 /*!
3290 * \internal
3291 * \brief Update resource role etc. after a successful migrate_to action
3292 *
3293 * \param[in,out] history Parsed action result history
3294 */
3295 static void
3296 unpack_migrate_to_success(struct action_history *history)
3297 {
3298 /* A complete migration sequence is:
3299 * 1. migrate_to on source node (which succeeded if we get to this function)
3300 * 2. migrate_from on target node
3301 * 3. stop on source node
3302 *
3303 * If no migrate_from has happened, the migration is considered to be
3304 * "partial". If the migrate_from succeeded but no stop has happened, the
3305 * migration is considered to be "dangling".
3306 *
3307 * If a successful migrate_to and stop have happened on the source node, we
3308 * still need to check for a partial migration, due to scenarios (easier to
3309 * produce with batch-limit=1) like:
3310 *
3311 * - A resource is migrating from node1 to node2, and a migrate_to is
3312 * initiated for it on node1.
3313 *
3314 * - node2 goes into standby mode while the migrate_to is pending, which
3315 * aborts the transition.
3316 *
3317 * - Upon completion of the migrate_to, a new transition schedules a stop
3318 * on both nodes and a start on node1.
3319 *
3320 * - If the new transition is aborted for any reason while the resource is
3321 * stopping on node1, the transition after that stop completes will see
3322 * the migrate_to and stop on the source, but it's still a partial
3323 * migration, and the resource must be stopped on node2 because it is
3324 * potentially active there due to the migrate_to.
3325 *
3326 * We also need to take into account that either node's history may be
3327 * cleared at any point in the migration process.
3328 */
3329 int from_rc = PCMK_OCF_OK;
3330 int from_status = PCMK_EXEC_PENDING;
3331 pcmk_node_t *target_node = NULL;
3332 xmlNode *migrate_from = NULL;
3333 const char *source = NULL;
3334 const char *target = NULL;
3335 bool source_newer_op = false;
3336 bool target_newer_state = false;
3337 bool active_on_target = false;
3338 pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3339
3340 // Get source and target node names from XML
3341 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3342 &target) != pcmk_rc_ok) {
3343 return;
3344 }
3345
3346 // Check for newer state on the source
3347 source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3348 scheduler);
3349
3350 // Check for a migrate_from action from this source on the target
3351 migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3352 target, source, -1, scheduler);
3353 if (migrate_from != NULL) {
3354 if (source_newer_op) {
3355 /* There's a newer non-monitor operation on the source and a
3356 * migrate_from on the target, so this migrate_to is irrelevant to
3357 * the resource's state.
3358 */
3359 return;
3360 }
3361 pcmk__xe_get_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3362 pcmk__xe_get_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3363 }
3364
3365 /* If the resource has newer state on both the source and target after the
3366 * migration events, this migrate_to is irrelevant to the resource's state.
3367 */
3368 target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3369 history->xml, migrate_from,
3370 scheduler);
3371 if (source_newer_op && target_newer_state) {
3372 return;
3373 }
3374
3375 /* Check for dangling migration (migrate_from succeeded but stop not done).
3376 * We know there's no stop because we already returned if the target has a
3377 * migrate_from and the source has any newer non-monitor operation.
3378 */
3379 if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3380 add_dangling_migration(history->rsc, history->node);
3381 return;
3382 }
3383
3384 /* Without newer state, this migrate_to implies the resource is active.
3385 * (Clones are not allowed to migrate, so role can't be promoted.)
3386 */
3387 history->rsc->priv->orig_role = pcmk_role_started;
3388
3389 target_node = pcmk_find_node(scheduler, target);
3390 active_on_target = !target_newer_state && (target_node != NULL)
3391 && target_node->details->online;
3392
3393 if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3394 if (active_on_target) {
3395 native_add_running(history->rsc, target_node, scheduler, TRUE);
3396 } else {
3397 // Mark resource as failed, require recovery, and prevent migration
3398 pcmk__set_rsc_flags(history->rsc,
3399 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
3400 pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
3401 }
3402 return;
3403 }
3404
3405 // The migrate_from is pending, complete but erased, or to be scheduled
3406
3407 /* If there is no history at all for the resource on an online target, then
3408 * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3409 * have the probe result, it will be reflected in target_newer_state.
3410 */
3411 if ((target_node != NULL) && target_node->details->online
3412 && unknown_on_node(history->rsc, target)) {
3413 return;
3414 }
3415
3416 if (active_on_target) {
3417 pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3418
3419 native_add_running(history->rsc, target_node, scheduler, FALSE);
3420 if ((source_node != NULL) && source_node->details->online) {
3421 /* This is a partial migration: the migrate_to completed
3422 * successfully on the source, but the migrate_from has not
3423 * completed. Remember the source and target; if the newly
3424 * chosen target remains the same when we schedule actions
3425 * later, we may continue with the migration.
3426 */
3427 history->rsc->priv->partial_migration_target = target_node;
3428 history->rsc->priv->partial_migration_source = source_node;
3429 }
3430
3431 } else if (!source_newer_op) {
3432 // Mark resource as failed, require recovery, and prevent migration
3433 pcmk__set_rsc_flags(history->rsc,
3434 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
3435 pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
3436 }
3437 }
3438
3439 /*!
3440 * \internal
3441 * \brief Update resource role etc. after a failed migrate_to action
3442 *
3443 * \param[in,out] history Parsed action result history
3444 */
3445 static void
3446 unpack_migrate_to_failure(struct action_history *history)
3447 {
3448 xmlNode *target_migrate_from = NULL;
3449 const char *source = NULL;
3450 const char *target = NULL;
3451 pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3452
3453 // Get source and target node names from XML
3454 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3455 &target) != pcmk_rc_ok) {
3456 return;
3457 }
3458
3459 /* If a migration failed, we have to assume the resource is active. Clones
3460 * are not allowed to migrate, so role can't be promoted.
3461 */
3462 history->rsc->priv->orig_role = pcmk_role_started;
3463
3464 // Check for migrate_from on the target
3465 target_migrate_from = find_lrm_op(history->rsc->id,
3466 PCMK_ACTION_MIGRATE_FROM, target, source,
3467 PCMK_OCF_OK, scheduler);
3468
3469 if (/* If the resource state is unknown on the target, it will likely be
3470 * probed there.
3471 * Don't just consider it running there. We will get back here anyway in
3472 * case the probe detects it's running there.
3473 */
3474 !unknown_on_node(history->rsc, target)
3475 /* If the resource has newer state on the target after the migration
3476 * events, this migrate_to no longer matters for the target.
3477 */
3478 && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3479 target_migrate_from, scheduler)) {
3480 /* The resource has no newer state on the target, so assume it's still
3481 * active there.
3482 * (if it is up).
3483 */
3484 pcmk_node_t *target_node = pcmk_find_node(scheduler, target);
3485
3486 if (target_node && target_node->details->online) {
3487 native_add_running(history->rsc, target_node, scheduler, FALSE);
3488 }
3489
3490 } else if (!non_monitor_after(history->rsc->id, source, history->xml,
3491 scheduler)) {
3492 /* We know the resource has newer state on the target, but this
3493 * migrate_to still matters for the source as long as there's no newer
3494 * non-monitor operation there.
3495 */
3496
3497 // Mark node as having dangling migration so we can force a stop later
3498 history->rsc->priv->dangling_migration_sources =
3499 g_list_prepend(history->rsc->priv->dangling_migration_sources,
3500 (gpointer) history->node);
3501 }
3502 }
3503
3504 /*!
3505 * \internal
3506 * \brief Update resource role etc. after a failed migrate_from action
3507 *
3508 * \param[in,out] history Parsed action result history
3509 */
3510 static void
3511 unpack_migrate_from_failure(struct action_history *history)
3512 {
3513 xmlNode *source_migrate_to = NULL;
3514 const char *source = NULL;
3515 const char *target = NULL;
3516 pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3517
3518 // Get source and target node names from XML
3519 if (get_migration_node_names(history->xml, NULL, history->node, &source,
3520 &target) != pcmk_rc_ok) {
3521 return;
3522 }
3523
3524 /* If a migration failed, we have to assume the resource is active. Clones
3525 * are not allowed to migrate, so role can't be promoted.
3526 */
3527 history->rsc->priv->orig_role = pcmk_role_started;
3528
3529 // Check for a migrate_to on the source
3530 source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3531 source, target, PCMK_OCF_OK, scheduler);
3532
3533 if (/* If the resource state is unknown on the source, it will likely be
3534 * probed there.
3535 * Don't just consider it running there. We will get back here anyway in
3536 * case the probe detects it's running there.
3537 */
3538 !unknown_on_node(history->rsc, source)
3539 /* If the resource has newer state on the source after the migration
3540 * events, this migrate_from no longer matters for the source.
3541 */
3542 && !newer_state_after_migrate(history->rsc->id, source,
3543 source_migrate_to, history->xml,
3544 scheduler)) {
3545 /* The resource has no newer state on the source, so assume it's still
3546 * active there (if it is up).
3547 */
3548 pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3549
3550 if (source_node && source_node->details->online) {
3551 native_add_running(history->rsc, source_node, scheduler, TRUE);
3552 }
3553 }
3554 }
3555
3556 /*!
3557 * \internal
3558 * \brief Add an action to cluster's list of failed actions
3559 *
3560 * \param[in,out] history Parsed action result history
3561 */
3562 static void
3563 record_failed_op(struct action_history *history)
3564 {
3565 const pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3566
3567 if (!(history->node->details->online)) {
3568 return;
3569 }
3570
3571 for (const xmlNode *xIter = scheduler->priv->failed->children;
3572 xIter != NULL; xIter = xIter->next) {
3573
3574 const char *key = pcmk__xe_history_key(xIter);
3575 const char *uname = pcmk__xe_get(xIter, PCMK_XA_UNAME);
3576
3577 if (pcmk__str_eq(history->key, key, pcmk__str_none)
3578 && pcmk__str_eq(uname, history->node->priv->name,
3579 pcmk__str_casei)) {
3580 pcmk__trace("Skipping duplicate entry %s on %s", history->key,
3581 pcmk__node_name(history->node));
3582 return;
3583 }
3584 }
3585
3586 pcmk__trace("Adding entry for %s on %s to failed action list",
3587 history->key, pcmk__node_name(history->node));
3588 pcmk__xe_set(history->xml, PCMK_XA_UNAME, history->node->priv->name);
3589 pcmk__xe_set(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3590 pcmk__xml_copy(scheduler->priv->failed, history->xml);
3591 }
3592
3593 static char *
3594 last_change_str(const xmlNode *xml_op)
3595 {
3596 time_t when;
3597 char *result = NULL;
3598
3599 if (pcmk__xe_get_time(xml_op, PCMK_XA_LAST_RC_CHANGE,
3600 &when) == pcmk_rc_ok) {
3601 char *when_s = pcmk__epoch2str(&when, 0);
3602 const char *p = strchr(when_s, ' ');
3603
3604 // Skip day of week to make message shorter
3605 if ((p != NULL) && (*(++p) != '\0')) {
3606 result = pcmk__str_copy(p);
3607 }
3608 free(when_s);
3609 }
3610
3611 if (result == NULL) {
3612 result = pcmk__str_copy("unknown_time");
3613 }
3614
3615 return result;
3616 }
3617
3618 /*!
3619 * \internal
3620 * \brief Ban a resource (or its clone if an anonymous instance) from all nodes
3621 *
3622 * \param[in,out] rsc Resource to ban
3623 */
3624 static void
3625 ban_from_all_nodes(pcmk_resource_t *rsc)
3626 {
3627 int score = -PCMK_SCORE_INFINITY;
3628 const pcmk_scheduler_t *scheduler = rsc->priv->scheduler;
3629
3630 if (rsc->priv->parent != NULL) {
3631 pcmk_resource_t *parent = uber_parent(rsc);
3632
3633 if (pcmk__is_anonymous_clone(parent)) {
3634 /* For anonymous clones, if an operation with
3635 * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3636 * entire clone must stop.
3637 */
3638 rsc = parent;
3639 }
3640 }
3641
3642 // Ban the resource from all nodes
3643 pcmk__notice("%s will not be started under current conditions", rsc->id);
3644 g_clear_pointer(&rsc->priv->allowed_nodes, g_hash_table_destroy);
3645 rsc->priv->allowed_nodes = pe__node_list2table(scheduler->nodes);
3646 g_hash_table_foreach(rsc->priv->allowed_nodes, set_node_score, &score);
3647 }
3648
3649 /*!
3650 * \internal
3651 * \brief Get configured failure handling and role after failure for an action
3652 *
3653 * \param[in,out] history Unpacked action history entry
3654 * \param[out] on_fail Where to set configured failure handling
3655 * \param[out] fail_role Where to set to role after failure
3656 */
3657 static void
3658 unpack_failure_handling(struct action_history *history,
3659 enum pcmk__on_fail *on_fail,
3660 enum rsc_role_e *fail_role)
3661 {
3662 xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3663 history->interval_ms, true);
3664
3665 GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3666 history->task,
3667 history->interval_ms, config);
3668
3669 const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3670
3671 *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3672 history->interval_ms, on_fail_str);
3673 *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3674 meta);
3675 g_hash_table_destroy(meta);
3676 }
3677
3678 /*!
3679 * \internal
3680 * \brief Update resource role, failure handling, etc., after a failed action
3681 *
3682 * \param[in,out] history Parsed action result history
3683 * \param[in] config_on_fail Action failure handling from configuration
3684 * \param[in] fail_role Resource's role after failure of this action
3685 * \param[out] last_failure This will be set to the history XML
3686 * \param[in,out] on_fail Actual handling of action result
3687 */
3688 static void
3689 unpack_rsc_op_failure(struct action_history *history,
3690 enum pcmk__on_fail config_on_fail,
3691 enum rsc_role_e fail_role, xmlNode **last_failure,
3692 enum pcmk__on_fail *on_fail)
3693 {
3694 bool is_probe = false;
3695 char *last_change_s = NULL;
3696 pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3697
3698 *last_failure = history->xml;
3699
3700 is_probe = pcmk_xe_is_probe(history->xml);
3701 last_change_s = last_change_str(history->xml);
3702
3703 if (!pcmk__is_set(scheduler->flags, pcmk__sched_symmetric_cluster)
3704 && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3705 pcmk__trace("Unexpected result (%s%s%s) was recorded for "
3706 "%s of %s on %s at %s " QB_XS " exit-status=%d id=%s",
3707 crm_exit_str(history->exit_status),
3708 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3709 pcmk__s(history->exit_reason, ""),
3710 (is_probe? "probe" : history->task), history->rsc->id,
3711 pcmk__node_name(history->node), last_change_s,
3712 history->exit_status, history->id);
3713 } else {
3714 pcmk__sched_warn(scheduler,
3715 "Unexpected result (%s%s%s) was recorded for %s of "
3716 "%s on %s at %s " QB_XS " exit-status=%d id=%s",
3717 crm_exit_str(history->exit_status),
3718 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3719 pcmk__s(history->exit_reason, ""),
3720 (is_probe? "probe" : history->task), history->rsc->id,
3721 pcmk__node_name(history->node), last_change_s,
3722 history->exit_status, history->id);
3723
3724 if (is_probe && (history->exit_status != PCMK_OCF_OK)
3725 && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3726 && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3727
3728 /* A failed (not just unexpected) probe result could mean the user
3729 * didn't know resources will be probed even where they can't run.
3730 */
3731 pcmk__notice("If it is not possible for %s to run on %s, see the "
3732 PCMK_XA_RESOURCE_DISCOVERY " option for location "
3733 "constraints",
3734 history->rsc->id, pcmk__node_name(history->node));
3735 }
3736
3737 record_failed_op(history);
3738 }
3739
3740 free(last_change_s);
3741
3742 if (*on_fail < config_on_fail) {
3743 pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3744 pcmk__on_fail_text(*on_fail),
3745 pcmk__on_fail_text(config_on_fail), history->key);
3746 *on_fail = config_on_fail;
3747 }
3748
3749 if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3750 resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3751 "__stop_fail__", scheduler);
3752
3753 } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3754 unpack_migrate_to_failure(history);
3755
3756 } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3757 unpack_migrate_from_failure(history);
3758
3759 } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3760 history->rsc->priv->orig_role = pcmk_role_promoted;
3761
3762 } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3763 if (config_on_fail == pcmk__on_fail_block) {
3764 history->rsc->priv->orig_role = pcmk_role_promoted;
3765 pe__set_next_role(history->rsc, pcmk_role_stopped,
3766 "demote with " PCMK_META_ON_FAIL "=block");
3767
3768 } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3769 history->rsc->priv->orig_role = pcmk_role_stopped;
3770
3771 } else {
3772 /* Staying in the promoted role would put the scheduler and
3773 * controller into a loop. Setting the role to unpromoted is not
3774 * dangerous because the resource will be stopped as part of
3775 * recovery, and any promotion will be ordered after that stop.
3776 */
3777 history->rsc->priv->orig_role = pcmk_role_unpromoted;
3778 }
3779 }
3780
3781 if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3782 /* leave stopped */
3783 pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3784 history->rsc->priv->orig_role = pcmk_role_stopped;
3785
3786 } else if (history->rsc->priv->orig_role < pcmk_role_started) {
3787 pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3788 set_active(history->rsc);
3789 }
3790
3791 pcmk__rsc_trace(history->rsc,
3792 "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3793 history->rsc->id,
3794 pcmk_role_text(history->rsc->priv->orig_role),
3795 pcmk__btoa(history->node->details->unclean),
3796 pcmk__on_fail_text(config_on_fail),
3797 pcmk_role_text(fail_role));
3798
3799 if ((fail_role != pcmk_role_started)
3800 && (history->rsc->priv->next_role < fail_role)) {
3801 pe__set_next_role(history->rsc, fail_role, "failure");
3802 }
3803
3804 if (fail_role == pcmk_role_stopped) {
3805 ban_from_all_nodes(history->rsc);
3806 }
3807 }
3808
3809 /*!
3810 * \internal
3811 * \brief Block a resource with a failed action if it cannot be recovered
3812 *
3813 * If resource action is a failed stop and fencing is not possible, mark the
3814 * resource as unmanaged and blocked, since recovery cannot be done.
3815 *
3816 * \param[in,out] history Parsed action history entry
3817 */
3818 static void
3819 block_if_unrecoverable(struct action_history *history)
3820 {
3821 char *last_change_s = NULL;
3822
3823 if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3824 return; // All actions besides stop are always recoverable
3825 }
3826 if (pe_can_fence(history->node->priv->scheduler, history->node)) {
3827 return; // Failed stops are recoverable via fencing
3828 }
3829
3830 last_change_s = last_change_str(history->xml);
3831 pcmk__sched_err(history->node->priv->scheduler,
3832 "No further recovery can be attempted for %s "
3833 "because %s on %s failed (%s%s%s) at %s "
3834 QB_XS " rc=%d id=%s",
3835 history->rsc->id, history->task,
3836 pcmk__node_name(history->node),
3837 crm_exit_str(history->exit_status),
3838 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3839 pcmk__s(history->exit_reason, ""),
3840 last_change_s, history->exit_status, history->id);
3841
3842 free(last_change_s);
3843
3844 pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_managed);
3845 pcmk__set_rsc_flags(history->rsc, pcmk__rsc_blocked);
3846 }
3847
3848 /*!
3849 * \internal
3850 * \brief Update action history's execution status and why
3851 *
3852 * \param[in,out] history Parsed action history entry
3853 * \param[out] why Where to store reason for update
3854 * \param[in] value New value
3855 * \param[in] reason Description of why value was changed
3856 */
3857 static inline void
3858 remap_because(struct action_history *history, const char **why, int value,
3859 const char *reason)
3860 {
3861 if (history->execution_status != value) {
3862 history->execution_status = value;
3863 *why = reason;
3864 }
3865 }
3866
3867 /*!
3868 * \internal
3869 * \brief Remap informational monitor results and operation status
3870 *
3871 * For the monitor results, certain OCF codes are for providing extended information
3872 * to the user about services that aren't yet failed but not entirely healthy either.
3873 * These must be treated as the "normal" result by Pacemaker.
3874 *
3875 * For operation status, the action result can be used to determine an appropriate
3876 * status for the purposes of responding to the action. The status provided by the
3877 * executor is not directly usable since the executor does not know what was expected.
3878 *
3879 * \param[in,out] history Parsed action history entry
3880 * \param[in,out] on_fail What should be done about the result
3881 * \param[in] expired Whether result is expired
3882 *
3883 * \note If the result is remapped and the node is not shutting down or failed,
3884 * the operation will be recorded in the scheduler data's list of failed
3885 * operations to highlight it for the user.
3886 *
3887 * \note This may update the resource's current and next role.
3888 */
3889 static void
3890 remap_operation(struct action_history *history,
3891 enum pcmk__on_fail *on_fail, bool expired)
3892 {
3893 /* @TODO It would probably also be a good idea to map an exit status of
3894 * CRM_EX_PROMOTED or CRM_EX_DEGRADED_PROMOTED to CRM_EX_OK for promote
3895 * actions
3896 */
3897
3898 bool is_probe = false;
3899 int orig_exit_status = history->exit_status;
3900 int orig_exec_status = history->execution_status;
3901 const char *why = NULL;
3902 const char *task = history->task;
3903
3904 // Remap degraded results to their successful counterparts
3905 history->exit_status = pcmk__effective_rc(history->exit_status);
3906 if (history->exit_status != orig_exit_status) {
3907 why = "degraded result";
3908 if (!expired && (!history->node->details->shutdown
3909 || history->node->details->online)) {
3910 record_failed_op(history);
3911 }
3912 }
3913
3914 if (!pcmk__is_bundled(history->rsc)
3915 && pcmk_xe_mask_probe_failure(history->xml)
3916 && ((history->execution_status != PCMK_EXEC_DONE)
3917 || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3918 history->execution_status = PCMK_EXEC_DONE;
3919 history->exit_status = PCMK_OCF_NOT_RUNNING;
3920 why = "equivalent probe result";
3921 }
3922
3923 /* If the executor reported an execution status of anything but done or
3924 * error, consider that final. But for done or error, we know better whether
3925 * it should be treated as a failure or not, because we know the expected
3926 * result.
3927 */
3928 switch (history->execution_status) {
3929 case PCMK_EXEC_DONE:
3930 case PCMK_EXEC_ERROR:
3931 break;
3932
3933 // These should be treated as node-fatal
3934 case PCMK_EXEC_NO_FENCE_DEVICE:
3935 case PCMK_EXEC_NO_SECRETS:
3936 remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3937 "node-fatal error");
3938 goto remap_done;
3939
3940 default:
3941 goto remap_done;
3942 }
3943
3944 is_probe = pcmk_xe_is_probe(history->xml);
3945 if (is_probe) {
3946 task = "probe";
3947 }
3948
3949 if (history->expected_exit_status < 0) {
3950 /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3951 * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3952 * expected exit status in the transition key, which (along with the
3953 * similar case of a corrupted transition key in the CIB) will be
3954 * reported to this function as -1. Pacemaker 2.0+ does not support
3955 * rolling upgrades from those versions or processing of saved CIB files
3956 * from those versions, so we do not need to care much about this case.
3957 */
3958 remap_because(history, &why, PCMK_EXEC_ERROR,
3959 "obsolete history format");
3960 pcmk__config_warn("Expected result not found for %s on %s "
3961 "(corrupt or obsolete CIB?)",
3962 history->key, pcmk__node_name(history->node));
3963
3964 } else if (history->exit_status == history->expected_exit_status) {
3965 remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
3966
3967 } else {
3968 remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
3969 pcmk__rsc_debug(history->rsc,
3970 "%s on %s: expected %d (%s), got %d (%s%s%s)",
3971 history->key, pcmk__node_name(history->node),
3972 history->expected_exit_status,
3973 crm_exit_str(history->expected_exit_status),
3974 history->exit_status,
3975 crm_exit_str(history->exit_status),
3976 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3977 pcmk__s(history->exit_reason, ""));
3978 }
3979
3980 switch (history->exit_status) {
3981 case PCMK_OCF_OK:
3982 if (is_probe
3983 && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
3984 char *last_change_s = last_change_str(history->xml);
3985
3986 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3987 pcmk__rsc_info(history->rsc,
3988 "Probe found %s active on %s at %s",
3989 history->rsc->id, pcmk__node_name(history->node),
3990 last_change_s);
3991 free(last_change_s);
3992 }
3993 break;
3994
3995 case PCMK_OCF_NOT_RUNNING:
3996 if (is_probe
3997 || (history->expected_exit_status == history->exit_status)
3998 || !pcmk__is_set(history->rsc->flags, pcmk__rsc_managed)) {
3999
4000 /* For probes, recurring monitors for the Stopped role, and
4001 * unmanaged resources, "not running" is not considered a
4002 * failure.
4003 */
4004 remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
4005 history->rsc->priv->orig_role = pcmk_role_stopped;
4006 *on_fail = pcmk__on_fail_ignore;
4007 pe__set_next_role(history->rsc, pcmk_role_unknown,
4008 "not running");
4009 }
4010 break;
4011
4012 case PCMK_OCF_RUNNING_PROMOTED:
4013 if (is_probe
4014 && (history->exit_status != history->expected_exit_status)) {
4015 char *last_change_s = last_change_str(history->xml);
4016
4017 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4018 pcmk__rsc_info(history->rsc,
4019 "Probe found %s active and promoted on %s at %s",
4020 history->rsc->id,
4021 pcmk__node_name(history->node), last_change_s);
4022 free(last_change_s);
4023 }
4024 if (!expired
4025 || (history->exit_status == history->expected_exit_status)) {
4026 history->rsc->priv->orig_role = pcmk_role_promoted;
4027 }
4028 break;
4029
4030 case PCMK_OCF_FAILED_PROMOTED:
4031 if (!expired) {
4032 history->rsc->priv->orig_role = pcmk_role_promoted;
4033 }
4034 remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
4035 break;
4036
4037 case PCMK_OCF_NOT_CONFIGURED:
4038 remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
4039 break;
4040
4041 case PCMK_OCF_UNIMPLEMENT_FEATURE:
4042 {
4043 guint interval_ms = 0;
4044 pcmk__xe_get_guint(history->xml, PCMK_META_INTERVAL,
4045 &interval_ms);
4046
4047 if (interval_ms == 0) {
4048 if (!expired) {
4049 block_if_unrecoverable(history);
4050 }
4051 remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
4052 "exit status");
4053 } else {
4054 remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
4055 "exit status");
4056 }
4057 }
4058 break;
4059
4060 case PCMK_OCF_NOT_INSTALLED:
4061 case PCMK_OCF_INVALID_PARAM:
4062 case PCMK_OCF_INSUFFICIENT_PRIV:
4063 if (!expired) {
4064 block_if_unrecoverable(history);
4065 }
4066 remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
4067 break;
4068
4069 default:
4070 if (history->execution_status == PCMK_EXEC_DONE) {
4071 char *last_change_s = last_change_str(history->xml);
4072
4073 pcmk__info("Treating unknown exit status %d from %s of %s on "
4074 "%s at %s as failure",
4075 history->exit_status, task, history->rsc->id,
4076 pcmk__node_name(history->node), last_change_s);
4077 remap_because(history, &why, PCMK_EXEC_ERROR,
4078 "unknown exit status");
4079 free(last_change_s);
4080 }
4081 break;
4082 }
4083
4084 remap_done:
4085 if (why != NULL) {
4086 pcmk__rsc_trace(history->rsc,
4087 "Remapped %s result from [%s: %s] to [%s: %s] "
4088 "because of %s",
4089 history->key, pcmk_exec_status_str(orig_exec_status),
4090 crm_exit_str(orig_exit_status),
4091 pcmk_exec_status_str(history->execution_status),
4092 crm_exit_str(history->exit_status), why);
4093 }
4094 }
4095
4096 // return TRUE if start or monitor last failure but parameters changed
4097 static bool
4098 should_clear_for_param_change(const xmlNode *xml_op, const char *task,
4099 pcmk_resource_t *rsc, pcmk_node_t *node)
4100 {
4101 if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
4102 if (pe__bundle_needs_remote_name(rsc)) {
4103 /* We haven't allocated resources yet, so we can't reliably
4104 * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4105 * When that's needed, defer the check until later.
4106 */
4107 pcmk__add_param_check(xml_op, rsc, node, pcmk__check_last_failure);
4108
4109 } else {
4110 pcmk__op_digest_t *digest_data = NULL;
4111
4112 digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4113 rsc->priv->scheduler);
4114 switch (digest_data->rc) {
4115 case pcmk__digest_unknown:
4116 pcmk__trace("Resource %s history entry %s on %s"
4117 " has no digest to compare",
4118 rsc->id, pcmk__xe_history_key(xml_op),
4119 node->priv->id);
4120 break;
4121 case pcmk__digest_match:
4122 break;
4123 default:
4124 return TRUE;
4125 }
4126 }
4127 }
4128 return FALSE;
4129 }
4130
4131 // Order action after fencing of remote node, given connection rsc
4132 static void
4133 order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
4134 pcmk_scheduler_t *scheduler)
4135 {
4136 pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4137
4138 if (remote_node) {
4139 pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4140 FALSE, scheduler);
4141
4142 order_actions(fence, action, pcmk__ar_first_implies_then);
4143 }
4144 }
4145
4146 static bool
4147 should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
4148 guint interval_ms, bool is_last_failure)
4149 {
4150 /* Clearing failures of recurring monitors has special concerns. The
4151 * executor reports only changes in the monitor result, so if the
4152 * monitor is still active and still getting the same failure result,
4153 * that will go undetected after the failure is cleared.
4154 *
4155 * Also, the operation history will have the time when the recurring
4156 * monitor result changed to the given code, not the time when the
4157 * result last happened.
4158 *
4159 * @TODO We probably should clear such failures only when the failure
4160 * timeout has passed since the last occurrence of the failed result.
4161 * However we don't record that information. We could maybe approximate
4162 * that by clearing only if there is a more recent successful monitor or
4163 * stop result, but we don't even have that information at this point
4164 * since we are still unpacking the resource's operation history.
4165 *
4166 * This is especially important for remote connection resources with a
4167 * reconnect interval, so in that case, we skip clearing failures
4168 * if the remote node hasn't been fenced.
4169 */
4170 if ((rsc->priv->remote_reconnect_ms > 0U)
4171 && pcmk__is_set(rsc->priv->scheduler->flags,
4172 pcmk__sched_fencing_enabled)
4173 && (interval_ms != 0)
4174 && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4175
4176 pcmk_node_t *remote_node = pcmk_find_node(rsc->priv->scheduler,
4177 rsc->id);
4178
4179 if (remote_node && !pcmk__is_set(remote_node->priv->flags,
4180 pcmk__node_remote_fenced)) {
4181 if (is_last_failure) {
4182 pcmk__info("Waiting to clear monitor failure for remote node %s"
4183 " until fencing has occurred",
4184 rsc->id);
4185 }
4186 return TRUE;
4187 }
4188 }
4189 return FALSE;
4190 }
4191
4192 /*!
4193 * \internal
4194 * \brief Check operation age and schedule failure clearing when appropriate
4195 *
4196 * This function has two distinct purposes. The first is to check whether an
4197 * operation history entry is expired (i.e. the resource has a failure timeout,
4198 * the entry is older than the timeout, and the resource either has no fail
4199 * count or its fail count is entirely older than the timeout). The second is to
4200 * schedule fail count clearing when appropriate (i.e. the operation is expired
4201 * and either the resource has an expired fail count or the operation is a
4202 * last_failure for a remote connection resource with a reconnect interval,
4203 * or the operation is a last_failure for a start or monitor operation and the
4204 * resource's parameters have changed since the operation).
4205 *
4206 * \param[in,out] history Parsed action result history
4207 *
4208 * \return true if operation history entry is expired, otherwise false
4209 */
4210 static bool
4211 check_operation_expiry(struct action_history *history)
4212 {
4213 bool expired = false;
4214 bool is_last_failure = (history->id != NULL)
4215 && g_str_has_suffix(history->id, "_last_failure_0");
4216 time_t last_run = 0;
4217 int unexpired_fail_count = 0;
4218 const char *clear_reason = NULL;
4219 const guint expiration_sec =
4220 pcmk__timeout_ms2s(history->rsc->priv->failure_expiration_ms);
4221 pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
4222
4223 if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4224 pcmk__rsc_trace(history->rsc,
4225 "Resource history entry %s on %s is not expired: "
4226 "Not Installed does not expire",
4227 history->id, pcmk__node_name(history->node));
4228 return false; // "Not installed" must always be cleared manually
4229 }
4230
4231 if ((expiration_sec > 0)
4232 && (pcmk__xe_get_time(history->xml, PCMK_XA_LAST_RC_CHANGE,
4233 &last_run) == pcmk_rc_ok)) {
4234
4235 /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4236 * timestamp
4237 */
4238
4239 time_t now = pcmk__scheduler_epoch_time(scheduler);
4240 time_t last_failure = 0;
4241
4242 // Is this particular operation history older than the failure timeout?
4243 if ((now >= (last_run + expiration_sec))
4244 && !should_ignore_failure_timeout(history->rsc, history->task,
4245 history->interval_ms,
4246 is_last_failure)) {
4247 expired = true;
4248 }
4249
4250 // Does the resource as a whole have an unexpired fail count?
4251 unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4252 &last_failure,
4253 pcmk__fc_effective,
4254 history->xml);
4255
4256 // Update scheduler recheck time according to *last* failure
4257 pcmk__trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d "
4258 "expiration=%s last-failure@%lld",
4259 history->id, (long long) last_run, (expired? "" : "not "),
4260 (long long) now, unexpired_fail_count,
4261 pcmk__readable_interval(expiration_sec * 1000),
4262 (long long) last_failure);
4263 last_failure += expiration_sec + 1;
4264 if (unexpired_fail_count && (now < last_failure)) {
4265 pcmk__update_recheck_time(last_failure, scheduler,
4266 "fail count expiration");
4267 }
4268 }
4269
4270 if (expired) {
4271 if (pe_get_failcount(history->node, history->rsc, NULL,
4272 pcmk__fc_default, history->xml)) {
4273 // There is a fail count ignoring timeout
4274
4275 if (unexpired_fail_count == 0) {
4276 // There is no fail count considering timeout
4277 clear_reason = "it expired";
4278
4279 } else {
4280 /* This operation is old, but there is an unexpired fail count.
4281 * In a properly functioning cluster, this should only be
4282 * possible if this operation is not a failure (otherwise the
4283 * fail count should be expired too), so this is really just a
4284 * failsafe.
4285 */
4286 pcmk__rsc_trace(history->rsc,
4287 "Resource history entry %s on %s is not "
4288 "expired: Unexpired fail count",
4289 history->id, pcmk__node_name(history->node));
4290 expired = false;
4291 }
4292
4293 } else if (is_last_failure
4294 && (history->rsc->priv->remote_reconnect_ms > 0U)) {
4295 /* Clear any expired last failure when reconnect interval is set,
4296 * even if there is no fail count.
4297 */
4298 clear_reason = "reconnect interval is set";
4299 }
4300 }
4301
4302 if (!expired && is_last_failure
4303 && should_clear_for_param_change(history->xml, history->task,
4304 history->rsc, history->node)) {
4305 clear_reason = "resource parameters have changed";
4306 }
4307
4308 if (clear_reason != NULL) {
4309 pcmk_action_t *clear_op = NULL;
4310
4311 // Schedule clearing of the fail count
4312 clear_op = pe__clear_failcount(history->rsc, history->node,
4313 clear_reason, scheduler);
4314
4315 if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)
4316 && (history->rsc->priv->remote_reconnect_ms > 0)) {
4317 /* If we're clearing a remote connection due to a reconnect
4318 * interval, we want to wait until any scheduled fencing
4319 * completes.
4320 *
4321 * We could limit this to remote_node->details->unclean, but at
4322 * this point, that's always true (it won't be reliable until
4323 * after unpack_node_history() is done).
4324 */
4325 pcmk__info("Clearing %s failure will wait until any scheduled "
4326 "fencing of %s completes",
4327 history->task, history->rsc->id);
4328 order_after_remote_fencing(clear_op, history->rsc, scheduler);
4329 }
4330 }
4331
4332 if (expired && (history->interval_ms == 0)
4333 && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4334 switch (history->exit_status) {
4335 case PCMK_OCF_OK:
4336 case PCMK_OCF_NOT_RUNNING:
4337 case PCMK_OCF_RUNNING_PROMOTED:
4338 case PCMK_OCF_DEGRADED:
4339 case PCMK_OCF_DEGRADED_PROMOTED:
4340 // Don't expire probes that return these values
4341 pcmk__rsc_trace(history->rsc,
4342 "Resource history entry %s on %s is not "
4343 "expired: Probe result",
4344 history->id, pcmk__node_name(history->node));
4345 expired = false;
4346 break;
4347 }
4348 }
4349
4350 return expired;
4351 }
4352
4353 int
4354 pe__target_rc_from_xml(const xmlNode *xml_op)
4355 {
4356 int target_rc = 0;
4357 const char *key = pcmk__xe_get(xml_op, PCMK__XA_TRANSITION_KEY);
4358
4359 if (key == NULL) {
4360 return -1;
4361 }
4362 decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4363 return target_rc;
4364 }
4365
4366 /*!
4367 * \internal
4368 * \brief Update a resource's state for an action result
4369 *
4370 * \param[in,out] history Parsed action history entry
4371 * \param[in] exit_status Exit status to base new state on
4372 * \param[in] last_failure Resource's last_failure entry, if known
4373 * \param[in,out] on_fail Resource's current failure handling
4374 */
4375 static void
4376 update_resource_state(struct action_history *history, int exit_status,
4377 const xmlNode *last_failure,
4378 enum pcmk__on_fail *on_fail)
4379 {
4380 bool clear_past_failure = false;
4381
4382 if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4383 || (!pcmk__is_bundled(history->rsc)
4384 && pcmk_xe_mask_probe_failure(history->xml))) {
4385 history->rsc->priv->orig_role = pcmk_role_stopped;
4386
4387 } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4388 clear_past_failure = true;
4389
4390 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4391 pcmk__str_none)) {
4392 if ((last_failure != NULL)
4393 && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4394 pcmk__str_none)) {
4395 clear_past_failure = true;
4396 }
4397 if (history->rsc->priv->orig_role < pcmk_role_started) {
4398 set_active(history->rsc);
4399 }
4400
4401 } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4402 history->rsc->priv->orig_role = pcmk_role_started;
4403 clear_past_failure = true;
4404
4405 } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4406 history->rsc->priv->orig_role = pcmk_role_stopped;
4407 clear_past_failure = true;
4408
4409 } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4410 pcmk__str_none)) {
4411 history->rsc->priv->orig_role = pcmk_role_promoted;
4412 clear_past_failure = true;
4413
4414 } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4415 pcmk__str_none)) {
4416 if (*on_fail == pcmk__on_fail_demote) {
4417 /* Demote clears an error only if
4418 * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4419 */
4420 clear_past_failure = true;
4421 }
4422 history->rsc->priv->orig_role = pcmk_role_unpromoted;
4423
4424 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4425 pcmk__str_none)) {
4426 history->rsc->priv->orig_role = pcmk_role_started;
4427 clear_past_failure = true;
4428
4429 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4430 pcmk__str_none)) {
4431 unpack_migrate_to_success(history);
4432
4433 } else if (history->rsc->priv->orig_role < pcmk_role_started) {
4434 pcmk__rsc_trace(history->rsc, "%s active on %s",
4435 history->rsc->id, pcmk__node_name(history->node));
4436 set_active(history->rsc);
4437 }
4438
4439 if (!clear_past_failure) {
4440 return;
4441 }
4442
4443 switch (*on_fail) {
4444 case pcmk__on_fail_stop:
4445 case pcmk__on_fail_ban:
4446 case pcmk__on_fail_standby_node:
4447 case pcmk__on_fail_fence_node:
4448 pcmk__rsc_trace(history->rsc,
4449 "%s (%s) is not cleared by a completed %s",
4450 history->rsc->id, pcmk__on_fail_text(*on_fail),
4451 history->task);
4452 break;
4453
4454 case pcmk__on_fail_block:
4455 case pcmk__on_fail_ignore:
4456 case pcmk__on_fail_demote:
4457 case pcmk__on_fail_restart:
4458 case pcmk__on_fail_restart_container:
4459 *on_fail = pcmk__on_fail_ignore;
4460 pe__set_next_role(history->rsc, pcmk_role_unknown,
4461 "clear past failures");
4462 break;
4463
4464 case pcmk__on_fail_reset_remote:
4465 if (history->rsc->priv->remote_reconnect_ms == 0U) {
4466 /* With no reconnect interval, the connection is allowed to
4467 * start again after the remote node is fenced and
4468 * completely stopped. (With a reconnect interval, we wait
4469 * for the failure to be cleared entirely before attempting
4470 * to reconnect.)
4471 */
4472 *on_fail = pcmk__on_fail_ignore;
4473 pe__set_next_role(history->rsc, pcmk_role_unknown,
4474 "clear past failures and reset remote");
4475 }
4476 break;
4477 }
4478 }
4479
4480 /*!
4481 * \internal
4482 * \brief Check whether a given history entry matters for resource state
4483 *
4484 * \param[in] history Parsed action history entry
4485 *
4486 * \return true if action can affect resource state, otherwise false
4487 */
4488 static inline bool
4489 can_affect_state(struct action_history *history)
4490 {
4491 return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4492 PCMK_ACTION_START, PCMK_ACTION_STOP,
4493 PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
4494 PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
4495 "asyncmon", NULL);
4496 }
4497
4498 /*!
4499 * \internal
4500 * \brief Unpack execution/exit status and exit reason from a history entry
4501 *
4502 * \param[in,out] history Action history entry to unpack
4503 *
4504 * \return Standard Pacemaker return code
4505 */
4506 static int
4507 unpack_action_result(struct action_history *history)
4508 {
4509 if ((pcmk__xe_get_int(history->xml, PCMK__XA_OP_STATUS,
4510 &(history->execution_status)) != pcmk_rc_ok)
4511 || (history->execution_status < PCMK_EXEC_PENDING)
4512 || (history->execution_status > PCMK_EXEC_MAX)
4513 || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4514 pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4515 "with invalid " PCMK__XA_OP_STATUS " '%s'",
4516 history->id, history->rsc->id,
4517 pcmk__node_name(history->node),
4518 pcmk__s(pcmk__xe_get(history->xml, PCMK__XA_OP_STATUS),
4519 ""));
4520 return pcmk_rc_unpack_error;
4521 }
4522 if ((pcmk__xe_get_int(history->xml, PCMK__XA_RC_CODE,
4523 &(history->exit_status)) != pcmk_rc_ok)
4524 || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4525 pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4526 "with invalid " PCMK__XA_RC_CODE " '%s'",
4527 history->id, history->rsc->id,
4528 pcmk__node_name(history->node),
4529 pcmk__s(pcmk__xe_get(history->xml, PCMK__XA_RC_CODE),
4530 ""));
4531 return pcmk_rc_unpack_error;
4532 }
4533 history->exit_reason = pcmk__xe_get(history->xml, PCMK_XA_EXIT_REASON);
4534 return pcmk_rc_ok;
4535 }
4536
4537 /*!
4538 * \internal
4539 * \brief Process an action history entry whose result expired
4540 *
4541 * \param[in,out] history Parsed action history entry
4542 * \param[in] orig_exit_status Action exit status before remapping
4543 *
4544 * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the
4545 * entry needs no further processing)
4546 */
4547 static int
4548 process_expired_result(struct action_history *history, int orig_exit_status)
4549 {
4550 if (!pcmk__is_bundled(history->rsc)
4551 && pcmk_xe_mask_probe_failure(history->xml)
4552 && (orig_exit_status != history->expected_exit_status)) {
4553
4554 if (history->rsc->priv->orig_role <= pcmk_role_stopped) {
4555 history->rsc->priv->orig_role = pcmk_role_unknown;
4556 }
4557 pcmk__trace("Ignoring resource history entry %s for probe of %s on %s: "
4558 "Masked failure expired",
4559 history->id, history->rsc->id,
4560 pcmk__node_name(history->node));
4561 return pcmk_rc_ok;
4562 }
4563
4564 if (history->exit_status == history->expected_exit_status) {
4565 return pcmk_rc_undetermined; // Only failures expire
4566 }
4567
4568 if (history->interval_ms == 0) {
4569 pcmk__notice("Ignoring resource history entry %s for %s of %s on %s: "
4570 "Expired failure",
4571 history->id, history->task, history->rsc->id,
4572 pcmk__node_name(history->node));
4573 return pcmk_rc_ok;
4574 }
4575
4576 if (history->node->details->online && !history->node->details->unclean) {
4577 /* Reschedule the recurring action. schedule_cancel() won't work at
4578 * this stage, so as a hacky workaround, forcibly change the restart
4579 * digest so pcmk__check_action_config() does what we want later.
4580 *
4581 * @TODO We should skip this if there is a newer successful monitor.
4582 * Also, this causes rescheduling only if the history entry
4583 * has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4584 * scheduler regression test doesn't, but that may not be a
4585 * realistic scenario in production).
4586 */
4587 pcmk__notice("Rescheduling %s-interval %s of %s on %s after failure "
4588 "expired",
4589 pcmk__readable_interval(history->interval_ms),
4590 history->task, history->rsc->id,
4591 pcmk__node_name(history->node));
4592 pcmk__xe_set(history->xml, PCMK__XA_OP_RESTART_DIGEST,
4593 "calculated-failure-timeout");
4594 return pcmk_rc_ok;
4595 }
4596
4597 return pcmk_rc_undetermined;
4598 }
4599
4600 /*!
4601 * \internal
4602 * \brief Process a masked probe failure
4603 *
4604 * \param[in,out] history Parsed action history entry
4605 * \param[in] orig_exit_status Action exit status before remapping
4606 * \param[in] last_failure Resource's last_failure entry, if known
4607 * \param[in,out] on_fail Resource's current failure handling
4608 */
4609 static void
4610 mask_probe_failure(struct action_history *history, int orig_exit_status,
4611 const xmlNode *last_failure,
4612 enum pcmk__on_fail *on_fail)
4613 {
4614 pcmk_resource_t *ban_rsc = history->rsc;
4615
4616 if (!pcmk__is_set(history->rsc->flags, pcmk__rsc_unique)) {
4617 ban_rsc = uber_parent(history->rsc);
4618 }
4619
4620 pcmk__notice("Treating probe result '%s' for %s on %s as 'not running'",
4621 crm_exit_str(orig_exit_status), history->rsc->id,
4622 pcmk__node_name(history->node));
4623 update_resource_state(history, history->expected_exit_status, last_failure,
4624 on_fail);
4625 pcmk__xe_set(history->xml, PCMK_XA_UNAME, history->node->priv->name);
4626
4627 record_failed_op(history);
4628 resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4629 "masked-probe-failure", ban_rsc->priv->scheduler);
4630 }
4631
4632 /*!
4633 * \internal Check whether a given failure is for a given pending action
4634 *
4635 * \param[in] history Parsed history entry for pending action
4636 * \param[in] last_failure Resource's last_failure entry, if known
4637 *
4638 * \return true if \p last_failure is failure of pending action in \p history,
4639 * otherwise false
4640 * \note Both \p history and \p last_failure must come from the same
4641 * \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be
4642 * the same.
4643 */
4644 static bool
4645 failure_is_newer(const struct action_history *history,
4646 const xmlNode *last_failure)
4647 {
4648 guint failure_interval_ms = 0U;
4649 long long failure_change = 0LL;
4650 long long this_change = 0LL;
4651
4652 if (last_failure == NULL) {
4653 return false; // Resource has no last_failure entry
4654 }
4655
4656 if (!pcmk__str_eq(history->task,
4657 pcmk__xe_get(last_failure, PCMK_XA_OPERATION),
4658 pcmk__str_none)) {
4659 return false; // last_failure is for different action
4660 }
4661
4662 if ((pcmk__xe_get_guint(last_failure, PCMK_META_INTERVAL,
4663 &failure_interval_ms) != pcmk_rc_ok)
4664 || (history->interval_ms != failure_interval_ms)) {
4665 return false; // last_failure is for action with different interval
4666 }
4667
4668 if ((pcmk__scan_ll(pcmk__xe_get(history->xml, PCMK_XA_LAST_RC_CHANGE),
4669 &this_change, 0LL) != pcmk_rc_ok)
4670 || (pcmk__scan_ll(pcmk__xe_get(last_failure, PCMK_XA_LAST_RC_CHANGE),
4671 &failure_change, 0LL) != pcmk_rc_ok)
4672 || (failure_change < this_change)) {
4673 return false; // Failure is not known to be newer
4674 }
4675
4676 return true;
4677 }
4678
4679 /*!
4680 * \internal
4681 * \brief Update a resource's role etc. for a pending action
4682 *
4683 * \param[in,out] history Parsed history entry for pending action
4684 * \param[in] last_failure Resource's last_failure entry, if known
4685 */
4686 static void
4687 process_pending_action(struct action_history *history,
4688 const xmlNode *last_failure)
4689 {
4690 /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4691 * and there might be a RSC_monitor_INTERVAL entry with the last successful
4692 * or pending result.
4693 *
4694 * If last_failure contains the failure of the pending recurring monitor
4695 * we're processing here, and is newer, the action is no longer pending.
4696 * (Pending results have call ID -1, which sorts last, so the last failure
4697 * if any should be known.)
4698 */
4699 if (failure_is_newer(history, last_failure)) {
4700 return;
4701 }
4702
4703 if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4704 pcmk__set_rsc_flags(history->rsc, pcmk__rsc_start_pending);
4705 set_active(history->rsc);
4706
4707 } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4708 history->rsc->priv->orig_role = pcmk_role_promoted;
4709
4710 } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4711 && history->node->details->unclean) {
4712 /* A migrate_to action is pending on a unclean source, so force a stop
4713 * on the target.
4714 */
4715 const char *migrate_target = NULL;
4716 pcmk_node_t *target = NULL;
4717
4718 migrate_target = pcmk__xe_get(history->xml, PCMK__META_MIGRATE_TARGET);
4719 target = pcmk_find_node(history->rsc->priv->scheduler,
4720 migrate_target);
4721 if (target != NULL) {
4722 stop_action(history->rsc, target, FALSE);
4723 }
4724 }
4725
4726 if (history->rsc->priv->pending_action != NULL) {
4727 /* There should never be multiple pending actions, but as a failsafe,
4728 * just remember the first one processed for display purposes.
4729 */
4730 return;
4731 }
4732
4733 if (pcmk_is_probe(history->task, history->interval_ms)) {
4734 /* Pending probes are currently never displayed, even if pending
4735 * operations are requested. If we ever want to change that,
4736 * enable the below and the corresponding part of
4737 * native.c:native_pending_action().
4738 */
4739 #if 0
4740 history->rsc->private->pending_action = strdup("probe");
4741 history->rsc->private->pending_node = history->node;
4742 #endif
4743 } else {
4744 history->rsc->priv->pending_action = strdup(history->task);
4745 history->rsc->priv->pending_node = history->node;
4746 }
4747 }
4748
4749 static void
4750 unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
4751 xmlNode **last_failure, enum pcmk__on_fail *on_fail)
4752 {
4753 int old_rc = 0;
4754 bool expired = false;
4755 pcmk_resource_t *parent = rsc;
4756 enum rsc_role_e fail_role = pcmk_role_unknown;
4757 enum pcmk__on_fail failure_strategy = pcmk__on_fail_restart;
4758
4759 struct action_history history = {
4760 .rsc = rsc,
4761 .node = node,
4762 .xml = xml_op,
4763 .execution_status = PCMK_EXEC_UNKNOWN,
4764 };
4765
4766 CRM_CHECK(rsc && node && xml_op, return);
4767
4768 history.id = pcmk__xe_id(xml_op);
4769 if (history.id == NULL) {
4770 pcmk__config_err("Ignoring resource history entry for %s on %s "
4771 "without ID", rsc->id, pcmk__node_name(node));
4772 return;
4773 }
4774
4775 // Task and interval
4776 history.task = pcmk__xe_get(xml_op, PCMK_XA_OPERATION);
4777 if (history.task == NULL) {
4778 pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4779 "without " PCMK_XA_OPERATION,
4780 history.id, rsc->id, pcmk__node_name(node));
4781 return;
4782 }
4783 pcmk__xe_get_guint(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4784 if (!can_affect_state(&history)) {
4785 pcmk__rsc_trace(rsc,
4786 "Ignoring resource history entry %s for %s on %s "
4787 "with irrelevant action '%s'",
4788 history.id, rsc->id, pcmk__node_name(node),
4789 history.task);
4790 return;
4791 }
4792
4793 if (unpack_action_result(&history) != pcmk_rc_ok) {
4794 return; // Error already logged
4795 }
4796
4797 history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4798 history.key = pcmk__xe_history_key(xml_op);
4799 pcmk__xe_get_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4800
4801 pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4802 history.id, history.task, history.call_id,
4803 pcmk__node_name(node),
4804 pcmk_exec_status_str(history.execution_status),
4805 crm_exit_str(history.exit_status));
4806
4807 if (node->details->unclean) {
4808 pcmk__rsc_trace(rsc,
4809 "%s is running on %s, which is unclean (further action "
4810 "depends on value of stop's on-fail attribute)",
4811 rsc->id, pcmk__node_name(node));
4812 }
4813
4814 expired = check_operation_expiry(&history);
4815 old_rc = history.exit_status;
4816
4817 remap_operation(&history, on_fail, expired);
4818
4819 if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4820 goto done;
4821 }
4822
4823 if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4824 mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4825 goto done;
4826 }
4827
4828 if (!pcmk__is_set(rsc->flags, pcmk__rsc_unique)) {
4829 parent = uber_parent(rsc);
4830 }
4831
4832 switch (history.execution_status) {
4833 case PCMK_EXEC_PENDING:
4834 process_pending_action(&history, *last_failure);
4835 goto done;
4836
4837 case PCMK_EXEC_DONE:
4838 update_resource_state(&history, history.exit_status, *last_failure,
4839 on_fail);
4840 goto done;
4841
4842 case PCMK_EXEC_NOT_INSTALLED:
4843 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4844 if (failure_strategy == pcmk__on_fail_ignore) {
4845 pcmk__warn("Cannot ignore failed %s of %s on %s: Resource "
4846 "agent doesn't exist "
4847 QB_XS " status=%d rc=%d id=%s",
4848 history.task, rsc->id, pcmk__node_name(node),
4849 history.execution_status, history.exit_status,
4850 history.id);
4851 /* Also for printing it as "FAILED" by marking it as
4852 * pcmk__rsc_failed later
4853 */
4854 *on_fail = pcmk__on_fail_ban;
4855 }
4856 resource_location(parent, node, -PCMK_SCORE_INFINITY,
4857 "hard-error", rsc->priv->scheduler);
4858 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4859 last_failure, on_fail);
4860 goto done;
4861
4862 case PCMK_EXEC_NOT_CONNECTED:
4863 if (pcmk__is_pacemaker_remote_node(node)
4864 && pcmk__is_set(node->priv->remote->flags,
4865 pcmk__rsc_managed)) {
4866 /* We should never get into a situation where a managed remote
4867 * connection resource is considered OK but a resource action
4868 * behind the connection gets a "not connected" status. But as a
4869 * fail-safe in case a bug or unusual circumstances do lead to
4870 * that, ensure the remote connection is considered failed.
4871 */
4872 pcmk__set_rsc_flags(node->priv->remote,
4873 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
4874 }
4875 break; // Not done, do error handling
4876
4877 case PCMK_EXEC_ERROR:
4878 case PCMK_EXEC_ERROR_HARD:
4879 case PCMK_EXEC_ERROR_FATAL:
4880 case PCMK_EXEC_TIMEOUT:
4881 case PCMK_EXEC_NOT_SUPPORTED:
4882 case PCMK_EXEC_INVALID:
4883 break; // Not done, do error handling
4884
4885 default: // No other value should be possible at this point
4886 break;
4887 }
4888
4889 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4890 if ((failure_strategy == pcmk__on_fail_ignore)
4891 || ((failure_strategy == pcmk__on_fail_restart_container)
4892 && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4893
4894 char *last_change_s = last_change_str(xml_op);
4895
4896 pcmk__warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4897 QB_XS " %s",
4898 history.task, crm_exit_str(history.exit_status),
4899 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4900 pcmk__s(history.exit_reason, ""), rsc->id,
4901 pcmk__node_name(node), last_change_s, history.id);
4902 free(last_change_s);
4903
4904 update_resource_state(&history, history.expected_exit_status,
4905 *last_failure, on_fail);
4906 pcmk__xe_set(xml_op, PCMK_XA_UNAME, node->priv->name);
4907 pcmk__set_rsc_flags(rsc, pcmk__rsc_ignore_failure);
4908
4909 record_failed_op(&history);
4910
4911 if ((failure_strategy == pcmk__on_fail_restart_container)
4912 && (*on_fail <= pcmk__on_fail_restart)) {
4913 *on_fail = failure_strategy;
4914 }
4915
4916 } else {
4917 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4918 last_failure, on_fail);
4919
4920 if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4921 uint8_t log_level = LOG_ERR;
4922
4923 if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4924 log_level = LOG_NOTICE;
4925 }
4926 do_crm_log(log_level,
4927 "Preventing %s from restarting on %s because "
4928 "of hard failure (%s%s%s) " QB_XS " %s",
4929 parent->id, pcmk__node_name(node),
4930 crm_exit_str(history.exit_status),
4931 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4932 pcmk__s(history.exit_reason, ""), history.id);
4933 resource_location(parent, node, -PCMK_SCORE_INFINITY,
4934 "hard-error", rsc->priv->scheduler);
4935
4936 } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4937 pcmk__sched_err(rsc->priv->scheduler,
4938 "Preventing %s from restarting anywhere because "
4939 "of fatal failure (%s%s%s) " QB_XS " %s",
4940 parent->id, crm_exit_str(history.exit_status),
4941 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4942 pcmk__s(history.exit_reason, ""), history.id);
4943 resource_location(parent, NULL, -PCMK_SCORE_INFINITY,
4944 "fatal-error", rsc->priv->scheduler);
4945 }
4946 }
4947
4948 done:
4949 pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
4950 rsc->id, pcmk__node_name(node), history.id,
4951 pcmk_role_text(rsc->priv->orig_role),
4952 pcmk_role_text(rsc->priv->next_role));
4953 }
4954
4955 /*!
4956 * \internal
4957 * \brief Insert a node attribute with value into a \c GHashTable
4958 *
4959 * \param[in,out] key Key to insert (either freed or owned by
4960 * \p user_data upon return)
4961 * \param[in] value Value to insert (owned by \p user_data upon return)
4962 * \param[in] user_data \c GHashTable to insert into
4963 */
4964 static gboolean
4965 insert_attr(gpointer key, gpointer value, gpointer user_data)
4966 {
4967 GHashTable *table = user_data;
4968
4969 g_hash_table_insert(table, key, value);
4970 return TRUE;
4971 }
4972
4973 static void
4974 add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
4975 pcmk_scheduler_t *scheduler)
4976 {
4977 const char *cluster_name = NULL;
4978 const char *dc_id = pcmk__xe_get(scheduler->input, PCMK_XA_DC_UUID);
4979 const pcmk_rule_input_t rule_input = {
4980 .now = scheduler->priv->now,
4981 };
4982
4983 pcmk__insert_dup(node->priv->attrs,
4984 CRM_ATTR_UNAME, node->priv->name);
4985
4986 pcmk__insert_dup(node->priv->attrs, CRM_ATTR_ID, node->priv->id);
4987
4988 if ((scheduler->dc_node == NULL)
4989 && pcmk__str_eq(node->priv->id, dc_id, pcmk__str_casei)) {
4990
4991 scheduler->dc_node = node;
4992 pcmk__insert_dup(node->priv->attrs,
4993 CRM_ATTR_IS_DC, PCMK_VALUE_TRUE);
4994
4995 } else if (!pcmk__same_node(node, scheduler->dc_node)) {
4996 pcmk__insert_dup(node->priv->attrs,
4997 CRM_ATTR_IS_DC, PCMK_VALUE_FALSE);
4998 }
4999
5000 cluster_name = g_hash_table_lookup(scheduler->priv->options,
5001 PCMK_OPT_CLUSTER_NAME);
5002 if (cluster_name) {
5003 pcmk__insert_dup(node->priv->attrs, CRM_ATTR_CLUSTER_NAME,
5004 cluster_name);
5005 }
5006
5007 if (overwrite) {
5008 /* @TODO Try to reorder some unpacking so that we don't need the
5009 * overwrite argument or to unpack into a temporary table
5010 */
5011 GHashTable *unpacked = pcmk__strkey_table(free, free);
5012
5013 pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
5014 &rule_input, unpacked, NULL, scheduler);
5015 g_hash_table_foreach_steal(unpacked, insert_attr, node->priv->attrs);
5016 g_hash_table_destroy(unpacked);
5017
5018 } else {
5019 pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
5020 &rule_input, node->priv->attrs, NULL,
5021 scheduler);
5022 }
5023
5024 pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_input,
5025 node->priv->utilization, NULL, scheduler);
5026
5027 if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
5028 pcmk__rsc_node_current) == NULL) {
5029 const char *site_name = pcmk__node_attr(node, "site-name", NULL,
5030 pcmk__rsc_node_current);
5031
5032 if (site_name) {
5033 pcmk__insert_dup(node->priv->attrs,
5034 CRM_ATTR_SITE_NAME, site_name);
5035
5036 } else if (cluster_name) {
5037 /* Default to cluster-name if unset */
5038 pcmk__insert_dup(node->priv->attrs,
5039 CRM_ATTR_SITE_NAME, cluster_name);
5040 }
5041 }
5042 }
5043
5044 static GList *
5045 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
5046 {
5047 int counter = -1;
5048 int stop_index = -1;
5049 int start_index = -1;
5050
5051 xmlNode *rsc_op = NULL;
5052
5053 GList *gIter = NULL;
5054 GList *op_list = NULL;
5055 GList *sorted_op_list = NULL;
5056
5057 /* extract operations */
5058 op_list = NULL;
5059 sorted_op_list = NULL;
5060
5061 for (rsc_op = pcmk__xe_first_child(rsc_entry, PCMK__XE_LRM_RSC_OP, NULL,
5062 NULL);
5063 rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
5064
5065 pcmk__xe_set(rsc_op, PCMK_XA_RESOURCE, rsc);
5066 pcmk__xe_set(rsc_op, PCMK_XA_UNAME, node);
5067 op_list = g_list_prepend(op_list, rsc_op);
5068 }
5069
5070 if (op_list == NULL) {
5071 /* if there are no operations, there is nothing to do */
5072 return NULL;
5073 }
5074
5075 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5076
5077 /* create active recurring operations as optional */
5078 if (active_filter == FALSE) {
5079 return sorted_op_list;
5080 }
5081
5082 op_list = NULL;
5083
5084 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5085
5086 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5087 xmlNode *rsc_op = (xmlNode *) gIter->data;
5088
5089 counter++;
5090
5091 if (start_index < stop_index) {
5092 pcmk__trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5093 break;
5094
5095 } else if (counter < start_index) {
5096 pcmk__trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5097 continue;
5098 }
5099 op_list = g_list_append(op_list, rsc_op);
5100 }
5101
5102 g_list_free(sorted_op_list);
5103 return op_list;
5104 }
5105
5106 GList *
5107 find_operations(const char *rsc, const char *node, gboolean active_filter,
5108 pcmk_scheduler_t *scheduler)
5109 {
5110 GList *output = NULL;
5111 GList *intermediate = NULL;
5112
5113 xmlNode *tmp = NULL;
5114 xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
5115 NULL, NULL);
5116
5117 pcmk_node_t *this_node = NULL;
5118
5119 xmlNode *node_state = NULL;
5120
5121 CRM_CHECK(status != NULL, return NULL);
5122
5123 for (node_state = pcmk__xe_first_child(status, PCMK__XE_NODE_STATE, NULL,
5124 NULL);
5125 node_state != NULL;
5126 node_state = pcmk__xe_next(node_state, PCMK__XE_NODE_STATE)) {
5127
5128 const char *uname = pcmk__xe_get(node_state, PCMK_XA_UNAME);
5129
5130 if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5131 continue;
5132 }
5133
5134 this_node = pcmk_find_node(scheduler, uname);
5135 if(this_node == NULL) {
5136 CRM_LOG_ASSERT(this_node != NULL);
5137 continue;
5138
5139 } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5140 determine_remote_online_status(scheduler, this_node);
5141
5142 } else {
5143 determine_online_status(node_state, this_node, scheduler);
5144 }
5145
5146 if (this_node->details->online
5147 || pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
5148
5149 /* Offline nodes run no resources if fencing is disabled. If fencing
5150 * is enabled, we need to ensure that resource start events happen
5151 * after the fencing event.
5152 */
5153 xmlNode *lrm_rsc = NULL;
5154
5155 tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5156 NULL);
5157 tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL,
5158 NULL);
5159
5160 for (lrm_rsc = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCE,
5161 NULL, NULL);
5162 lrm_rsc != NULL;
5163 lrm_rsc = pcmk__xe_next(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5164
5165 const char *rsc_id = pcmk__xe_get(lrm_rsc, PCMK_XA_ID);
5166
5167 if ((rsc != NULL)
5168 && !pcmk__str_eq(rsc_id, rsc, pcmk__str_none)) {
5169 continue;
5170 }
5171
5172 intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5173 output = g_list_concat(output, intermediate);
5174 }
5175 }
5176 }
5177
5178 return output;
5179 }
5180