1 /*
2 * Copyright 2004-2023 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <glib.h>
13
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18
19 /*!
20 * \internal
21 * \brief Add the expected result to a newly created probe
22 *
23 * \param[in,out] probe Probe action to add expected result to
24 * \param[in] rsc Resource that probe is for
25 * \param[in] node Node that probe will run on
26 */
27 static void
28 add_expected_result(pcmk_action_t *probe, const pcmk_resource_t *rsc,
29 const pcmk_node_t *node)
30 {
31 // Check whether resource is currently active on node
32 pcmk_node_t *running = pe_find_node_id(rsc->running_on, node->details->id);
33
34 // The expected result is what we think the resource's current state is
35 if (running == NULL) {
36 pe__add_action_expected_result(probe, CRM_EX_NOT_RUNNING);
37
38 } else if (rsc->role == pcmk_role_promoted) {
39 pe__add_action_expected_result(probe, CRM_EX_PROMOTED);
40 }
41 }
42
43 /*!
44 * \internal
45 * \brief Create any needed robes on a node for a list of resources
46 *
47 * \param[in,out] rscs List of resources to create probes for
48 * \param[in,out] node Node to create probes on
49 *
50 * \return true if any probe was created, otherwise false
51 */
52 bool
53 pcmk__probe_resource_list(GList *rscs, pcmk_node_t *node)
54 {
55 bool any_created = false;
56
57 for (GList *iter = rscs; iter != NULL; iter = iter->next) {
58 pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
59
60 if (rsc->cmds->create_probe(rsc, node)) {
61 any_created = true;
62 }
63 }
64 return any_created;
65 }
66
67 /*!
68 * \internal
69 * \brief Order one resource's start after another's start-up probe
70 *
71 * \param[in,out] rsc1 Resource that might get start-up probe
72 * \param[in] rsc2 Resource that might be started
73 */
74 static void
75 probe_then_start(pcmk_resource_t *rsc1, pcmk_resource_t *rsc2)
76 {
77 if ((rsc1->allocated_to != NULL)
78 && (g_hash_table_lookup(rsc1->known_on,
79 rsc1->allocated_to->details->id) == NULL)) {
80
81 pcmk__new_ordering(rsc1,
82 pcmk__op_key(rsc1->id, PCMK_ACTION_MONITOR, 0),
83 NULL,
84 rsc2, pcmk__op_key(rsc2->id, PCMK_ACTION_START, 0),
85 NULL,
86 pcmk__ar_ordered, rsc1->cluster);
87 }
88 }
89
90 /*!
91 * \internal
92 * \brief Check whether a guest resource will stop
93 *
94 * \param[in] node Guest node to check
95 *
96 * \return true if guest resource will likely stop, otherwise false
97 */
98 static bool
99 guest_resource_will_stop(const pcmk_node_t *node)
100 {
101 const pcmk_resource_t *guest_rsc = node->details->remote_rsc->container;
102
103 /* Ideally, we'd check whether the guest has a required stop, but that
104 * information doesn't exist yet, so approximate it ...
105 */
106 return node->details->remote_requires_reset
107 || node->details->unclean
108 || pcmk_is_set(guest_rsc->flags, pcmk_rsc_failed)
109 || (guest_rsc->next_role == pcmk_role_stopped)
110
111 // Guest is moving
112 || ((guest_rsc->role > pcmk_role_stopped)
113 && (guest_rsc->allocated_to != NULL)
114 && (pe_find_node(guest_rsc->running_on,
115 guest_rsc->allocated_to->details->uname) == NULL));
116 }
117
118 /*!
119 * \internal
120 * \brief Create a probe action for a resource on a node
121 *
122 * \param[in,out] rsc Resource to create probe for
123 * \param[in,out] node Node to create probe on
124 *
125 * \return Newly created probe action
126 */
127 static pcmk_action_t *
128 probe_action(pcmk_resource_t *rsc, pcmk_node_t *node)
129 {
130 pcmk_action_t *probe = NULL;
131 char *key = pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0);
132
133 crm_debug("Scheduling probe of %s %s on %s",
134 role2text(rsc->role), rsc->id, pe__node_name(node));
135
136 probe = custom_action(rsc, key, PCMK_ACTION_MONITOR, node, FALSE,
137 rsc->cluster);
138 pe__clear_action_flags(probe, pcmk_action_optional);
139
140 pcmk__order_vs_unfence(rsc, node, probe, pcmk__ar_ordered);
141 add_expected_result(probe, rsc, node);
142 return probe;
143 }
144
145 /*!
146 * \internal
147 * \brief Create probes for a resource on a node, if needed
148 *
149 * \brief Schedule any probes needed for a resource on a node
150 *
151 * \param[in,out] rsc Resource to create probe for
152 * \param[in,out] node Node to create probe on
153 *
154 * \return true if any probe was created, otherwise false
155 */
156 bool
157 pcmk__probe_rsc_on_node(pcmk_resource_t *rsc, pcmk_node_t *node)
158 {
159 uint32_t flags = pcmk__ar_ordered;
160 pcmk_action_t *probe = NULL;
161 pcmk_node_t *allowed = NULL;
162 pcmk_resource_t *top = uber_parent(rsc);
163 const char *reason = NULL;
164
165 CRM_ASSERT((rsc != NULL) && (node != NULL));
166
167 if (!pcmk_is_set(rsc->cluster->flags, pcmk_sched_probe_resources)) {
168 reason = "start-up probes are disabled";
169 goto no_probe;
170 }
171
172 if (pe__is_guest_or_remote_node(node)) {
173 const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
174
175 if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) {
176 reason = "Pacemaker Remote nodes cannot run stonith agents";
177 goto no_probe;
178
179 } else if (pe__is_guest_node(node)
180 && pe__resource_contains_guest_node(rsc->cluster, rsc)) {
181 reason = "guest nodes cannot run resources containing guest nodes";
182 goto no_probe;
183
184 } else if (rsc->is_remote_node) {
185 reason = "Pacemaker Remote nodes cannot host remote connections";
186 goto no_probe;
187 }
188 }
189
190 // If this is a collective resource, probes are created for its children
191 if (rsc->children != NULL) {
192 return pcmk__probe_resource_list(rsc->children, node);
193 }
194
195 if ((rsc->container != NULL) && !rsc->is_remote_node) {
196 reason = "resource is inside a container";
197 goto no_probe;
198
199 } else if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
200 reason = "resource is orphaned";
201 goto no_probe;
202
203 } else if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
204 reason = "resource state is already known";
205 goto no_probe;
206 }
207
208 allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
209
210 if (rsc->exclusive_discover || top->exclusive_discover) {
211 // Exclusive discovery is enabled ...
212
213 if (allowed == NULL) {
214 // ... but this node is not allowed to run the resource
215 reason = "resource has exclusive discovery but is not allowed "
216 "on node";
217 goto no_probe;
218
219 } else if (allowed->rsc_discover_mode != pcmk_probe_exclusive) {
220 // ... but no constraint marks this node for discovery of resource
221 reason = "resource has exclusive discovery but is not enabled "
222 "on node";
223 goto no_probe;
224 }
225 }
226
227 if (allowed == NULL) {
228 allowed = node;
229 }
230 if (allowed->rsc_discover_mode == pcmk_probe_never) {
231 reason = "node has discovery disabled";
232 goto no_probe;
233 }
234
235 if (pe__is_guest_node(node)) {
236 pcmk_resource_t *guest = node->details->remote_rsc->container;
237
238 if (guest->role == pcmk_role_stopped) {
239 // The guest is stopped, so we know no resource is active there
240 reason = "node's guest is stopped";
241 probe_then_start(guest, top);
242 goto no_probe;
243
244 } else if (guest_resource_will_stop(node)) {
245 reason = "node's guest will stop";
246
247 // Order resource start after guest stop (in case it's restarting)
248 pcmk__new_ordering(guest,
249 pcmk__op_key(guest->id, PCMK_ACTION_STOP, 0),
250 NULL, top,
251 pcmk__op_key(top->id, PCMK_ACTION_START, 0),
252 NULL, pcmk__ar_ordered, rsc->cluster);
253 goto no_probe;
254 }
255 }
256
257 // We've eliminated all cases where a probe is not needed, so now it is
258 probe = probe_action(rsc, node);
259
260 /* Below, we will order the probe relative to start or reload. If this is a
261 * clone instance, the start or reload is for the entire clone rather than
262 * just the instance. Otherwise, the start or reload is for the resource
263 * itself.
264 */
265 if (!pe_rsc_is_clone(top)) {
266 top = rsc;
267 }
268
269 /* Prevent a start if the resource can't be probed, but don't cause the
270 * resource or entire clone to stop if already active.
271 */
272 if (!pcmk_is_set(probe->flags, pcmk_action_runnable)
273 && (top->running_on == NULL)) {
274 pe__set_order_flags(flags, pcmk__ar_unrunnable_first_blocks);
275 }
276
277 // Start or reload after probing the resource
278 pcmk__new_ordering(rsc, NULL, probe,
279 top, pcmk__op_key(top->id, PCMK_ACTION_START, 0), NULL,
280 flags, rsc->cluster);
281 pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
282 pcmk__ar_ordered, rsc->cluster);
283
284 return true;
285
286 no_probe:
287 pe_rsc_trace(rsc,
288 "Skipping probe for %s on %s because %s",
289 rsc->id, node->details->id, reason);
290 return false;
291 }
292
293 /*!
294 * \internal
295 * \brief Check whether a probe should be ordered before another action
296 *
297 * \param[in] probe Probe action to check
298 * \param[in] then Other action to check
299 *
300 * \return true if \p probe should be ordered before \p then, otherwise false
301 */
302 static bool
303 probe_needed_before_action(const pcmk_action_t *probe,
304 const pcmk_action_t *then)
305 {
306 // Probes on a node are performed after unfencing it, not before
307 if (pcmk__str_eq(then->task, PCMK_ACTION_STONITH, pcmk__str_none)
308 && pe__same_node(probe->node, then->node)) {
309 const char *op = g_hash_table_lookup(then->meta, "stonith_action");
310
311 if (pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) {
312 return false;
313 }
314 }
315
316 // Probes should be done on a node before shutting it down
317 if (pcmk__str_eq(then->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)
318 && (probe->node != NULL) && (then->node != NULL)
319 && !pe__same_node(probe->node, then->node)) {
320 return false;
321 }
322
323 // Otherwise probes should always be done before any other action
324 return true;
325 }
326
327 /*!
328 * \internal
329 * \brief Add implicit "probe then X" orderings for "stop then X" orderings
330 *
331 * If the state of a resource is not known yet, a probe will be scheduled,
332 * expecting a "not running" result. If the probe fails, a stop will not be
333 * scheduled until the next transition. Thus, if there are ordering constraints
334 * like "stop this resource then do something else that's not for the same
335 * resource", add implicit "probe this resource then do something" equivalents
336 * so the relation is upheld until we know whether a stop is needed.
337 *
338 * \param[in,out] scheduler Scheduler data
339 */
340 static void
341 add_probe_orderings_for_stops(pcmk_scheduler_t *scheduler)
342 {
343 for (GList *iter = scheduler->ordering_constraints; iter != NULL;
344 iter = iter->next) {
345
346 pe__ordering_t *order = iter->data;
347 uint32_t order_flags = pcmk__ar_ordered;
348 GList *probes = NULL;
349 GList *then_actions = NULL;
350 pcmk_action_t *first = NULL;
351 pcmk_action_t *then = NULL;
352
353 // Skip disabled orderings
354 if (order->flags == pcmk__ar_none) {
355 continue;
356 }
357
358 // Skip non-resource orderings, and orderings for the same resource
359 if ((order->lh_rsc == NULL) || (order->lh_rsc == order->rh_rsc)) {
360 continue;
361 }
362
363 // Skip invalid orderings (shouldn't be possible)
364 first = order->lh_action;
365 then = order->rh_action;
366 if (((first == NULL) && (order->lh_action_task == NULL))
367 || ((then == NULL) && (order->rh_action_task == NULL))) {
368 continue;
369 }
370
371 // Skip orderings for first actions other than stop
372 if ((first != NULL) && !pcmk__str_eq(first->task, PCMK_ACTION_STOP,
373 pcmk__str_none)) {
374 continue;
375 } else if ((first == NULL)
376 && !pcmk__ends_with(order->lh_action_task,
377 "_" PCMK_ACTION_STOP "_0")) {
378 continue;
379 }
380
381 /* Do not imply a probe ordering for a resource inside of a stopping
382 * container. Otherwise, it might introduce a transition loop, since a
383 * probe could be scheduled after the container starts again.
384 */
385 if ((order->rh_rsc != NULL)
386 && (order->lh_rsc->container == order->rh_rsc)) {
387
388 if ((then != NULL) && pcmk__str_eq(then->task, PCMK_ACTION_STOP,
389 pcmk__str_none)) {
390 continue;
391 } else if ((then == NULL)
392 && pcmk__ends_with(order->rh_action_task,
393 "_" PCMK_ACTION_STOP "_0")) {
394 continue;
395 }
396 }
397
398 // Preserve certain order options for future filtering
399 if (pcmk_is_set(order->flags, pcmk__ar_if_first_unmigratable)) {
400 pe__set_order_flags(order_flags, pcmk__ar_if_first_unmigratable);
401 }
402 if (pcmk_is_set(order->flags, pcmk__ar_if_on_same_node)) {
403 pe__set_order_flags(order_flags, pcmk__ar_if_on_same_node);
404 }
405
406 // Preserve certain order types for future filtering
407 if ((order->flags == pcmk__ar_if_required_on_same_node)
408 || (order->flags == pcmk__ar_if_on_same_node_or_target)) {
409 order_flags = order->flags;
410 }
411
412 // List all scheduled probes for the first resource
413 probes = pe__resource_actions(order->lh_rsc, NULL, PCMK_ACTION_MONITOR,
414 FALSE);
415 if (probes == NULL) { // There aren't any
416 continue;
417 }
418
419 // List all relevant "then" actions
420 if (then != NULL) {
421 then_actions = g_list_prepend(NULL, then);
422
423 } else if (order->rh_rsc != NULL) {
424 then_actions = find_actions(order->rh_rsc->actions,
425 order->rh_action_task, NULL);
426 if (then_actions == NULL) { // There aren't any
427 g_list_free(probes);
428 continue;
429 }
430 }
431
432 crm_trace("Implying 'probe then' orderings for '%s then %s' "
433 "(id=%d, type=%.6x)",
434 ((first == NULL)? order->lh_action_task : first->uuid),
435 ((then == NULL)? order->rh_action_task : then->uuid),
436 order->id, order->flags);
437
438 for (GList *probe_iter = probes; probe_iter != NULL;
439 probe_iter = probe_iter->next) {
440
441 pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
442
443 for (GList *then_iter = then_actions; then_iter != NULL;
444 then_iter = then_iter->next) {
445
446 pcmk_action_t *then = (pcmk_action_t *) then_iter->data;
447
448 if (probe_needed_before_action(probe, then)) {
449 order_actions(probe, then, order_flags);
450 }
451 }
452 }
453
454 g_list_free(then_actions);
455 g_list_free(probes);
456 }
457 }
458
459 /*!
460 * \internal
461 * \brief Add necessary orderings between probe and starts of clone instances
462 *
463 * , in additon to the ordering with the parent resource added upon creating
464 * the probe.
465 *
466 * \param[in,out] probe Probe as 'first' action in an ordering
467 * \param[in,out] after 'then' action wrapper in the ordering
468 */
469 static void
470 add_start_orderings_for_probe(pcmk_action_t *probe,
471 pcmk__related_action_t *after)
472 {
473 uint32_t flags = pcmk__ar_ordered|pcmk__ar_unrunnable_first_blocks;
474
475 /* Although the ordering between the probe of the clone instance and the
476 * start of its parent has been added in pcmk__probe_rsc_on_node(), we
477 * avoided enforcing `pcmk__ar_unrunnable_first_blocks` order type for that
478 * as long as any of the clone instances are running to prevent them from
479 * being unexpectedly stopped.
480 *
481 * On the other hand, we still need to prevent any inactive instances from
482 * starting unless the probe is runnable so that we don't risk starting too
483 * many instances before we know the state on all nodes.
484 */
485 if ((after->action->rsc->variant <= pcmk_rsc_variant_group)
486 || pcmk_is_set(probe->flags, pcmk_action_runnable)
487 // The order type is already enforced for its parent.
488 || pcmk_is_set(after->type, pcmk__ar_unrunnable_first_blocks)
489 || (pe__const_top_resource(probe->rsc, false) != after->action->rsc)
490 || !pcmk__str_eq(after->action->task, PCMK_ACTION_START,
491 pcmk__str_none)) {
492 return;
493 }
494
495 crm_trace("Adding probe start orderings for 'unrunnable %s@%s "
496 "then instances of %s@%s'",
497 probe->uuid, pe__node_name(probe->node),
498 after->action->uuid, pe__node_name(after->action->node));
499
500 for (GList *then_iter = after->action->actions_after; then_iter != NULL;
501 then_iter = then_iter->next) {
502
503 pcmk__related_action_t *then = then_iter->data;
504
505 if (then->action->rsc->running_on
506 || (pe__const_top_resource(then->action->rsc, false)
507 != after->action->rsc)
508 || !pcmk__str_eq(then->action->task, PCMK_ACTION_START,
509 pcmk__str_none)) {
510 continue;
511 }
512
513 crm_trace("Adding probe start ordering for 'unrunnable %s@%s "
514 "then %s@%s' (type=%#.6x)",
515 probe->uuid, pe__node_name(probe->node),
516 then->action->uuid, pe__node_name(then->action->node), flags);
517
518 /* Prevent the instance from starting if the instance can't, but don't
519 * cause any other intances to stop if already active.
520 */
521 order_actions(probe, then->action, flags);
522 }
523
524 return;
525 }
526
527 /*!
528 * \internal
529 * \brief Order probes before restarts and re-promotes
530 *
531 * If a given ordering is a "probe then start" or "probe then promote" ordering,
532 * add an implicit "probe then stop/demote" ordering in case the action is part
533 * of a restart/re-promote, and do the same recursively for all actions ordered
534 * after the "then" action.
535 *
536 * \param[in,out] probe Probe as 'first' action in an ordering
537 * \param[in,out] after 'then' action in the ordering
538 */
539 static void
540 add_restart_orderings_for_probe(pcmk_action_t *probe, pcmk_action_t *after)
541 {
542 GList *iter = NULL;
543 bool interleave = false;
544 pcmk_resource_t *compatible_rsc = NULL;
545
546 // Validate that this is a resource probe followed by some action
547 if ((after == NULL) || (probe == NULL) || (probe->rsc == NULL)
548 || (probe->rsc->variant != pcmk_rsc_variant_primitive)
549 || !pcmk__str_eq(probe->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
550 return;
551 }
552
553 // Avoid running into any possible loop
554 if (pcmk_is_set(after->flags, pcmk_action_detect_loop)) {
555 return;
556 }
557 pe__set_action_flags(after, pcmk_action_detect_loop);
558
559 crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
560 probe->uuid, pe__node_name(probe->node),
561 after->uuid, pe__node_name(after->node));
562
563 /* Add restart orderings if "then" is for a different primitive.
564 * Orderings for collective resources will be added later.
565 */
566 if ((after->rsc != NULL)
567 && (after->rsc->variant == pcmk_rsc_variant_primitive)
568 && (probe->rsc != after->rsc)) {
569
570 GList *then_actions = NULL;
571
572 if (pcmk__str_eq(after->task, PCMK_ACTION_START, pcmk__str_none)) {
573 then_actions = pe__resource_actions(after->rsc, NULL,
574 PCMK_ACTION_STOP, FALSE);
575
576 } else if (pcmk__str_eq(after->task, PCMK_ACTION_PROMOTE,
577 pcmk__str_none)) {
578 then_actions = pe__resource_actions(after->rsc, NULL,
579 PCMK_ACTION_DEMOTE, FALSE);
580 }
581
582 for (iter = then_actions; iter != NULL; iter = iter->next) {
583 pcmk_action_t *then = (pcmk_action_t *) iter->data;
584
585 // Skip pseudo-actions (for example, those implied by fencing)
586 if (!pcmk_is_set(then->flags, pcmk_action_pseudo)) {
587 order_actions(probe, then, pcmk__ar_ordered);
588 }
589 }
590 g_list_free(then_actions);
591 }
592
593 /* Detect whether "then" is an interleaved clone action. For these, we want
594 * to add orderings only for the relevant instance.
595 */
596 if ((after->rsc != NULL)
597 && (after->rsc->variant > pcmk_rsc_variant_group)) {
598 const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
599 XML_RSC_ATTR_INTERLEAVE);
600
601 interleave = crm_is_true(interleave_s);
602 if (interleave) {
603 compatible_rsc = pcmk__find_compatible_instance(probe->rsc,
604 after->rsc,
605 pcmk_role_unknown,
606 false);
607 }
608 }
609
610 /* Now recursively do the same for all actions ordered after "then". This
611 * also handles collective resources since the collective action will be
612 * ordered before its individual instances' actions.
613 */
614 for (iter = after->actions_after; iter != NULL; iter = iter->next) {
615 pcmk__related_action_t *after_wrapper = iter->data;
616
617 /* pcmk__ar_first_implies_then is the reason why a required A.start
618 * implies/enforces B.start to be required too, which is the cause of
619 * B.restart/re-promote.
620 *
621 * Not sure about pcmk__ar_first_implies_same_node_then though. It's now
622 * only used for unfencing case, which tends to introduce transition
623 * loops...
624 */
625 if (!pcmk_is_set(after_wrapper->type, pcmk__ar_first_implies_then)) {
626 /* The order type between a group/clone and its child such as
627 * B.start-> B_child.start is:
628 * pcmk__ar_then_implies_first_graphed
629 * |pcmk__ar_unrunnable_first_blocks
630 *
631 * Proceed through the ordering chain and build dependencies with
632 * its children.
633 */
634 if ((after->rsc == NULL)
635 || (after->rsc->variant < pcmk_rsc_variant_group)
636 || (probe->rsc->parent == after->rsc)
637 || (after_wrapper->action->rsc == NULL)
638 || (after_wrapper->action->rsc->variant > pcmk_rsc_variant_group)
639 || (after->rsc != after_wrapper->action->rsc->parent)) {
640 continue;
641 }
642
643 /* Proceed to the children of a group or a non-interleaved clone.
644 * For an interleaved clone, proceed only to the relevant child.
645 */
646 if ((after->rsc->variant > pcmk_rsc_variant_group) && interleave
647 && ((compatible_rsc == NULL)
648 || (compatible_rsc != after_wrapper->action->rsc))) {
649 continue;
650 }
651 }
652
653 crm_trace("Recursively adding probe restart orderings for "
654 "'%s@%s then %s@%s' (type=%#.6x)",
655 after->uuid, pe__node_name(after->node),
656 after_wrapper->action->uuid,
657 pe__node_name(after_wrapper->action->node),
658 after_wrapper->type);
659
660 add_restart_orderings_for_probe(probe, after_wrapper->action);
661 }
662 }
663
664 /*!
665 * \internal
666 * \brief Clear the tracking flag on all scheduled actions
667 *
668 * \param[in,out] scheduler Scheduler data
669 */
670 static void
671 clear_actions_tracking_flag(pcmk_scheduler_t *scheduler)
672 {
673 for (GList *iter = scheduler->actions; iter != NULL; iter = iter->next) {
674 pcmk_action_t *action = iter->data;
675
676 pe__clear_action_flags(action, pcmk_action_detect_loop);
677 }
678 }
679
680 /*!
681 * \internal
682 * \brief Add start and restart orderings for probes scheduled for a resource
683 *
684 * \param[in,out] data Resource whose probes should be ordered
685 * \param[in] user_data Unused
686 */
687 static void
688 add_start_restart_orderings_for_rsc(gpointer data, gpointer user_data)
689 {
690 pcmk_resource_t *rsc = data;
691 GList *probes = NULL;
692
693 // For collective resources, order each instance recursively
694 if (rsc->variant != pcmk_rsc_variant_primitive) {
695 g_list_foreach(rsc->children, add_start_restart_orderings_for_rsc,
696 NULL);
697 return;
698 }
699
700 // Find all probes for given resource
701 probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
702
703 // Add probe restart orderings for each probe found
704 for (GList *iter = probes; iter != NULL; iter = iter->next) {
705 pcmk_action_t *probe = (pcmk_action_t *) iter->data;
706
707 for (GList *then_iter = probe->actions_after; then_iter != NULL;
708 then_iter = then_iter->next) {
709
710 pcmk__related_action_t *then = then_iter->data;
711
712 add_start_orderings_for_probe(probe, then);
713 add_restart_orderings_for_probe(probe, then->action);
714 clear_actions_tracking_flag(rsc->cluster);
715 }
716 }
717
718 g_list_free(probes);
719 }
720
721 /*!
722 * \internal
723 * \brief Add "A then probe B" orderings for "A then B" orderings
724 *
725 * \param[in,out] scheduler Scheduler data
726 *
727 * \note This function is currently disabled (see next comment).
728 */
729 static void
730 order_then_probes(pcmk_scheduler_t *scheduler)
731 {
732 #if 0
733 /* Given an ordering "A then B", we would prefer to wait for A to be started
734 * before probing B.
735 *
736 * For example, if A is a filesystem which B can't even run without, it
737 * would be helpful if the author of B's agent could assume that A is
738 * running before B.monitor will be called.
739 *
740 * However, we can't _only_ probe after A is running, otherwise we wouldn't
741 * detect the state of B if A could not be started. We can't even do an
742 * opportunistic version of this, because B may be moving:
743 *
744 * A.stop -> A.start -> B.probe -> B.stop -> B.start
745 *
746 * and if we add B.stop -> A.stop here, we get a loop:
747 *
748 * A.stop -> A.start -> B.probe -> B.stop -> A.stop
749 *
750 * We could kill the "B.probe -> B.stop" dependency, but that could mean
751 * stopping B "too" soon, because B.start must wait for the probe, and
752 * we don't want to stop B if we can't start it.
753 *
754 * We could add the ordering only if A is an anonymous clone with
755 * clone-max == node-max (since we'll never be moving it). However, we could
756 * still be stopping one instance at the same time as starting another.
757 *
758 * The complexity of checking for allowed conditions combined with the ever
759 * narrowing use case suggests that this code should remain disabled until
760 * someone gets smarter.
761 */
762 for (GList *iter = scheduler->resources; iter != NULL; iter = iter->next) {
763 pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
764
765 pcmk_action_t *start = NULL;
766 GList *actions = NULL;
767 GList *probes = NULL;
768
769 actions = pe__resource_actions(rsc, NULL, PCMK_ACTION_START, FALSE);
770
771 if (actions) {
772 start = actions->data;
773 g_list_free(actions);
774 }
775
776 if (start == NULL) {
777 crm_err("No start action for %s", rsc->id);
778 continue;
779 }
780
781 probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
782
783 for (actions = start->actions_before; actions != NULL;
784 actions = actions->next) {
785
786 pcmk__related_action_t *before = actions->data;
787
788 pcmk_action_t *first = before->action;
789 pcmk_resource_t *first_rsc = first->rsc;
790
791 if (first->required_runnable_before) {
792 for (GList *clone_actions = first->actions_before;
793 clone_actions != NULL;
794 clone_actions = clone_actions->next) {
795
796 before = clone_actions->data;
797
798 crm_trace("Testing '%s then %s' for %s",
799 first->uuid, before->action->uuid, start->uuid);
800
801 CRM_ASSERT(before->action->rsc != NULL);
802 first_rsc = before->action->rsc;
803 break;
804 }
805
806 } else if (!pcmk__str_eq(first->task, PCMK_ACTION_START,
807 pcmk__str_none)) {
808 crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
809 }
810
811 if (first_rsc == NULL) {
812 continue;
813
814 } else if (pe__const_top_resource(first_rsc, false)
815 == pe__const_top_resource(start->rsc, false)) {
816 crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
817 continue;
818
819 } else if (!pe_rsc_is_clone(pe__const_top_resource(first_rsc,
820 false))) {
821 crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
822 continue;
823 }
824
825 crm_err("Applying %s before %s %d", first->uuid, start->uuid,
826 pe__const_top_resource(first_rsc, false)->variant);
827
828 for (GList *probe_iter = probes; probe_iter != NULL;
829 probe_iter = probe_iter->next) {
830
831 pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
832
833 crm_err("Ordering %s before %s", first->uuid, probe->uuid);
834 order_actions(first, probe, pcmk__ar_ordered);
835 }
836 }
837 }
838 #endif
839 }
840
841 void
842 pcmk__order_probes(pcmk_scheduler_t *scheduler)
843 {
844 // Add orderings for "probe then X"
845 g_list_foreach(scheduler->resources, add_start_restart_orderings_for_rsc,
846 NULL);
847 add_probe_orderings_for_stops(scheduler);
848
849 order_then_probes(scheduler);
850 }
851
852 /*!
853 * \internal
854 * \brief Schedule any probes needed
855 *
856 * \param[in,out] scheduler Scheduler data
857 *
858 * \note This may also schedule fencing of failed remote nodes.
859 */
860 void
861 pcmk__schedule_probes(pcmk_scheduler_t *scheduler)
862 {
863 // Schedule probes on each node in the cluster as needed
864 for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
865 pcmk_node_t *node = (pcmk_node_t *) iter->data;
866 const char *probed = NULL;
867
868 if (!node->details->online) { // Don't probe offline nodes
869 if (pcmk__is_failed_remote_node(node)) {
870 pe_fence_node(scheduler, node,
871 "the connection is unrecoverable", FALSE);
872 }
873 continue;
874
875 } else if (node->details->unclean) { // ... or nodes that need fencing
876 continue;
877
878 } else if (!node->details->rsc_discovery_enabled) {
879 // The user requested that probes not be done on this node
880 continue;
881 }
882
883 /* This is no longer needed for live clusters, since the probe_complete
884 * node attribute will never be in the CIB. However this is still useful
885 * for processing old saved CIBs (< 1.1.14), including the
886 * reprobe-target_rc regression test.
887 */
888 probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
889 if (probed != NULL && crm_is_true(probed) == FALSE) {
890 pcmk_action_t *probe_op = NULL;
891
892 probe_op = custom_action(NULL,
893 crm_strdup_printf("%s-%s", CRM_OP_REPROBE,
894 node->details->uname),
895 CRM_OP_REPROBE, node, FALSE, scheduler);
896 add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT,
897 XML_BOOLEAN_TRUE);
898 continue;
899 }
900
901 // Probe each resource in the cluster on this node, as needed
902 pcmk__probe_resource_list(scheduler->resources, node);
903 }
904 }
905