1 /*
2 * Copyright 2004-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <stdbool.h>
13 #include <stdint.h> // uint8_t, uint32_t
14
15 #include <crm/common/xml.h>
16 #include <pacemaker-internal.h>
17
18 #include "libpacemaker_private.h"
19
20 static void stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
21 bool optional);
22 static void start_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
23 bool optional);
24 static void demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
25 bool optional);
26 static void promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
27 bool optional);
28 static void assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node,
29 bool optional);
30
31 #define RSC_ROLE_MAX (pcmk_role_promoted + 1)
32
33 static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
34 /* This array lists the immediate next role when transitioning from one role
35 * to a target role. For example, when going from Stopped to Promoted, the
36 * next role is Unpromoted, because the resource must be started before it
37 * can be promoted. The current state then becomes Started, which is fed
38 * into this array again, giving a next role of Promoted.
39 *
40 * Current role Immediate next role Final target role
41 * ------------ ------------------- -----------------
42 */
43 /* Unknown */ { pcmk_role_unknown, /* Unknown */
44 pcmk_role_stopped, /* Stopped */
45 pcmk_role_stopped, /* Started */
46 pcmk_role_stopped, /* Unpromoted */
47 pcmk_role_stopped, /* Promoted */
48 },
49 /* Stopped */ { pcmk_role_stopped, /* Unknown */
50 pcmk_role_stopped, /* Stopped */
51 pcmk_role_started, /* Started */
52 pcmk_role_unpromoted, /* Unpromoted */
53 pcmk_role_unpromoted, /* Promoted */
54 },
55 /* Started */ { pcmk_role_stopped, /* Unknown */
56 pcmk_role_stopped, /* Stopped */
57 pcmk_role_started, /* Started */
58 pcmk_role_unpromoted, /* Unpromoted */
59 pcmk_role_promoted, /* Promoted */
60 },
61 /* Unpromoted */ { pcmk_role_stopped, /* Unknown */
62 pcmk_role_stopped, /* Stopped */
63 pcmk_role_stopped, /* Started */
64 pcmk_role_unpromoted, /* Unpromoted */
65 pcmk_role_promoted, /* Promoted */
66 },
67 /* Promoted */ { pcmk_role_stopped, /* Unknown */
68 pcmk_role_unpromoted, /* Stopped */
69 pcmk_role_unpromoted, /* Started */
70 pcmk_role_unpromoted, /* Unpromoted */
71 pcmk_role_promoted, /* Promoted */
72 },
73 };
74
75 /*!
76 * \internal
77 * \brief Function to schedule actions needed for a role change
78 *
79 * \param[in,out] rsc Resource whose role is changing
80 * \param[in,out] node Node where resource will be in its next role
81 * \param[in] optional Whether scheduled actions should be optional
82 */
83 typedef void (*rsc_transition_fn)(pcmk_resource_t *rsc, pcmk_node_t *node,
84 bool optional);
85
86 static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
87 /* This array lists the function needed to transition directly from one role
88 * to another. NULL indicates that nothing is needed.
89 *
90 * Current role Transition function Next role
91 * ------------ ------------------- ----------
92 */
93 /* Unknown */ { assert_role_error, /* Unknown */
94 stop_resource, /* Stopped */
95 assert_role_error, /* Started */
96 assert_role_error, /* Unpromoted */
97 assert_role_error, /* Promoted */
98 },
99 /* Stopped */ { assert_role_error, /* Unknown */
100 NULL, /* Stopped */
101 start_resource, /* Started */
102 start_resource, /* Unpromoted */
103 assert_role_error, /* Promoted */
104 },
105 /* Started */ { assert_role_error, /* Unknown */
106 stop_resource, /* Stopped */
107 NULL, /* Started */
108 NULL, /* Unpromoted */
109 promote_resource, /* Promoted */
110 },
111 /* Unpromoted */ { assert_role_error, /* Unknown */
112 stop_resource, /* Stopped */
113 stop_resource, /* Started */
114 NULL, /* Unpromoted */
115 promote_resource, /* Promoted */
116 },
117 /* Promoted */ { assert_role_error, /* Unknown */
118 demote_resource, /* Stopped */
119 demote_resource, /* Started */
120 demote_resource, /* Unpromoted */
121 NULL, /* Promoted */
122 },
123 };
124
125 /*!
126 * \internal
127 * \brief Get a list of a resource's allowed nodes sorted by node score
128 *
129 * \param[in] rsc Resource to check
130 *
131 * \return List of allowed nodes sorted by node score
132 */
133 static GList *
134 sorted_allowed_nodes(const pcmk_resource_t *rsc)
135 {
136 if (rsc->priv->allowed_nodes != NULL) {
137 GList *nodes = g_hash_table_get_values(rsc->priv->allowed_nodes);
138
139 if (nodes != NULL) {
140 return pcmk__sort_nodes(nodes, pcmk__current_node(rsc));
141 }
142 }
143 return NULL;
144 }
145
146 /*!
147 * \internal
148 * \brief Assign a resource to its best allowed node, if possible
149 *
150 * \param[in,out] rsc Resource to choose a node for
151 * \param[in] prefer If not \c NULL, prefer this node when all else
152 * equal
153 * \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a
154 * node, set next role to stopped and update
155 * existing actions
156 *
157 * \return true if \p rsc could be assigned to a node, otherwise false
158 *
159 * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
160 * completely undo the assignment. A successful assignment can be either
161 * undone or left alone as final. A failed assignment has the same effect
162 * as calling pcmk__unassign_resource(); there are no side effects on
163 * roles or actions.
164 */
165 static bool
166 assign_best_node(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
167 bool stop_if_fail)
168 {
169 GList *nodes = NULL;
170 pcmk_node_t *chosen = NULL;
171 pcmk_node_t *best = NULL;
172 const pcmk_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc);
173
174 if (prefer == NULL) {
175 prefer = most_free_node;
176 }
177
178 if (!pcmk__is_set(rsc->flags, pcmk__rsc_unassigned)) {
179 // We've already finished assignment of resources to nodes
180 return rsc->priv->assigned_node != NULL;
181 }
182
183 // Sort allowed nodes by score
184 nodes = sorted_allowed_nodes(rsc);
185 if (nodes != NULL) {
186 best = (pcmk_node_t *) nodes->data; // First node has best score
187 }
188
189 if ((prefer != NULL) && (nodes != NULL)) {
190 // Get the allowed node version of prefer
191 chosen = g_hash_table_lookup(rsc->priv->allowed_nodes,
192 prefer->priv->id);
193
194 if (chosen == NULL) {
195 pcmk__rsc_trace(rsc, "Preferred node %s for %s was unknown",
196 pcmk__node_name(prefer), rsc->id);
197
198 /* Favor the preferred node as long as its score is at least as good as
199 * the best allowed node's.
200 *
201 * An alternative would be to favor the preferred node even if the best
202 * node is better, when the best node's score is less than INFINITY.
203 */
204 } else if (chosen->assign->score < best->assign->score) {
205 pcmk__rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
206 pcmk__node_name(chosen), rsc->id);
207 chosen = NULL;
208
209 } else if (!pcmk__node_available(chosen, true, false)) {
210 pcmk__rsc_trace(rsc, "Preferred node %s for %s was unavailable",
211 pcmk__node_name(chosen), rsc->id);
212 chosen = NULL;
213
214 } else {
215 pcmk__rsc_trace(rsc,
216 "Chose preferred node %s for %s "
217 "(ignoring %d candidates)",
218 pcmk__node_name(chosen), rsc->id,
219 g_list_length(nodes));
220 }
221 }
222
223 if ((chosen == NULL) && (best != NULL)) {
224 /* Either there is no preferred node, or the preferred node is not
225 * suitable, but another node is allowed to run the resource.
226 */
227
228 chosen = best;
229
230 if (!pcmk__is_unique_clone(rsc->priv->parent)
231 && (chosen->assign->score > 0) // Zero not acceptable
232 && pcmk__node_available(chosen, false, false)) {
233 /* If the resource is already running on a node, prefer that node if
234 * it is just as good as the chosen node.
235 *
236 * We don't do this for unique clone instances, because
237 * pcmk__assign_instances() has already assigned instances to their
238 * running nodes when appropriate, and if we get here, we don't want
239 * remaining unassigned instances to prefer a node that's already
240 * running another instance.
241 */
242 pcmk_node_t *running = pcmk__current_node(rsc);
243
244 if (running == NULL) {
245 // Nothing to do
246
247 } else if (!pcmk__node_available(running, true, false)) {
248 pcmk__rsc_trace(rsc,
249 "Current node for %s (%s) can't run resources",
250 rsc->id, pcmk__node_name(running));
251
252 } else {
253 int nodes_with_best_score = 1;
254
255 for (GList *iter = nodes->next; iter; iter = iter->next) {
256 pcmk_node_t *allowed = (pcmk_node_t *) iter->data;
257
258 if (allowed->assign->score != chosen->assign->score) {
259 // The nodes are sorted by score, so no more are equal
260 break;
261 }
262 if (pcmk__same_node(allowed, running)) {
263 // Scores are equal, so prefer the current node
264 chosen = allowed;
265 }
266 nodes_with_best_score++;
267 }
268
269 if (nodes_with_best_score > 1) {
270 uint8_t log_level = LOG_INFO;
271
272 if (chosen->assign->score >= PCMK_SCORE_INFINITY) {
273 log_level = LOG_WARNING;
274 }
275 do_crm_log(log_level,
276 "Chose %s for %s from %d nodes with score %s",
277 pcmk__node_name(chosen), rsc->id,
278 nodes_with_best_score,
279 pcmk_readable_score(chosen->assign->score));
280 }
281 }
282 }
283
284 pcmk__rsc_trace(rsc, "Chose %s for %s from %d candidates",
285 pcmk__node_name(chosen), rsc->id, g_list_length(nodes));
286 }
287
288 pcmk__assign_resource(rsc, chosen, false, stop_if_fail);
289 g_list_free(nodes);
290 return rsc->priv->assigned_node != NULL;
291 }
292
293 /*!
294 * \internal
295 * \brief Apply a "this with" colocation to a node's allowed node scores
296 *
297 * \param[in,out] colocation Colocation to apply
298 * \param[in,out] rsc Resource being assigned
299 */
300 static void
301 apply_this_with(pcmk__colocation_t *colocation, pcmk_resource_t *rsc)
302 {
303 GHashTable *archive = NULL;
304 pcmk_resource_t *other = colocation->primary;
305
306 // In certain cases, we will need to revert the node scores
|
(1) Event path: |
Condition "colocation->dependent_role >= pcmk_role_promoted", taking true branch. |
307 if ((colocation->dependent_role >= pcmk_role_promoted)
308 || ((colocation->score < 0)
309 && (colocation->score > -PCMK_SCORE_INFINITY))) {
310 archive = pcmk__copy_node_table(rsc->priv->allowed_nodes);
311 }
312
|
(2) Event path: |
Condition "pcmk__is_set(other->flags, pcmk__rsc_unassigned)", taking true branch. |
313 if (pcmk__is_set(other->flags, pcmk__rsc_unassigned)) {
|
(3) Event path: |
Switch case default. |
|
(4) Event path: |
Condition "trace_tag_cs == NULL", taking true branch. |
|
(5) Event path: |
Condition "crm_is_callsite_active(trace_tag_cs, _level, converted_tag)", taking false branch. |
314 pcmk__rsc_trace(rsc,
315 "%s: Assigning colocation %s primary %s first"
316 "(score=%d role=%s)",
317 rsc->id, colocation->id, other->id,
318 colocation->score,
319 pcmk_role_text(colocation->dependent_role));
320 other->priv->cmds->assign(other, NULL, true);
321 }
322
323 // Apply the colocation score to this resource's allowed node scores
324 rsc->priv->cmds->apply_coloc_score(rsc, other, colocation, true);
|
(6) Event path: |
Condition "archive != NULL", taking false branch. |
325 if ((archive != NULL)
326 && !pcmk__any_node_available(rsc->priv->allowed_nodes)) {
327 pcmk__rsc_info(rsc,
328 "%s: Reverting scores from colocation with %s "
329 "because no nodes allowed",
330 rsc->id, other->id);
331 g_hash_table_destroy(rsc->priv->allowed_nodes);
332 rsc->priv->allowed_nodes = archive;
333 archive = NULL;
334 }
335
|
CID (unavailable; MK=c3917fc22d2b0a513869079b689fe137) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS): |
|
(7) Event assign_union_field: |
The union field "in" of "_pp" is written. |
|
(8) Event inconsistent_union_field_access: |
In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in". |
336 g_clear_pointer(&archive, g_hash_table_destroy);
337 }
338
339 /*!
340 * \internal
341 * \brief Update a Pacemaker Remote node once its connection has been assigned
342 *
343 * \param[in] connection Connection resource that has been assigned
344 */
345 static void
346 remote_connection_assigned(const pcmk_resource_t *connection)
347 {
348 pcmk_node_t *remote_node = pcmk_find_node(connection->priv->scheduler,
349 connection->id);
350
351 CRM_CHECK(remote_node != NULL, return);
352
353 if ((connection->priv->assigned_node != NULL)
354 && (connection->priv->next_role != pcmk_role_stopped)) {
355
356 pcmk__trace("Pacemaker Remote node %s will be online",
357 remote_node->priv->id);
358 remote_node->details->online = TRUE;
359 if (!pcmk__is_set(remote_node->priv->flags, pcmk__node_seen)) {
360 // Avoid unnecessary fence, since we will attempt connection
361 remote_node->details->unclean = FALSE;
362 }
363
364 } else {
365 pcmk__trace("Pacemaker Remote node %s will be shut down (%sassigned "
366 "connection's next role is %s)",
367 remote_node->priv->id,
368 ((connection->priv->assigned_node == NULL)? "un" : ""),
369 pcmk_role_text(connection->priv->next_role));
370 remote_node->details->shutdown = TRUE;
371 }
372 }
373
374 /*!
375 * \internal
376 * \brief Assign a primitive resource to a node
377 *
378 * \param[in,out] rsc Resource to assign to a node
379 * \param[in] prefer Node to prefer, if all else is equal
380 * \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a
381 * node, set next role to stopped and update
382 * existing actions
383 *
384 * \return Node that \p rsc is assigned to, if assigned entirely to one node
385 *
386 * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
387 * completely undo the assignment. A successful assignment can be either
388 * undone or left alone as final. A failed assignment has the same effect
389 * as calling pcmk__unassign_resource(); there are no side effects on
390 * roles or actions.
391 */
392 pcmk_node_t *
393 pcmk__primitive_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
394 bool stop_if_fail)
395 {
396 GList *this_with_colocations = NULL;
397 GList *with_this_colocations = NULL;
398 GList *iter = NULL;
399 pcmk_resource_t *parent = NULL;
400 pcmk__colocation_t *colocation = NULL;
401 pcmk_scheduler_t *scheduler = NULL;
402
403 pcmk__assert(pcmk__is_primitive(rsc));
404 scheduler = rsc->priv->scheduler;
405 parent = rsc->priv->parent;
406
407 // Never assign a child without parent being assigned first
408 if ((parent != NULL) && !pcmk__is_set(parent->flags, pcmk__rsc_assigning)) {
409 pcmk__rsc_debug(rsc, "%s: Assigning parent %s first",
410 rsc->id, parent->id);
411 parent->priv->cmds->assign(parent, prefer, stop_if_fail);
412 }
413
414 if (!pcmk__is_set(rsc->flags, pcmk__rsc_unassigned)) {
415 // Assignment has already been done
416 const char *node_name = "no node";
417
418 if (rsc->priv->assigned_node != NULL) {
419 node_name = pcmk__node_name(rsc->priv->assigned_node);
420 }
421 pcmk__rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id, node_name);
422 return rsc->priv->assigned_node;
423 }
424
425 // Ensure we detect assignment loops
426 if (pcmk__is_set(rsc->flags, pcmk__rsc_assigning)) {
427 pcmk__rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
428 return NULL;
429 }
430 pcmk__set_rsc_flags(rsc, pcmk__rsc_assigning);
431
432 pe__show_node_scores(true, rsc, "Pre-assignment",
433 rsc->priv->allowed_nodes, scheduler);
434
435 this_with_colocations = pcmk__this_with_colocations(rsc);
436 with_this_colocations = pcmk__with_this_colocations(rsc);
437
438 // Apply mandatory colocations first, to satisfy as many as possible
439 for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
440 colocation = iter->data;
441
442 if ((colocation->score <= -PCMK_SCORE_INFINITY)
443 || (colocation->score >= PCMK_SCORE_INFINITY)) {
444 apply_this_with(colocation, rsc);
445 }
446 }
447 for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
448 colocation = iter->data;
449
450 if ((colocation->score <= -PCMK_SCORE_INFINITY)
451 || (colocation->score >= PCMK_SCORE_INFINITY)) {
452 pcmk__add_dependent_scores(colocation, rsc);
453 }
454 }
455
456 pe__show_node_scores(true, rsc, "Mandatory-colocations",
457 rsc->priv->allowed_nodes, scheduler);
458
459 // Then apply optional colocations
460 for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
461 colocation = iter->data;
462
463 if ((colocation->score > -PCMK_SCORE_INFINITY)
464 && (colocation->score < PCMK_SCORE_INFINITY)) {
465 apply_this_with(colocation, rsc);
466 }
467 }
468 for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
469 colocation = iter->data;
470
471 if ((colocation->score > -PCMK_SCORE_INFINITY)
472 && (colocation->score < PCMK_SCORE_INFINITY)) {
473 pcmk__add_dependent_scores(colocation, rsc);
474 }
475 }
476
477 g_list_free(this_with_colocations);
478 g_list_free(with_this_colocations);
479
480 if (rsc->priv->next_role == pcmk_role_stopped) {
481 pcmk__rsc_trace(rsc,
482 "Banning %s from all nodes because it will be stopped",
483 rsc->id);
484 resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
485 PCMK_META_TARGET_ROLE, scheduler);
486
487 } else if ((rsc->priv->next_role > rsc->priv->orig_role)
488 && !pcmk__is_set(scheduler->flags, pcmk__sched_quorate)
489 && (scheduler->no_quorum_policy == pcmk_no_quorum_freeze)) {
490 pcmk__notice("Resource %s cannot be elevated from %s to %s due to "
491 PCMK_OPT_NO_QUORUM_POLICY "=" PCMK_VALUE_FREEZE,
492 rsc->id, pcmk_role_text(rsc->priv->orig_role),
493 pcmk_role_text(rsc->priv->next_role));
494 pe__set_next_role(rsc, rsc->priv->orig_role,
495 PCMK_OPT_NO_QUORUM_POLICY "=" PCMK_VALUE_FREEZE);
496 }
497
498 pe__show_node_scores(!pcmk__is_set(scheduler->flags,
499 pcmk__sched_output_scores),
500 rsc, __func__, rsc->priv->allowed_nodes, scheduler);
501
502 // Unmanage resource if fencing is enabled but no device is configured
503 if (pcmk__is_set(scheduler->flags, pcmk__sched_fencing_enabled)
504 && !pcmk__is_set(scheduler->flags, pcmk__sched_have_fencing)) {
505 pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
506 }
507
508 if (!pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
509 // Unmanaged resources stay on their current node
510 const char *reason = NULL;
511 pcmk_node_t *assign_to = NULL;
512
513 pe__set_next_role(rsc, rsc->priv->orig_role, "unmanaged");
514 assign_to = pcmk__current_node(rsc);
515 if (assign_to == NULL) {
516 reason = "inactive";
517 } else if (rsc->priv->orig_role == pcmk_role_promoted) {
518 reason = "promoted";
519 } else if (pcmk__is_set(rsc->flags, pcmk__rsc_failed)) {
520 reason = "failed";
521 } else {
522 reason = "active";
523 }
524 pcmk__rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
525 (assign_to? assign_to->priv->name : "no node"),
526 reason);
527 pcmk__assign_resource(rsc, assign_to, true, stop_if_fail);
528
529 } else if (pcmk__is_set(scheduler->flags, pcmk__sched_stop_all)) {
530 // Must stop at some point, but be consistent with stop_if_fail
531 if (stop_if_fail) {
532 pcmk__rsc_debug(rsc,
533 "Forcing %s to stop: " PCMK_OPT_STOP_ALL_RESOURCES,
534 rsc->id);
535 }
536 pcmk__assign_resource(rsc, NULL, true, stop_if_fail);
537
538 } else if (!assign_best_node(rsc, prefer, stop_if_fail)) {
539 // Assignment failed
540 if (!pcmk__is_set(rsc->flags, pcmk__rsc_removed)) {
541 pcmk__rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
542 } else if ((rsc->priv->active_nodes != NULL) && stop_if_fail) {
543 pcmk__rsc_info(rsc, "Stopping removed resource %s", rsc->id);
544 }
545 }
546
547 pcmk__clear_rsc_flags(rsc, pcmk__rsc_assigning);
548
549 if (pcmk__is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
550 remote_connection_assigned(rsc);
551 }
552
553 return rsc->priv->assigned_node;
554 }
555
556 /*!
557 * \internal
558 * \brief Schedule actions to bring resource down and back to current role
559 *
560 * \param[in,out] rsc Resource to restart
561 * \param[in,out] current Node that resource should be brought down on
562 * \param[in] need_stop Whether the resource must be stopped
563 * \param[in] need_promote Whether the resource must be promoted
564 *
565 * \return Role that resource would have after scheduled actions are taken
566 */
567 static void
568 schedule_restart_actions(pcmk_resource_t *rsc, pcmk_node_t *current,
569 bool need_stop, bool need_promote)
570 {
571 enum rsc_role_e role = rsc->priv->orig_role;
572 enum rsc_role_e next_role;
573 rsc_transition_fn fn = NULL;
574
575 pcmk__set_rsc_flags(rsc, pcmk__rsc_restarting);
576
577 // Bring resource down to a stop on its current node
578 while (role != pcmk_role_stopped) {
579 next_role = rsc_state_matrix[role][pcmk_role_stopped];
580 pcmk__rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
581 (need_stop? "required" : "optional"), rsc->id,
582 pcmk_role_text(role), pcmk_role_text(next_role));
583 fn = rsc_action_matrix[role][next_role];
584 if (fn == NULL) {
585 break;
586 }
587 fn(rsc, current, !need_stop);
588 role = next_role;
589 }
590
591 // Bring resource up to its next role on its next node
592 while ((rsc->priv->orig_role <= rsc->priv->next_role)
593 && (role != rsc->priv->orig_role)
594 && !pcmk__is_set(rsc->flags, pcmk__rsc_blocked)) {
595 bool required = need_stop;
596
597 next_role = rsc_state_matrix[role][rsc->priv->orig_role];
598 if ((next_role == pcmk_role_promoted) && need_promote) {
599 required = true;
600 }
601 pcmk__rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
602 (required? "required" : "optional"), rsc->id,
603 pcmk_role_text(role), pcmk_role_text(next_role));
604 fn = rsc_action_matrix[role][next_role];
605 if (fn == NULL) {
606 break;
607 }
608 fn(rsc, rsc->priv->assigned_node, !required);
609 role = next_role;
610 }
611
612 pcmk__clear_rsc_flags(rsc, pcmk__rsc_restarting);
613 }
614
615 /*!
616 * \internal
617 * \brief If a resource's next role is not explicitly specified, set a default
618 *
619 * \param[in,out] rsc Resource to set next role for
620 *
621 * \return "explicit" if next role was explicitly set, otherwise "implicit"
622 */
623 static const char *
624 set_default_next_role(pcmk_resource_t *rsc)
625 {
626 if (rsc->priv->next_role != pcmk_role_unknown) {
627 return "explicit";
628 }
629
630 if (rsc->priv->assigned_node == NULL) {
631 pe__set_next_role(rsc, pcmk_role_stopped, "assignment");
632 } else {
633 pe__set_next_role(rsc, pcmk_role_started, "assignment");
634 }
635 return "implicit";
636 }
637
638 /*!
639 * \internal
640 * \brief Create an action to represent an already pending start
641 *
642 * \param[in,out] rsc Resource to create start action for
643 */
644 static void
645 create_pending_start(pcmk_resource_t *rsc)
646 {
647 pcmk_action_t *start = NULL;
648
649 pcmk__rsc_trace(rsc,
650 "Creating action for %s to represent already pending start",
651 rsc->id);
652 start = start_action(rsc, rsc->priv->assigned_node, TRUE);
653 pcmk__set_action_flags(start, pcmk__action_always_in_graph);
654 }
655
656 /*!
657 * \internal
658 * \brief Schedule actions needed to take a resource to its next role
659 *
660 * \param[in,out] rsc Resource to schedule actions for
661 */
662 static void
663 schedule_role_transition_actions(pcmk_resource_t *rsc)
664 {
665 enum rsc_role_e role = rsc->priv->orig_role;
666
667 while (role != rsc->priv->next_role) {
668 enum rsc_role_e next_role =
669 rsc_state_matrix[role][rsc->priv->next_role];
670 rsc_transition_fn fn = NULL;
671
672 pcmk__rsc_trace(rsc,
673 "Creating action to take %s from %s to %s "
674 "(ending at %s)",
675 rsc->id, pcmk_role_text(role),
676 pcmk_role_text(next_role),
677 pcmk_role_text(rsc->priv->next_role));
678 fn = rsc_action_matrix[role][next_role];
679 if (fn == NULL) {
680 break;
681 }
682 fn(rsc, rsc->priv->assigned_node, false);
683 role = next_role;
684 }
685 }
686
687 /*!
688 * \internal
689 * \brief Create all actions needed for a given primitive resource
690 *
691 * \param[in,out] rsc Primitive resource to create actions for
692 */
693 void
694 pcmk__primitive_create_actions(pcmk_resource_t *rsc)
695 {
696 bool need_stop = false;
697 bool need_promote = false;
698 bool is_moving = false;
699 bool allow_migrate = false;
700 bool multiply_active = false;
701
702 pcmk_node_t *current = NULL;
703 pcmk_node_t *migration_target = NULL;
704 unsigned int num_all_active = 0;
705 unsigned int num_clean_active = 0;
706 const char *next_role_source = NULL;
707
708 pcmk__assert(pcmk__is_primitive(rsc));
709
710 next_role_source = set_default_next_role(rsc);
711 pcmk__rsc_trace(rsc,
712 "Creating all actions for %s transition from %s to %s "
713 "(%s) on %s",
714 rsc->id, pcmk_role_text(rsc->priv->orig_role),
715 pcmk_role_text(rsc->priv->next_role), next_role_source,
716 pcmk__node_name(rsc->priv->assigned_node));
717
718 current = rsc->priv->fns->active_node(rsc, &num_all_active,
719 &num_clean_active);
720
721 g_list_foreach(rsc->priv->dangling_migration_sources,
722 pcmk__abort_dangling_migration, rsc);
723
724 if ((current != NULL) && (rsc->priv->assigned_node != NULL)
725 && !pcmk__same_node(current, rsc->priv->assigned_node)
726 && (rsc->priv->next_role >= pcmk_role_started)) {
727
728 pcmk__rsc_trace(rsc, "Moving %s from %s to %s",
729 rsc->id, pcmk__node_name(current),
730 pcmk__node_name(rsc->priv->assigned_node));
731 is_moving = true;
732 allow_migrate = pcmk__rsc_can_migrate(rsc, current);
733
734 // This is needed even if migrating (though I'm not sure why ...)
735 need_stop = true;
736 }
737
738 // Check whether resource is partially migrated and/or multiply active
739 migration_target = rsc->priv->partial_migration_target;
740 if ((rsc->priv->partial_migration_source != NULL)
741 && (migration_target != NULL) && allow_migrate && (num_all_active == 2)
742 && pcmk__same_node(current, rsc->priv->partial_migration_source)
743 && pcmk__same_node(rsc->priv->assigned_node, migration_target)) {
744 /* A partial migration is in progress, and the migration target remains
745 * the same as when the migration began.
746 */
747 pcmk__rsc_trace(rsc,
748 "Partial migration of %s from %s to %s will continue",
749 rsc->id,
750 pcmk__node_name(rsc->priv->partial_migration_source),
751 pcmk__node_name(migration_target));
752
753 } else if ((rsc->priv->partial_migration_source != NULL)
754 || (migration_target != NULL)) {
755 // A partial migration is in progress but can't be continued
756
757 if (num_all_active > 2) {
758 // The resource is migrating *and* multiply active!
759 pcmk__notice("Forcing recovery of %s because it is migrating "
760 "from %s to %s and possibly active elsewhere",
761 rsc->id,
762 pcmk__node_name(rsc->priv->partial_migration_source),
763 pcmk__node_name(migration_target));
764 } else {
765 // The migration source or target isn't available
766 pcmk__notice("Forcing recovery of %s because it can no longer "
767 "migrate from %s to %s",
768 rsc->id,
769 pcmk__node_name(rsc->priv->partial_migration_source),
770 pcmk__node_name(migration_target));
771 }
772 need_stop = true;
773 rsc->priv->partial_migration_source = NULL;
774 rsc->priv->partial_migration_target = NULL;
775 allow_migrate = false;
776
777 } else if (pcmk__is_set(rsc->flags, pcmk__rsc_needs_fencing)) {
778 multiply_active = (num_all_active > 1);
779 } else {
780 /* If a resource has PCMK_META_REQUIRES set to PCMK_VALUE_NOTHING or
781 * PCMK_VALUE_QUORUM, don't consider it active on unclean nodes (similar
782 * to how all resources behave when PCMK_OPT_FENCING_ENABLED is false).
783 * We can start such resources elsewhere before fencing completes, and
784 * if we considered the resource active on the failed node, we would
785 * attempt recovery for being active on multiple nodes.
786 */
787 multiply_active = (num_clean_active > 1);
788 }
789
790 if (multiply_active) {
791 const char *class = pcmk__xe_get(rsc->priv->xml, PCMK_XA_CLASS);
792
793 // Resource was (possibly) incorrectly multiply active
794 pcmk__sched_err(rsc->priv->scheduler,
795 "%s resource %s might be active on %u nodes (%s)",
796 pcmk__s(class, "Untyped"), rsc->id, num_all_active,
797 pcmk__multiply_active_text(rsc));
798 pcmk__notice("For more information, see \"What are multiply active "
799 "resources?\" at "
800 "https://projects.clusterlabs.org/w/clusterlabs/faq/");
801
802 switch (rsc->priv->multiply_active_policy) {
803 case pcmk__multiply_active_restart:
804 need_stop = true;
805 break;
806 case pcmk__multiply_active_unexpected:
807 need_stop = true; // stop_resource() will skip expected node
808 pcmk__set_rsc_flags(rsc, pcmk__rsc_stop_unexpected);
809 break;
810 default:
811 break;
812 }
813
814 } else {
815 pcmk__clear_rsc_flags(rsc, pcmk__rsc_stop_unexpected);
816 }
817
818 if (pcmk__is_set(rsc->flags, pcmk__rsc_start_pending)) {
819 create_pending_start(rsc);
820 }
821
822 if (is_moving) {
823 // Remaining tests are only for resources staying where they are
824
825 } else if (pcmk__is_set(rsc->flags, pcmk__rsc_failed)) {
826 if (pcmk__is_set(rsc->flags, pcmk__rsc_stop_if_failed)) {
827 need_stop = true;
828 pcmk__rsc_trace(rsc, "Recovering %s", rsc->id);
829 } else {
830 pcmk__rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
831 if (rsc->priv->next_role == pcmk_role_promoted) {
832 need_promote = true;
833 }
834 }
835
836 } else if (pcmk__is_set(rsc->flags, pcmk__rsc_blocked)) {
837 pcmk__rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
838 need_stop = true;
839
840 } else if ((rsc->priv->orig_role > pcmk_role_started)
841 && (current != NULL)
842 && (rsc->priv->assigned_node != NULL)) {
843 pcmk_action_t *start = NULL;
844
845 pcmk__rsc_trace(rsc, "Creating start action for promoted resource %s",
846 rsc->id);
847 start = start_action(rsc, rsc->priv->assigned_node, TRUE);
848 if (!pcmk__is_set(start->flags, pcmk__action_optional)) {
849 // Recovery of a promoted resource
850 pcmk__rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
851 need_stop = true;
852 }
853 }
854
855 // Create any actions needed to bring resource down and back up to same role
856 schedule_restart_actions(rsc, current, need_stop, need_promote);
857
858 // Create any actions needed to take resource from this role to the next
859 schedule_role_transition_actions(rsc);
860
861 pcmk__create_recurring_actions(rsc);
862
863 if (allow_migrate) {
864 pcmk__create_migration_actions(rsc, current);
865 }
866 }
867
868 /*!
869 * \internal
870 * \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes
871 *
872 * \param[in] rsc Resource to check
873 */
874 static void
875 rsc_avoids_remote_nodes(const pcmk_resource_t *rsc)
876 {
877 GHashTableIter iter;
878 pcmk_node_t *node = NULL;
879
880 g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes);
881 while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
882 if (node->priv->remote != NULL) {
883 node->assign->score = -PCMK_SCORE_INFINITY;
884 }
885 }
886 }
887
888 /*!
889 * \internal
890 * \brief Return allowed nodes as (possibly sorted) list
891 *
892 * Convert a resource's hash table of allowed nodes to a list. If printing to
893 * stdout, sort the list, to keep action ID numbers consistent for regression
894 * test output (while avoiding the performance hit on a live cluster).
895 *
896 * \param[in] rsc Resource to check for allowed nodes
897 *
898 * \return List of resource's allowed nodes
899 * \note Callers should take care not to rely on the list being sorted.
900 */
901 static GList *
902 allowed_nodes_as_list(const pcmk_resource_t *rsc)
903 {
904 GList *allowed_nodes = NULL;
905
906 if (rsc->priv->allowed_nodes != NULL) {
907 allowed_nodes = g_hash_table_get_values(rsc->priv->allowed_nodes);
908 }
909
910 if (!pcmk__is_daemon) {
911 allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
912 }
913
914 return allowed_nodes;
915 }
916
917 /*!
918 * \internal
919 * \brief Create implicit constraints needed for a primitive resource
920 *
921 * \param[in,out] rsc Primitive resource to create implicit constraints for
922 */
923 void
924 pcmk__primitive_internal_constraints(pcmk_resource_t *rsc)
925 {
926 GList *allowed_nodes = NULL;
927 bool check_unfencing = false;
928 bool check_utilization = false;
929 pcmk_scheduler_t *scheduler = NULL;
930
931 pcmk__assert(pcmk__is_primitive(rsc));
932 scheduler = rsc->priv->scheduler;
933
934 if (!pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
935 pcmk__rsc_trace(rsc,
936 "Skipping implicit constraints for unmanaged resource "
937 "%s", rsc->id);
938 return;
939 }
940
941 // Whether resource requires unfencing
942 check_unfencing = !pcmk__is_set(rsc->flags, pcmk__rsc_fence_device)
943 && pcmk__is_set(scheduler->flags,
944 pcmk__sched_enable_unfencing)
945 && pcmk__is_set(rsc->flags, pcmk__rsc_needs_unfencing);
946
947 // Whether a non-default placement strategy is used
948 check_utilization = (g_hash_table_size(rsc->priv->utilization) > 0)
949 && !pcmk__str_eq(scheduler->priv->placement_strategy,
950 PCMK_VALUE_DEFAULT, pcmk__str_casei);
951
952 // Order stops before starts (i.e. restart)
953 pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL,
954 rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL,
955 pcmk__ar_ordered
956 |pcmk__ar_first_implies_then
957 |pcmk__ar_intermediate_stop, scheduler);
958
959 // Promotable ordering: demote before stop, start before promote
960 if (pcmk__is_set(pe__const_top_resource(rsc, false)->flags,
961 pcmk__rsc_promotable)
962 || (rsc->priv->orig_role > pcmk_role_unpromoted)) {
963
964 pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_DEMOTE, 0),
965 NULL,
966 rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
967 NULL,
968 pcmk__ar_promoted_then_implies_first, scheduler);
969
970 pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
971 NULL,
972 rsc, pcmk__op_key(rsc->id, PCMK_ACTION_PROMOTE, 0),
973 NULL,
974 pcmk__ar_unrunnable_first_blocks, scheduler);
975 }
976
977 // Don't clear resource history if probing on same node
978 pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_LRM_DELETE, 0),
979 NULL, rsc,
980 pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0),
981 NULL,
982 pcmk__ar_if_on_same_node|pcmk__ar_then_cancels_first,
983 scheduler);
984
985 // Certain checks need allowed nodes
986 if (check_unfencing || check_utilization
987 || (rsc->priv->launcher != NULL)) {
988
989 allowed_nodes = allowed_nodes_as_list(rsc);
990 }
991
992 if (check_unfencing) {
993 g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc);
994 }
995
996 if (check_utilization) {
997 pcmk__create_utilization_constraints(rsc, allowed_nodes);
998 }
999
1000 if (rsc->priv->launcher != NULL) {
1001 pcmk_resource_t *remote_rsc = NULL;
1002
1003 if (pcmk__is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
1004 // rsc is the implicit remote connection for a guest or bundle node
1005
1006 /* Guest resources are not allowed to run on Pacemaker Remote nodes,
1007 * to avoid nesting remotes. However, bundles are allowed.
1008 */
1009 if (!pcmk__is_set(rsc->flags, pcmk__rsc_remote_nesting_allowed)) {
1010 rsc_avoids_remote_nodes(rsc->priv->launcher);
1011 }
1012
1013 /* If someone cleans up a guest or bundle node's launcher, we will
1014 * likely schedule a (re-)probe of the launcher and recovery of the
1015 * connection. Order the connection stop after the launcher probe,
1016 * so that if we detect the launcher running, we will trigger a new
1017 * transition and avoid the unnecessary recovery.
1018 */
1019 pcmk__order_resource_actions(rsc->priv->launcher,
1020 PCMK_ACTION_MONITOR,
1021 rsc, PCMK_ACTION_STOP,
1022 pcmk__ar_ordered);
1023
1024 /* A user can specify that a resource must start on a Pacemaker Remote
1025 * node by explicitly configuring it with the PCMK__META_CONTAINER
1026 * meta-attribute. This is of questionable merit, since location
1027 * constraints can accomplish the same thing. But we support it, so here
1028 * we check whether a resource (that is not itself a remote connection)
1029 * has PCMK__META_CONTAINER set to a remote node or guest node resource.
1030 */
1031 } else if (pcmk__is_set(rsc->priv->launcher->flags,
1032 pcmk__rsc_is_remote_connection)) {
1033 remote_rsc = rsc->priv->launcher;
1034 } else {
1035 remote_rsc =
1036 pe__resource_contains_guest_node(scheduler,
1037 rsc->priv->launcher);
1038 }
1039
1040 if (remote_rsc != NULL) {
1041 /* Force the resource on the Pacemaker Remote node instead of
1042 * colocating the resource with the launcher.
1043 */
1044 for (GList *item = allowed_nodes; item; item = item->next) {
1045 pcmk_node_t *node = item->data;
1046
1047 if (node->priv->remote != remote_rsc) {
1048 node->assign->score = -PCMK_SCORE_INFINITY;
1049 }
1050 }
1051
1052 } else {
1053 /* This resource is either launched by a resource that does NOT
1054 * represent a Pacemaker Remote node, or a Pacemaker Remote
1055 * connection resource for a guest node or bundle.
1056 */
1057 int score;
1058
1059 pcmk__trace("Order and colocate %s relative to its launcher %s",
1060 rsc->id, rsc->priv->launcher->id);
1061
1062 pcmk__new_ordering(rsc->priv->launcher,
1063 pcmk__op_key(rsc->priv->launcher->id,
1064 PCMK_ACTION_START, 0),
1065 NULL, rsc,
1066 pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
1067 NULL,
1068 pcmk__ar_first_implies_then
1069 |pcmk__ar_unrunnable_first_blocks, scheduler);
1070
1071 pcmk__new_ordering(rsc,
1072 pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
1073 NULL,
1074 rsc->priv->launcher,
1075 pcmk__op_key(rsc->priv->launcher->id,
1076 PCMK_ACTION_STOP, 0),
1077 NULL, pcmk__ar_then_implies_first, scheduler);
1078
1079 if (pcmk__is_set(rsc->flags, pcmk__rsc_remote_nesting_allowed)
1080 /* @TODO: && non-bundle Pacemaker Remote nodes exist */) {
1081 score = 10000; /* Highly preferred but not essential */
1082 } else {
1083 score = PCMK_SCORE_INFINITY; // Force to run on same host
1084 }
1085 pcmk__new_colocation("#resource-with-container", NULL, score, rsc,
1086 rsc->priv->launcher, NULL, NULL,
1087 pcmk__coloc_influence);
1088 }
1089 }
1090
1091 if (pcmk__is_set(rsc->flags, pcmk__rsc_is_remote_connection)
1092 || pcmk__is_set(rsc->flags, pcmk__rsc_fence_device)) {
1093 /* Remote connections and fencing devices are not allowed to run on
1094 * Pacemaker Remote nodes
1095 */
1096 rsc_avoids_remote_nodes(rsc);
1097 }
1098 g_list_free(allowed_nodes);
1099 }
1100
1101 /*!
1102 * \internal
1103 * \brief Apply a colocation's score to node scores or resource priority
1104 *
1105 * Given a colocation constraint, apply its score to the dependent's
1106 * allowed node scores (if we are still placing resources) or priority (if
1107 * we are choosing promotable clone instance roles).
1108 *
1109 * \param[in,out] dependent Dependent resource in colocation
1110 * \param[in] primary Primary resource in colocation
1111 * \param[in] colocation Colocation constraint to apply
1112 * \param[in] for_dependent true if called on behalf of dependent
1113 *
1114 * \return The score added to the dependent's priority
1115 */
1116 int
1117 pcmk__primitive_apply_coloc_score(pcmk_resource_t *dependent,
1118 const pcmk_resource_t *primary,
1119 const pcmk__colocation_t *colocation,
1120 bool for_dependent)
1121 {
1122 enum pcmk__coloc_affects filter_results;
1123
1124 pcmk__assert((dependent != NULL) && (primary != NULL)
1125 && (colocation != NULL));
1126
1127 if (for_dependent) {
1128 // Always process on behalf of primary resource
1129 return primary->priv->cmds->apply_coloc_score(dependent, primary,
1130 colocation, false);
1131 }
1132
1133 filter_results = pcmk__colocation_affects(dependent, primary, colocation,
1134 false);
1135 pcmk__rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
1136 ((colocation->score > 0)? "Colocating" : "Anti-colocating"),
1137 dependent->id, primary->id, colocation->id,
1138 colocation->score,
1139 filter_results);
1140
1141 switch (filter_results) {
1142 case pcmk__coloc_affects_role:
1143 return pcmk__apply_coloc_to_priority(dependent, primary,
1144 colocation);
1145
1146 case pcmk__coloc_affects_location:
1147 pcmk__apply_coloc_to_scores(dependent, primary, colocation);
1148 return 0;
1149
1150 default: // pcmk__coloc_affects_nothing
1151 return 0;
1152 }
1153 }
1154
1155 /* Primitive implementation of
1156 * pcmk__assignment_methods_t:with_this_colocations()
1157 */
1158 void
1159 pcmk__with_primitive_colocations(const pcmk_resource_t *rsc,
1160 const pcmk_resource_t *orig_rsc, GList **list)
1161 {
1162 const pcmk_resource_t *parent = NULL;
1163
1164 pcmk__assert(pcmk__is_primitive(rsc) && (list != NULL));
1165 parent = rsc->priv->parent;
1166
1167 if (rsc == orig_rsc) {
1168 /* For the resource itself, add all of its own colocations and relevant
1169 * colocations from its parent (if any).
1170 */
1171 pcmk__add_with_this_list(list, rsc->priv->with_this_colocations,
1172 orig_rsc);
1173 if (parent != NULL) {
1174 parent->priv->cmds->with_this_colocations(parent, orig_rsc, list);
1175 }
1176 } else {
1177 // For an ancestor, add only explicitly configured constraints
1178 for (GList *iter = rsc->priv->with_this_colocations;
1179 iter != NULL; iter = iter->next) {
1180 pcmk__colocation_t *colocation = iter->data;
1181
1182 if (pcmk__is_set(colocation->flags, pcmk__coloc_explicit)) {
1183 pcmk__add_with_this(list, colocation, orig_rsc);
1184 }
1185 }
1186 }
1187 }
1188
1189 /* Primitive implementation of
1190 * pcmk__assignment_methods_t:this_with_colocations()
1191 */
1192 void
1193 pcmk__primitive_with_colocations(const pcmk_resource_t *rsc,
1194 const pcmk_resource_t *orig_rsc, GList **list)
1195 {
1196 const pcmk_resource_t *parent = NULL;
1197
1198 pcmk__assert(pcmk__is_primitive(rsc) && (list != NULL));
1199 parent = rsc->priv->parent;
1200
1201 if (rsc == orig_rsc) {
1202 /* For the resource itself, add all of its own colocations and relevant
1203 * colocations from its parent (if any).
1204 */
1205 pcmk__add_this_with_list(list, rsc->priv->this_with_colocations,
1206 orig_rsc);
1207 if (parent != NULL) {
1208 parent->priv->cmds->this_with_colocations(parent, orig_rsc, list);
1209 }
1210 } else {
1211 // For an ancestor, add only explicitly configured constraints
1212 for (GList *iter = rsc->priv->this_with_colocations;
1213 iter != NULL; iter = iter->next) {
1214 pcmk__colocation_t *colocation = iter->data;
1215
1216 if (pcmk__is_set(colocation->flags, pcmk__coloc_explicit)) {
1217 pcmk__add_this_with(list, colocation, orig_rsc);
1218 }
1219 }
1220 }
1221 }
1222
1223 /*!
1224 * \internal
1225 * \brief Return action flags for a given primitive resource action
1226 *
1227 * \param[in,out] action Action to get flags for
1228 * \param[in] node If not NULL, limit effects to this node (ignored)
1229 *
1230 * \return Flags appropriate to \p action on \p node
1231 */
1232 uint32_t
1233 pcmk__primitive_action_flags(pcmk_action_t *action, const pcmk_node_t *node)
1234 {
1235 pcmk__assert(action != NULL);
1236 return (uint32_t) action->flags;
1237 }
1238
1239 /*!
1240 * \internal
1241 * \brief Check whether a node is a multiply active resource's expected node
1242 *
1243 * \param[in] rsc Resource to check
1244 * \param[in] node Node to check
1245 *
1246 * \return \c true if \p rsc is multiply active with
1247 * \c PCMK_META_MULTIPLE_ACTIVE set to \c PCMK_VALUE_STOP_UNEXPECTED,
1248 * and \p node is the node where it will remain active
1249 * \note This assumes that the resource's next role cannot be changed to stopped
1250 * after this is called, which should be reasonable if status has already
1251 * been unpacked and resources have been assigned to nodes.
1252 */
1253 static bool
1254 is_expected_node(const pcmk_resource_t *rsc, const pcmk_node_t *node)
1255 {
1256 return pcmk__all_flags_set(rsc->flags,
1257 pcmk__rsc_stop_unexpected|pcmk__rsc_restarting)
1258 && (rsc->priv->next_role > pcmk_role_stopped)
1259 && pcmk__same_node(rsc->priv->assigned_node, node);
1260 }
1261
1262 /*!
1263 * \internal
1264 * \brief Schedule actions needed to stop a resource wherever it is active
1265 *
1266 * \param[in,out] rsc Resource being stopped
1267 * \param[in] node Node where resource is being stopped (ignored)
1268 * \param[in] optional Whether actions should be optional
1269 */
1270 static void
1271 stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1272 {
1273 for (GList *iter = rsc->priv->active_nodes;
1274 iter != NULL; iter = iter->next) {
1275
1276 pcmk_node_t *current = (pcmk_node_t *) iter->data;
1277 pcmk_action_t *stop = NULL;
1278
1279 if (is_expected_node(rsc, current)) {
1280 /* We are scheduling restart actions for a multiply active resource
1281 * with PCMK_META_MULTIPLE_ACTIVE=PCMK_VALUE_STOP_UNEXPECTED, and
1282 * this is where it should not be stopped.
1283 */
1284 pcmk__rsc_trace(rsc,
1285 "Skipping stop of multiply active resource %s "
1286 "on expected node %s",
1287 rsc->id, pcmk__node_name(current));
1288 continue;
1289 }
1290
1291 if (rsc->priv->partial_migration_target != NULL) {
1292 // Continue migration if node originally was and remains target
1293 if (pcmk__same_node(current, rsc->priv->partial_migration_target)
1294 && pcmk__same_node(current, rsc->priv->assigned_node)) {
1295 pcmk__rsc_trace(rsc,
1296 "Skipping stop of %s on %s "
1297 "because partial migration there will continue",
1298 rsc->id, pcmk__node_name(current));
1299 continue;
1300 } else {
1301 pcmk__rsc_trace(rsc,
1302 "Forcing stop of %s on %s "
1303 "because migration target changed",
1304 rsc->id, pcmk__node_name(current));
1305 optional = false;
1306 }
1307 }
1308
1309 pcmk__rsc_trace(rsc, "Scheduling stop of %s on %s",
1310 rsc->id, pcmk__node_name(current));
1311 stop = stop_action(rsc, current, optional);
1312
1313 if (rsc->priv->assigned_node == NULL) {
1314 pe_action_set_reason(stop, "node availability", true);
1315 } else if (pcmk__all_flags_set(rsc->flags,
1316 pcmk__rsc_restarting
1317 |pcmk__rsc_stop_unexpected)) {
1318 /* We are stopping a multiply active resource on a node that is
1319 * not its expected node, and we are still scheduling restart
1320 * actions, so the stop is for being multiply active.
1321 */
1322 pe_action_set_reason(stop, "being multiply active", true);
1323 }
1324
1325 if (!pcmk__is_set(rsc->flags, pcmk__rsc_managed)) {
1326 pcmk__clear_action_flags(stop, pcmk__action_runnable);
1327 }
1328
1329 if (pcmk__is_set(rsc->flags, pcmk__rsc_needs_unfencing)) {
1330 pcmk_action_t *unfence = pe_fence_op(current, PCMK_ACTION_ON, true,
1331 NULL, false,
1332 rsc->priv->scheduler);
1333
1334 order_actions(stop, unfence, pcmk__ar_then_implies_first);
1335 if (!pcmk__node_unfenced(current)) {
1336 pcmk__sched_err(rsc->priv->scheduler,
1337 "Stopping %s until %s can be unfenced",
1338 rsc->id, pcmk__node_name(current));
1339 }
1340 }
1341 }
1342 }
1343
1344 /*!
1345 * \internal
1346 * \brief Schedule actions needed to start a resource on a node
1347 *
1348 * \param[in,out] rsc Resource being started
1349 * \param[in,out] node Node where resource should be started
1350 * \param[in] optional Whether actions should be optional
1351 */
1352 static void
1353 start_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1354 {
1355 pcmk_action_t *start = NULL;
1356
1357 pcmk__assert(node != NULL);
1358
1359 pcmk__rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)",
1360 (optional? "optional" : "required"), rsc->id,
1361 pcmk__node_name(node), node->assign->score);
1362 start = start_action(rsc, node, TRUE);
1363
1364 pcmk__order_vs_unfencing(rsc, node, start, pcmk__ar_first_implies_then);
1365
1366 if (pcmk__is_set(start->flags, pcmk__action_runnable) && !optional) {
1367 pcmk__clear_action_flags(start, pcmk__action_optional);
1368 }
1369
1370 if (is_expected_node(rsc, node)) {
1371 /* This could be a problem if the start becomes necessary for other
1372 * reasons later.
1373 */
1374 pcmk__rsc_trace(rsc,
1375 "Start of multiply active resouce %s "
1376 "on expected node %s will be a pseudo-action",
1377 rsc->id, pcmk__node_name(node));
1378 pcmk__set_action_flags(start, pcmk__action_pseudo);
1379 }
1380 }
1381
1382 /*!
1383 * \internal
1384 * \brief Schedule actions needed to promote a resource on a node
1385 *
1386 * \param[in,out] rsc Resource being promoted
1387 * \param[in] node Node where resource should be promoted
1388 * \param[in] optional Whether actions should be optional
1389 */
1390 static void
1391 promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1392 {
1393 GList *iter = NULL;
1394 GList *action_list = NULL;
1395 bool runnable = true;
1396
1397 pcmk__assert(node != NULL);
1398
1399 // Any start must be runnable for promotion to be runnable
1400 action_list = pe__resource_actions(rsc, node, PCMK_ACTION_START, true);
1401 for (iter = action_list; iter != NULL; iter = iter->next) {
1402 pcmk_action_t *start = (pcmk_action_t *) iter->data;
1403
1404 if (!pcmk__is_set(start->flags, pcmk__action_runnable)) {
1405 runnable = false;
1406 }
1407 }
1408 g_list_free(action_list);
1409
1410 if (runnable) {
1411 pcmk_action_t *promote = promote_action(rsc, node, optional);
1412
1413 pcmk__rsc_trace(rsc, "Scheduling %s promotion of %s on %s",
1414 (optional? "optional" : "required"), rsc->id,
1415 pcmk__node_name(node));
1416
1417 if (is_expected_node(rsc, node)) {
1418 /* This could be a problem if the promote becomes necessary for
1419 * other reasons later.
1420 */
1421 pcmk__rsc_trace(rsc,
1422 "Promotion of multiply active resouce %s "
1423 "on expected node %s will be a pseudo-action",
1424 rsc->id, pcmk__node_name(node));
1425 pcmk__set_action_flags(promote, pcmk__action_pseudo);
1426 }
1427 } else {
1428 pcmk__rsc_trace(rsc, "Not promoting %s on %s: start unrunnable",
1429 rsc->id, pcmk__node_name(node));
1430 action_list = pe__resource_actions(rsc, node, PCMK_ACTION_PROMOTE,
1431 true);
1432 for (iter = action_list; iter != NULL; iter = iter->next) {
1433 pcmk_action_t *promote = (pcmk_action_t *) iter->data;
1434
1435 pcmk__clear_action_flags(promote, pcmk__action_runnable);
1436 }
1437 g_list_free(action_list);
1438 }
1439 }
1440
1441 /*!
1442 * \internal
1443 * \brief Schedule actions needed to demote a resource wherever it is active
1444 *
1445 * \param[in,out] rsc Resource being demoted
1446 * \param[in] node Node where resource should be demoted (ignored)
1447 * \param[in] optional Whether actions should be optional
1448 */
1449 static void
1450 demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1451 {
1452 /* Since this will only be called for a primitive (possibly as an instance
1453 * of a collective resource), the resource is multiply active if it is
1454 * running on more than one node, so we want to demote on all of them as
1455 * part of recovery, regardless of which one is the desired node.
1456 */
1457 for (GList *iter = rsc->priv->active_nodes;
1458 iter != NULL; iter = iter->next) {
1459
1460 pcmk_node_t *current = (pcmk_node_t *) iter->data;
1461
1462 if (is_expected_node(rsc, current)) {
1463 pcmk__rsc_trace(rsc,
1464 "Skipping demote of multiply active resource %s "
1465 "on expected node %s",
1466 rsc->id, pcmk__node_name(current));
1467 } else {
1468 pcmk__rsc_trace(rsc, "Scheduling %s demotion of %s on %s",
1469 (optional? "optional" : "required"), rsc->id,
1470 pcmk__node_name(current));
1471 demote_action(rsc, current, optional);
1472 }
1473 }
1474 }
1475
1476 static void
1477 assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1478 {
1479 pcmk__assert(false);
1480 }
1481
1482 /*!
1483 * \internal
1484 * \brief Schedule cleanup of a resource
1485 *
1486 * \param[in,out] rsc Resource to clean up
1487 * \param[in] node Node to clean up on
1488 * \param[in] optional Whether clean-up should be optional
1489 */
1490 void
1491 pcmk__schedule_cleanup(pcmk_resource_t *rsc, const pcmk_node_t *node,
1492 bool optional)
1493 {
1494 /* If the cleanup is required, its orderings are optional, because they're
1495 * relevant only if both actions are required. Conversely, if the cleanup is
1496 * optional, the orderings make the then action required if the first action
1497 * becomes required.
1498 */
1499 uint32_t flag = optional? pcmk__ar_first_implies_then : pcmk__ar_ordered;
1500
1501 CRM_CHECK((rsc != NULL) && (node != NULL), return);
1502
1503 if (pcmk__is_set(rsc->flags, pcmk__rsc_failed)) {
1504 pcmk__rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed",
1505 rsc->id, pcmk__node_name(node));
1506 return;
1507 }
1508
1509 if (node->details->unclean || !node->details->online) {
1510 pcmk__rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable",
1511 rsc->id, pcmk__node_name(node));
1512 return;
1513 }
1514
1515 pcmk__notice("Scheduling clean-up of %s on %s", rsc->id,
1516 pcmk__node_name(node));
1517 delete_action(rsc, node, optional);
1518
1519 // stop -> clean-up -> start
1520 pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP,
1521 rsc, PCMK_ACTION_DELETE, flag);
1522 pcmk__order_resource_actions(rsc, PCMK_ACTION_DELETE,
1523 rsc, PCMK_ACTION_START, flag);
1524 }
1525
1526 /*!
1527 * \internal
1528 * \brief Add primitive meta-attributes relevant to graph actions to XML
1529 *
1530 * \param[in] rsc Primitive resource whose meta-attributes should be added
1531 * \param[in,out] xml Transition graph action attributes XML to add to
1532 */
1533 void
1534 pcmk__primitive_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml)
1535 {
1536 char *name = NULL;
1537 char *value = NULL;
1538 const pcmk_resource_t *parent = NULL;
1539
1540 pcmk__assert(pcmk__is_primitive(rsc) && (xml != NULL));
1541
1542 /* Clone instance numbers get set internally as meta-attributes, and are
1543 * needed in the transition graph (for example, to tell unique clone
1544 * instances apart).
1545 */
1546 value = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CLONE);
1547 if (value != NULL) {
1548 name = crm_meta_name(PCMK__META_CLONE);
1549 pcmk__xe_set(xml, name, value);
1550 free(name);
1551 }
1552
1553 // Not sure if this one is really needed ...
1554 value = g_hash_table_lookup(rsc->priv->meta, PCMK_META_REMOTE_NODE);
1555 if (value != NULL) {
1556 name = crm_meta_name(PCMK_META_REMOTE_NODE);
1557 pcmk__xe_set(xml, name, value);
1558 free(name);
1559 }
1560
1561 /* The PCMK__META_CONTAINER meta-attribute can be set on the primitive
1562 * itself or one of its ancestors, so check them all and keep the highest.
1563 */
1564 for (parent = rsc; parent != NULL; parent = parent->priv->parent) {
1565 if (parent->priv->launcher != NULL) {
1566 pcmk__xe_set(xml, CRM_META "_" PCMK__META_CONTAINER,
1567 parent->priv->launcher->id);
1568 }
1569 }
1570
1571 /* Bundle replica children will get their external-ip set internally as a
1572 * meta-attribute. The graph action needs it, but under a different naming
1573 * convention than other meta-attributes.
1574 */
1575 value = g_hash_table_lookup(rsc->priv->meta, "external-ip");
1576 if (value != NULL) {
1577 pcmk__xe_set(xml, "pcmk_external_ip", value);
1578 }
1579 }
1580
1581 // Primitive implementation of pcmk__assignment_methods_t:add_utilization()
1582 void
1583 pcmk__primitive_add_utilization(const pcmk_resource_t *rsc,
1584 const pcmk_resource_t *orig_rsc,
1585 GList *all_rscs, GHashTable *utilization)
1586 {
1587 pcmk__assert(pcmk__is_primitive(rsc) && (orig_rsc != NULL)
1588 && (utilization != NULL));
1589
1590 if (!pcmk__is_set(rsc->flags, pcmk__rsc_unassigned)) {
1591 return;
1592 }
1593
1594 pcmk__rsc_trace(orig_rsc,
1595 "%s: Adding primitive %s as colocated utilization",
1596 orig_rsc->id, rsc->id);
1597 pcmk__release_node_capacity(utilization, rsc);
1598 }
1599
1600 /*!
1601 * \internal
1602 * \brief Get epoch time of node's shutdown attribute (or now if none)
1603 *
1604 * \param[in,out] node Node to check
1605 *
1606 * \return Epoch time corresponding to shutdown attribute if set or now if not
1607 */
1608 static time_t
1609 shutdown_time(pcmk_node_t *node)
1610 {
1611 const char *shutdown = pcmk__node_attr(node, PCMK__NODE_ATTR_SHUTDOWN, NULL,
1612 pcmk__rsc_node_current);
1613 time_t result = 0;
1614
1615 if (shutdown != NULL) {
1616 long long result_ll;
1617 int rc = pcmk__scan_ll(shutdown, &result_ll, 0LL);
1618
1619 if (rc == pcmk_rc_ok) {
1620 result = (time_t) result_ll;
1621 } else {
1622 pcmk__warn("Ignoring invalid value '%s' for %s "
1623 PCMK__NODE_ATTR_SHUTDOWN " attribute: %s",
1624 shutdown, pcmk__node_name(node), pcmk_rc_str(rc));
1625 }
1626 }
1627 if (result == 0) {
1628 result = pcmk__scheduler_epoch_time(node->priv->scheduler);
1629 }
1630 return result;
1631 }
1632
1633 /*!
1634 * \internal
1635 * \brief Ban a resource from a node if it's not locked to the node
1636 *
1637 * \param[in] data Node to check
1638 * \param[in,out] user_data Resource to check
1639 */
1640 static void
1641 ban_if_not_locked(gpointer data, gpointer user_data)
1642 {
1643 const pcmk_node_t *node = (const pcmk_node_t *) data;
1644 pcmk_resource_t *rsc = (pcmk_resource_t *) user_data;
1645
1646 if (!pcmk__same_node(node, rsc->priv->lock_node)) {
1647 resource_location(rsc, node, -PCMK_SCORE_INFINITY,
1648 PCMK_OPT_SHUTDOWN_LOCK, rsc->priv->scheduler);
1649 }
1650 }
1651
1652 // Primitive implementation of pcmk__assignment_methods_t:shutdown_lock()
1653 void
1654 pcmk__primitive_shutdown_lock(pcmk_resource_t *rsc)
1655 {
1656 pcmk_scheduler_t *scheduler = NULL;
1657
1658 pcmk__assert(pcmk__is_primitive(rsc));
1659 scheduler = rsc->priv->scheduler;
1660
1661 // Fence devices and remote connections can't be locked
1662 if (pcmk__any_flags_set(rsc->flags,
1663 pcmk__rsc_fence_device
1664 |pcmk__rsc_is_remote_connection)) {
1665 return;
1666 }
1667
1668 if (rsc->priv->lock_node != NULL) {
1669 // The lock was obtained from resource history
1670
1671 if (rsc->priv->active_nodes != NULL) {
1672 /* The resource was started elsewhere even though it is now
1673 * considered locked. This shouldn't be possible, but as a
1674 * failsafe, we don't want to disturb the resource now.
1675 */
1676 pcmk__rsc_info(rsc,
1677 "Cancelling shutdown lock "
1678 "because %s is already active", rsc->id);
1679 pe__clear_resource_history(rsc, rsc->priv->lock_node);
1680 rsc->priv->lock_node = NULL;
1681 rsc->priv->lock_time = 0;
1682 }
1683
1684 // Only a resource active on exactly one node can be locked
1685 } else if (pcmk__list_of_1(rsc->priv->active_nodes)) {
1686 pcmk_node_t *node = rsc->priv->active_nodes->data;
1687
1688 if (node->details->shutdown) {
1689 if (node->details->unclean) {
1690 pcmk__rsc_debug(rsc,
1691 "Not locking %s to unclean %s for shutdown",
1692 rsc->id, pcmk__node_name(node));
1693 } else {
1694 rsc->priv->lock_node = node;
1695 rsc->priv->lock_time = shutdown_time(node);
1696 }
1697 }
1698 }
1699
1700 if (rsc->priv->lock_node == NULL) {
1701 // No lock needed
1702 return;
1703 }
1704
1705 if (scheduler->priv->shutdown_lock_ms > 0U) {
1706 time_t lock_expiration = rsc->priv->lock_time
1707 + pcmk__timeout_ms2s(scheduler->priv->shutdown_lock_ms);
1708
1709 pcmk__rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
1710 rsc->id, pcmk__node_name(rsc->priv->lock_node),
1711 (long long) lock_expiration);
1712 pcmk__update_recheck_time(++lock_expiration, scheduler,
1713 "shutdown lock expiration");
1714 } else {
1715 pcmk__rsc_info(rsc, "Locking %s to %s due to shutdown",
1716 rsc->id, pcmk__node_name(rsc->priv->lock_node));
1717 }
1718
1719 // If resource is locked to one node, ban it from all other nodes
1720 g_list_foreach(scheduler->nodes, ban_if_not_locked, rsc);
1721 }
1722