1 /*
2 * Copyright 2004-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <inttypes.h> // PRIu32
13 #include <stdbool.h> // bool, true, false
14 #include <stdio.h> // NULL
15 #include <stdlib.h> // free(), etc.
16
17 #include <glib.h> // gboolean, etc.
18 #include <libxml/tree.h> // xmlNode
19
20 #include <crm/crm.h>
21
22 #include <crm/common/xml.h>
23 #include <crm/cluster.h>
24
25 #include <pacemaker-controld.h>
26
27 static char *max_generation_from = NULL;
28 static xmlNode *max_generation_xml = NULL;
29
30 /*!
31 * \internal
32 * \brief Nodes from which a CIB sync has failed since the peer joined
33 *
34 * This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
35 * the name of a client node from which a CIB \p sync_from() call has failed in
36 * \p do_dc_join_finalize() since the client joined the cluster as a peer.
37 * \p join_id is the ID of the join round in which the \p sync_from() failed,
38 * and is intended for use in nack log messages.
39 */
40 static GHashTable *failed_sync_nodes = NULL;
41
42 void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
43 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
44 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
45
46 /* Numeric counter used to identify join rounds (an unsigned int would be
47 * appropriate, except we get and set it in XML as int)
48 */
49 static int current_join_id = 0;
50
51 /*!
52 * \internal
53 * \brief Get log-friendly string equivalent of a controller group join phase
54 *
55 * \param[in] phase Join phase
56 *
57 * \return Log-friendly string equivalent of \p phase
58 */
59 static const char *
60 join_phase_text(enum controld_join_phase phase)
61 {
62 switch (phase) {
63 case controld_join_nack:
64 return "nack";
65 case controld_join_none:
66 return "none";
67 case controld_join_welcomed:
68 return "welcomed";
69 case controld_join_integrated:
70 return "integrated";
71 case controld_join_finalized:
72 return "finalized";
73 case controld_join_confirmed:
74 return "confirmed";
75 default:
76 return "invalid";
77 }
78 }
79
80 /*!
81 * \internal
82 * \brief Destroy the hash table containing failed sync nodes
83 */
84 void
85 controld_destroy_failed_sync_table(void)
86 {
87 g_clear_pointer(&failed_sync_nodes, g_hash_table_destroy);
88 }
89
90 /*!
91 * \internal
92 * \brief Remove a node from the failed sync nodes table if present
93 *
94 * \param[in] node_name Node name to remove
95 */
96 void
97 controld_remove_failed_sync_node(const char *node_name)
98 {
99 if (failed_sync_nodes != NULL) {
100 g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
101 }
102 }
103
104 /*!
105 * \internal
106 * \brief Add to a hash table a node whose CIB failed to sync
107 *
108 * \param[in] node_name Name of node whose CIB failed to sync
109 * \param[in] join_id Join round when the failure occurred
110 */
111 static void
112 record_failed_sync_node(const char *node_name, gint join_id)
113 {
114 if (failed_sync_nodes == NULL) {
115 failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
116 }
117
118 /* If the node is already in the table then we failed to nack it during the
119 * filter offer step
120 */
121 CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
122 GINT_TO_POINTER(join_id)));
123 }
124
125 /*!
126 * \internal
127 * \brief Look up a node name in the failed sync table
128 *
129 * \param[in] node_name Name of node to look up
130 * \param[out] join_id Where to store the join ID of when the sync failed
131 *
132 * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
133 * node name was found, or \p pcmk_rc_node_unknown otherwise.
134 * \note \p *join_id is set to -1 if the node is not found.
135 */
136 static int
137 lookup_failed_sync_node(const char *node_name, gint *join_id)
138 {
139 *join_id = -1;
140
141 if (failed_sync_nodes != NULL) {
142 gpointer result = g_hash_table_lookup(failed_sync_nodes,
143 (gchar *) node_name);
144 if (result != NULL) {
145 *join_id = GPOINTER_TO_INT(result);
146 return pcmk_rc_ok;
147 }
148 }
149 return pcmk_rc_node_unknown;
150 }
151
152 void
153 crm_update_peer_join(const char *source, pcmk__node_status_t *node,
154 enum controld_join_phase phase)
155 {
156 enum controld_join_phase last = controld_get_join_phase(node);
157
158 CRM_CHECK(node != NULL, return);
159
160 /* Remote nodes do not participate in joins */
161 if (pcmk__is_set(node->flags, pcmk__node_status_remote)) {
162 return;
163 }
164
165 if (phase == last) {
166 pcmk__trace("Node %s join-%d phase is still %s "
167 QB_XS " nodeid=%" PRIu32 " source=%s",
168 node->name, current_join_id, join_phase_text(last),
169 node->cluster_layer_id, source);
170 return;
171 }
172
173 if ((phase <= controld_join_none) || (phase == (last + 1))) {
174 struct controld_node_status_data *data = NULL;
175
176 if (node->user_data == NULL) {
177 node->user_data =
178 pcmk__assert_alloc(1, sizeof(struct controld_node_status_data));
179 }
180 data = node->user_data;
181 data->join_phase = phase;
182
183 pcmk__trace("Node %s join-%d phase is now %s (was %s) "
184 QB_XS " nodeid=%" PRIu32 " source=%s",
185 node->name, current_join_id, join_phase_text(phase),
186 join_phase_text(last), node->cluster_layer_id,
187 source);
188 return;
189 }
190
191 pcmk__warn("Rejecting join-%d phase update for node %s because can't go "
192 "from %s to %s " QB_XS " nodeid=%" PRIu32 " source=%s",
193 current_join_id, node->name, join_phase_text(last),
194 join_phase_text(phase), node->cluster_layer_id, source);
195 }
196
197 static void
198 set_join_phase_none(gpointer key, gpointer value, gpointer user_data)
199 {
200 crm_update_peer_join(__func__, (pcmk__node_status_t *) value,
201 controld_join_none);
202 }
203
204 /*!
205 * \internal
206 * \brief Create a join message from the DC
207 *
208 * \param[in] join_op Join operation name
209 * \param[in] host_to Recipient of message
210 */
211 static xmlNode *
212 create_dc_message(const char *join_op, const char *host_to)
213 {
214 xmlNode *msg = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_DC, host_to,
215 CRM_SYSTEM_CRMD, join_op, NULL);
216
217 /* Identify which election this is a part of */
218 pcmk__xe_set_int(msg, PCMK__XA_JOIN_ID, current_join_id);
219
220 /* Add a field specifying whether the DC is shutting down. This keeps the
221 * joining node from fencing the old DC if it becomes the new DC.
222 */
223 pcmk__xe_set_bool(msg, PCMK__XA_DC_LEAVING,
224 pcmk__is_set(controld_globals.fsa_input_register,
225 R_SHUTDOWN));
226 return msg;
227 }
228
229 static void
230 join_make_offer(gpointer key, gpointer value, gpointer user_data)
231 {
232 /* @TODO We don't use user_data except to distinguish one particular call
233 * from others. Make this clearer.
234 */
235 xmlNode *offer = NULL;
236 pcmk__node_status_t *member = (pcmk__node_status_t *) value;
237
238 pcmk__assert(member != NULL);
239 if (!pcmk__cluster_is_node_active(member)) {
240 pcmk__info("Not making join-%d offer to inactive node %s",
241 current_join_id, pcmk__s(member->name, "with unknown name"));
242 if ((member->expected == NULL)
243 && pcmk__str_eq(member->state, PCMK__VALUE_LOST, pcmk__str_none)) {
244 /* You would think this unsafe, but in fact this plus an
245 * active resource is what causes it to be fenced.
246 *
247 * Yes, this does mean that any node that dies at the same
248 * time as the old DC and is not running resource (still)
249 * won't be fenced.
250 *
251 * I'm not happy about this either.
252 */
253 pcmk__update_peer_expected(member, CRMD_JOINSTATE_DOWN);
254 }
255 return;
256 }
257
258 if (member->name == NULL) {
259 pcmk__info("Not making join-%d offer to node uuid %s with unknown name",
260 current_join_id, member->xml_id);
261 return;
262 }
263
264 if (controld_globals.membership_id != controld_globals.peer_seq) {
265 controld_globals.membership_id = controld_globals.peer_seq;
266 pcmk__info("Making join-%d offers based on membership event %llu",
267 current_join_id, controld_globals.peer_seq);
268 }
269
270 if (user_data != NULL) {
271 enum controld_join_phase phase = controld_get_join_phase(member);
272
273 if (phase > controld_join_none) {
274 pcmk__info("Not making join-%d offer to already known node %s (%s)",
275 current_join_id, member->name, join_phase_text(phase));
276 return;
277 }
278 }
279
280 crm_update_peer_join(__func__, (pcmk__node_status_t*) member,
281 controld_join_none);
282
283 offer = create_dc_message(CRM_OP_JOIN_OFFER, member->name);
284
285 // Advertise our feature set so the joining node can bail if not compatible
286 pcmk__xe_set(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
287
288 pcmk__info("Sending join-%d offer to %s", current_join_id, member->name);
289 pcmk__cluster_send_message(member, pcmk_ipc_controld, offer);
290 pcmk__xml_free(offer);
291
292 crm_update_peer_join(__func__, member, controld_join_welcomed);
293 }
294
295 // A_DC_JOIN_OFFER_ALL
296 void
297 do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause,
298 enum crmd_fsa_state cur_state,
299 enum crmd_fsa_input current_input, fsa_data_t *msg_data)
300 {
301 int count = 0;
302
303 if ((cause == C_HA_MESSAGE) && (current_input == I_NODE_JOIN)) {
304 pcmk__info("A new node joined the cluster");
305 }
306
307 current_join_id++;
308 if (current_join_id <= 0) {
309 current_join_id = 1;
310 }
311 pcmk__debug("Starting new join round join-%d", current_join_id);
312
313 g_hash_table_foreach(pcmk__peer_cache, set_join_phase_none, NULL);
314 free_max_generation();
315 controld_clear_fsa_input_flags(R_HAVE_CIB);
316 update_dc(NULL);
317
318 /* For each node, either send a welcome message and update join phase to
319 * welcomed, or set expected state to down if inactive and lost.
320 */
321 g_hash_table_foreach(pcmk__peer_cache, join_make_offer, NULL);
322
323 count = crmd_join_phase_count(controld_join_welcomed);
324 pcmk__info("Waiting on join-%d requests from %d outstanding node%s",
325 current_join_id, count, pcmk__plural_s(count));
326
327 // Don't waste time by invoking the scheduler yet
328 }
329
330 // A_DC_JOIN_OFFER_ONE
331 void
332 do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause,
333 enum crmd_fsa_state cur_state,
334 enum crmd_fsa_input current_input, fsa_data_t *msg_data)
335 {
336 pcmk__node_status_t *member = NULL;
337 ha_msg_input_t *welcome = NULL;
338 const char *join_to = NULL;
339 int count = 0;
340
341 pcmk__assert(msg_data != NULL);
342
343 welcome = msg_data->data;
344 if (welcome == NULL) {
345 pcmk__info("Making join-%d offers to any unconfirmed nodes because an "
346 "unknown node joined", current_join_id);
347 g_hash_table_foreach(pcmk__peer_cache, join_make_offer, &member);
348 check_join_state(cur_state, __func__);
349 return;
350 }
351
352 join_to = pcmk__xe_get(welcome->msg, PCMK__XA_SRC);
353 if (join_to == NULL) {
354 pcmk__err("Can't make join-%d offer to unknown node", current_join_id);
355 return;
356 }
357
358 /* It is possible that a node will have been sick or starting up when the
359 * original offer was made. However, either it will re-announce itself in
360 * due course, or we can re-store the original offer on the client.
361 */
362 member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);
363 crm_update_peer_join(__func__, member, controld_join_none);
364 join_make_offer(NULL, member, NULL);
365
366 /* If the offer isn't to the local node, make an offer to the local node as
367 * well, to ensure the correct value for max_generation_from.
368 */
369 if (!controld_is_local_node(join_to)) {
370 member = controld_get_local_node_status();
371 join_make_offer(NULL, member, NULL);
372 }
373
374 /* This was a genuine join request; cancel any existing transition and
375 * invoke the scheduler.
376 */
377 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
378 NULL);
379
380 count = crmd_join_phase_count(controld_join_welcomed);
381 pcmk__info("Waiting on join-%d requests from %d outstanding node%s",
382 current_join_id, count, pcmk__plural_s(count));
383
384 // Don't waste time by invoking the scheduler yet
385 }
386
387 static int
388 compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
389 {
390 const char *elem_l = pcmk__xe_get(left, field);
391 const char *elem_r = pcmk__xe_get(right, field);
392
393 long long int_elem_l;
394 long long int_elem_r;
395
396 int rc = pcmk_rc_ok;
397
398 rc = pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
399 if (rc != pcmk_rc_ok) { // Shouldn't be possible
400 pcmk__warn("Comparing current CIB %s as -1 because '%s' is not an "
401 "integer",
402 field, elem_l);
403 }
404
405 rc = pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
406 if (rc != pcmk_rc_ok) { // Shouldn't be possible
407 pcmk__warn("Comparing joining node's CIB %s as -1 because '%s' is not "
408 "an integer",
409 field, elem_r);
410 }
411
412 if (int_elem_l < int_elem_r) {
413 return -1;
414
415 } else if (int_elem_l > int_elem_r) {
416 return 1;
417 }
418
419 return 0;
420 }
421
422 // A_DC_JOIN_PROCESS_REQ
423 void
424 do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause,
425 enum crmd_fsa_state cur_state,
426 enum crmd_fsa_input current_input, fsa_data_t *msg_data)
427 {
428 ha_msg_input_t *join_ack = NULL;
429 const char *join_from = NULL;
430 int join_id = -1;
431 xmlNode *generation = NULL;
432 int cmp = 0;
433 pcmk__node_status_t *join_node = NULL;
434 const char *join_version = NULL;
435 const char *ref = NULL;
436 gint value = 0;
437 bool accept = true;
438 int count = 0;
439
440 pcmk__assert((msg_data != NULL) && (msg_data->data != NULL));
441
442 join_ack = msg_data->data;
443 join_from = pcmk__xe_get(join_ack->msg, PCMK__XA_SRC);
444 if (join_from == NULL) {
445 pcmk__err("Ignoring invalid join request without node name");
446 return;
447 }
448
449 pcmk__xe_get_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
450 if (join_id != current_join_id) {
451 pcmk__debug("Ignoring join-%d request from %s because we are on "
452 "join-%d", join_id, join_from, current_join_id);
453 check_join_state(cur_state, __func__);
454 return;
455 }
456
457 generation = join_ack->xml;
458 if ((max_generation_xml != NULL) && (generation != NULL)) {
459 static const char *attributes[] = {
460 PCMK_XA_ADMIN_EPOCH,
461 PCMK_XA_EPOCH,
462 PCMK_XA_NUM_UPDATES,
463 };
464
465 /* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE
466 * element from the join client. The "if" guard is for clarity.
467 */
468 if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
469 for (int i = 0; (cmp == 0) && (i < PCMK__NELEM(attributes)); i++) {
470 cmp = compare_int_fields(max_generation_xml, generation,
471 attributes[i]);
472 }
473
474 } else { // Should always be PCMK__XE_GENERATION_TUPLE
475 CRM_LOG_ASSERT(false);
476 }
477 }
478
479 join_node = pcmk__get_node(0, join_from, NULL,
480 pcmk__node_search_cluster_member);
481 join_version = pcmk__xe_get(join_ack->msg, PCMK_XA_CRM_FEATURE_SET);
482
483 // For logging only
484 ref = pcmk__s(pcmk__xe_get(join_ack->msg, PCMK_XA_REFERENCE), "(none)");
485
486 if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
487 pcmk__err("Rejecting join-%d request from node %s because we failed to "
488 "sync its CIB in join-%d " QB_XS " ref=%s",
489 join_id, join_from, value, ref);
490 accept = false;
491
492 } else if (!pcmk__cluster_is_node_active(join_node)) {
493 if (match_down_event(join_from) != NULL) {
494 /* The join request was received after the node was fenced or
495 * otherwise shutdown in a way that we're aware of. No need to log
496 * an error in this rare occurrence; we know the client was recently
497 * shut down, and receiving a lingering in-flight request is not
498 * cause for alarm.
499 */
500 pcmk__debug("Rejecting join-%d request from inactive node %s "
501 QB_XS " ref=%s",
502 join_id, join_from, ref);
503 } else {
504 pcmk__err("Rejecting join-%d request from inactive node %s "
505 QB_XS " ref=%s",
506 join_id, join_from, ref);
507 }
508 accept = false;
509
510 } else if (generation == NULL) {
511 pcmk__err("Rejecting invalid join-%d request from node %s missing CIB "
512 "generation " QB_XS " ref=%s",
513 join_id, join_from, ref);
514 accept = false;
515
516 } else if ((join_version == NULL)
517 || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
518 pcmk__err("Rejecting join-%d request from node %s because feature set "
519 "%s is incompatible with ours (%s) " QB_XS " ref=%s",
520 join_id, join_from, (join_version? join_version : "pre-3.1.0"),
521 CRM_FEATURE_SET, ref);
522 accept = false;
523
524 } else if (max_generation_xml == NULL) {
525 const char *validation = pcmk__xe_get(generation,
526 PCMK_XA_VALIDATE_WITH);
527
528 if (pcmk__get_schema(validation) == NULL) {
529 pcmk__err("Rejecting join-%d request from %s (with first CIB "
530 "generation) due to %s schema version %s "
531 QB_XS " ref=%s",
532 join_id, join_from,
533 ((validation == NULL)? "missing" : "unknown"),
534 pcmk__s(validation, ""), ref);
535 accept = false;
536
537 } else {
538 pcmk__debug("Accepting join-%d request from %s (with first CIB "
539 "generation) " QB_XS " ref=%s",
540 join_id, join_from, ref);
541 max_generation_xml = pcmk__xml_copy(NULL, generation);
542 pcmk__str_update(&max_generation_from, join_from);
543 }
544
545 } else if ((cmp < 0)
546 || ((cmp == 0) && controld_is_local_node(join_from))) {
547 const char *validation = pcmk__xe_get(generation,
548 PCMK_XA_VALIDATE_WITH);
549
550 if (pcmk__get_schema(validation) == NULL) {
551 pcmk__err("Rejecting join-%d request from %s (with better CIB "
552 "generation than current best from %s) due to %s "
553 "schema version %s " QB_XS " ref=%s",
554 join_id, join_from, max_generation_from,
555 ((validation == NULL)? "missing" : "unknown"),
556 pcmk__s(validation, ""), ref);
557 accept = false;
558
559 } else {
560 pcmk__debug("Accepting join-%d request from %s (with better CIB "
561 "generation than current best from %s) " QB_XS " ref=%s",
562 join_id, join_from, max_generation_from, ref);
563 pcmk__log_xml_debug(max_generation_xml, "Old max generation");
564 pcmk__log_xml_debug(generation, "New max generation");
565
566 pcmk__xml_free(max_generation_xml);
567 max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
568 pcmk__str_update(&max_generation_from, join_from);
569 }
570
571 } else {
572 pcmk__debug("Accepting join-%d request from %s " QB_XS " ref=%s",
573 join_id, join_from, ref);
574 }
575
576 if (accept) {
577 crm_update_peer_join(__func__, join_node, controld_join_integrated);
578 pcmk__update_peer_expected(join_node, CRMD_JOINSTATE_MEMBER);
579
580 } else {
581 crm_update_peer_join(__func__, join_node, controld_join_nack);
582 pcmk__update_peer_expected(join_node, CRMD_JOINSTATE_NACK);
583 }
584
585 count = crmd_join_phase_count(controld_join_integrated);
586 pcmk__debug("%d node%s currently integrated in join-%d", count,
587 pcmk__plural_s(count), join_id);
588
589 if (!check_join_state(cur_state, __func__)) {
590 // Don't waste time by invoking the scheduler yet
591 count = crmd_join_phase_count(controld_join_welcomed);
592 pcmk__debug("Waiting on join-%d requests from %d outstanding node%s",
593 join_id, count, pcmk__plural_s(count));
594 }
595 }
596
597 // A_DC_JOIN_FINALIZE
598 void
599 do_dc_join_finalize(long long action, enum crmd_fsa_cause cause,
600 enum crmd_fsa_state cur_state,
601 enum crmd_fsa_input current_input, fsa_data_t *msg_data)
602 {
603 char *sync_from = NULL;
604 int rc = pcmk_ok;
605 int count_welcomed = crmd_join_phase_count(controld_join_welcomed);
606 int count_finalizable = crmd_join_phase_count(controld_join_integrated)
607 + crmd_join_phase_count(controld_join_nack);
608
609 /* This we can do straight away and avoid clients timing us out while we
610 * compute the latest CIB
611 */
612 if (count_welcomed != 0) {
613 pcmk__debug("Waiting on join-%d requests from %d outstanding node%s "
614 "before finalizing join", current_join_id, count_welcomed,
615 pcmk__plural_s(count_welcomed));
616 crmd_join_phase_log(LOG_DEBUG);
617 return;
618 }
619
620 if (count_finalizable == 0) {
621 pcmk__debug("Finalization not needed for join-%d at the current time",
622 current_join_id);
623 crmd_join_phase_log(LOG_DEBUG);
624 check_join_state(controld_globals.fsa_state, __func__);
625 return;
626 }
627
628 controld_clear_fsa_input_flags(R_HAVE_CIB);
629 if ((max_generation_from == NULL)
630 || controld_is_local_node(max_generation_from)) {
631 controld_set_fsa_input_flags(R_HAVE_CIB);
632 }
633
634 if (!controld_globals.transition_graph->complete) {
635 pcmk__warn("Delaying join-%d finalization while transition in progress",
636 current_join_id);
637 crmd_join_phase_log(LOG_DEBUG);
638 controld_fsa_stall(msg_data, action);
639 return;
640 }
641
642 if (pcmk__is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
643 // Send our CIB out to everyone
644 sync_from = pcmk__str_copy(controld_globals.cluster->priv->node_name);
645 } else {
646 // Ask for the agreed best CIB
647 sync_from = pcmk__str_copy(max_generation_from);
648 }
649
650 pcmk__notice("Finalizing join-%d for %d node%s (sync'ing CIB %s.%s.%s "
651 "with schema %s and feature set %s from %s)",
652 current_join_id, count_finalizable,
653 pcmk__plural_s(count_finalizable),
654 pcmk__s(pcmk__xe_get(max_generation_xml, PCMK_XA_ADMIN_EPOCH),
655 "0"),
656 pcmk__s(pcmk__xe_get(max_generation_xml, PCMK_XA_EPOCH), "0"),
657 pcmk__s(pcmk__xe_get(max_generation_xml, PCMK_XA_NUM_UPDATES),
658 "0"),
659 pcmk__s(pcmk__xe_get(max_generation_xml,
660 PCMK_XA_VALIDATE_WITH),
661 "(none)"),
662 pcmk__s(pcmk__xe_get(max_generation_xml,
663 PCMK_XA_CRM_FEATURE_SET),
664 "(none)"),
665 sync_from);
666
667 crmd_join_phase_log(LOG_DEBUG);
668
669 rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
670 sync_from, NULL, cib_none);
671 fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
672 }
673
674 void
675 free_max_generation(void)
676 {
|
CID (unavailable; MK=ceb1d8ab08ab93d6785bf54c6b7f0ff3) (#1 of 2): Inconsistent C union access (INCONSISTENT_UNION_ACCESS): |
|
(1) Event assign_union_field: |
The union field "in" of "_pp" is written. |
|
(2) Event inconsistent_union_field_access: |
In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in". |
677 g_clear_pointer(&max_generation_from, free);
678 g_clear_pointer(&max_generation_xml, pcmk__xml_free);
679 }
680
681 void
682 finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
683 {
684 CRM_LOG_ASSERT(-EPERM != rc);
685
686 if (rc != pcmk_ok) {
687 const char *sync_from = (const char *) user_data;
688
689 do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
690 "Could not sync CIB from %s in join-%d: %s",
691 sync_from, current_join_id, pcmk_strerror(rc));
692
693 if (rc != -pcmk_err_old_data) {
694 record_failed_sync_node(sync_from, current_join_id);
695 }
696
697 /* restart the whole join process */
698 register_fsa_error(I_ELECTION_DC, NULL);
699
700 } else if (!AM_I_DC) {
701 pcmk__debug("Sync'ed CIB for join-%d but no longer DC",
702 current_join_id);
703
704 } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
705 pcmk__debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
706 "(%s)", current_join_id,
707 fsa_state2string(controld_globals.fsa_state));
708
709 } else {
710 controld_set_fsa_input_flags(R_HAVE_CIB);
711
712 /* make sure dc_uuid is re-set to us */
713 if (!check_join_state(controld_globals.fsa_state, __func__)) {
714 int count_finalizable = 0;
715
716 count_finalizable = crmd_join_phase_count(controld_join_integrated)
717 + crmd_join_phase_count(controld_join_nack);
718
719 pcmk__debug("Notifying %d node%s of join-%d results",
720 count_finalizable, pcmk__plural_s(count_finalizable),
721 current_join_id);
722 g_hash_table_foreach(pcmk__peer_cache, finalize_join_for, NULL);
723 }
724 }
725 }
726
727 static void
728 join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
729 xmlNode *output, void *user_data)
730 {
731 const char *node = user_data;
732
733 if (rc != pcmk_ok) {
734 pcmk__crit("join-%d node history update (via CIB call %d) for node %s "
735 "failed: %s",
736 current_join_id, call_id, node, pcmk_strerror(rc));
737 pcmk__log_xml_debug(msg, "failed");
738 register_fsa_error(I_ERROR, NULL);
739 }
740
741 pcmk__debug("join-%d node history update (via CIB call %d) for node %s "
742 "complete", current_join_id, call_id, node);
743 check_join_state(controld_globals.fsa_state, __func__);
744 }
745
746 // A_DC_JOIN_PROCESS_ACK
747 void
748 do_dc_join_ack(long long action, enum crmd_fsa_cause cause,
749 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input,
750 fsa_data_t *msg_data)
751 {
752 ha_msg_input_t *join_ack = NULL;
753 char *join_from = NULL;
754 const char *op = NULL;
755 int join_id = -1;
756
757 pcmk__node_status_t *peer = NULL;
758 enum controld_join_phase phase = controld_join_none;
759
760 cib_t *cib = controld_globals.cib_conn;
761 int rc = pcmk_ok;
762
763 const bool unlocked_only = pcmk__is_set(controld_globals.flags,
764 controld_shutdown_lock_enabled);
765 char *xpath = NULL;
766 xmlNode *state = NULL;
767
768 pcmk__assert((msg_data != NULL) && (msg_data->data != NULL));
769
770 join_ack = msg_data->data;
771
772 // Sanity checks
773 join_from = pcmk__xe_get_copy(join_ack->msg, PCMK__XA_SRC);
774 if (join_from == NULL) {
775 pcmk__warn("Ignoring message received without node identification");
776 goto done;
777 }
778
779 op = pcmk__xe_get(join_ack->msg, PCMK__XA_CRM_TASK);
780 if (op == NULL) {
781 pcmk__warn("Ignoring message received from %s without task", join_from);
782 goto done;
783 }
784 if (!pcmk__str_eq(op, CRM_OP_JOIN_CONFIRM, pcmk__str_none)) {
785 pcmk__debug("Ignoring '%s' message from %s while waiting for '%s'", op,
786 join_from, CRM_OP_JOIN_CONFIRM);
787 goto done;
788 }
789
790 if (pcmk__xe_get_int(join_ack->msg, PCMK__XA_JOIN_ID,
791 &join_id) != pcmk_rc_ok) {
792 pcmk__warn("Ignoring join confirmation from %s without valid join ID",
793 join_from);
794 goto done;
795 }
796
797 peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
798 phase = controld_get_join_phase(peer);
799 if (phase != controld_join_finalized) {
800 pcmk__info("Ignoring out-of-sequence join-%d confirmation from %s "
801 "(currently %s not %s)",
802 join_id, join_from, join_phase_text(phase),
803 join_phase_text(controld_join_finalized));
804 goto done;
805 }
806
807 if (join_id != current_join_id) {
808 pcmk__err("Rejecting join-%d confirmation from %s because currently on "
809 "join-%d",
810 join_id, join_from, current_join_id);
811 crm_update_peer_join(__func__, peer, controld_join_nack);
812 goto done;
813 }
814
815 crm_update_peer_join(__func__, peer, controld_join_confirmed);
816
817 /* Update CIB with node's current executor state. A new transition will be
818 * triggered later, when the CIB manager notifies us of the change.
819 *
820 * The delete and modify requests are part of an atomic transaction.
821 */
822 rc = cib->cmds->init_transaction(cib);
823 if (rc != pcmk_ok) {
824 goto done;
825 }
826
827 // Delete relevant parts of node's current executor state from CIB
828 controld_node_history_deletion_strings(join_from, unlocked_only, &xpath,
829 NULL);
830
831 rc = cib->cmds->remove(cib, xpath, NULL,
832 cib_xpath|cib_multiple|cib_transaction);
833 if (rc != pcmk_ok) {
834 goto done;
835 }
836
837 // Update CIB with node's latest known executor state
838 if (controld_is_local_node(join_from)) {
839
840 // Use the latest possible state if processing our own join ack
841 state = controld_query_executor_state();
842
843 if (state != NULL) {
844 pcmk__debug("Updating local node history for join-%d from query "
845 "result", current_join_id);
846
847 } else {
848 pcmk__warn("Updating local node history from join-%d confirmation "
849 "because query failed",
850 current_join_id);
851 }
852
853 } else {
854 pcmk__debug("Updating node history for %s from join-%d confirmation",
855 join_from, current_join_id);
856 }
857
858 rc = cib->cmds->modify(cib, PCMK_XE_STATUS,
859 ((state != NULL)? state : join_ack->xml),
860 cib_can_create|cib_transaction);
861 if (rc != pcmk_ok) {
862 goto done;
863 }
864
865 // Commit the transaction
866 rc = cib->cmds->end_transaction(cib, true, cib_none);
867 fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
868
869 if (rc > 0) {
870 // join_from will be freed after callback
871 join_from = NULL;
872 rc = pcmk_ok;
873 }
874
875 done:
876 if (rc != pcmk_ok) {
877 rc = pcmk_legacy2rc(rc);
878 pcmk__crit("join-%d node history update for node %s failed: %s",
879 current_join_id, join_from, pcmk_rc_str(rc));
880 register_fsa_error(I_ERROR, msg_data);
881 }
882 free(join_from);
883 free(xpath);
884 pcmk__xml_free(state);
885 }
886
887 void
888 finalize_join_for(gpointer key, gpointer value, gpointer user_data)
889 {
890 xmlNode *acknak = NULL;
891 xmlNode *tmp1 = NULL;
892 pcmk__node_status_t *join_node = value;
893 const char *join_to = join_node->name;
894 enum controld_join_phase phase = controld_get_join_phase(join_node);
895 bool integrated = false;
896
897 switch (phase) {
898 case controld_join_integrated:
899 integrated = true;
900 break;
901 case controld_join_nack:
902 break;
903 default:
904 pcmk__trace("Not updating non-integrated and non-nacked node %s "
905 "(%s) for join-%d",
906 join_to, join_phase_text(phase), current_join_id);
907 return;
908 }
909
910 /* Update the <node> element with the node's name and UUID, in case they
911 * weren't known before
912 */
913 pcmk__trace("Updating node name and UUID in CIB for %s", join_to);
914 tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
915 pcmk__xe_set(tmp1, PCMK_XA_ID, pcmk__cluster_get_xml_id(join_node));
916 pcmk__xe_set(tmp1, PCMK_XA_UNAME, join_to);
917 fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
918 pcmk__xml_free(tmp1);
919
920 join_node = pcmk__get_node(0, join_to, NULL,
921 pcmk__node_search_cluster_member);
922 if (!pcmk__cluster_is_node_active(join_node)) {
923 /*
924 * NACK'ing nodes that the membership layer doesn't know about yet
925 * simply creates more churn
926 *
927 * Better to leave them waiting and let the join restart when
928 * the new membership event comes in
929 *
930 * All other NACKs (due to versions etc) should still be processed
931 */
932 pcmk__update_peer_expected(join_node, CRMD_JOINSTATE_PENDING);
933 return;
934 }
935
936 // Acknowledge or nack node's join request
937 pcmk__debug("%sing join-%d request from %s",
938 (integrated? "Acknowledg" : "Nack"), current_join_id, join_to);
939 acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
940 pcmk__xe_set_bool(acknak, CRM_OP_JOIN_ACKNAK, integrated);
941
942 if (integrated) {
943 // No change needed for a nacked node
944 crm_update_peer_join(__func__, join_node, controld_join_finalized);
945 pcmk__update_peer_expected(join_node, CRMD_JOINSTATE_MEMBER);
946
947 /* Iterate through the remote peer cache and add information on which
948 * node hosts each to the ACK message. This keeps new controllers in
949 * sync with what has already happened.
950 */
951 if (pcmk__cluster_num_remote_nodes() > 0) {
952 GHashTableIter iter;
953 pcmk__node_status_t *node = NULL;
954 xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);
955
956 g_hash_table_iter_init(&iter, pcmk__remote_peer_cache);
957 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
958 xmlNode *remote = NULL;
959
960 if (!node->conn_host) {
961 continue;
962 }
963
964 remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
965 pcmk__xe_set_props(remote,
966 PCMK_XA_ID, node->name,
967 PCMK__XA_NODE_STATE, node->state,
968 PCMK__XA_CONNECTION_HOST, node->conn_host,
969 NULL);
970 }
971 }
972 }
973 pcmk__cluster_send_message(join_node, pcmk_ipc_controld, acknak);
974 pcmk__xml_free(acknak);
975 }
976
977 gboolean
978 check_join_state(enum crmd_fsa_state cur_state, const char *source)
979 {
980 static unsigned long long highest_seq = 0;
981
982 if (controld_globals.membership_id != controld_globals.peer_seq) {
983 pcmk__debug("join-%d: Membership changed from %llu to %llu "
984 QB_XS " highest=%llu state=%s for=%s",
985 current_join_id, controld_globals.membership_id,
986 controld_globals.peer_seq, highest_seq,
987 fsa_state2string(cur_state), source);
988 if (highest_seq < controld_globals.peer_seq) {
989 /* Don't spam the FSA with duplicates */
990 highest_seq = controld_globals.peer_seq;
991 controld_fsa_prepend(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
992 }
993
994 } else if (cur_state == S_INTEGRATION) {
995 if (crmd_join_phase_count(controld_join_welcomed) == 0) {
996 int count = crmd_join_phase_count(controld_join_integrated);
997
998 pcmk__debug("join-%d: Integration of %d peer%s complete "
999 QB_XS " state=%s for=%s",
1000 current_join_id, count, pcmk__plural_s(count),
1001 fsa_state2string(cur_state), source);
1002 controld_fsa_prepend(C_FSA_INTERNAL, I_INTEGRATED, NULL);
1003 return TRUE;
1004 }
1005
1006 } else if (cur_state == S_FINALIZE_JOIN) {
1007 if (!pcmk__is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
1008 pcmk__debug("join-%d: Delaying finalization until we have CIB "
1009 QB_XS " state=%s for=%s",
1010 current_join_id, fsa_state2string(cur_state), source);
1011 return TRUE;
1012
1013 } else if (crmd_join_phase_count(controld_join_welcomed) != 0) {
1014 int count = crmd_join_phase_count(controld_join_welcomed);
1015
1016 pcmk__debug("join-%d: Still waiting on %d welcomed node%s "
1017 QB_XS " state=%s for=%s",
1018 current_join_id, count, pcmk__plural_s(count),
1019 fsa_state2string(cur_state), source);
1020 crmd_join_phase_log(LOG_DEBUG);
1021
1022 } else if (crmd_join_phase_count(controld_join_integrated) != 0) {
1023 int count = crmd_join_phase_count(controld_join_integrated);
1024
1025 pcmk__debug("join-%d: Still waiting on %d integrated node%s "
1026 QB_XS " state=%s for=%s",
1027 current_join_id, count, pcmk__plural_s(count),
1028 fsa_state2string(cur_state), source);
1029 crmd_join_phase_log(LOG_DEBUG);
1030
1031 } else if (crmd_join_phase_count(controld_join_finalized) != 0) {
1032 int count = crmd_join_phase_count(controld_join_finalized);
1033
1034 pcmk__debug("join-%d: Still waiting on %d finalized node%s "
1035 QB_XS " state=%s for=%s",
1036 current_join_id, count, pcmk__plural_s(count),
1037 fsa_state2string(cur_state), source);
1038 crmd_join_phase_log(LOG_DEBUG);
1039
1040 } else {
1041 pcmk__debug("join-%d: Complete " QB_XS " state=%s for=%s",
1042 current_join_id, fsa_state2string(cur_state), source);
1043 controld_fsa_append(C_FSA_INTERNAL, I_FINALIZED, NULL);
1044 return TRUE;
1045 }
1046 }
1047
1048 return FALSE;
1049 }
1050
1051 // A_DC_JOIN_FINAL
1052 void
1053 do_dc_join_final(long long action, enum crmd_fsa_cause cause,
1054 enum crmd_fsa_state cur_state,
1055 enum crmd_fsa_input current_input, fsa_data_t *msg_data)
1056 {
1057 pcmk__debug("Ensuring DC, quorum, and node attributes are up to date");
1058 crm_update_quorum(pcmk__cluster_has_quorum(), true);
1059 }
1060
1061 int crmd_join_phase_count(enum controld_join_phase phase)
1062 {
1063 int count = 0;
1064 pcmk__node_status_t *peer;
1065 GHashTableIter iter;
1066
1067 g_hash_table_iter_init(&iter, pcmk__peer_cache);
1068 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1069 if (controld_get_join_phase(peer) == phase) {
1070 count++;
1071 }
1072 }
1073 return count;
1074 }
1075
1076 void crmd_join_phase_log(int level)
1077 {
1078 pcmk__node_status_t *peer;
1079 GHashTableIter iter;
1080
1081 g_hash_table_iter_init(&iter, pcmk__peer_cache);
1082 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1083 do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->name,
1084 join_phase_text(controld_get_join_phase(peer)));
1085 }
1086 }
1087