1    	/*
2    	 * Copyright 2004-2026 the Pacemaker project contributors
3    	 *
4    	 * The version control history for this file may have further details.
5    	 *
6    	 * This source code is licensed under the GNU General Public License version 2
7    	 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8    	 */
9    	
10   	#include <crm_internal.h>
11   	
12   	#include <inttypes.h>               // PRIu32
13   	#include <stdbool.h>                // bool, true, false
14   	#include <stdio.h>                  // NULL
15   	#include <stdlib.h>                 // free(), etc.
16   	
17   	#include <glib.h>                   // gboolean, etc.
18   	#include <libxml/tree.h>            // xmlNode
19   	
20   	#include <crm/crm.h>
21   	
22   	#include <crm/common/xml.h>
23   	#include <crm/cluster.h>
24   	
25   	#include <pacemaker-controld.h>
26   	
27   	static char *max_generation_from = NULL;
28   	static xmlNode *max_generation_xml = NULL;
29   	
30   	/*!
31   	 * \internal
32   	 * \brief Nodes from which a CIB sync has failed since the peer joined
33   	 *
34   	 * This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
35   	 * the name of a client node from which a CIB \p sync_from() call has failed in
36   	 * \p do_dc_join_finalize() since the client joined the cluster as a peer.
37   	 * \p join_id is the ID of the join round in which the \p sync_from() failed,
38   	 * and is intended for use in nack log messages.
39   	 */
40   	static GHashTable *failed_sync_nodes = NULL;
41   	
42   	void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
43   	void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
44   	gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
45   	
46   	/* Numeric counter used to identify join rounds (an unsigned int would be
47   	 * appropriate, except we get and set it in XML as int)
48   	 */
49   	static int current_join_id = 0;
50   	
51   	/*!
52   	 * \internal
53   	 * \brief Get log-friendly string equivalent of a controller group join phase
54   	 *
55   	 * \param[in] phase  Join phase
56   	 *
57   	 * \return Log-friendly string equivalent of \p phase
58   	 */
59   	static const char *
60   	join_phase_text(enum controld_join_phase phase)
61   	{
62   	    switch (phase) {
63   	        case controld_join_nack:
64   	            return "nack";
65   	        case controld_join_none:
66   	            return "none";
67   	        case controld_join_welcomed:
68   	            return "welcomed";
69   	        case controld_join_integrated:
70   	            return "integrated";
71   	        case controld_join_finalized:
72   	            return "finalized";
73   	        case controld_join_confirmed:
74   	            return "confirmed";
75   	        default:
76   	            return "invalid";
77   	    }
78   	}
79   	
80   	/*!
81   	 * \internal
82   	 * \brief Destroy the hash table containing failed sync nodes
83   	 */
84   	void
85   	controld_destroy_failed_sync_table(void)
86   	{
87   	    g_clear_pointer(&failed_sync_nodes, g_hash_table_destroy);
88   	}
89   	
90   	/*!
91   	 * \internal
92   	 * \brief Remove a node from the failed sync nodes table if present
93   	 *
94   	 * \param[in] node_name  Node name to remove
95   	 */
96   	void
97   	controld_remove_failed_sync_node(const char *node_name)
98   	{
99   	    if (failed_sync_nodes != NULL) {
100  	        g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
101  	    }
102  	}
103  	
104  	/*!
105  	 * \internal
106  	 * \brief Add to a hash table a node whose CIB failed to sync
107  	 *
108  	 * \param[in] node_name  Name of node whose CIB failed to sync
109  	 * \param[in] join_id    Join round when the failure occurred
110  	 */
111  	static void
112  	record_failed_sync_node(const char *node_name, gint join_id)
113  	{
114  	    if (failed_sync_nodes == NULL) {
115  	        failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
116  	    }
117  	
118  	    /* If the node is already in the table then we failed to nack it during the
119  	     * filter offer step
120  	     */
121  	    CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
122  	                                       GINT_TO_POINTER(join_id)));
123  	}
124  	
125  	/*!
126  	 * \internal
127  	 * \brief Look up a node name in the failed sync table
128  	 *
129  	 * \param[in]  node_name  Name of node to look up
130  	 * \param[out] join_id    Where to store the join ID of when the sync failed
131  	 *
132  	 * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
133  	 *         node name was found, or \p pcmk_rc_node_unknown otherwise.
134  	 * \note \p *join_id is set to -1 if the node is not found.
135  	 */
136  	static int
137  	lookup_failed_sync_node(const char *node_name, gint *join_id)
138  	{
139  	    *join_id = -1;
140  	
141  	    if (failed_sync_nodes != NULL) {
142  	        gpointer result = g_hash_table_lookup(failed_sync_nodes,
143  	                                              (gchar *) node_name);
144  	        if (result != NULL) {
145  	            *join_id = GPOINTER_TO_INT(result);
146  	            return pcmk_rc_ok;
147  	        }
148  	    }
149  	    return pcmk_rc_node_unknown;
150  	}
151  	
152  	void
153  	crm_update_peer_join(const char *source, pcmk__node_status_t *node,
154  	                     enum controld_join_phase phase)
155  	{
156  	    enum controld_join_phase last = controld_get_join_phase(node);
157  	
158  	    CRM_CHECK(node != NULL, return);
159  	
160  	    /* Remote nodes do not participate in joins */
161  	    if (pcmk__is_set(node->flags, pcmk__node_status_remote)) {
162  	        return;
163  	    }
164  	
165  	    if (phase == last) {
166  	        pcmk__trace("Node %s join-%d phase is still %s "
167  	                    QB_XS " nodeid=%" PRIu32 " source=%s",
168  	                    node->name, current_join_id, join_phase_text(last),
169  	                    node->cluster_layer_id, source);
170  	        return;
171  	    }
172  	
173  	    if ((phase <= controld_join_none) || (phase == (last + 1))) {
174  	        struct controld_node_status_data *data = NULL;
175  	
176  	        if (node->user_data == NULL) {
177  	            node->user_data =
178  	                pcmk__assert_alloc(1, sizeof(struct controld_node_status_data));
179  	        }
180  	        data = node->user_data;
181  	        data->join_phase = phase;
182  	
183  	        pcmk__trace("Node %s join-%d phase is now %s (was %s) "
184  	                    QB_XS " nodeid=%" PRIu32 " source=%s",
185  	                    node->name, current_join_id, join_phase_text(phase),
186  	                    join_phase_text(last), node->cluster_layer_id,
187  	                    source);
188  	        return;
189  	    }
190  	
191  	    pcmk__warn("Rejecting join-%d phase update for node %s because can't go "
192  	               "from %s to %s " QB_XS " nodeid=%" PRIu32 " source=%s",
193  	               current_join_id, node->name, join_phase_text(last),
194  	               join_phase_text(phase), node->cluster_layer_id, source);
195  	}
196  	
197  	static void
198  	set_join_phase_none(gpointer key, gpointer value, gpointer user_data)
199  	{
200  	    crm_update_peer_join(__func__, (pcmk__node_status_t *) value,
201  	                         controld_join_none);
202  	}
203  	
204  	/*!
205  	 * \internal
206  	 * \brief Create a join message from the DC
207  	 *
208  	 * \param[in] join_op  Join operation name
209  	 * \param[in] host_to  Recipient of message
210  	 */
211  	static xmlNode *
212  	create_dc_message(const char *join_op, const char *host_to)
213  	{
214  	    xmlNode *msg = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_DC, host_to,
215  	                                     CRM_SYSTEM_CRMD, join_op, NULL);
216  	
217  	    /* Identify which election this is a part of */
218  	    pcmk__xe_set_int(msg, PCMK__XA_JOIN_ID, current_join_id);
219  	
220  	    /* Add a field specifying whether the DC is shutting down. This keeps the
221  	     * joining node from fencing the old DC if it becomes the new DC.
222  	     */
223  	    pcmk__xe_set_bool(msg, PCMK__XA_DC_LEAVING,
224  	                      pcmk__is_set(controld_globals.fsa_input_register,
225  	                                   R_SHUTDOWN));
226  	    return msg;
227  	}
228  	
229  	static void
230  	join_make_offer(gpointer key, gpointer value, gpointer user_data)
231  	{
232  	    /* @TODO We don't use user_data except to distinguish one particular call
233  	     * from others. Make this clearer.
234  	     */
235  	    xmlNode *offer = NULL;
236  	    pcmk__node_status_t *member = (pcmk__node_status_t *) value;
237  	
238  	    pcmk__assert(member != NULL);
239  	    if (!pcmk__cluster_is_node_active(member)) {
240  	        pcmk__info("Not making join-%d offer to inactive node %s",
241  	                   current_join_id, pcmk__s(member->name, "with unknown name"));
242  	        if ((member->expected == NULL)
243  	            && pcmk__str_eq(member->state, PCMK__VALUE_LOST, pcmk__str_none)) {
244  	            /* You would think this unsafe, but in fact this plus an
245  	             * active resource is what causes it to be fenced.
246  	             *
247  	             * Yes, this does mean that any node that dies at the same
248  	             * time as the old DC and is not running resource (still)
249  	             * won't be fenced.
250  	             *
251  	             * I'm not happy about this either.
252  	             */
253  	            pcmk__update_peer_expected(member, CRMD_JOINSTATE_DOWN);
254  	        }
255  	        return;
256  	    }
257  	
258  	    if (member->name == NULL) {
259  	        pcmk__info("Not making join-%d offer to node uuid %s with unknown name",
260  	                   current_join_id, member->xml_id);
261  	        return;
262  	    }
263  	
264  	    if (controld_globals.membership_id != controld_globals.peer_seq) {
265  	        controld_globals.membership_id = controld_globals.peer_seq;
266  	        pcmk__info("Making join-%d offers based on membership event %llu",
267  	                   current_join_id, controld_globals.peer_seq);
268  	    }
269  	
270  	    if (user_data != NULL) {
271  	        enum controld_join_phase phase = controld_get_join_phase(member);
272  	
273  	        if (phase > controld_join_none) {
274  	            pcmk__info("Not making join-%d offer to already known node %s (%s)",
275  	                       current_join_id, member->name, join_phase_text(phase));
276  	            return;
277  	        }
278  	    }
279  	
280  	    crm_update_peer_join(__func__, (pcmk__node_status_t*) member,
281  	                         controld_join_none);
282  	
283  	    offer = create_dc_message(CRM_OP_JOIN_OFFER, member->name);
284  	
285  	    // Advertise our feature set so the joining node can bail if not compatible
286  	    pcmk__xe_set(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
287  	
288  	    pcmk__info("Sending join-%d offer to %s", current_join_id, member->name);
289  	    pcmk__cluster_send_message(member, pcmk_ipc_controld, offer);
290  	    pcmk__xml_free(offer);
291  	
292  	    crm_update_peer_join(__func__, member, controld_join_welcomed);
293  	}
294  	
295  	// A_DC_JOIN_OFFER_ALL
296  	void
297  	do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause,
298  	                     enum crmd_fsa_state cur_state,
299  	                     enum crmd_fsa_input current_input, fsa_data_t *msg_data)
300  	{
301  	    int count = 0;
302  	
303  	    if ((cause == C_HA_MESSAGE) && (current_input == I_NODE_JOIN)) {
304  	        pcmk__info("A new node joined the cluster");
305  	    }
306  	
307  	    current_join_id++;
308  	    if (current_join_id <= 0) {
309  	        current_join_id = 1;
310  	    }
311  	    pcmk__debug("Starting new join round join-%d", current_join_id);
312  	
313  	    g_hash_table_foreach(pcmk__peer_cache, set_join_phase_none, NULL);
314  	    free_max_generation();
315  	    controld_clear_fsa_input_flags(R_HAVE_CIB);
316  	    update_dc(NULL);
317  	
318  	    /* For each node, either send a welcome message and update join phase to
319  	     * welcomed, or set expected state to down if inactive and lost.
320  	     */
321  	    g_hash_table_foreach(pcmk__peer_cache, join_make_offer, NULL);
322  	
323  	    count = crmd_join_phase_count(controld_join_welcomed);
324  	    pcmk__info("Waiting on join-%d requests from %d outstanding node%s",
325  	               current_join_id, count, pcmk__plural_s(count));
326  	
327  	    // Don't waste time by invoking the scheduler yet
328  	}
329  	
330  	// A_DC_JOIN_OFFER_ONE
331  	void
332  	do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause,
333  	                     enum crmd_fsa_state cur_state,
334  	                     enum crmd_fsa_input current_input, fsa_data_t *msg_data)
335  	{
336  	    pcmk__node_status_t *member = NULL;
337  	    ha_msg_input_t *welcome = NULL;
338  	    const char *join_to = NULL;
339  	    int count = 0;
340  	
341  	    pcmk__assert(msg_data != NULL);
342  	
343  	    welcome = msg_data->data;
344  	    if (welcome == NULL) {
345  	        pcmk__info("Making join-%d offers to any unconfirmed nodes because an "
346  	                   "unknown node joined", current_join_id);
347  	        g_hash_table_foreach(pcmk__peer_cache, join_make_offer, &member);
348  	        check_join_state(cur_state, __func__);
349  	        return;
350  	    }
351  	
352  	    join_to = pcmk__xe_get(welcome->msg, PCMK__XA_SRC);
353  	    if (join_to == NULL) {
354  	        pcmk__err("Can't make join-%d offer to unknown node", current_join_id);
355  	        return;
356  	    }
357  	
358  	    /* It is possible that a node will have been sick or starting up when the
359  	     * original offer was made. However, either it will re-announce itself in
360  	     * due course, or we can re-store the original offer on the client.
361  	     */
362  	    member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);
363  	    crm_update_peer_join(__func__, member, controld_join_none);
364  	    join_make_offer(NULL, member, NULL);
365  	
366  	    /* If the offer isn't to the local node, make an offer to the local node as
367  	     * well, to ensure the correct value for max_generation_from.
368  	     */
369  	    if (!controld_is_local_node(join_to)) {
370  	        member = controld_get_local_node_status();
371  	        join_make_offer(NULL, member, NULL);
372  	    }
373  	
374  	    /* This was a genuine join request; cancel any existing transition and
375  	     * invoke the scheduler.
376  	     */
377  	    abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
378  	                     NULL);
379  	
380  	    count = crmd_join_phase_count(controld_join_welcomed);
381  	    pcmk__info("Waiting on join-%d requests from %d outstanding node%s",
382  	               current_join_id, count, pcmk__plural_s(count));
383  	
384  	    // Don't waste time by invoking the scheduler yet
385  	}
386  	
387  	static int
388  	compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
389  	{
390  	    const char *elem_l = pcmk__xe_get(left, field);
391  	    const char *elem_r = pcmk__xe_get(right, field);
392  	
393  	    long long int_elem_l;
394  	    long long int_elem_r;
395  	
396  	    int rc = pcmk_rc_ok;
397  	
398  	    rc = pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
399  	    if (rc != pcmk_rc_ok) { // Shouldn't be possible
400  	        pcmk__warn("Comparing current CIB %s as -1 because '%s' is not an "
401  	                   "integer",
402  	                   field, elem_l);
403  	    }
404  	
405  	    rc = pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
406  	    if (rc != pcmk_rc_ok) { // Shouldn't be possible
407  	        pcmk__warn("Comparing joining node's CIB %s as -1 because '%s' is not "
408  	                   "an integer",
409  	                   field, elem_r);
410  	    }
411  	
412  	    if (int_elem_l < int_elem_r) {
413  	        return -1;
414  	
415  	    } else if (int_elem_l > int_elem_r) {
416  	        return 1;
417  	    }
418  	
419  	    return 0;
420  	}
421  	
422  	// A_DC_JOIN_PROCESS_REQ
423  	void
424  	do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause,
425  	                        enum crmd_fsa_state cur_state,
426  	                        enum crmd_fsa_input current_input, fsa_data_t *msg_data)
427  	{
428  	    ha_msg_input_t *join_ack = NULL;
429  	    const char *join_from = NULL;
430  	    int join_id = -1;
431  	    xmlNode *generation = NULL;
432  	    int cmp = 0;
433  	    pcmk__node_status_t *join_node = NULL;
434  	    const char *join_version = NULL;
435  	    const char *ref = NULL;
436  	    gint value = 0;
437  	    bool accept = true;
438  	    int count = 0;
439  	
440  	    pcmk__assert((msg_data != NULL) && (msg_data->data != NULL));
441  	
442  	    join_ack = msg_data->data;
443  	    join_from = pcmk__xe_get(join_ack->msg, PCMK__XA_SRC);
444  	    if (join_from == NULL) {
445  	        pcmk__err("Ignoring invalid join request without node name");
446  	        return;
447  	    }
448  	
449  	    pcmk__xe_get_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
450  	    if (join_id != current_join_id) {
451  	        pcmk__debug("Ignoring join-%d request from %s because we are on "
452  	                    "join-%d", join_id, join_from, current_join_id);
453  	        check_join_state(cur_state, __func__);
454  	        return;
455  	    }
456  	
457  	    generation = join_ack->xml;
458  	    if ((max_generation_xml != NULL) && (generation != NULL)) {
459  	        static const char *attributes[] = {
460  	            PCMK_XA_ADMIN_EPOCH,
461  	            PCMK_XA_EPOCH,
462  	            PCMK_XA_NUM_UPDATES,
463  	        };
464  	
465  	        /* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE
466  	         * element from the join client. The "if" guard is for clarity.
467  	         */
468  	        if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
469  	            for (int i = 0; (cmp == 0) && (i < PCMK__NELEM(attributes)); i++) {
470  	                cmp = compare_int_fields(max_generation_xml, generation,
471  	                                         attributes[i]);
472  	            }
473  	
474  	        } else {    // Should always be PCMK__XE_GENERATION_TUPLE
475  	            CRM_LOG_ASSERT(false);
476  	        }
477  	    }
478  	
479  	    join_node = pcmk__get_node(0, join_from, NULL,
480  	                               pcmk__node_search_cluster_member);
481  	    join_version = pcmk__xe_get(join_ack->msg, PCMK_XA_CRM_FEATURE_SET);
482  	
483  	    // For logging only
484  	    ref = pcmk__s(pcmk__xe_get(join_ack->msg, PCMK_XA_REFERENCE), "(none)");
485  	
486  	    if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
487  	        pcmk__err("Rejecting join-%d request from node %s because we failed to "
488  	                  "sync its CIB in join-%d " QB_XS " ref=%s",
489  	                  join_id, join_from, value, ref);
490  	        accept = false;
491  	
492  	    } else if (!pcmk__cluster_is_node_active(join_node)) {
493  	        if (match_down_event(join_from) != NULL) {
494  	            /* The join request was received after the node was fenced or
495  	             * otherwise shutdown in a way that we're aware of. No need to log
496  	             * an error in this rare occurrence; we know the client was recently
497  	             * shut down, and receiving a lingering in-flight request is not
498  	             * cause for alarm.
499  	             */
500  	            pcmk__debug("Rejecting join-%d request from inactive node %s "
501  	                        QB_XS " ref=%s",
502  	                        join_id, join_from, ref);
503  	        } else {
504  	            pcmk__err("Rejecting join-%d request from inactive node %s "
505  	                      QB_XS " ref=%s",
506  	                      join_id, join_from, ref);
507  	        }
508  	        accept = false;
509  	
510  	    } else if (generation == NULL) {
511  	        pcmk__err("Rejecting invalid join-%d request from node %s missing CIB "
512  	                  "generation " QB_XS " ref=%s",
513  	                  join_id, join_from, ref);
514  	        accept = false;
515  	
516  	    } else if ((join_version == NULL)
517  	               || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
518  	        pcmk__err("Rejecting join-%d request from node %s because feature set "
519  	                  "%s is incompatible with ours (%s) " QB_XS " ref=%s",
520  	                  join_id, join_from, (join_version? join_version : "pre-3.1.0"),
521  	                  CRM_FEATURE_SET, ref);
522  	        accept = false;
523  	
524  	    } else if (max_generation_xml == NULL) {
525  	        const char *validation = pcmk__xe_get(generation,
526  	                                              PCMK_XA_VALIDATE_WITH);
527  	
528  	        if (pcmk__get_schema(validation) == NULL) {
529  	            pcmk__err("Rejecting join-%d request from %s (with first CIB "
530  	                      "generation) due to %s schema version %s "
531  	                      QB_XS " ref=%s",
532  	                      join_id, join_from,
533  	                      ((validation == NULL)? "missing" : "unknown"),
534  	                      pcmk__s(validation, ""), ref);
535  	            accept = false;
536  	
537  	        } else {
538  	            pcmk__debug("Accepting join-%d request from %s (with first CIB "
539  	                        "generation) " QB_XS " ref=%s",
540  	                        join_id, join_from, ref);
541  	            max_generation_xml = pcmk__xml_copy(NULL, generation);
542  	            pcmk__str_update(&max_generation_from, join_from);
543  	        }
544  	
545  	    } else if ((cmp < 0)
546  	               || ((cmp == 0) && controld_is_local_node(join_from))) {
547  	        const char *validation = pcmk__xe_get(generation,
548  	                                              PCMK_XA_VALIDATE_WITH);
549  	
550  	        if (pcmk__get_schema(validation) == NULL) {
551  	            pcmk__err("Rejecting join-%d request from %s (with better CIB "
552  	                      "generation than current best from %s) due to %s "
553  	                      "schema version %s " QB_XS " ref=%s",
554  	                      join_id, join_from, max_generation_from,
555  	                      ((validation == NULL)? "missing" : "unknown"),
556  	                      pcmk__s(validation, ""), ref);
557  	            accept = false;
558  	
559  	        } else {
560  	            pcmk__debug("Accepting join-%d request from %s (with better CIB "
561  	                        "generation than current best from %s) " QB_XS " ref=%s",
562  	                        join_id, join_from, max_generation_from, ref);
563  	            pcmk__log_xml_debug(max_generation_xml, "Old max generation");
564  	            pcmk__log_xml_debug(generation, "New max generation");
565  	
566  	            pcmk__xml_free(max_generation_xml);
567  	            max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
568  	            pcmk__str_update(&max_generation_from, join_from);
569  	        }
570  	
571  	    } else {
572  	        pcmk__debug("Accepting join-%d request from %s " QB_XS " ref=%s",
573  	                    join_id, join_from, ref);
574  	    }
575  	
576  	    if (accept) {
577  	        crm_update_peer_join(__func__, join_node, controld_join_integrated);
578  	        pcmk__update_peer_expected(join_node, CRMD_JOINSTATE_MEMBER);
579  	
580  	    } else {
581  	        crm_update_peer_join(__func__, join_node, controld_join_nack);
582  	        pcmk__update_peer_expected(join_node, CRMD_JOINSTATE_NACK);
583  	    }
584  	
585  	    count = crmd_join_phase_count(controld_join_integrated);
586  	    pcmk__debug("%d node%s currently integrated in join-%d", count,
587  	                pcmk__plural_s(count), join_id);
588  	
589  	    if (!check_join_state(cur_state, __func__)) {
590  	        // Don't waste time by invoking the scheduler yet
591  	        count = crmd_join_phase_count(controld_join_welcomed);
592  	        pcmk__debug("Waiting on join-%d requests from %d outstanding node%s",
593  	                    join_id, count, pcmk__plural_s(count));
594  	    }
595  	}
596  	
597  	// A_DC_JOIN_FINALIZE
598  	void
599  	do_dc_join_finalize(long long action, enum crmd_fsa_cause cause,
600  	                    enum crmd_fsa_state cur_state,
601  	                    enum crmd_fsa_input current_input, fsa_data_t *msg_data)
602  	{
603  	    char *sync_from = NULL;
604  	    int rc = pcmk_ok;
605  	    int count_welcomed = crmd_join_phase_count(controld_join_welcomed);
606  	    int count_finalizable = crmd_join_phase_count(controld_join_integrated)
607  	                            + crmd_join_phase_count(controld_join_nack);
608  	
609  	    /* This we can do straight away and avoid clients timing us out while we
610  	     * compute the latest CIB
611  	     */
612  	    if (count_welcomed != 0) {
613  	        pcmk__debug("Waiting on join-%d requests from %d outstanding node%s "
614  	                    "before finalizing join", current_join_id, count_welcomed,
615  	                    pcmk__plural_s(count_welcomed));
616  	        crmd_join_phase_log(LOG_DEBUG);
617  	        return;
618  	    }
619  	
620  	    if (count_finalizable == 0) {
621  	        pcmk__debug("Finalization not needed for join-%d at the current time",
622  	                    current_join_id);
623  	        crmd_join_phase_log(LOG_DEBUG);
624  	        check_join_state(controld_globals.fsa_state, __func__);
625  	        return;
626  	    }
627  	
628  	    controld_clear_fsa_input_flags(R_HAVE_CIB);
629  	    if ((max_generation_from == NULL)
630  	        || controld_is_local_node(max_generation_from)) {
631  	        controld_set_fsa_input_flags(R_HAVE_CIB);
632  	    }
633  	
634  	    if (!controld_globals.transition_graph->complete) {
635  	        pcmk__warn("Delaying join-%d finalization while transition in progress",
636  	                   current_join_id);
637  	        crmd_join_phase_log(LOG_DEBUG);
638  	        controld_fsa_stall(msg_data, action);
639  	        return;
640  	    }
641  	
642  	    if (pcmk__is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
643  	        // Send our CIB out to everyone
644  	        sync_from = pcmk__str_copy(controld_globals.cluster->priv->node_name);
645  	    } else {
646  	        // Ask for the agreed best CIB
647  	        sync_from = pcmk__str_copy(max_generation_from);
648  	    }
649  	
650  	    pcmk__notice("Finalizing join-%d for %d node%s (sync'ing CIB %s.%s.%s "
651  	                 "with schema %s and feature set %s from %s)",
652  	                 current_join_id, count_finalizable,
653  	                 pcmk__plural_s(count_finalizable),
654  	                 pcmk__s(pcmk__xe_get(max_generation_xml, PCMK_XA_ADMIN_EPOCH),
655  	                         "0"),
656  	                 pcmk__s(pcmk__xe_get(max_generation_xml, PCMK_XA_EPOCH), "0"),
657  	                 pcmk__s(pcmk__xe_get(max_generation_xml, PCMK_XA_NUM_UPDATES),
658  	                         "0"),
659  	                 pcmk__s(pcmk__xe_get(max_generation_xml,
660  	                                      PCMK_XA_VALIDATE_WITH),
661  	                         "(none)"),
662  	                 pcmk__s(pcmk__xe_get(max_generation_xml,
663  	                                      PCMK_XA_CRM_FEATURE_SET),
664  	                         "(none)"),
665  	                 sync_from);
666  	
667  	    crmd_join_phase_log(LOG_DEBUG);
668  	
669  	    rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
670  	                                                    sync_from, NULL, cib_none);
671  	    fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
672  	}
673  	
674  	void
675  	free_max_generation(void)
676  	{
CID (unavailable; MK=ceb1d8ab08ab93d6785bf54c6b7f0ff3) (#1 of 2): Inconsistent C union access (INCONSISTENT_UNION_ACCESS):
(1) Event assign_union_field: The union field "in" of "_pp" is written.
(2) Event inconsistent_union_field_access: In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in".
677  	    g_clear_pointer(&max_generation_from, free);
678  	    g_clear_pointer(&max_generation_xml, pcmk__xml_free);
679  	}
680  	
681  	void
682  	finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
683  	{
684  	    CRM_LOG_ASSERT(-EPERM != rc);
685  	
686  	    if (rc != pcmk_ok) {
687  	        const char *sync_from = (const char *) user_data;
688  	
689  	        do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
690  	                   "Could not sync CIB from %s in join-%d: %s",
691  	                   sync_from, current_join_id, pcmk_strerror(rc));
692  	
693  	        if (rc != -pcmk_err_old_data) {
694  	            record_failed_sync_node(sync_from, current_join_id);
695  	        }
696  	
697  	        /* restart the whole join process */
698  	        register_fsa_error(I_ELECTION_DC, NULL);
699  	
700  	    } else if (!AM_I_DC) {
701  	        pcmk__debug("Sync'ed CIB for join-%d but no longer DC",
702  	                    current_join_id);
703  	
704  	    } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
705  	        pcmk__debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
706  	                    "(%s)", current_join_id,
707  	                    fsa_state2string(controld_globals.fsa_state));
708  	
709  	    } else {
710  	        controld_set_fsa_input_flags(R_HAVE_CIB);
711  	
712  	        /* make sure dc_uuid is re-set to us */
713  	        if (!check_join_state(controld_globals.fsa_state, __func__)) {
714  	            int count_finalizable = 0;
715  	
716  	            count_finalizable = crmd_join_phase_count(controld_join_integrated)
717  	                                + crmd_join_phase_count(controld_join_nack);
718  	
719  	            pcmk__debug("Notifying %d node%s of join-%d results",
720  	                        count_finalizable, pcmk__plural_s(count_finalizable),
721  	                        current_join_id);
722  	            g_hash_table_foreach(pcmk__peer_cache, finalize_join_for, NULL);
723  	        }
724  	    }
725  	}
726  	
727  	static void
728  	join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
729  	                                xmlNode *output, void *user_data)
730  	{
731  	    const char *node = user_data;
732  	
733  	    if (rc != pcmk_ok) {
734  	        pcmk__crit("join-%d node history update (via CIB call %d) for node %s "
735  	                   "failed: %s",
736  	                   current_join_id, call_id, node, pcmk_strerror(rc));
737  	        pcmk__log_xml_debug(msg, "failed");
738  	        register_fsa_error(I_ERROR, NULL);
739  	    }
740  	
741  	    pcmk__debug("join-%d node history update (via CIB call %d) for node %s "
742  	                "complete", current_join_id, call_id, node);
743  	    check_join_state(controld_globals.fsa_state, __func__);
744  	}
745  	
746  	// A_DC_JOIN_PROCESS_ACK
747  	void
748  	do_dc_join_ack(long long action, enum crmd_fsa_cause cause,
749  	               enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input,
750  	               fsa_data_t *msg_data)
751  	{
752  	    ha_msg_input_t *join_ack = NULL;
753  	    char *join_from = NULL;
754  	    const char *op = NULL;
755  	    int join_id = -1;
756  	
757  	    pcmk__node_status_t *peer = NULL;
758  	    enum controld_join_phase phase = controld_join_none;
759  	
760  	    cib_t *cib = controld_globals.cib_conn;
761  	    int rc = pcmk_ok;
762  	
763  	    const bool unlocked_only = pcmk__is_set(controld_globals.flags,
764  	                                            controld_shutdown_lock_enabled);
765  	    char *xpath = NULL;
766  	    xmlNode *state = NULL;
767  	
768  	    pcmk__assert((msg_data != NULL) && (msg_data->data != NULL));
769  	
770  	    join_ack = msg_data->data;
771  	
772  	    // Sanity checks
773  	    join_from = pcmk__xe_get_copy(join_ack->msg, PCMK__XA_SRC);
774  	    if (join_from == NULL) {
775  	        pcmk__warn("Ignoring message received without node identification");
776  	        goto done;
777  	    }
778  	
779  	    op = pcmk__xe_get(join_ack->msg, PCMK__XA_CRM_TASK);
780  	    if (op == NULL) {
781  	        pcmk__warn("Ignoring message received from %s without task", join_from);
782  	        goto done;
783  	    }
784  	    if (!pcmk__str_eq(op, CRM_OP_JOIN_CONFIRM, pcmk__str_none)) {
785  	        pcmk__debug("Ignoring '%s' message from %s while waiting for '%s'", op,
786  	                    join_from, CRM_OP_JOIN_CONFIRM);
787  	        goto done;
788  	    }
789  	
790  	    if (pcmk__xe_get_int(join_ack->msg, PCMK__XA_JOIN_ID,
791  	                         &join_id) != pcmk_rc_ok) {
792  	        pcmk__warn("Ignoring join confirmation from %s without valid join ID",
793  	                   join_from);
794  	        goto done;
795  	    }
796  	
797  	    peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
798  	    phase = controld_get_join_phase(peer);
799  	    if (phase != controld_join_finalized) {
800  	        pcmk__info("Ignoring out-of-sequence join-%d confirmation from %s "
801  	                   "(currently %s not %s)",
802  	                   join_id, join_from, join_phase_text(phase),
803  	                   join_phase_text(controld_join_finalized));
804  	        goto done;
805  	    }
806  	
807  	    if (join_id != current_join_id) {
808  	        pcmk__err("Rejecting join-%d confirmation from %s because currently on "
809  	                  "join-%d",
810  	                  join_id, join_from, current_join_id);
811  	        crm_update_peer_join(__func__, peer, controld_join_nack);
812  	        goto done;
813  	    }
814  	
815  	    crm_update_peer_join(__func__, peer, controld_join_confirmed);
816  	
817  	    /* Update CIB with node's current executor state. A new transition will be
818  	     * triggered later, when the CIB manager notifies us of the change.
819  	     *
820  	     * The delete and modify requests are part of an atomic transaction.
821  	     */
822  	    rc = cib->cmds->init_transaction(cib);
823  	    if (rc != pcmk_ok) {
824  	        goto done;
825  	    }
826  	
827  	    // Delete relevant parts of node's current executor state from CIB
828  	    controld_node_history_deletion_strings(join_from, unlocked_only, &xpath,
829  	                                           NULL);
830  	
831  	    rc = cib->cmds->remove(cib, xpath, NULL,
832  	                           cib_xpath|cib_multiple|cib_transaction);
833  	    if (rc != pcmk_ok) {
834  	        goto done;
835  	    }
836  	
837  	    // Update CIB with node's latest known executor state
838  	    if (controld_is_local_node(join_from)) {
839  	
840  	        // Use the latest possible state if processing our own join ack
841  	        state = controld_query_executor_state();
842  	
843  	        if (state != NULL) {
844  	            pcmk__debug("Updating local node history for join-%d from query "
845  	                        "result", current_join_id);
846  	
847  	        } else {
848  	            pcmk__warn("Updating local node history from join-%d confirmation "
849  	                       "because query failed",
850  	                       current_join_id);
851  	        }
852  	
853  	    } else {
854  	        pcmk__debug("Updating node history for %s from join-%d confirmation",
855  	                    join_from, current_join_id);
856  	    }
857  	
858  	    rc = cib->cmds->modify(cib, PCMK_XE_STATUS,
859  	                           ((state != NULL)? state : join_ack->xml),
860  	                           cib_can_create|cib_transaction);
861  	    if (rc != pcmk_ok) {
862  	        goto done;
863  	    }
864  	
865  	    // Commit the transaction
866  	    rc = cib->cmds->end_transaction(cib, true, cib_none);
867  	    fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
868  	
869  	    if (rc > 0) {
870  	        // join_from will be freed after callback
871  	        join_from = NULL;
872  	        rc = pcmk_ok;
873  	    }
874  	
875  	done:
876  	    if (rc != pcmk_ok) {
877  	        rc = pcmk_legacy2rc(rc);
878  	        pcmk__crit("join-%d node history update for node %s failed: %s",
879  	                   current_join_id, join_from, pcmk_rc_str(rc));
880  	        register_fsa_error(I_ERROR, msg_data);
881  	    }
882  	    free(join_from);
883  	    free(xpath);
884  	    pcmk__xml_free(state);
885  	}
886  	
887  	void
888  	finalize_join_for(gpointer key, gpointer value, gpointer user_data)
889  	{
890  	    xmlNode *acknak = NULL;
891  	    xmlNode *tmp1 = NULL;
892  	    pcmk__node_status_t *join_node = value;
893  	    const char *join_to = join_node->name;
894  	    enum controld_join_phase phase = controld_get_join_phase(join_node);
895  	    bool integrated = false;
896  	
897  	    switch (phase) {
898  	        case controld_join_integrated:
899  	            integrated = true;
900  	            break;
901  	        case controld_join_nack:
902  	            break;
903  	        default:
904  	            pcmk__trace("Not updating non-integrated and non-nacked node %s "
905  	                        "(%s) for join-%d",
906  	                        join_to, join_phase_text(phase), current_join_id);
907  	            return;
908  	    }
909  	
910  	    /* Update the <node> element with the node's name and UUID, in case they
911  	     * weren't known before
912  	     */
913  	    pcmk__trace("Updating node name and UUID in CIB for %s", join_to);
914  	    tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
915  	    pcmk__xe_set(tmp1, PCMK_XA_ID, pcmk__cluster_get_xml_id(join_node));
916  	    pcmk__xe_set(tmp1, PCMK_XA_UNAME, join_to);
917  	    fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
918  	    pcmk__xml_free(tmp1);
919  	
920  	    join_node = pcmk__get_node(0, join_to, NULL,
921  	                               pcmk__node_search_cluster_member);
922  	    if (!pcmk__cluster_is_node_active(join_node)) {
923  	        /*
924  	         * NACK'ing nodes that the membership layer doesn't know about yet
925  	         * simply creates more churn
926  	         *
927  	         * Better to leave them waiting and let the join restart when
928  	         * the new membership event comes in
929  	         *
930  	         * All other NACKs (due to versions etc) should still be processed
931  	         */
932  	        pcmk__update_peer_expected(join_node, CRMD_JOINSTATE_PENDING);
933  	        return;
934  	    }
935  	
936  	    // Acknowledge or nack node's join request
937  	    pcmk__debug("%sing join-%d request from %s",
938  	                (integrated? "Acknowledg" : "Nack"), current_join_id, join_to);
939  	    acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
940  	    pcmk__xe_set_bool(acknak, CRM_OP_JOIN_ACKNAK, integrated);
941  	
942  	    if (integrated) {
943  	        // No change needed for a nacked node
944  	        crm_update_peer_join(__func__, join_node, controld_join_finalized);
945  	        pcmk__update_peer_expected(join_node, CRMD_JOINSTATE_MEMBER);
946  	
947  	        /* Iterate through the remote peer cache and add information on which
948  	         * node hosts each to the ACK message.  This keeps new controllers in
949  	         * sync with what has already happened.
950  	         */
951  	        if (pcmk__cluster_num_remote_nodes() > 0) {
952  	            GHashTableIter iter;
953  	            pcmk__node_status_t *node = NULL;
954  	            xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);
955  	
956  	            g_hash_table_iter_init(&iter, pcmk__remote_peer_cache);
957  	            while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
958  	                xmlNode *remote = NULL;
959  	
960  	                if (!node->conn_host) {
961  	                    continue;
962  	                }
963  	
964  	                remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
965  	                pcmk__xe_set_props(remote,
966  	                                   PCMK_XA_ID, node->name,
967  	                                   PCMK__XA_NODE_STATE, node->state,
968  	                                   PCMK__XA_CONNECTION_HOST, node->conn_host,
969  	                                   NULL);
970  	            }
971  	        }
972  	    }
973  	    pcmk__cluster_send_message(join_node, pcmk_ipc_controld, acknak);
974  	    pcmk__xml_free(acknak);
975  	}
976  	
977  	gboolean
978  	check_join_state(enum crmd_fsa_state cur_state, const char *source)
979  	{
980  	    static unsigned long long highest_seq = 0;
981  	
982  	    if (controld_globals.membership_id != controld_globals.peer_seq) {
983  	        pcmk__debug("join-%d: Membership changed from %llu to %llu "
984  	                    QB_XS " highest=%llu state=%s for=%s",
985  	                    current_join_id, controld_globals.membership_id,
986  	                    controld_globals.peer_seq, highest_seq,
987  	                    fsa_state2string(cur_state), source);
988  	        if (highest_seq < controld_globals.peer_seq) {
989  	            /* Don't spam the FSA with duplicates */
990  	            highest_seq = controld_globals.peer_seq;
991  	            controld_fsa_prepend(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
992  	        }
993  	
994  	    } else if (cur_state == S_INTEGRATION) {
995  	        if (crmd_join_phase_count(controld_join_welcomed) == 0) {
996  	            int count = crmd_join_phase_count(controld_join_integrated);
997  	
998  	            pcmk__debug("join-%d: Integration of %d peer%s complete "
999  	                        QB_XS " state=%s for=%s",
1000 	                        current_join_id, count, pcmk__plural_s(count),
1001 	                        fsa_state2string(cur_state), source);
1002 	            controld_fsa_prepend(C_FSA_INTERNAL, I_INTEGRATED, NULL);
1003 	            return TRUE;
1004 	        }
1005 	
1006 	    } else if (cur_state == S_FINALIZE_JOIN) {
1007 	        if (!pcmk__is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
1008 	            pcmk__debug("join-%d: Delaying finalization until we have CIB "
1009 	                        QB_XS " state=%s for=%s",
1010 	                        current_join_id, fsa_state2string(cur_state), source);
1011 	            return TRUE;
1012 	
1013 	        } else if (crmd_join_phase_count(controld_join_welcomed) != 0) {
1014 	            int count = crmd_join_phase_count(controld_join_welcomed);
1015 	
1016 	            pcmk__debug("join-%d: Still waiting on %d welcomed node%s "
1017 	                        QB_XS " state=%s for=%s",
1018 	                        current_join_id, count, pcmk__plural_s(count),
1019 	                        fsa_state2string(cur_state), source);
1020 	            crmd_join_phase_log(LOG_DEBUG);
1021 	
1022 	        } else if (crmd_join_phase_count(controld_join_integrated) != 0) {
1023 	            int count = crmd_join_phase_count(controld_join_integrated);
1024 	
1025 	            pcmk__debug("join-%d: Still waiting on %d integrated node%s "
1026 	                        QB_XS " state=%s for=%s",
1027 	                        current_join_id, count, pcmk__plural_s(count),
1028 	                        fsa_state2string(cur_state), source);
1029 	            crmd_join_phase_log(LOG_DEBUG);
1030 	
1031 	        } else if (crmd_join_phase_count(controld_join_finalized) != 0) {
1032 	            int count = crmd_join_phase_count(controld_join_finalized);
1033 	
1034 	            pcmk__debug("join-%d: Still waiting on %d finalized node%s "
1035 	                        QB_XS " state=%s for=%s",
1036 	                        current_join_id, count, pcmk__plural_s(count),
1037 	                        fsa_state2string(cur_state), source);
1038 	            crmd_join_phase_log(LOG_DEBUG);
1039 	
1040 	        } else {
1041 	            pcmk__debug("join-%d: Complete " QB_XS " state=%s for=%s",
1042 	                        current_join_id, fsa_state2string(cur_state), source);
1043 	            controld_fsa_append(C_FSA_INTERNAL, I_FINALIZED, NULL);
1044 	            return TRUE;
1045 	        }
1046 	    }
1047 	
1048 	    return FALSE;
1049 	}
1050 	
1051 	// A_DC_JOIN_FINAL
1052 	void
1053 	do_dc_join_final(long long action, enum crmd_fsa_cause cause,
1054 	                 enum crmd_fsa_state cur_state,
1055 	                 enum crmd_fsa_input current_input, fsa_data_t *msg_data)
1056 	{
1057 	    pcmk__debug("Ensuring DC, quorum, and node attributes are up to date");
1058 	    crm_update_quorum(pcmk__cluster_has_quorum(), true);
1059 	}
1060 	
1061 	int crmd_join_phase_count(enum controld_join_phase phase)
1062 	{
1063 	    int count = 0;
1064 	    pcmk__node_status_t *peer;
1065 	    GHashTableIter iter;
1066 	
1067 	    g_hash_table_iter_init(&iter, pcmk__peer_cache);
1068 	    while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1069 	        if (controld_get_join_phase(peer) == phase) {
1070 	            count++;
1071 	        }
1072 	    }
1073 	    return count;
1074 	}
1075 	
1076 	void crmd_join_phase_log(int level)
1077 	{
1078 	    pcmk__node_status_t *peer;
1079 	    GHashTableIter iter;
1080 	
1081 	    g_hash_table_iter_init(&iter, pcmk__peer_cache);
1082 	    while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1083 	        do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->name,
1084 	                   join_phase_text(controld_get_join_phase(peer)));
1085 	    }
1086 	}
1087