1    	/*
2    	 * Copyright 2004-2026 the Pacemaker project contributors
3    	 *
4    	 * The version control history for this file may have further details.
5    	 *
6    	 * This source code is licensed under the GNU Lesser General Public License
7    	 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8    	 */
9    	
10   	#include <crm_internal.h>
11   	
12   	#include <inttypes.h>                   // PRIu32
13   	#include <stdbool.h>                    // bool
14   	#include <stdio.h>
15   	#include <string.h>
16   	#include <sys/param.h>
17   	#include <sys/types.h>
18   	#include <unistd.h>
19   	
20   	#include <glib.h>
21   	
22   	#include <crm/common/ipc.h>
23   	#include <crm/cluster/internal.h>
24   	#include <crm/common/xml.h>
25   	#include "crmcluster_private.h"
26   	
27   	/* The peer cache remembers cluster nodes that have been seen. This is managed
28   	 * mostly automatically by libcrmcluster, based on cluster membership events.
29   	 *
30   	 * Because cluster nodes can have conflicting names or UUIDs, the hash table key
31   	 * is a uniquely generated ID.
32   	 *
33   	 * @TODO Move caches to pcmk_cluster_t
34   	 */
35   	GHashTable *pcmk__peer_cache = NULL;
36   	
37   	/* The remote peer cache tracks pacemaker_remote nodes. While the
38   	 * value has the same type as the peer cache's, it is tracked separately for
39   	 * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
40   	 * so the name (which is also the UUID) is used as the hash table key; there
41   	 * is no equivalent of membership events, so management is not automatic; and
42   	 * most users of the peer cache need to exclude pacemaker_remote nodes.
43   	 *
44   	 * @TODO That said, using a single cache would be more logical and less
45   	 * error-prone, so it would be a good idea to merge them one day.
46   	 *
47   	 * libcrmcluster provides two avenues for populating the cache:
48   	 * pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node()
49   	 * directly manage it, while refresh_remote_nodes() populates it via the CIB.
50   	 *
51   	 * @TODO Move caches to pcmk_cluster_t
52   	 */
53   	GHashTable *pcmk__remote_peer_cache = NULL;
54   	
55   	/*
56   	 * The CIB cluster node cache tracks cluster nodes that have been seen in
57   	 * the CIB. It is useful mainly when a caller needs to know about a node that
58   	 * may no longer be in the membership, but doesn't want to add the node to the
59   	 * main peer cache tables.
60   	 */
61   	static GHashTable *cluster_node_cib_cache = NULL;
62   	
63   	static bool autoreap = true;
64   	static bool has_quorum = false;
65   	
66   	// Flag setting and clearing for pcmk__node_status_t:flags
67   	
68   	#define set_peer_flags(peer, flags_to_set) do {                               \
69   	        (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE,     \
70   	                                           "Peer", (peer)->name,              \
71   	                                           (peer)->flags, (flags_to_set),     \
72   	                                           #flags_to_set);                    \
73   	    } while (0)
74   	
75   	#define clear_peer_flags(peer, flags_to_clear) do {                           \
76   	        (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__,              \
77   	                                             LOG_TRACE,                       \
78   	                                             "Peer", (peer)->name,            \
79   	                                             (peer)->flags, (flags_to_clear), \
80   	                                             #flags_to_clear);                \
81   	    } while (0)
82   	
83   	static void update_peer_uname(pcmk__node_status_t *node, const char *uname);
84   	static pcmk__node_status_t *find_cib_cluster_node(const char *id,
85   	                                                  const char *uname);
86   	
87   	/*!
88   	 * \internal
89   	 * \brief Check whether the cluster currently has quorum
90   	 *
91   	 * \return \c true if the cluster has quorum, or \c false otherwise
92   	 */
93   	bool
94   	pcmk__cluster_has_quorum(void)
95   	{
96   	    return has_quorum;
97   	}
98   	
99   	/*!
100  	 * \internal
101  	 * \brief Set whether the cluster currently has quorum
102  	 *
103  	 * \param[in] quorate  \c true if the cluster has quorum, or \c false otherwise
104  	 */
105  	void
106  	pcmk__cluster_set_quorum(bool quorate)
107  	{
108  	    has_quorum = quorate;
109  	}
110  	
111  	/*!
112  	 * \internal
113  	 * \brief Get the number of Pacemaker Remote nodes that have been seen
114  	 *
115  	 * \return Number of cached Pacemaker Remote nodes
116  	 */
117  	unsigned int
118  	pcmk__cluster_num_remote_nodes(void)
119  	{
120  	    if (pcmk__remote_peer_cache == NULL) {
121  	        return 0U;
122  	    }
123  	    return g_hash_table_size(pcmk__remote_peer_cache);
124  	}
125  	
126  	/*!
127  	 * \internal
128  	 * \brief Get a remote node cache entry, creating it if necessary
129  	 *
130  	 * \param[in] node_name  Name of remote node
131  	 *
132  	 * \return Cache entry for node on success, or \c NULL (and set \c errno)
133  	 *         otherwise
134  	 *
135  	 * \note When creating a new entry, this will leave the node state undetermined.
136  	 *       The caller should also call \c pcmk__update_peer_state() if the state
137  	 *       is known.
138  	 * \note Because this can add and remove cache entries, callers should not
139  	 *       assume any previously obtained cache entry pointers remain valid.
140  	 */
141  	pcmk__node_status_t *
142  	pcmk__cluster_lookup_remote_node(const char *node_name)
143  	{
144  	    pcmk__node_status_t *node = NULL;
145  	    char *node_name_copy = NULL;
146  	
147  	    if (node_name == NULL) {
148  	        errno = EINVAL;
149  	        return NULL;
150  	    }
151  	
152  	    /* It's theoretically possible that the node was added to the cluster peer
153  	     * cache before it was known to be a Pacemaker Remote node. Remove that
154  	     * entry unless it has an XML ID, which means the name actually is
155  	     * associated with a cluster node. (@TODO return an error in that case?)
156  	     */
157  	    node = pcmk__search_node_caches(0, node_name, NULL,
158  	                                    pcmk__node_search_cluster_member);
159  	    if ((node != NULL)
160  	        && ((node->xml_id == NULL)
161  	            /* This assumes only Pacemaker Remote nodes have their XML ID the
162  	             * same as their node name
163  	             */
164  	            || pcmk__str_eq(node->name, node->xml_id, pcmk__str_none))) {
165  	
166  	        /* node_name could be a pointer into the cache entry being removed, so
167  	         * reassign it to a copy before the original gets freed
168  	         */
169  	        node_name_copy = strdup(node_name);
170  	        if (node_name_copy == NULL) {
171  	            errno = ENOMEM;
172  	            return NULL;
173  	        }
174  	        node_name = node_name_copy;
175  	        pcmk__cluster_forget_cluster_node(0, node_name);
176  	    }
177  	
178  	    /* Return existing cache entry if one exists */
179  	    node = g_hash_table_lookup(pcmk__remote_peer_cache, node_name);
180  	    if (node) {
181  	        free(node_name_copy);
182  	        return node;
183  	    }
184  	
185  	    /* Allocate a new entry */
186  	    node = calloc(1, sizeof(pcmk__node_status_t));
187  	    if (node == NULL) {
188  	        free(node_name_copy);
189  	        return NULL;
190  	    }
191  	
192  	    /* Populate the essential information */
193  	    set_peer_flags(node, pcmk__node_status_remote);
194  	    node->xml_id = strdup(node_name);
195  	    if (node->xml_id == NULL) {
196  	        free(node);
197  	        errno = ENOMEM;
198  	        free(node_name_copy);
199  	        return NULL;
200  	    }
201  	
202  	    /* Add the new entry to the cache */
203  	    g_hash_table_replace(pcmk__remote_peer_cache, node->xml_id, node);
204  	    pcmk__trace("added %s to remote cache", node_name);
205  	
206  	    /* Update the entry's uname, ensuring peer status callbacks are called */
207  	    update_peer_uname(node, node_name);
208  	    free(node_name_copy);
209  	    return node;
210  	}
211  	
212  	/*!
213  	 * \internal
214  	 * \brief Remove a node from the Pacemaker Remote node cache
215  	 *
216  	 * \param[in] node_name  Name of node to remove from cache
217  	 *
218  	 * \note The caller must be careful not to use \p node_name after calling this
219  	 *       function if it might be a pointer into the cache entry being removed.
220  	 */
221  	void
222  	pcmk__cluster_forget_remote_node(const char *node_name)
223  	{
224  	    /* Do a lookup first, because node_name could be a pointer within the entry
225  	     * being removed -- we can't log it *after* removing it.
226  	     */
227  	    if (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL) {
228  	        pcmk__trace("Removing %s from Pacemaker Remote node cache", node_name);
229  	        g_hash_table_remove(pcmk__remote_peer_cache, node_name);
230  	    }
231  	}
232  	
233  	/*!
234  	 * \internal
235  	 * \brief Return node status based on a CIB status entry
236  	 *
237  	 * \param[in] node_state  XML of node state
238  	 *
239  	 * \return \c PCMK_VALUE_MEMBER if \c PCMK__XA_IN_CCM is true in
240  	 *         \c PCMK__XE_NODE_STATE, or \c PCMK__VALUE_LOST otherwise
241  	 */
242  	static const char *
243  	remote_state_from_cib(const xmlNode *node_state)
244  	{
245  	    bool in_ccm = false;
246  	
247  	    if ((pcmk__xe_get_bool(node_state, PCMK__XA_IN_CCM, &in_ccm) == pcmk_rc_ok)
248  	        && in_ccm) {
249  	        return PCMK_VALUE_MEMBER;
250  	    }
251  	    return PCMK__VALUE_LOST;
252  	}
253  	
254  	/* user data for looping through remote node xpath searches */
255  	struct refresh_data {
256  	    const char *field;  /* XML attribute to check for node name */
257  	    gboolean has_state; /* whether to update node state based on XML */
258  	};
259  	
260  	/*!
261  	 * \internal
262  	 * \brief Process one pacemaker_remote node xpath search result
263  	 *
264  	 * \param[in] result     XML search result
265  	 * \param[in] user_data  what to look for in the XML
266  	 */
267  	static void
268  	remote_cache_refresh_helper(xmlNode *result, void *user_data)
269  	{
270  	    const struct refresh_data *data = user_data;
271  	    const char *remote = pcmk__xe_get(result, data->field);
272  	    const char *state = NULL;
273  	    pcmk__node_status_t *node;
274  	
275  	    CRM_CHECK(remote != NULL, return);
276  	
277  	    /* Determine node's state, if the result has it */
278  	    if (data->has_state) {
279  	        state = remote_state_from_cib(result);
280  	    }
281  	
282  	    /* Check whether cache already has entry for node */
283  	    node = g_hash_table_lookup(pcmk__remote_peer_cache, remote);
284  	
285  	    if (node == NULL) {
286  	        /* Node is not in cache, so add a new entry for it */
287  	        node = pcmk__cluster_lookup_remote_node(remote);
288  	        pcmk__assert(node != NULL);
289  	        if (state) {
290  	            pcmk__update_peer_state(__func__, node, state, 0);
291  	        }
292  	
293  	    } else if (pcmk__is_set(node->flags, pcmk__node_status_dirty)) {
294  	        /* Node is in cache and hasn't been updated already, so mark it clean */
295  	        clear_peer_flags(node, pcmk__node_status_dirty);
296  	        if (state) {
297  	            pcmk__update_peer_state(__func__, node, state, 0);
298  	        }
299  	    }
300  	}
301  	
302  	static void
303  	mark_dirty(gpointer key, gpointer value, gpointer user_data)
304  	{
305  	    set_peer_flags((pcmk__node_status_t *) value, pcmk__node_status_dirty);
306  	}
307  	
308  	static gboolean
309  	is_dirty(gpointer key, gpointer value, gpointer user_data)
310  	{
311  	    const pcmk__node_status_t *node = value;
312  	
313  	    return pcmk__is_set(node->flags, pcmk__node_status_dirty);
314  	}
315  	
316  	/*!
317  	 * \internal
318  	 * \brief Repopulate the remote node cache based on CIB XML
319  	 *
320  	 * \param[in] cib  CIB XML to parse
321  	 */
322  	static void
323  	refresh_remote_nodes(xmlNode *cib)
324  	{
325  	    struct refresh_data data;
326  	
327  	    pcmk__cluster_init_node_caches();
328  	
329  	    /* First, we mark all existing cache entries as dirty,
330  	     * so that later we can remove any that weren't in the CIB.
331  	     * We don't empty the cache, because we need to detect changes in state.
332  	     */
333  	    g_hash_table_foreach(pcmk__remote_peer_cache, mark_dirty, NULL);
334  	
335  	    /* Look for guest nodes and remote nodes in the status section */
336  	    data.field = PCMK_XA_ID;
337  	    data.has_state = TRUE;
338  	    pcmk__xpath_foreach_result(cib->doc, PCMK__XP_REMOTE_NODE_STATUS,
339  	                               remote_cache_refresh_helper, &data);
340  	
341  	    /* Look for guest nodes and remote nodes in the configuration section,
342  	     * because they may have just been added and not have a status entry yet.
343  	     * In that case, the cached node state will be left NULL, so that the
344  	     * peer status callback isn't called until we're sure the node started
345  	     * successfully.
346  	     */
347  	    data.field = PCMK_XA_VALUE;
348  	    data.has_state = FALSE;
349  	    pcmk__xpath_foreach_result(cib->doc, PCMK__XP_GUEST_NODE_CONFIG,
350  	                               remote_cache_refresh_helper, &data);
351  	    data.field = PCMK_XA_ID;
352  	    data.has_state = FALSE;
353  	    pcmk__xpath_foreach_result(cib->doc, PCMK__XP_REMOTE_NODE_CONFIG,
354  	                               remote_cache_refresh_helper, &data);
355  	
356  	    /* Remove all old cache entries that weren't seen in the CIB */
357  	    g_hash_table_foreach_remove(pcmk__remote_peer_cache, is_dirty, NULL);
358  	}
359  	
360  	/*!
361  	 * \internal
362  	 * \brief Check whether a node is an active cluster node
363  	 *
364  	 * Remote nodes are never considered active. This guarantees that they can never
365  	 * become DC.
366  	 *
367  	 * \param[in] node  Node to check
368  	 *
369  	 * \return \c true if the node is an active cluster node, or \c false otherwise
370  	 */
371  	bool
372  	pcmk__cluster_is_node_active(const pcmk__node_status_t *node)
373  	{
374  	    const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
375  	
376  	    if ((node == NULL) || pcmk__is_set(node->flags, pcmk__node_status_remote)) {
377  	        return false;
378  	    }
379  	
380  	    switch (cluster_layer) {
381  	        case pcmk_cluster_layer_corosync:
382  	#if SUPPORT_COROSYNC
383  	            return pcmk__corosync_is_peer_active(node);
384  	#else
385  	            break;
386  	#endif  // SUPPORT_COROSYNC
387  	        default:
388  	            break;
389  	    }
390  	
391  	    pcmk__err("Unhandled cluster layer: %s",
392  	              pcmk_cluster_layer_text(cluster_layer));
393  	    return false;
394  	}
395  	
396  	/*!
397  	 * \internal
398  	 * \brief Check if a node's entry should be removed from the cluster node cache
399  	 *
400  	 * A node should be removed from the cache if it's inactive and matches another
401  	 * \c pcmk__node_status_t (the search object). The node is considered a
402  	 * mismatch if any of the following are true:
403  	 * * The search object is \c NULL.
404  	 * * The search object has an ID set and the cached node's ID does not match it.
405  	 * * The search object does not have an ID set, and the cached node's name does
406  	 *   not match the search node's name. (If both names are \c NULL, it's a
407  	 *   match.)
408  	 *
409  	 * Otherwise, the node is considered a match.
410  	 *
411  	 * Note that if the search object has both an ID and a name set, the name is
412  	 * ignored for matching purposes.
413  	 *
414  	 * \param[in] key        Ignored
415  	 * \param[in] value      \c pcmk__node_status_t object from cluster node cache
416  	 * \param[in] user_data  \c pcmk__node_status_t object to match against (search
417  	 *                       object)
418  	 *
419  	 * \return \c TRUE if the node entry should be removed from \c pcmk__peer_cache,
420  	 *         or \c FALSE otherwise
421  	 */
422  	static gboolean
423  	should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
424  	{
425  	    pcmk__node_status_t *node = value;
426  	    pcmk__node_status_t *search = user_data;
427  	
428  	    if (search == NULL) {
429  	        return FALSE;
430  	    }
431  	    if ((search->cluster_layer_id != 0)
432  	        && (node->cluster_layer_id != search->cluster_layer_id)) {
433  	        return FALSE;
434  	    }
435  	    if ((search->cluster_layer_id == 0)
436  	        && !pcmk__str_eq(node->name, search->name, pcmk__str_casei)) {
437  	        // @TODO Consider name even if ID is set?
438  	        return FALSE;
439  	    }
440  	    if (pcmk__cluster_is_node_active(value)) {
441  	        return FALSE;
442  	    }
443  	
444  	    pcmk__info("Removing node with name %s and cluster layer ID %" PRIu32
445  	               " from membership cache",
446  	               pcmk__s(node->name, "(unknown)"), node->cluster_layer_id);
447  	    return TRUE;
448  	}
449  	
450  	/*!
451  	 * \internal
452  	 * \brief Remove one or more inactive nodes from the cluster node cache
453  	 *
454  	 * All inactive nodes matching \p id and \p node_name as described in
455  	 * \c should_forget_cluster_node documentation are removed from the cache.
456  	 *
457  	 * If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed
458  	 * from the cache regardless of ID and name. This differs from clearing the
459  	 * cache, in that entries for active nodes are preserved.
460  	 *
461  	 * \param[in] id         ID of node to remove from cache (0 to ignore)
462  	 * \param[in] node_name  Name of node to remove from cache (ignored if \p id is
463  	 *                       nonzero)
464  	 *
465  	 * \note \p node_name is not modified directly, but it will be freed if it's a
466  	 *       pointer into a cache entry that is removed.
467  	 */
468  	void
469  	pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
470  	{
471  	    pcmk__node_status_t search = { 0, };
472  	    char *criterion = NULL; // For logging
473  	    guint matches = 0;
474  	
475  	    if (pcmk__peer_cache == NULL) {
476  	        pcmk__trace("Membership cache not initialized, ignoring removal "
477  	                    "request");
478  	        return;
479  	    }
480  	
481  	    search.cluster_layer_id = id;
482  	    search.name = pcmk__str_copy(node_name);    // May log after original freed
483  	
484  	    if (id > 0) {
485  	        criterion = pcmk__assert_asprintf("cluster layer ID %" PRIu32, id);
486  	
487  	    } else if (node_name != NULL) {
488  	        criterion = pcmk__assert_asprintf("name %s", node_name);
489  	    }
490  	
491  	    matches = g_hash_table_foreach_remove(pcmk__peer_cache,
492  	                                          should_forget_cluster_node, &search);
493  	    if (matches > 0) {
494  	        if (criterion != NULL) {
495  	            pcmk__notice("Removed %u inactive node%s with %s from the "
496  	                         "membership cache",
497  	                         matches, pcmk__plural_s(matches), criterion);
498  	        } else {
499  	            pcmk__notice("Removed all (%u) inactive cluster nodes from the "
500  	                         "membership cache",
501  	                         matches);
502  	        }
503  	
504  	    } else {
505  	        pcmk__info("No inactive cluster nodes%s%s to remove from the "
506  	                   "membership cache",
507  	                   ((criterion != NULL)? " with " : ""),
508  	                   pcmk__s(criterion, ""));
509  	    }
510  	
511  	    free(search.name);
512  	    free(criterion);
513  	}
514  	
515  	static void
516  	count_peer(gpointer key, gpointer value, gpointer user_data)
517  	{
518  	    unsigned int *count = user_data;
519  	    pcmk__node_status_t *node = value;
520  	
521  	    if (pcmk__cluster_is_node_active(node)) {
522  	        *count = *count + 1;
523  	    }
524  	}
525  	
526  	/*!
527  	 * \internal
528  	 * \brief Get the number of active cluster nodes that have been seen
529  	 *
530  	 * Remote nodes are never considered active. This guarantees that they can never
531  	 * become DC.
532  	 *
533  	 * \return Number of active nodes in the cluster node cache
534  	 */
535  	unsigned int
536  	pcmk__cluster_num_active_nodes(void)
537  	{
538  	    unsigned int count = 0;
539  	
540  	    if (pcmk__peer_cache != NULL) {
541  	        g_hash_table_foreach(pcmk__peer_cache, count_peer, &count);
542  	    }
543  	    return count;
544  	}
545  	
546  	static void
547  	destroy_crm_node(gpointer data)
548  	{
549  	    pcmk__node_status_t *node = data;
550  	
551  	    pcmk__trace("Destroying entry for node %" PRIu32 ": %s",
552  	                node->cluster_layer_id, node->name);
553  	
554  	    free(node->name);
555  	    free(node->state);
556  	    free(node->xml_id);
557  	    free(node->user_data);
558  	    free(node->expected);
559  	    free(node->conn_host);
560  	    free(node);
561  	}
562  	
563  	/*!
564  	 * \internal
565  	 * \brief Initialize node caches
566  	 */
567  	void
568  	pcmk__cluster_init_node_caches(void)
569  	{
570  	    if (pcmk__peer_cache == NULL) {
571  	        pcmk__peer_cache = pcmk__strikey_table(free, destroy_crm_node);
572  	    }
573  	
574  	    if (pcmk__remote_peer_cache == NULL) {
575  	        pcmk__remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
576  	    }
577  	
578  	    if (cluster_node_cib_cache == NULL) {
579  	        cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
580  	    }
581  	}
582  	
583  	/*!
584  	 * \internal
585  	 * \brief Initialize node caches
586  	 */
587  	void
588  	pcmk__cluster_destroy_node_caches(void)
589  	{
(1) Event path: Condition "_p", taking true branch.
590  	    g_clear_pointer(&pcmk__peer_cache, g_hash_table_destroy);
CID (unavailable; MK=1675383136d46547cc2f4682c005c034) (#2 of 3): Inconsistent C union access (INCONSISTENT_UNION_ACCESS):
(2) Event assign_union_field: The union field "in" of "_pp" is written.
(3) Event inconsistent_union_field_access: In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in".
591  	    g_clear_pointer(&pcmk__remote_peer_cache, g_hash_table_destroy);
592  	    g_clear_pointer(&cluster_node_cib_cache, g_hash_table_destroy);
593  	}
594  	
595  	static void (*peer_status_callback)(enum pcmk__node_update,
596  	                                    pcmk__node_status_t *,
597  	                                    const void *) = NULL;
598  	
599  	/*!
600  	 * \internal
601  	 * \brief Set a client function that will be called after peer status changes
602  	 *
603  	 * \param[in] dispatch  Pointer to function to use as callback
604  	 *
605  	 * \note Client callbacks should do only client-specific handling. Callbacks
606  	 *       must not add or remove entries in the peer caches.
607  	 */
608  	void
609  	pcmk__cluster_set_status_callback(void (*dispatch)(enum pcmk__node_update,
610  	                                                   pcmk__node_status_t *,
611  	                                                   const void *))
612  	{
613  	    // @TODO Improve documentation of peer_status_callback
614  	    peer_status_callback = dispatch;
615  	}
616  	
617  	/*!
618  	 * \internal
619  	 * \brief Tell the library whether to automatically reap lost nodes
620  	 *
621  	 * If \c true (the default), calling \c crm_update_peer_proc() will also update
622  	 * the peer state to \c PCMK_VALUE_MEMBER or \c PCMK__VALUE_LOST, and updating
623  	 * the peer state will reap peers whose state changes to anything other than
624  	 * \c PCMK_VALUE_MEMBER.
625  	 *
626  	 * Callers should leave this enabled unless they plan to manage the cache
627  	 * separately on their own.
628  	 *
629  	 * \param[in] enable  \c true to enable automatic reaping, \c false to disable
630  	 */
631  	void
632  	pcmk__cluster_set_autoreap(bool enable)
633  	{
634  	    autoreap = enable;
635  	}
636  	
637  	static void
638  	dump_peer_hash(int level, const char *caller)
639  	{
640  	    GHashTableIter iter;
641  	    const char *id = NULL;
642  	    pcmk__node_status_t *node = NULL;
643  	
644  	    g_hash_table_iter_init(&iter, pcmk__peer_cache);
645  	    while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
646  	        do_crm_log(level, "%s: Node %" PRIu32 "/%s = %p - %s",
647  	                   caller, node->cluster_layer_id, node->name, node, id);
648  	    }
649  	}
650  	
651  	static gboolean
652  	hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
653  	{
654  	    return value == user_data;
655  	}
656  	
657  	/*!
658  	 * \internal
659  	 * \brief Search cluster member node cache
660  	 *
661  	 * \param[in] id     If not 0, cluster node ID to search for
662  	 * \param[in] uname  If not NULL, node name to search for
663  	 * \param[in] uuid   If not NULL while id is 0, node UUID instead of cluster
664  	 *                   node ID to search for
665  	 *
666  	 * \return Cluster node cache entry if found, otherwise NULL
667  	 */
668  	static pcmk__node_status_t *
669  	search_cluster_member_cache(unsigned int id, const char *uname,
670  	                            const char *uuid)
671  	{
672  	    GHashTableIter iter;
673  	    pcmk__node_status_t *node = NULL;
674  	    pcmk__node_status_t *by_id = NULL;
675  	    pcmk__node_status_t *by_name = NULL;
676  	
677  	    pcmk__assert((id > 0) || (uname != NULL));
678  	
679  	    pcmk__cluster_init_node_caches();
680  	
681  	    if (uname != NULL) {
682  	        g_hash_table_iter_init(&iter, pcmk__peer_cache);
683  	        while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
684  	            if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) {
685  	                pcmk__trace("Name match: %s", node->name);
686  	                by_name = node;
687  	                break;
688  	            }
689  	        }
690  	    }
691  	
692  	    if (id > 0) {
693  	        g_hash_table_iter_init(&iter, pcmk__peer_cache);
694  	        while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
695  	            if (node->cluster_layer_id == id) {
696  	                pcmk__trace("ID match: %" PRIu32, node->cluster_layer_id);
697  	                by_id = node;
698  	                break;
699  	            }
700  	        }
701  	
702  	    } else if (uuid != NULL) {
703  	        g_hash_table_iter_init(&iter, pcmk__peer_cache);
704  	        while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
705  	            const char *this_xml_id = pcmk__cluster_get_xml_id(node);
706  	
707  	            if (pcmk__str_eq(uuid, this_xml_id, pcmk__str_none)) {
708  	                pcmk__trace("Found cluster node cache entry by XML ID %s",
709  	                            this_xml_id);
710  	                by_id = node;
711  	                break;
712  	            }
713  	        }
714  	    }
715  	
716  	    node = by_id; /* Good default */
717  	    if(by_id == by_name) {
718  	        /* Nothing to do if they match (both NULL counts) */
719  	        pcmk__trace("Consistent: %p for %u/%s", by_id, id, uname);
720  	
721  	    } else if(by_id == NULL && by_name) {
722  	        pcmk__trace("Only one: %p for %u/%s", by_name, id, uname);
723  	
724  	        if (id && by_name->cluster_layer_id) {
725  	            dump_peer_hash(LOG_WARNING, __func__);
726  	            pcmk__crit("Nodes %u and %" PRIu32 " share the same name '%s'",
727  	                       id, by_name->cluster_layer_id, uname);
728  	            node = NULL; /* Create a new one */
729  	
730  	        } else {
731  	            node = by_name;
732  	        }
733  	
734  	    } else if(by_name == NULL && by_id) {
735  	        pcmk__trace("Only one: %p for %u/%s", by_id, id, uname);
736  	
737  	        if ((uname != NULL) && (by_id->name != NULL)) {
738  	            dump_peer_hash(LOG_WARNING, __func__);
739  	            pcmk__crit("Nodes '%s' and '%s' share the same cluster nodeid %u: "
740  	                       "assuming '%s' is correct",
741  	                       uname, by_id->name, id, uname);
742  	        }
743  	
744  	    } else if ((uname != NULL) && (by_id->name != NULL)) {
745  	        if (pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) {
746  	            pcmk__notice("Node '%s' has changed its cluster layer ID "
747  	                         "from %" PRIu32 " to %" PRIu32,
748  	                         by_id->name, by_name->cluster_layer_id,
749  	                         by_id->cluster_layer_id);
750  	            g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data,
751  	                                        by_name);
752  	
753  	        } else {
754  	            pcmk__warn("Nodes '%s' and '%s' share the same cluster nodeid: %u "
755  	                       "%s",
756  	                       by_id->name, by_name->name, id, uname);
757  	            dump_peer_hash(LOG_INFO, __func__);
758  	            crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
759  	                      TRUE);
760  	        }
761  	
762  	    } else if ((id > 0) && (by_name->cluster_layer_id > 0)) {
763  	        pcmk__warn("Nodes %" PRIu32 " and %" PRIu32 " share the same name: "
764  	                   "'%s'",
765  	                   by_id->cluster_layer_id, by_name->cluster_layer_id, uname);
766  	
767  	    } else {
768  	        /* Simple merge */
769  	
770  	        /* Only corosync-based clusters use node IDs. The functions that call
771  	         * pcmk__update_peer_state() and crm_update_peer_proc() only know
772  	         * nodeid, so 'by_id' is authoritative when merging.
773  	         */
774  	        dump_peer_hash(LOG_DEBUG, __func__);
775  	
776  	        pcmk__info("Merging %p into %p", by_name, by_id);
777  	        g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data,
778  	                                    by_name);
779  	    }
780  	
781  	    return node;
782  	}
783  	
784  	/*!
785  	 * \internal
786  	 * \brief Search caches for a node (cluster or Pacemaker Remote)
787  	 *
788  	 * \param[in] id      If not 0, cluster node ID to search for
789  	 * \param[in] uname   If not NULL, node name to search for
790  	 * \param[in] xml_id  If not NULL, CIB XML ID of node to search for
791  	 * \param[in] flags   Group of enum pcmk__node_search_flags
792  	 *
793  	 * \return Node cache entry if found, otherwise NULL
794  	 */
795  	pcmk__node_status_t *
796  	pcmk__search_node_caches(unsigned int id, const char *uname,
797  	                         const char *xml_id, uint32_t flags)
798  	{
799  	    pcmk__node_status_t *node = NULL;
800  	
801  	    pcmk__assert((id > 0) || (uname != NULL) || (xml_id != NULL));
802  	
803  	    pcmk__cluster_init_node_caches();
804  	
805  	    if (pcmk__is_set(flags, pcmk__node_search_remote)) {
806  	        if (uname != NULL) {
807  	            node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
808  	        } else if (xml_id != NULL) {
809  	            node = g_hash_table_lookup(pcmk__remote_peer_cache, xml_id);
810  	        }
811  	    }
812  	
813  	    if ((node == NULL)
814  	        && pcmk__is_set(flags, pcmk__node_search_cluster_member)) {
815  	
816  	        node = search_cluster_member_cache(id, uname, xml_id);
817  	    }
818  	
819  	    if ((node == NULL) && pcmk__is_set(flags, pcmk__node_search_cluster_cib)) {
820  	        if (xml_id != NULL) {
821  	            node = find_cib_cluster_node(xml_id, uname);
822  	        } else {
823  	            // Assumes XML ID is node ID as string (as with Corosync)
824  	            char *id_str = (id == 0)? NULL : pcmk__assert_asprintf("%u", id);
825  	
826  	            node = find_cib_cluster_node(id_str, uname);
827  	            free(id_str);
828  	        }
829  	    }
830  	
831  	    return node;
832  	}
833  	
834  	/*!
835  	 * \internal
836  	 * \brief Purge a node from cache (both cluster and Pacemaker Remote)
837  	 *
838  	 * \param[in] node_name  If not NULL, purge only nodes with this name
839  	 * \param[in] node_id    If not 0, purge cluster nodes only if they have this ID
840  	 *
841  	 * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged.
842  	 *       If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote
843  	 *       nodes that match \p node_name will be purged, and cluster nodes that
844  	 *       match both \p node_name and \p node_id will be purged.
845  	 * \note The caller must be careful not to use \p node_name after calling this
846  	 *       function if it might be a pointer into a cache entry being removed.
847  	 */
848  	void
849  	pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
850  	{
851  	    char *node_name_copy = NULL;
852  	
853  	    if ((node_name == NULL) && (node_id == 0U)) {
854  	        return;
855  	    }
856  	
857  	    // Purge from Pacemaker Remote node cache
858  	    if ((node_name != NULL)
859  	        && (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL)) {
860  	        /* node_name could be a pointer into the cache entry being purged,
861  	         * so reassign it to a copy before the original gets freed
862  	         */
863  	        node_name_copy = pcmk__str_copy(node_name);
864  	        node_name = node_name_copy;
865  	
866  	        pcmk__trace("Purging %s from Pacemaker Remote node cache", node_name);
867  	        g_hash_table_remove(pcmk__remote_peer_cache, node_name);
868  	    }
869  	
870  	    pcmk__cluster_forget_cluster_node(node_id, node_name);
871  	    free(node_name_copy);
872  	}
873  	
874  	#if SUPPORT_COROSYNC
875  	static guint
876  	remove_conflicting_peer(pcmk__node_status_t *node)
877  	{
878  	    int matches = 0;
879  	    GHashTableIter iter;
880  	    pcmk__node_status_t *existing_node = NULL;
881  	
882  	    if ((node->cluster_layer_id == 0) || (node->name == NULL)) {
883  	        return 0;
884  	    }
885  	
886  	    if (!pcmk__corosync_has_nodelist()) {
887  	        return 0;
888  	    }
889  	
890  	    g_hash_table_iter_init(&iter, pcmk__peer_cache);
891  	    while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
892  	        if ((existing_node->cluster_layer_id > 0)
893  	            && (existing_node->cluster_layer_id != node->cluster_layer_id)
894  	            && pcmk__str_eq(existing_node->name, node->name, pcmk__str_casei)) {
895  	
896  	            if (pcmk__cluster_is_node_active(existing_node)) {
897  	                continue;
898  	            }
899  	
900  	            pcmk__warn("Removing cached offline node %" PRIu32 "/%s which has "
901  	                       "conflicting name with %" PRIu32,
902  	                       existing_node->cluster_layer_id, existing_node->name,
903  	                       node->cluster_layer_id);
904  	
905  	            g_hash_table_iter_remove(&iter);
906  	            matches++;
907  	        }
908  	    }
909  	
910  	    return matches;
911  	}
912  	#endif
913  	
914  	/*!
915  	 * \internal
916  	 * \brief Get a cluster node cache entry, possibly creating one if not found
917  	 *
918  	 * If \c pcmk__node_search_cluster_member is set in \p flags, the return value
919  	 * is guaranteed not to be \c NULL. A new cache entry is created if one does not
920  	 * already exist.
921  	 *
922  	 * \param[in] id      If not 0, cluster node ID to search for
923  	 * \param[in] uname   If not NULL, node name to search for
924  	 * \param[in] xml_id  If not NULL while \p id is 0, search for this CIB XML ID
925  	 *                    instead of a cluster ID
926  	 * \param[in] flags   Group of enum pcmk__node_search_flags
927  	 *
928  	 * \return (Possibly newly created) cluster node cache entry
929  	 */
930  	/* coverity[-alloc] Memory is referenced in one or both hashtables */
931  	pcmk__node_status_t *
932  	pcmk__get_node(unsigned int id, const char *uname, const char *xml_id,
933  	               uint32_t flags)
934  	{
935  	    pcmk__node_status_t *node = NULL;
936  	    char *uname_lookup = NULL;
937  	
938  	    pcmk__assert((id > 0) || (uname != NULL));
939  	
940  	    pcmk__cluster_init_node_caches();
941  	
942  	    // Check the Pacemaker Remote node cache first
943  	    if (pcmk__is_set(flags, pcmk__node_search_remote)) {
944  	        node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
945  	        if (node != NULL) {
946  	            return node;
947  	        }
948  	    }
949  	
950  	    if (!pcmk__is_set(flags, pcmk__node_search_cluster_member)) {
951  	        return NULL;
952  	    }
953  	
954  	    node = search_cluster_member_cache(id, uname, xml_id);
955  	
956  	    /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
957  	     * we need to do a lookup of the node name using the id in the cluster membership. */
958  	    if ((uname == NULL) && ((node == NULL) || (node->name == NULL))) {
959  	        uname_lookup = pcmk__cluster_node_name(id);
960  	    }
961  	
962  	    if (uname_lookup) {
963  	        uname = uname_lookup;
964  	        pcmk__trace("Inferred a name of '%s' for node %u", uname, id);
965  	
966  	        /* try to turn up the node one more time now that we know the uname. */
967  	        if (node == NULL) {
968  	            node = search_cluster_member_cache(id, uname, xml_id);
969  	        }
970  	    }
971  	
972  	    if (node == NULL) {
973  	        char *uniqueid = pcmk__generate_uuid();
974  	
975  	        node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t));
976  	
977  	        pcmk__info("Created entry %s/%p for node %s/%u (%d total)", uniqueid,
978  	                   node, uname, id, (1 + g_hash_table_size(pcmk__peer_cache)));
979  	        g_hash_table_replace(pcmk__peer_cache, uniqueid, node);
980  	    }
981  	
982  	    if ((id > 0) && (uname != NULL)
983  	        && ((node->cluster_layer_id == 0) || (node->name == NULL))) {
984  	        pcmk__info("Node %u is now known as %s", id, uname);
985  	    }
986  	
987  	    if ((id > 0) && (node->cluster_layer_id == 0)) {
988  	        node->cluster_layer_id = id;
989  	    }
990  	
991  	    if ((uname != NULL) && (node->name == NULL)) {
992  	        update_peer_uname(node, uname);
993  	    }
994  	
995  	    if ((xml_id == NULL) && (node->xml_id == NULL)) {
996  	        xml_id = pcmk__cluster_get_xml_id(node);
997  	        if (xml_id == NULL) {
998  	            pcmk__debug("Cannot obtain an XML ID for node %s[%u] at this time",
999  	                        node->name, id);
1000 	        } else {
1001 	            pcmk__info("Node %s[%u] has XML ID %s", node->name, id, xml_id);
1002 	        }
1003 	    }
1004 	
1005 	    free(uname_lookup);
1006 	
1007 	    return node;
1008 	}
1009 	
1010 	/*!
1011 	 * \internal
1012 	 * \brief Update a node's uname
1013 	 *
1014 	 * \param[in,out] node   Node object to update
1015 	 * \param[in]     uname  New name to set
1016 	 *
1017 	 * \note This function should not be called within a peer cache iteration,
1018 	 *       because in some cases it can remove conflicting cache entries,
1019 	 *       which would invalidate the iterator.
1020 	 */
1021 	static void
1022 	update_peer_uname(pcmk__node_status_t *node, const char *uname)
1023 	{
1024 	    CRM_CHECK(uname != NULL,
1025 	              pcmk__err("Bug: can't update node name without name"); return);
1026 	    CRM_CHECK(node != NULL,
1027 	              pcmk__err("Bug: can't update node name to %s without node",
1028 	                        uname);
1029 	              return);
1030 	
1031 	    if (pcmk__str_eq(uname, node->name, pcmk__str_casei)) {
1032 	        pcmk__debug("Node name '%s' did not change", uname);
1033 	        return;
1034 	    }
1035 	
1036 	    for (const char *c = uname; *c; ++c) {
1037 	        if ((*c >= 'A') && (*c <= 'Z')) {
1038 	            pcmk__warn("Node names with capitals are discouraged, consider "
1039 	                       "changing '%s'",
1040 	                       uname);
1041 	            break;
1042 	        }
1043 	    }
1044 	
1045 	    pcmk__str_update(&node->name, uname);
1046 	
1047 	    if (peer_status_callback != NULL) {
1048 	        peer_status_callback(pcmk__node_update_name, node, NULL);
1049 	    }
1050 	
1051 	#if SUPPORT_COROSYNC
1052 	    if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)
1053 	        && !pcmk__is_set(node->flags, pcmk__node_status_remote)) {
1054 	
1055 	        remove_conflicting_peer(node);
1056 	    }
1057 	#endif
1058 	}
1059 	
1060 	/*!
1061 	 * \internal
1062 	 * \brief Get log-friendly string equivalent of a process flag
1063 	 *
1064 	 * \param[in] proc  Process flag
1065 	 *
1066 	 * \return Log-friendly string equivalent of \p proc
1067 	 */
1068 	static inline const char *
1069 	proc2text(enum crm_proc_flag proc)
1070 	{
1071 	    switch (proc) {
1072 	        case crm_proc_none:
1073 	            return "none";
1074 	
1075 	        case crm_proc_cpg:
1076 	            return "corosync-cpg";
1077 	
1078 	        default:
1079 	            return "unknown";
1080 	    }
1081 	}
1082 	
1083 	/*!
1084 	 * \internal
1085 	 * \brief Update a node's process information (and potentially state)
1086 	 *
1087 	 * \param[in]     source  Caller's function name (for log messages)
1088 	 * \param[in,out] node    Node object to update
1089 	 * \param[in]     flag    Bitmask of new process information
1090 	 * \param[in]     status  node status (online, offline, etc.)
1091 	 *
1092 	 * \return NULL if any node was reaped from peer caches, value of node otherwise
1093 	 *
1094 	 * \note If this function returns NULL, the supplied node object was likely
1095 	 *       freed and should not be used again. This function should not be
1096 	 *       called within a cache iteration if reaping is possible, otherwise
1097 	 *       reaping could invalidate the iterator.
1098 	 */
1099 	pcmk__node_status_t *
1100 	crm_update_peer_proc(const char *source, pcmk__node_status_t *node,
1101 	                     uint32_t flag, const char *status)
1102 	{
1103 	    uint32_t last = 0;
1104 	    gboolean changed = FALSE;
1105 	
1106 	    CRM_CHECK(node != NULL,
1107 	              pcmk__err("%s: Could not set %s to %s for NULL", source,
1108 	                        proc2text(flag), status);
1109 	              return NULL);
1110 	
1111 	    /* Pacemaker doesn't spawn processes on remote nodes */
1112 	    if (pcmk__is_set(node->flags, pcmk__node_status_remote)) {
1113 	        return node;
1114 	    }
1115 	
1116 	    last = node->processes;
1117 	    if (status == NULL) {
1118 	        node->processes = flag;
1119 	        if (node->processes != last) {
1120 	            changed = TRUE;
1121 	        }
1122 	
1123 	    } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1124 	        if ((node->processes & flag) != flag) {
1125 	            node->processes = pcmk__set_flags_as(__func__, __LINE__,
1126 	                                                 LOG_TRACE, "Peer process",
1127 	                                                 node->name, node->processes,
1128 	                                                 flag, "processes");
1129 	            changed = TRUE;
1130 	        }
1131 	
1132 	    } else if (node->processes & flag) {
1133 	        node->processes = pcmk__clear_flags_as(__func__, __LINE__,
1134 	                                               LOG_TRACE, "Peer process",
1135 	                                               node->name, node->processes,
1136 	                                               flag, "processes");
1137 	        changed = TRUE;
1138 	    }
1139 	
1140 	    if (changed) {
1141 	        if (status == NULL && flag <= crm_proc_none) {
1142 	            pcmk__info("%s: Node %s[%" PRIu32 "] - all processes are now "
1143 	                       "offline",
1144 	                       source, node->name, node->cluster_layer_id);
1145 	        } else {
1146 	            pcmk__info("%s: Node %s[%" PRIu32 "] - %s is now %s", source,
1147 	                       node->name, node->cluster_layer_id, proc2text(flag),
1148 	                       status);
1149 	        }
1150 	
1151 	        if (pcmk__is_set(node->processes, crm_get_cluster_proc())) {
1152 	            node->when_online = time(NULL);
1153 	
1154 	        } else {
1155 	            node->when_online = 0;
1156 	        }
1157 	
1158 	        /* Call the client callback first, then update the peer state,
1159 	         * in case the node will be reaped
1160 	         */
1161 	        if (peer_status_callback != NULL) {
1162 	            peer_status_callback(pcmk__node_update_processes, node, &last);
1163 	        }
1164 	
1165 	        /* The client callback shouldn't touch the peer caches,
1166 	         * but as a safety net, bail if the peer cache was destroyed.
1167 	         */
1168 	        if (pcmk__peer_cache == NULL) {
1169 	            return NULL;
1170 	        }
1171 	
1172 	        if (autoreap) {
1173 	            const char *peer_state = NULL;
1174 	
1175 	            if (pcmk__is_set(node->processes, crm_get_cluster_proc())) {
1176 	                peer_state = PCMK_VALUE_MEMBER;
1177 	            } else {
1178 	                peer_state = PCMK__VALUE_LOST;
1179 	            }
1180 	            node = pcmk__update_peer_state(__func__, node, peer_state, 0);
1181 	        }
1182 	    } else {
1183 	        pcmk__trace("%s: Node %s[%" PRIu32 "] - %s is unchanged (%s)", source,
1184 	                    node->name, node->cluster_layer_id, proc2text(flag),
1185 	                    status);
1186 	    }
1187 	    return node;
1188 	}
1189 	
1190 	/*!
1191 	 * \internal
1192 	 * \brief Update a cluster node cache entry's expected join state
1193 	 *
1194 	 * \param[in]     function  Caller's function name (for logging)
1195 	 * \param[in,out] node      Node to update
1196 	 * \param[in]     expected  Node's new join state
1197 	 */
1198 	void
1199 	pcmk__update_peer_expected_as(const char *function, pcmk__node_status_t *node,
1200 	                              const char *expected)
1201 	{
1202 	    CRM_CHECK(node != NULL, return);
1203 	
1204 	    // Remote nodes don't participate in joins
1205 	    if (pcmk__is_set(node->flags, pcmk__node_status_remote)) {
1206 	        return;
1207 	    }
1208 	
1209 	    if ((expected != NULL)
1210 	        && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
1211 	
1212 	        char *last = node->expected;
1213 	
1214 	        node->expected = pcmk__str_copy(expected);
1215 	        pcmk__info("%s: Node %s[%" PRIu32 "] - expected state is now %s "
1216 	                   "(was %s)", function, node->name, node->cluster_layer_id,
1217 	                   expected, last);
1218 	        free(last);
1219 	        return;
1220 	    }
1221 	
1222 	    pcmk__trace("%s: Node %s[%" PRIu32 "] - expected state is unchanged (%s)",
1223 	                function, node->name, node->cluster_layer_id, expected);
1224 	}
1225 	
1226 	/*!
1227 	 * \internal
1228 	 * \brief Update a node's state and membership information
1229 	 *
1230 	 * \param[in]     source      Caller's function name (for log messages)
1231 	 * \param[in,out] node        Node object to update
1232 	 * \param[in]     state       Node's new state
1233 	 * \param[in]     membership  Node's new membership ID
1234 	 * \param[in,out] iter        If not NULL, pointer to node's peer cache iterator
1235 	 *
1236 	 * \return NULL if any node was reaped, value of node otherwise
1237 	 *
1238 	 * \note If this function returns NULL, the supplied node object was likely
1239 	 *       freed and should not be used again. This function may be called from
1240 	 *       within a peer cache iteration if the iterator is supplied.
1241 	 */
1242 	static pcmk__node_status_t *
1243 	update_peer_state_iter(const char *source, pcmk__node_status_t *node,
1244 	                       const char *state, uint64_t membership,
1245 	                       GHashTableIter *iter)
1246 	{
1247 	    gboolean is_member;
1248 	
1249 	    CRM_CHECK(node != NULL,
1250 	              pcmk__err("Could not set state for unknown host to %s "
1251 	                        QB_XS " source=%s",
1252 	                        state, source);
1253 	              return NULL);
1254 	
1255 	    is_member = pcmk__str_eq(state, PCMK_VALUE_MEMBER, pcmk__str_none);
1256 	    if (is_member) {
1257 	        node->when_lost = 0;
1258 	        if (membership) {
1259 	            node->membership_id = membership;
1260 	        }
1261 	    }
1262 	
1263 	    if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1264 	        char *last = node->state;
1265 	
1266 	        if (is_member) {
1267 	             node->when_member = time(NULL);
1268 	
1269 	        } else {
1270 	             node->when_member = 0;
1271 	        }
1272 	
1273 	        node->state = strdup(state);
1274 	        pcmk__notice("Node %s state is now %s "
1275 	                     QB_XS " nodeid=%" PRIu32 " previous=%s source=%s",
1276 	                     node->name, state, node->cluster_layer_id,
1277 	                     pcmk__s(last, "unknown"), source);
1278 	        if (peer_status_callback != NULL) {
1279 	            peer_status_callback(pcmk__node_update_state, node, last);
1280 	        }
1281 	        free(last);
1282 	
1283 	        if (autoreap && !is_member
1284 	            && !pcmk__is_set(node->flags, pcmk__node_status_remote)) {
1285 	            /* We only autoreap from the peer cache, not the remote peer cache,
1286 	             * because the latter should be managed only by
1287 	             * refresh_remote_nodes().
1288 	             */
1289 	            if(iter) {
1290 	                pcmk__notice("Purged 1 peer with cluster layer ID %" PRIu32
1291 	                             "and/or name=%s from the membership cache",
1292 	                             node->cluster_layer_id, node->name);
1293 	                g_hash_table_iter_remove(iter);
1294 	
1295 	            } else {
1296 	                pcmk__cluster_forget_cluster_node(node->cluster_layer_id,
1297 	                                                  node->name);
1298 	            }
1299 	            node = NULL;
1300 	        }
1301 	
1302 	    } else {
1303 	        pcmk__trace("Node %s state is unchanged (%s) " QB_XS
1304 	                    " nodeid=%" PRIu32 " source=%s",
1305 	                    node->name, state, node->cluster_layer_id, source);
1306 	    }
1307 	    return node;
1308 	}
1309 	
1310 	/*!
1311 	 * \brief Update a node's state and membership information
1312 	 *
1313 	 * \param[in]     source      Caller's function name (for log messages)
1314 	 * \param[in,out] node        Node object to update
1315 	 * \param[in]     state       Node's new state
1316 	 * \param[in]     membership  Node's new membership ID
1317 	 *
1318 	 * \return NULL if any node was reaped, value of node otherwise
1319 	 *
1320 	 * \note If this function returns NULL, the supplied node object was likely
1321 	 *       freed and should not be used again. This function should not be
1322 	 *       called within a cache iteration if reaping is possible,
1323 	 *       otherwise reaping could invalidate the iterator.
1324 	 */
1325 	pcmk__node_status_t *
1326 	pcmk__update_peer_state(const char *source, pcmk__node_status_t *node,
1327 	                        const char *state, uint64_t membership)
1328 	{
1329 	    return update_peer_state_iter(source, node, state, membership, NULL);
1330 	}
1331 	
1332 	/*!
1333 	 * \internal
1334 	 * \brief Reap all nodes from cache whose membership information does not match
1335 	 *
1336 	 * \param[in] membership  Membership ID of nodes to keep
1337 	 */
1338 	void
1339 	pcmk__reap_unseen_nodes(uint64_t membership)
1340 	{
1341 	    GHashTableIter iter;
1342 	    pcmk__node_status_t *node = NULL;
1343 	
1344 	    pcmk__trace("Reaping unseen nodes...");
1345 	    g_hash_table_iter_init(&iter, pcmk__peer_cache);
1346 	    while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1347 	        if (node->membership_id != membership) {
1348 	            if (node->state) {
1349 	                /* Calling update_peer_state_iter() allows us to remove the node
1350 	                 * from pcmk__peer_cache without invalidating our iterator
1351 	                 */
1352 	                update_peer_state_iter(__func__, node, PCMK__VALUE_LOST,
1353 	                                       membership, &iter);
1354 	
1355 	            } else {
1356 	                pcmk__info("State of node %s[%" PRIu32 "] is still unknown",
1357 	                           node->name, node->cluster_layer_id);
1358 	            }
1359 	        }
1360 	    }
1361 	}
1362 	
1363 	static pcmk__node_status_t *
1364 	find_cib_cluster_node(const char *id, const char *uname)
1365 	{
1366 	    GHashTableIter iter;
1367 	    pcmk__node_status_t *node = NULL;
1368 	    pcmk__node_status_t *by_id = NULL;
1369 	    pcmk__node_status_t *by_name = NULL;
1370 	
1371 	    if (uname) {
1372 	        g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1373 	        while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1374 	            if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) {
1375 	                pcmk__trace("Name match: %s = %p", node->name, node);
1376 	                by_name = node;
1377 	                break;
1378 	            }
1379 	        }
1380 	    }
1381 	
1382 	    if (id) {
1383 	        g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1384 	        while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1385 	            if (pcmk__str_eq(id, pcmk__cluster_get_xml_id(node),
1386 	                             pcmk__str_none)) {
1387 	                pcmk__trace("ID match: %s= %p", id, node);
1388 	                by_id = node;
1389 	                break;
1390 	            }
1391 	        }
1392 	    }
1393 	
1394 	    node = by_id; /* Good default */
1395 	    if (by_id == by_name) {
1396 	        /* Nothing to do if they match (both NULL counts) */
1397 	        pcmk__trace("Consistent: %p for %s/%s", by_id, id, uname);
1398 	
1399 	    } else if (by_id == NULL && by_name) {
1400 	        pcmk__trace("Only one: %p for %s/%s", by_name, id, uname);
1401 	
1402 	        if (id) {
1403 	            node = NULL;
1404 	
1405 	        } else {
1406 	            node = by_name;
1407 	        }
1408 	
1409 	    } else if (by_name == NULL && by_id) {
1410 	        pcmk__trace("Only one: %p for %s/%s", by_id, id, uname);
1411 	
1412 	        if (uname) {
1413 	            node = NULL;
1414 	        }
1415 	
1416 	    } else if ((uname != NULL) && (by_id->name != NULL)
1417 	               && pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) {
1418 	        /* Multiple nodes have the same uname in the CIB.
1419 	         * Return by_id. */
1420 	
1421 	    } else if ((id != NULL) && (by_name->xml_id != NULL)
1422 	               && pcmk__str_eq(id, by_name->xml_id, pcmk__str_none)) {
1423 	        /* Multiple nodes have the same id in the CIB.
1424 	         * Return by_name. */
1425 	        node = by_name;
1426 	
1427 	    } else {
1428 	        node = NULL;
1429 	    }
1430 	
1431 	    if (node == NULL) {
1432 	        pcmk__debug("Couldn't find node%s%s%s%s", ((id != NULL)? " " : ""),
1433 	                    pcmk__s(id, ""), ((uname != NULL)? " with name " : ""),
1434 	                    pcmk__s(uname, ""));
1435 	    }
1436 	
1437 	    return node;
1438 	}
1439 	
1440 	static void
1441 	cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1442 	{
1443 	    const char *id = pcmk__xe_get(xml_node, PCMK_XA_ID);
1444 	    const char *uname = pcmk__xe_get(xml_node, PCMK_XA_UNAME);
1445 	    pcmk__node_status_t * node =  NULL;
1446 	
1447 	    CRM_CHECK(id != NULL && uname !=NULL, return);
1448 	    node = find_cib_cluster_node(id, uname);
1449 	
1450 	    if (node == NULL) {
1451 	        char *uniqueid = pcmk__generate_uuid();
1452 	
1453 	        node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t));
1454 	
1455 	        node->name = pcmk__str_copy(uname);
1456 	        node->xml_id = pcmk__str_copy(id);
1457 	
1458 	        g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);
1459 	
1460 	    } else if (pcmk__is_set(node->flags, pcmk__node_status_dirty)) {
1461 	        pcmk__str_update(&node->name, uname);
1462 	
1463 	        /* Node is in cache and hasn't been updated already, so mark it clean */
1464 	        clear_peer_flags(node, pcmk__node_status_dirty);
1465 	    }
1466 	
1467 	}
1468 	
1469 	static void
1470 	refresh_cluster_node_cib_cache(xmlNode *cib)
1471 	{
1472 	    pcmk__cluster_init_node_caches();
1473 	
1474 	    g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);
1475 	
1476 	    pcmk__xpath_foreach_result(cib->doc, PCMK__XP_MEMBER_NODE_CONFIG,
1477 	                               cluster_node_cib_cache_refresh_helper, NULL);
1478 	
1479 	    // Remove all old cache entries that weren't seen in the CIB
1480 	    g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
1481 	}
1482 	
1483 	void
1484 	pcmk__refresh_node_caches_from_cib(xmlNode *cib)
1485 	{
1486 	    refresh_remote_nodes(cib);
1487 	    refresh_cluster_node_cib_cache(cib);
1488 	}
1489 	
1490 	// Deprecated functions kept only for backward API compatibility
1491 	// LCOV_EXCL_START
1492 	
1493 	#include <crm/cluster/compat.h>
1494 	
1495 	void
1496 	crm_peer_init(void)
1497 	{
1498 	    pcmk__cluster_init_node_caches();
1499 	}
1500 	
1501 	// LCOV_EXCL_STOP
1502 	// End deprecated API
1503