1    	/*
2    	 * Copyright 2010-2026 the Pacemaker project contributors
3    	 *
4    	 * The version control history for this file may have further details.
5    	 *
6    	 * This source code is licensed under the GNU General Public License version 2
7    	 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8    	 */
9    	
10   	#include <crm_internal.h>
11   	
12   	#include <stdbool.h>
13   	#include <stdint.h>                     // uint64_t, uint32_t
14   	#include <stdlib.h>                     // NULL, free
15   	#include <string.h>                     // strerror
16   	#include <sys/types.h>                  // gid_t, uid_t, pid_t
17   	#include <syslog.h>                     // LOG_DEBUG
18   	#include <unistd.h>                     // sleep
19   	
20   	#include <corosync/cfg.h>               // corosync_cfg_*
21   	#include <corosync/cmap.h>              // cmap_finalize, cmap_fd_get
22   	#include <corosync/corotypes.h>         // CS_OK, CS_ERR_QUEUE_FULL
23   	#include <corosync/cpg.h>               // CPG_MODEL_V1, cpg_fd_get
24   	#include <qb/qblog.h>                   // QB_XS
25   	
26   	#include <crm/common/ipc.h>             // crm_ipc_is_authentic_process
27   	#include <crm/common/mainloop.h>        // mainloop_*
28   	#include <crm/cluster.h>                // pcmk_cluster_layer*
29   	#include <crm/common/options.h>         // PCMK_VALUE_*
30   	#include <crm/common/results.h>         // pcmk_rc_str, pcmk_rc_*
31   	#include <crm_config.h>                 // CRM_DAEMON_USER
32   	
33   	#include "pacemakerd.h"                 // restart_cluster_subdaemons
34   	#include "pcmkd_corosync.h"
35   	
36   	static corosync_cfg_handle_t cfg_handle = 0;
37   	static mainloop_timer_t *reconnect_timer = NULL;
38   	
39   	/* =::=::=::= CFG - Shutdown stuff =::=::=::= */
40   	
41   	static void
42   	cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags)
43   	{
44   	    const char *shutdown_s = NULL;
45   	
46   	    switch (flags) {
47   	        case COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE:
48   	            shutdown_s = "immediate";
49   	            break;
50   	        case COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS:
51   	            shutdown_s = "forced";
52   	            break;
53   	        default:
54   	            shutdown_s = "optional";
55   	            break;
56   	    }
57   	
58   	    pcmk__info("Corosync wants to shut down: %s", shutdown_s);
59   	
60   	    /* Never allow corosync to shut down while we're running */
61   	    corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO);
62   	}
63   	
64   	static corosync_cfg_callbacks_t cfg_callbacks = {
65   	    .corosync_cfg_shutdown_callback = cfg_shutdown_callback,
66   	};
67   	
68   	static int
69   	pcmk_cfg_dispatch(gpointer user_data)
70   	{
71   	    corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data;
72   	    cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL);
73   	
74   	    if (rc != CS_OK) {
75   	        return -1;
76   	    }
77   	    return 0;
78   	}
79   	
80   	static void
81   	close_cfg(void)
82   	{
83   	    if (cfg_handle != 0) {
84   	#ifdef HAVE_COROSYNC_CFG_TRACKSTART
85   	        /* Ideally, we would call corosync_cfg_trackstop(cfg_handle) here, but a
86   	         * bug in corosync 3.1.1 and 3.1.2 makes it hang forever. Thankfully,
87   	         * it's not necessary since we exit immediately after this.
88   	         */
89   	#endif
90   	        corosync_cfg_finalize(cfg_handle);
91   	        cfg_handle = 0;
92   	    }
93   	}
94   	
95   	static gboolean
96   	cluster_reconnect_cb(gpointer data)
97   	{
(1) Event path: Condition "cluster_connect_cfg()", taking true branch.
98   	    if (cluster_connect_cfg()) {
CID (unavailable; MK=8509ad2c6a113597a7485809db49b4ed) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS):
(2) Event assign_union_field: The union field "in" of "_pp" is written.
(3) Event inconsistent_union_field_access: In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in".
99   	        g_clear_pointer(&reconnect_timer, mainloop_timer_del);
100  	        pcmk__notice("Cluster reconnect succeeded");
101  	        pacemakerd_read_config();
102  	        restart_cluster_subdaemons();
103  	        return G_SOURCE_REMOVE;
104  	    } else {
105  	        pcmk__info("Cluster reconnect failed (connection will be reattempted "
106  	                   "once per second)");
107  	    }
108  	    /*
109  	     * In theory this will continue forever. In practice the CIB connection from
110  	     * attrd will timeout and shut down Pacemaker when it gets bored.
111  	     */
112  	    return G_SOURCE_CONTINUE;
113  	}
114  	
115  	
116  	static void
117  	cfg_connection_destroy(gpointer user_data)
118  	{
119  	    pcmk__warn("Lost connection to cluster layer (connection will be "
120  	               "reattempted once per second)");
121  	    corosync_cfg_finalize(cfg_handle);
122  	    cfg_handle = 0;
123  	    reconnect_timer = mainloop_timer_add("corosync reconnect", 1000, TRUE, cluster_reconnect_cb, NULL);
124  	    mainloop_timer_start(reconnect_timer);
125  	}
126  	
127  	void
128  	cluster_disconnect_cfg(void)
129  	{
130  	    close_cfg();
131  	
132  	    /* The mainloop should be gone by this point, so this isn't necessary, but
133  	     * cleaning up memory should make valgrind happier.
134  	     */
135  	    g_clear_pointer(&reconnect_timer, mainloop_timer_del);
136  	}
137  	
138  	#define cs_repeat(counter, max, code) do {		\
139  		code;						\
140  		if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {  \
141  		    counter++;					\
142  		    pcmk__debug("Retrying Corosync operation after %ds", counter);  \
143  		    sleep(counter);				\
144  		} else {                                        \
145  	            break;                                      \
146  		}						\
147  	    } while(counter < max)
148  	
149  	gboolean
150  	cluster_connect_cfg(void)
151  	{
152  	    cs_error_t rc;
153  	    int fd = -1, retries = 0, rv;
154  	    uid_t found_uid = 0;
155  	    gid_t found_gid = 0;
156  	    pid_t found_pid = 0;
157  	    uint32_t nodeid;
158  	
159  	    static struct mainloop_fd_callbacks cfg_fd_callbacks = {
160  	        .dispatch = pcmk_cfg_dispatch,
161  	        .destroy = cfg_connection_destroy,
162  	    };
163  	
164  	    cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
165  	
166  	    if (rc != CS_OK) {
167  	        pcmk__crit("Could not connect to Corosync CFG: %s " QB_XS " rc=%d",
168  	                   pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
169  	        return FALSE;
170  	    }
171  	
172  	    rc = corosync_cfg_fd_get(cfg_handle, &fd);
173  	    if (rc != CS_OK) {
174  	        pcmk__crit("Could not get Corosync CFG descriptor: %s " QB_XS " rc=%d",
175  	                   pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
176  	        goto bail;
177  	    }
178  	
179  	    /* CFG provider run as root (in given user namespace, anyway)? */
180  	    if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
181  	                                            &found_uid, &found_gid))) {
182  	        pcmk__crit("Rejecting Corosync CFG provider because process %lld "
183  	                   "is running as uid %lld gid %lld, not root",
184  	                   (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
185  	                   (long long) found_uid, (long long) found_gid);
186  	        goto bail;
187  	    } else if (rv < 0) {
188  	        pcmk__crit("Could not authenticate Corosync CFG provider: %s "
189  	                   QB_XS " rc=%d", strerror(-rv), -rv);
190  	        goto bail;
191  	    }
192  	
193  	    retries = 0;
194  	    cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid));
195  	    if (rc != CS_OK) {
196  	        pcmk__crit("Could not get local node ID from Corosync: %s "
197  	                   QB_XS " rc=%d", pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
198  	        goto bail;
199  	    }
200  	    pcmk__debug("Corosync reports local node ID is %" PRIu32, nodeid);
201  	
202  	#ifdef HAVE_COROSYNC_CFG_TRACKSTART
203  	    retries = 0;
204  	    cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0));
205  	    if (rc != CS_OK) {
206  	        pcmk__crit("Could not enable Corosync CFG shutdown tracker: %s "
207  	                   QB_XS " rc=%d",
208  	                   pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
209  	        goto bail;
210  	    }
211  	#endif
212  	
213  	    mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks);
214  	    return TRUE;
215  	
216  	  bail:
217  	    corosync_cfg_finalize(cfg_handle);
218  	    return FALSE;
219  	}
220  	
221  	void
222  	pcmkd_shutdown_corosync(void)
223  	{
224  	    cs_error_t rc;
225  	
226  	    if (cfg_handle == 0) {
227  	        pcmk__warn("Unable to shut down Corosync: No connection");
228  	        return;
229  	    }
230  	    pcmk__info("Asking Corosync to shut down");
231  	    rc = corosync_cfg_try_shutdown(cfg_handle,
232  	                                    COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
233  	    if (rc == CS_OK) {
234  	        close_cfg();
235  	    } else {
236  	        pcmk__warn("Corosync shutdown failed: %s " QB_XS " rc=%d",
237  	                   pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
238  	    }
239  	}
240  	
241  	bool
242  	pcmkd_corosync_connected(void)
243  	{
244  	    cpg_handle_t local_handle = 0;
245  	    cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0};
246  	    int fd = -1;
247  	
248  	    if (cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *) &cpg_model_info, NULL) != CS_OK) {
249  	        return false;
250  	    }
251  	
252  	    if (cpg_fd_get(local_handle, &fd) != CS_OK) {
253  	        return false;
254  	    }
255  	
256  	    cpg_finalize(local_handle);
257  	
258  	    return true;
259  	}
260  	
261  	/* =::=::=::= Configuration =::=::=::= */
262  	static int
263  	get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value,
264  	               const char *fallback)
265  	{
266  	    int rc = 0, retries = 0;
267  	
268  	    cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value));
269  	    if (rc != CS_OK) {
270  	        pcmk__trace("Search for %s failed %d, defaulting to %s", key, rc,
271  	                    fallback);
272  	        pcmk__str_update(value, fallback);
273  	    }
274  	    pcmk__trace("%s: %s", key, *value);
275  	    return rc;
276  	}
277  	
278  	gboolean
279  	pacemakerd_read_config(void)
280  	{
281  	    cs_error_t rc = CS_OK;
282  	    int retries = 0;
283  	    cmap_handle_t local_handle;
284  	    uint64_t config = 0;
285  	    int fd = -1;
286  	    uid_t found_uid = 0;
287  	    gid_t found_gid = 0;
288  	    pid_t found_pid = 0;
289  	    int rv;
290  	    enum pcmk_cluster_layer cluster_layer = pcmk_cluster_layer_unknown;
291  	    const char *cluster_layer_s = NULL;
292  	
293  	    // There can be only one possibility
294  	    do {
295  	        rc = pcmk__init_cmap(&local_handle);
296  	        if (rc != CS_OK) {
297  	            retries++;
298  	            pcmk__info("Could not connect to Corosync CMAP: %s "
299  	                       "(retrying in %ds) " QB_XS " rc=%d",
300  	                       pcmk_rc_str(pcmk__corosync2rc(rc)), retries, rc);
301  	            sleep(retries);
302  	
303  	        } else {
304  	            break;
305  	        }
306  	
307  	    } while (retries < 5);
308  	
309  	    if (rc != CS_OK) {
310  	        pcmk__crit("Could not connect to Corosync CMAP: %s "
311  	                   QB_XS " rc=%d", pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
312  	        return FALSE;
313  	    }
314  	
315  	    rc = cmap_fd_get(local_handle, &fd);
316  	    if (rc != CS_OK) {
317  	        pcmk__crit("Could not get Corosync CMAP descriptor: %s " QB_XS " rc=%d",
318  	                   pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
319  	        cmap_finalize(local_handle);
320  	        return FALSE;
321  	    }
322  	
323  	    /* CMAP provider run as root (in given user namespace, anyway)? */
324  	    if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
325  	                                            &found_uid, &found_gid))) {
326  	        pcmk__crit("Rejecting Corosync CMAP provider because process %lld "
327  	                   "is running as uid %lld gid %lld, not root",
328  	                   (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
329  	                   (long long) found_uid, (long long) found_gid);
330  	        cmap_finalize(local_handle);
331  	        return FALSE;
332  	    } else if (rv < 0) {
333  	        pcmk__crit("Could not authenticate Corosync CMAP provider: %s "
334  	                   QB_XS " rc=%d", strerror(-rv), -rv);
335  	        cmap_finalize(local_handle);
336  	        return FALSE;
337  	    }
338  	
339  	    cluster_layer = pcmk_get_cluster_layer();
340  	    cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
341  	
342  	    if (cluster_layer != pcmk_cluster_layer_corosync) {
343  	        pcmk__crit("Expected Corosync cluster layer but detected %s "
344  	                   QB_XS " cluster_layer=%d",
345  	                   cluster_layer_s, cluster_layer);
346  	        return FALSE;
347  	    }
348  	
349  	    pcmk__info("Reading configuration for %s cluster layer", cluster_layer_s);
350  	    pcmk__set_env_option(PCMK__ENV_CLUSTER_TYPE, PCMK_VALUE_COROSYNC, true);
351  	
352  	    // If debug logging is not configured, check whether corosync has it
353  	    if (pcmk__env_option(PCMK__ENV_DEBUG) == NULL) {
354  	        char *debug_enabled = NULL;
355  	
356  	        get_config_opt(config, local_handle, "logging.debug", &debug_enabled,
357  	                       PCMK_VALUE_OFF);
358  	
359  	        if (pcmk__is_true(debug_enabled)) {
360  	            pcmk__set_env_option(PCMK__ENV_DEBUG, "1", true);
361  	            if (get_crm_log_level() < LOG_DEBUG) {
362  	                set_crm_log_level(LOG_DEBUG);
363  	            }
364  	
365  	        } else {
366  	            pcmk__set_env_option(PCMK__ENV_DEBUG, "0", true);
367  	        }
368  	
369  	        free(debug_enabled);
370  	    }
371  	
372  	    if(local_handle){
373  	        gid_t gid = 0;
374  	        if (pcmk__daemon_user(NULL, &gid) != pcmk_rc_ok) {
375  	            pcmk__warn("Could not authorize group with Corosync "
376  	                       QB_XS " No group found for user " CRM_DAEMON_USER);
377  	
378  	        } else {
379  	            char *key = pcmk__assert_asprintf("uidgid.gid.%lld",
380  	                                              (long long) gid);
381  	
382  	            rc = cmap_set_uint8(local_handle, key, 1);
383  	            free(key);
384  	
385  	            if (rc != CS_OK) {
386  	                pcmk__warn("Could not authorize group with Corosync: %s "
387  	                           QB_XS " group=%u rc=%d",
388  	                           pcmk_rc_str(pcmk__corosync2rc(rc)), gid, rc);
389  	            }
390  	        }
391  	    }
392  	    cmap_finalize(local_handle);
393  	
394  	    return TRUE;
395  	}
396