1 /*
2 * Copyright 2010-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <stdbool.h>
13 #include <stdint.h> // uint64_t, uint32_t
14 #include <stdlib.h> // NULL, free
15 #include <string.h> // strerror
16 #include <sys/types.h> // gid_t, uid_t, pid_t
17 #include <syslog.h> // LOG_DEBUG
18 #include <unistd.h> // sleep
19
20 #include <corosync/cfg.h> // corosync_cfg_*
21 #include <corosync/cmap.h> // cmap_finalize, cmap_fd_get
22 #include <corosync/corotypes.h> // CS_OK, CS_ERR_QUEUE_FULL
23 #include <corosync/cpg.h> // CPG_MODEL_V1, cpg_fd_get
24 #include <qb/qblog.h> // QB_XS
25
26 #include <crm/common/ipc.h> // crm_ipc_is_authentic_process
27 #include <crm/common/mainloop.h> // mainloop_*
28 #include <crm/cluster.h> // pcmk_cluster_layer*
29 #include <crm/common/options.h> // PCMK_VALUE_*
30 #include <crm/common/results.h> // pcmk_rc_str, pcmk_rc_*
31 #include <crm_config.h> // CRM_DAEMON_USER
32
33 #include "pacemakerd.h" // restart_cluster_subdaemons
34 #include "pcmkd_corosync.h"
35
36 static corosync_cfg_handle_t cfg_handle = 0;
37 static mainloop_timer_t *reconnect_timer = NULL;
38
39 /* =::=::=::= CFG - Shutdown stuff =::=::=::= */
40
41 static void
42 cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags)
43 {
44 const char *shutdown_s = NULL;
45
46 switch (flags) {
47 case COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE:
48 shutdown_s = "immediate";
49 break;
50 case COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS:
51 shutdown_s = "forced";
52 break;
53 default:
54 shutdown_s = "optional";
55 break;
56 }
57
58 pcmk__info("Corosync wants to shut down: %s", shutdown_s);
59
60 /* Never allow corosync to shut down while we're running */
61 corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO);
62 }
63
64 static corosync_cfg_callbacks_t cfg_callbacks = {
65 .corosync_cfg_shutdown_callback = cfg_shutdown_callback,
66 };
67
68 static int
69 pcmk_cfg_dispatch(gpointer user_data)
70 {
71 corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data;
72 cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL);
73
74 if (rc != CS_OK) {
75 return -1;
76 }
77 return 0;
78 }
79
80 static void
81 close_cfg(void)
82 {
83 if (cfg_handle != 0) {
84 #ifdef HAVE_COROSYNC_CFG_TRACKSTART
85 /* Ideally, we would call corosync_cfg_trackstop(cfg_handle) here, but a
86 * bug in corosync 3.1.1 and 3.1.2 makes it hang forever. Thankfully,
87 * it's not necessary since we exit immediately after this.
88 */
89 #endif
90 corosync_cfg_finalize(cfg_handle);
91 cfg_handle = 0;
92 }
93 }
94
95 static gboolean
96 cluster_reconnect_cb(gpointer data)
97 {
|
(1) Event path: |
Condition "cluster_connect_cfg()", taking true branch. |
98 if (cluster_connect_cfg()) {
|
CID (unavailable; MK=8509ad2c6a113597a7485809db49b4ed) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS): |
|
(2) Event assign_union_field: |
The union field "in" of "_pp" is written. |
|
(3) Event inconsistent_union_field_access: |
In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in". |
99 g_clear_pointer(&reconnect_timer, mainloop_timer_del);
100 pcmk__notice("Cluster reconnect succeeded");
101 pacemakerd_read_config();
102 restart_cluster_subdaemons();
103 return G_SOURCE_REMOVE;
104 } else {
105 pcmk__info("Cluster reconnect failed (connection will be reattempted "
106 "once per second)");
107 }
108 /*
109 * In theory this will continue forever. In practice the CIB connection from
110 * attrd will timeout and shut down Pacemaker when it gets bored.
111 */
112 return G_SOURCE_CONTINUE;
113 }
114
115
116 static void
117 cfg_connection_destroy(gpointer user_data)
118 {
119 pcmk__warn("Lost connection to cluster layer (connection will be "
120 "reattempted once per second)");
121 corosync_cfg_finalize(cfg_handle);
122 cfg_handle = 0;
123 reconnect_timer = mainloop_timer_add("corosync reconnect", 1000, TRUE, cluster_reconnect_cb, NULL);
124 mainloop_timer_start(reconnect_timer);
125 }
126
127 void
128 cluster_disconnect_cfg(void)
129 {
130 close_cfg();
131
132 /* The mainloop should be gone by this point, so this isn't necessary, but
133 * cleaning up memory should make valgrind happier.
134 */
135 g_clear_pointer(&reconnect_timer, mainloop_timer_del);
136 }
137
138 #define cs_repeat(counter, max, code) do { \
139 code; \
140 if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \
141 counter++; \
142 pcmk__debug("Retrying Corosync operation after %ds", counter); \
143 sleep(counter); \
144 } else { \
145 break; \
146 } \
147 } while(counter < max)
148
149 gboolean
150 cluster_connect_cfg(void)
151 {
152 cs_error_t rc;
153 int fd = -1, retries = 0, rv;
154 uid_t found_uid = 0;
155 gid_t found_gid = 0;
156 pid_t found_pid = 0;
157 uint32_t nodeid;
158
159 static struct mainloop_fd_callbacks cfg_fd_callbacks = {
160 .dispatch = pcmk_cfg_dispatch,
161 .destroy = cfg_connection_destroy,
162 };
163
164 cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
165
166 if (rc != CS_OK) {
167 pcmk__crit("Could not connect to Corosync CFG: %s " QB_XS " rc=%d",
168 pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
169 return FALSE;
170 }
171
172 rc = corosync_cfg_fd_get(cfg_handle, &fd);
173 if (rc != CS_OK) {
174 pcmk__crit("Could not get Corosync CFG descriptor: %s " QB_XS " rc=%d",
175 pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
176 goto bail;
177 }
178
179 /* CFG provider run as root (in given user namespace, anyway)? */
180 if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
181 &found_uid, &found_gid))) {
182 pcmk__crit("Rejecting Corosync CFG provider because process %lld "
183 "is running as uid %lld gid %lld, not root",
184 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
185 (long long) found_uid, (long long) found_gid);
186 goto bail;
187 } else if (rv < 0) {
188 pcmk__crit("Could not authenticate Corosync CFG provider: %s "
189 QB_XS " rc=%d", strerror(-rv), -rv);
190 goto bail;
191 }
192
193 retries = 0;
194 cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid));
195 if (rc != CS_OK) {
196 pcmk__crit("Could not get local node ID from Corosync: %s "
197 QB_XS " rc=%d", pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
198 goto bail;
199 }
200 pcmk__debug("Corosync reports local node ID is %" PRIu32, nodeid);
201
202 #ifdef HAVE_COROSYNC_CFG_TRACKSTART
203 retries = 0;
204 cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0));
205 if (rc != CS_OK) {
206 pcmk__crit("Could not enable Corosync CFG shutdown tracker: %s "
207 QB_XS " rc=%d",
208 pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
209 goto bail;
210 }
211 #endif
212
213 mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks);
214 return TRUE;
215
216 bail:
217 corosync_cfg_finalize(cfg_handle);
218 return FALSE;
219 }
220
221 void
222 pcmkd_shutdown_corosync(void)
223 {
224 cs_error_t rc;
225
226 if (cfg_handle == 0) {
227 pcmk__warn("Unable to shut down Corosync: No connection");
228 return;
229 }
230 pcmk__info("Asking Corosync to shut down");
231 rc = corosync_cfg_try_shutdown(cfg_handle,
232 COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
233 if (rc == CS_OK) {
234 close_cfg();
235 } else {
236 pcmk__warn("Corosync shutdown failed: %s " QB_XS " rc=%d",
237 pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
238 }
239 }
240
241 bool
242 pcmkd_corosync_connected(void)
243 {
244 cpg_handle_t local_handle = 0;
245 cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0};
246 int fd = -1;
247
248 if (cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *) &cpg_model_info, NULL) != CS_OK) {
249 return false;
250 }
251
252 if (cpg_fd_get(local_handle, &fd) != CS_OK) {
253 return false;
254 }
255
256 cpg_finalize(local_handle);
257
258 return true;
259 }
260
261 /* =::=::=::= Configuration =::=::=::= */
262 static int
263 get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value,
264 const char *fallback)
265 {
266 int rc = 0, retries = 0;
267
268 cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value));
269 if (rc != CS_OK) {
270 pcmk__trace("Search for %s failed %d, defaulting to %s", key, rc,
271 fallback);
272 pcmk__str_update(value, fallback);
273 }
274 pcmk__trace("%s: %s", key, *value);
275 return rc;
276 }
277
278 gboolean
279 pacemakerd_read_config(void)
280 {
281 cs_error_t rc = CS_OK;
282 int retries = 0;
283 cmap_handle_t local_handle;
284 uint64_t config = 0;
285 int fd = -1;
286 uid_t found_uid = 0;
287 gid_t found_gid = 0;
288 pid_t found_pid = 0;
289 int rv;
290 enum pcmk_cluster_layer cluster_layer = pcmk_cluster_layer_unknown;
291 const char *cluster_layer_s = NULL;
292
293 // There can be only one possibility
294 do {
295 rc = pcmk__init_cmap(&local_handle);
296 if (rc != CS_OK) {
297 retries++;
298 pcmk__info("Could not connect to Corosync CMAP: %s "
299 "(retrying in %ds) " QB_XS " rc=%d",
300 pcmk_rc_str(pcmk__corosync2rc(rc)), retries, rc);
301 sleep(retries);
302
303 } else {
304 break;
305 }
306
307 } while (retries < 5);
308
309 if (rc != CS_OK) {
310 pcmk__crit("Could not connect to Corosync CMAP: %s "
311 QB_XS " rc=%d", pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
312 return FALSE;
313 }
314
315 rc = cmap_fd_get(local_handle, &fd);
316 if (rc != CS_OK) {
317 pcmk__crit("Could not get Corosync CMAP descriptor: %s " QB_XS " rc=%d",
318 pcmk_rc_str(pcmk__corosync2rc(rc)), rc);
319 cmap_finalize(local_handle);
320 return FALSE;
321 }
322
323 /* CMAP provider run as root (in given user namespace, anyway)? */
324 if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
325 &found_uid, &found_gid))) {
326 pcmk__crit("Rejecting Corosync CMAP provider because process %lld "
327 "is running as uid %lld gid %lld, not root",
328 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
329 (long long) found_uid, (long long) found_gid);
330 cmap_finalize(local_handle);
331 return FALSE;
332 } else if (rv < 0) {
333 pcmk__crit("Could not authenticate Corosync CMAP provider: %s "
334 QB_XS " rc=%d", strerror(-rv), -rv);
335 cmap_finalize(local_handle);
336 return FALSE;
337 }
338
339 cluster_layer = pcmk_get_cluster_layer();
340 cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
341
342 if (cluster_layer != pcmk_cluster_layer_corosync) {
343 pcmk__crit("Expected Corosync cluster layer but detected %s "
344 QB_XS " cluster_layer=%d",
345 cluster_layer_s, cluster_layer);
346 return FALSE;
347 }
348
349 pcmk__info("Reading configuration for %s cluster layer", cluster_layer_s);
350 pcmk__set_env_option(PCMK__ENV_CLUSTER_TYPE, PCMK_VALUE_COROSYNC, true);
351
352 // If debug logging is not configured, check whether corosync has it
353 if (pcmk__env_option(PCMK__ENV_DEBUG) == NULL) {
354 char *debug_enabled = NULL;
355
356 get_config_opt(config, local_handle, "logging.debug", &debug_enabled,
357 PCMK_VALUE_OFF);
358
359 if (pcmk__is_true(debug_enabled)) {
360 pcmk__set_env_option(PCMK__ENV_DEBUG, "1", true);
361 if (get_crm_log_level() < LOG_DEBUG) {
362 set_crm_log_level(LOG_DEBUG);
363 }
364
365 } else {
366 pcmk__set_env_option(PCMK__ENV_DEBUG, "0", true);
367 }
368
369 free(debug_enabled);
370 }
371
372 if(local_handle){
373 gid_t gid = 0;
374 if (pcmk__daemon_user(NULL, &gid) != pcmk_rc_ok) {
375 pcmk__warn("Could not authorize group with Corosync "
376 QB_XS " No group found for user " CRM_DAEMON_USER);
377
378 } else {
379 char *key = pcmk__assert_asprintf("uidgid.gid.%lld",
380 (long long) gid);
381
382 rc = cmap_set_uint8(local_handle, key, 1);
383 free(key);
384
385 if (rc != CS_OK) {
386 pcmk__warn("Could not authorize group with Corosync: %s "
387 QB_XS " group=%u rc=%d",
388 pcmk_rc_str(pcmk__corosync2rc(rc)), gid, rc);
389 }
390 }
391 }
392 cmap_finalize(local_handle);
393
394 return TRUE;
395 }
396