1 /*
2 * Copyright 2004-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <stdbool.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <signal.h>
16 #include <errno.h>
17
18 #include <sys/wait.h>
19
20 #include <crm/crm.h>
21 #include <crm/common/xml.h>
22 #include <crm/common/mainloop.h>
23
24 #include <qb/qbarray.h>
25
26 struct trigger_s {
27 GSource source;
28 gboolean running;
29 gboolean trigger;
30 void *user_data;
31 unsigned int id;
32
33 };
34
35 struct mainloop_timer_s {
36 unsigned int id;
37 unsigned int period_ms;
38 bool repeat;
39 char *name;
40 GSourceFunc cb;
41 void *userdata;
42 };
43
44 static gboolean
45 crm_trigger_prepare(GSource *source, int *timeout)
46 {
47 crm_trigger_t *trig = (crm_trigger_t *) source;
48
49 /* cluster-glue's FD and IPC related sources make use of
50 * g_source_add_poll() but do not set a timeout in their prepare
51 * functions
52 *
53 * This means mainloop's poll() will block until an event for one
54 * of these sources occurs - any /other/ type of source, such as
55 * this one or g_idle_*, that doesn't use g_source_add_poll() is
56 * S-O-L and won't be processed until there is something fd-based
57 * happens.
58 *
59 * Luckily the timeout we can set here affects all sources and
60 * puts an upper limit on how long poll() can take.
61 *
62 * So unconditionally set a small-ish timeout, not too small that
63 * we're in constant motion, which will act as an upper bound on
64 * how long the signal handling might be delayed for.
65 */
66 *timeout = 500; /* Timeout in ms */
67
68 return trig->trigger;
69 }
70
71 static gboolean
72 crm_trigger_check(GSource * source)
73 {
74 crm_trigger_t *trig = (crm_trigger_t *) source;
75
76 return trig->trigger;
77 }
78
79 /*!
80 * \internal
81 * \brief GSource dispatch function for crm_trigger_t
82 *
83 * \param[in] source crm_trigger_t being dispatched
84 * \param[in] callback Callback passed at source creation
85 * \param[in,out] userdata User data passed at source creation
86 *
87 * \return G_SOURCE_REMOVE to remove source, G_SOURCE_CONTINUE to keep it
88 */
89 static gboolean
90 crm_trigger_dispatch(GSource *source, GSourceFunc callback, void *userdata)
91 {
92 gboolean rc = G_SOURCE_CONTINUE;
93 crm_trigger_t *trig = (crm_trigger_t *) source;
94
95 if (trig->running) {
96 /* Wait until the existing job is complete before starting the next one */
97 return G_SOURCE_CONTINUE;
98 }
99 trig->trigger = FALSE;
100
101 if (callback) {
102 int callback_rc = callback(trig->user_data);
103
104 if (callback_rc < 0) {
105 pcmk__trace("Trigger handler %p not yet complete", trig);
106 trig->running = TRUE;
107 } else if (callback_rc == 0) {
108 rc = G_SOURCE_REMOVE;
109 }
110 }
111 return rc;
112 }
113
114 static void
115 crm_trigger_finalize(GSource * source)
116 {
117 pcmk__trace("Trigger %p destroyed", source);
118 }
119
120 static GSourceFuncs crm_trigger_funcs = {
121 crm_trigger_prepare,
122 crm_trigger_check,
123 crm_trigger_dispatch,
124 crm_trigger_finalize,
125 };
126
127 static crm_trigger_t *
128 mainloop_setup_trigger(GSource * source, int priority,
129 int (*dispatch)(void *user_data), void *userdata)
130 {
131 crm_trigger_t *trigger = NULL;
132
133 trigger = (crm_trigger_t *) source;
134
135 trigger->id = 0;
136 trigger->trigger = FALSE;
137 trigger->user_data = userdata;
138
139 if (dispatch) {
140 g_source_set_callback(source, dispatch, trigger, NULL);
141 }
142
143 g_source_set_priority(source, priority);
144 g_source_set_can_recurse(source, FALSE);
145
146 trigger->id = g_source_attach(source, NULL);
147 return trigger;
148 }
149
150 void
151 mainloop_trigger_complete(crm_trigger_t * trig)
152 {
153 pcmk__trace("Trigger handler %p complete", trig);
154 trig->running = FALSE;
155 }
156
157 /*!
158 * \brief Create a trigger to be used as a mainloop source
159 *
160 * \param[in] priority Relative priority of source (lower number is higher priority)
161 * \param[in] dispatch Trigger dispatch function (should return 0 to remove the
162 * trigger from the mainloop, -1 if the trigger should be
163 * kept but the job is still running and not complete, and
164 * 1 if the trigger should be kept and the job is complete)
165 * \param[in] userdata Pointer to pass to \p dispatch
166 *
167 * \return Newly allocated mainloop source for trigger
168 */
169 crm_trigger_t *
170 mainloop_add_trigger(int priority, int (*dispatch) (void *user_data),
171 void *userdata)
172 {
173 GSource *source = NULL;
174
175 pcmk__assert(sizeof(crm_trigger_t) > sizeof(GSource));
176 source = g_source_new(&crm_trigger_funcs, sizeof(crm_trigger_t));
177
178 return mainloop_setup_trigger(source, priority, dispatch, userdata);
179 }
180
181 void
182 mainloop_set_trigger(crm_trigger_t * source)
183 {
184 if(source) {
185 source->trigger = TRUE;
186 }
187 }
188
189 gboolean
190 mainloop_destroy_trigger(crm_trigger_t * source)
191 {
192 GSource *gs = NULL;
193
194 if(source == NULL) {
195 return TRUE;
196 }
197
198 gs = (GSource *)source;
199
200 g_source_destroy(gs); /* Remove from mainloop, ref_count-- */
201 g_source_unref(gs); /* The caller no longer carries a reference to source
202 *
203 * At this point the source should be free'd,
204 * unless we're currently processing said
205 * source, in which case mainloop holds an
206 * additional reference and it will be free'd
207 * once our processing completes
208 */
209 return TRUE;
210 }
211
212 // Define a custom glib source for signal handling
213
214 // Data structure for custom glib source
215 typedef struct {
216 crm_trigger_t trigger; // trigger that invoked source (must be first)
217 void (*handler) (int sig); // signal handler
218 int signal; // signal that was received
219 } crm_signal_t;
220
221 // Table to associate signal handlers with signal numbers
222 static crm_signal_t *crm_signals[NSIG];
223
224 /*!
225 * \internal
226 * \brief Dispatch an event from custom glib source for signals
227 *
228 * Given an signal event, clear the event trigger and call any registered
229 * signal handler.
230 *
231 * \param[in] source glib source that triggered this dispatch
232 * \param[in] callback (ignored)
233 * \param[in] userdata (ignored)
234 */
235 static gboolean
236 crm_signal_dispatch(GSource *source, GSourceFunc callback, void *userdata)
237 {
238 crm_signal_t *sig = (crm_signal_t *) source;
239
240 if(sig->signal != SIGCHLD) {
241 pcmk__notice("Caught '%s' signal " QB_XS " %d (%s handler)",
242 strsignal(sig->signal), sig->signal,
243 ((sig->handler != NULL)? "invoking" : "no"));
244 }
245
246 sig->trigger.trigger = FALSE;
247 if (sig->handler) {
248 sig->handler(sig->signal);
249 }
250 return TRUE;
251 }
252
253 /*!
254 * \internal
255 * \brief Handle a signal by setting a trigger for signal source
256 *
257 * \param[in] sig Signal number that was received
258 *
259 * \note This is the true signal handler for the mainloop signal source, and
260 * must be async-safe.
261 */
262 static void
263 mainloop_signal_handler(int sig)
264 {
265 if (sig > 0 && sig < NSIG && crm_signals[sig] != NULL) {
266 mainloop_set_trigger((crm_trigger_t *) crm_signals[sig]);
267 }
268 }
269
270 // Functions implementing our custom glib source for signal handling
271 static GSourceFuncs crm_signal_funcs = {
272 crm_trigger_prepare,
273 crm_trigger_check,
274 crm_signal_dispatch,
275 crm_trigger_finalize,
276 };
277
278 /*!
279 * \internal
280 * \brief Set a true signal handler
281 *
282 * signal()-like interface to sigaction()
283 *
284 * \param[in] sig Signal number to register handler for
285 * \param[in] dispatch Signal handler
286 *
287 * \return The previous value of the signal handler, or SIG_ERR on error
288 * \note The dispatch function must be async-safe.
289 */
290 sighandler_t
291 crm_signal_handler(int sig, sighandler_t dispatch)
292 {
293 sigset_t mask;
294 struct sigaction sa;
295 struct sigaction old;
296
297 if (sigemptyset(&mask) < 0) {
298 pcmk__err("Could not %sset handler for signal %d: %s",
299 ((dispatch == NULL)? "un" : ""), sig, strerror(errno));
300 return SIG_ERR;
301 }
302
303 memset(&sa, 0, sizeof(struct sigaction));
304 sa.sa_handler = dispatch;
305 sa.sa_flags = SA_RESTART;
306 sa.sa_mask = mask;
307
308 if (sigaction(sig, &sa, &old) < 0) {
309 pcmk__err("Could not %sset handler for signal %d: %s",
310 ((dispatch == NULL)? "un" : ""), sig, strerror(errno));
311 return SIG_ERR;
312 }
313 return old.sa_handler;
314 }
315
316 static void
317 mainloop_destroy_signal_entry(int sig)
318 {
319 crm_signal_t *tmp = crm_signals[sig];
320
321 if (tmp != NULL) {
322 crm_signals[sig] = NULL;
323 pcmk__trace("Unregistering mainloop handler for signal %d", sig);
324 mainloop_destroy_trigger((crm_trigger_t *) tmp);
325 }
326 }
327
328 /*!
329 * \internal
330 * \brief Add a signal handler to a mainloop
331 *
332 * \param[in] sig Signal number to handle
333 * \param[in] dispatch Signal handler function (\c NULL to ignore the signal)
334 *
335 * \note The true signal handler merely sets a mainloop trigger to call this
336 * dispatch function via the mainloop. Therefore, the dispatch function
337 * does not need to be async-safe.
338 */
339 gboolean
340 mainloop_add_signal(int sig, void (*dispatch) (int sig))
341 {
342 GSource *source = NULL;
343 int priority = G_PRIORITY_HIGH - 1;
344
345 if (sig == SIGTERM) {
346 /* TERM is higher priority than other signals,
347 * signals are higher priority than other ipc.
348 * Yes, minus: smaller is "higher"
349 */
350 priority--;
351 }
352
353 if (sig >= NSIG || sig < 0) {
354 pcmk__err("Signal %d is out of range", sig);
355 return FALSE;
356
357 } else if (crm_signals[sig] != NULL && crm_signals[sig]->handler == dispatch) {
358 pcmk__trace("Signal handler for %d is already installed", sig);
359 return TRUE;
360
361 } else if (crm_signals[sig] != NULL) {
362 pcmk__err("Different signal handler for %d is already installed", sig);
363 return FALSE;
364 }
365
366 pcmk__assert(sizeof(crm_signal_t) > sizeof(GSource));
367 source = g_source_new(&crm_signal_funcs, sizeof(crm_signal_t));
368
369 crm_signals[sig] = (crm_signal_t *) mainloop_setup_trigger(source, priority, NULL, NULL);
370 pcmk__assert(crm_signals[sig] != NULL);
371
372 crm_signals[sig]->handler = dispatch;
373 crm_signals[sig]->signal = sig;
374
375 if (crm_signal_handler(sig, mainloop_signal_handler) == SIG_ERR) {
376 mainloop_destroy_signal_entry(sig);
377 return FALSE;
378 }
379
380 return TRUE;
381 }
382
383 gboolean
384 mainloop_destroy_signal(int sig)
385 {
386 if (sig >= NSIG || sig < 0) {
387 pcmk__err("Signal %d is out of range", sig);
388 return FALSE;
389
390 } else if (crm_signal_handler(sig, NULL) == SIG_ERR) {
391 // Error already logged
392 return FALSE;
393
394 } else if (crm_signals[sig] == NULL) {
395 return TRUE;
396 }
397 mainloop_destroy_signal_entry(sig);
398 return TRUE;
399 }
400
401 static qb_array_t *gio_map = NULL;
402
403 void
404 mainloop_cleanup(void)
405 {
406 g_clear_pointer(&gio_map, qb_array_free);
407
408 for (int sig = 0; sig < NSIG; ++sig) {
409 mainloop_destroy_signal_entry(sig);
410 }
411 }
412
413 /*
414 * libqb...
415 */
416 struct gio_to_qb_poll {
417 int32_t is_used;
418 unsigned int source;
419 int32_t events;
420 void *data;
421 qb_ipcs_dispatch_fn_t fn;
422 enum qb_loop_priority p;
423 };
424
425 static gboolean
426 gio_read_socket(GIOChannel * gio, GIOCondition condition, void *data)
427 {
428 struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data;
429 int fd = g_io_channel_unix_get_fd(gio);
430
431 pcmk__trace("%p.%d %d", data, fd, condition);
432
433 /* if this assert get's hit, then there is a race condition between
434 * when we destroy a fd and when mainloop actually gives it up */
435 pcmk__assert(adaptor->is_used > 0);
436
437 return (adaptor->fn(fd, condition, adaptor->data) == 0);
438 }
439
440 static void
441 gio_poll_destroy(void *data)
442 {
443 struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data;
444
445 adaptor->is_used--;
446 pcmk__assert(adaptor->is_used >= 0);
447
448 if (adaptor->is_used == 0) {
449 pcmk__trace("Marking adaptor %p unused", adaptor);
450 adaptor->source = 0;
451 }
452 }
453
454 /*!
455 * \internal
456 * \brief Convert libqb's poll priority into GLib's one
457 *
458 * \param[in] prio libqb's poll priority (#QB_LOOP_MED assumed as fallback)
459 *
460 * \return best matching GLib's priority
461 */
462 static int
463 conv_prio_libqb2glib(enum qb_loop_priority prio)
464 {
465 switch (prio) {
466 case QB_LOOP_LOW: return G_PRIORITY_LOW;
467 case QB_LOOP_HIGH: return G_PRIORITY_HIGH;
468 default: return G_PRIORITY_DEFAULT; // QB_LOOP_MED
469 }
470 }
471
472 /*!
473 * \internal
474 * \brief Convert libqb's poll priority to rate limiting spec
475 *
476 * \param[in] prio libqb's poll priority (#QB_LOOP_MED assumed as fallback)
477 *
478 * \return best matching rate limiting spec
479 * \note This is the inverse of libqb's qb_ipcs_request_rate_limit().
480 */
481 static enum qb_ipcs_rate_limit
482 conv_libqb_prio2ratelimit(enum qb_loop_priority prio)
483 {
484 switch (prio) {
485 case QB_LOOP_LOW: return QB_IPCS_RATE_SLOW;
486 case QB_LOOP_HIGH: return QB_IPCS_RATE_FAST;
487 default: return QB_IPCS_RATE_NORMAL; // QB_LOOP_MED
488 }
489 }
490
491 static int32_t
492 gio_poll_dispatch_update(enum qb_loop_priority p, int32_t fd, int32_t evts,
493 void *data, qb_ipcs_dispatch_fn_t fn, int32_t add)
494 {
495 struct gio_to_qb_poll *adaptor;
496 GIOChannel *channel;
497 int32_t res = 0;
498
499 res = qb_array_index(gio_map, fd, (void **)&adaptor);
500 if (res < 0) {
501 pcmk__err("Array lookup failed for fd=%d: %d", fd, res);
502 return res;
503 }
504
505 pcmk__trace("Adding fd=%d to mainloop as adaptor %p", fd, adaptor);
506
507 if (add && adaptor->source) {
508 pcmk__err("Adaptor for descriptor %d is still in-use", fd);
509 return -EEXIST;
510 }
511 if (!add && !adaptor->is_used) {
512 pcmk__err("Adaptor for descriptor %d is not in-use", fd);
513 return -ENOENT;
514 }
515
516 /* channel is created with ref_count = 1 */
517 channel = g_io_channel_unix_new(fd);
518 if (!channel) {
519 pcmk__err("No memory left to add fd=%d", fd);
520 return -ENOMEM;
521 }
522
523 if (adaptor->source) {
524 g_source_remove(adaptor->source);
525 adaptor->source = 0;
526 }
527
528 /* Because unlike the poll() API, glib doesn't tell us about HUPs by default */
529 evts |= (G_IO_HUP | G_IO_NVAL | G_IO_ERR);
530
531 adaptor->fn = fn;
532 adaptor->events = evts;
533 adaptor->data = data;
534 adaptor->p = p;
535 adaptor->is_used++;
536 adaptor->source =
537 g_io_add_watch_full(channel, conv_prio_libqb2glib(p), evts,
538 gio_read_socket, adaptor, gio_poll_destroy);
539
540 /* Now that mainloop now holds a reference to channel,
541 * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new().
542 *
543 * This means that channel will be free'd by:
544 * g_main_context_dispatch()
545 * -> g_source_destroy_internal()
546 * -> g_source_callback_unref()
547 * shortly after gio_poll_destroy() completes
548 */
549 g_io_channel_unref(channel);
550
551 pcmk__trace("Added to mainloop with gsource id=%d", adaptor->source);
552 if (adaptor->source > 0) {
553 return 0;
554 }
555
556 return -EINVAL;
557 }
558
559 static int32_t
560 gio_poll_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t evts,
561 void *data, qb_ipcs_dispatch_fn_t fn)
562 {
563 return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_TRUE);
564 }
565
566 static int32_t
567 gio_poll_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t evts,
568 void *data, qb_ipcs_dispatch_fn_t fn)
569 {
570 return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_FALSE);
571 }
572
573 static int32_t
574 gio_poll_dispatch_del(int32_t fd)
575 {
576 struct gio_to_qb_poll *adaptor;
577
578 pcmk__trace("Looking for fd=%d", fd);
579 if (qb_array_index(gio_map, fd, (void **)&adaptor) == 0) {
580 if (adaptor->source) {
581 g_source_remove(adaptor->source);
582 adaptor->source = 0;
583 }
584 }
585 return 0;
586 }
587
588 struct qb_ipcs_poll_handlers gio_poll_funcs = {
589 .job_add = NULL,
590 .dispatch_add = gio_poll_dispatch_add,
591 .dispatch_mod = gio_poll_dispatch_mod,
592 .dispatch_del = gio_poll_dispatch_del,
593 };
594
595 static enum qb_ipc_type
596 pick_ipc_type(enum qb_ipc_type requested)
597 {
598 const char *env = pcmk__env_option(PCMK__ENV_IPC_TYPE);
599
600 if (env && strcmp("shared-mem", env) == 0) {
601 return QB_IPC_SHM;
602 } else if (env && strcmp("socket", env) == 0) {
603 return QB_IPC_SOCKET;
604 } else if (env && strcmp("posix", env) == 0) {
605 return QB_IPC_POSIX_MQ;
606 } else if (env && strcmp("sysv", env) == 0) {
607 return QB_IPC_SYSV_MQ;
608 } else if (requested == QB_IPC_NATIVE) {
609 /* We prefer shared memory because the server never blocks on
610 * send. If part of a message fits into the socket, libqb
611 * needs to block until the remainder can be sent also.
612 * Otherwise the client will wait forever for the remaining
613 * bytes.
614 */
615 return QB_IPC_SHM;
616 }
617 return requested;
618 }
619
620 qb_ipcs_service_t *
621 mainloop_add_ipc_server(const char *name, enum qb_ipc_type type,
622 struct qb_ipcs_service_handlers *callbacks)
623 {
624 return mainloop_add_ipc_server_with_prio(name, type, callbacks, QB_LOOP_MED);
625 }
626
627 qb_ipcs_service_t *
628 mainloop_add_ipc_server_with_prio(const char *name, enum qb_ipc_type type,
629 struct qb_ipcs_service_handlers *callbacks,
630 enum qb_loop_priority prio)
631 {
632 int rc = 0;
633 qb_ipcs_service_t *server = NULL;
634
635 if (gio_map == NULL) {
636 gio_map = qb_array_create_2(64, sizeof(struct gio_to_qb_poll), 1);
637 }
638
639 server = qb_ipcs_create(name, 0, pick_ipc_type(type), callbacks);
640
641 if (server == NULL) {
642 pcmk__err("Could not create %s IPC server: %s (%d)", name,
643 pcmk_rc_str(errno), errno);
644 return NULL;
645 }
646
647 if (prio != QB_LOOP_MED) {
648 qb_ipcs_request_rate_limit(server, conv_libqb_prio2ratelimit(prio));
649 }
650
651 // Enforce a minimum IPC buffer size on all clients
652 qb_ipcs_enforce_buffer_size(server, crm_ipc_default_buffer_size());
653 qb_ipcs_poll_handlers_set(server, &gio_poll_funcs);
654
655 rc = qb_ipcs_run(server);
656 if (rc < 0) {
657 pcmk__err("Could not start %s IPC server: %s (%d)", name,
658 pcmk_strerror(rc), rc);
659 return NULL; // qb_ipcs_run() destroys server on failure
660 }
661
662 return server;
663 }
664
665 void
666 mainloop_del_ipc_server(qb_ipcs_service_t * server)
667 {
668 if (server) {
669 qb_ipcs_destroy(server);
670 }
671 }
672
673 struct mainloop_io_s {
674 char *name;
675 void *userdata;
676
677 int fd;
678 unsigned int source;
679 crm_ipc_t *ipc;
680 GIOChannel *channel;
681
682 int (*dispatch_fn_ipc)(const char *buffer, ssize_t length, void *userdata);
683 int (*dispatch_fn_io)(void *userdata);
684 void (*destroy_fn)(void *userdata);
685 };
686
687 /*!
688 * \internal
689 * \brief I/O watch callback function (GIOFunc)
690 *
691 * \param[in] gio I/O channel being watched
692 * \param[in] condition I/O condition satisfied
693 * \param[in] data User data passed when source was created
694 *
695 * \return G_SOURCE_REMOVE to remove source, G_SOURCE_CONTINUE to keep it
696 */
697 static gboolean
698 mainloop_gio_callback(GIOChannel *gio, GIOCondition condition, void *data)
699 {
700 gboolean rc = G_SOURCE_CONTINUE;
701 mainloop_io_t *client = data;
702
703 pcmk__assert(client->fd == g_io_channel_unix_get_fd(gio));
704
705 if (condition & G_IO_IN) {
706 if (client->ipc) {
707 long read_rc = 0L;
708 int max = 10;
709
710 do {
711 read_rc = crm_ipc_read(client->ipc);
712 if (read_rc <= 0) {
713 pcmk__trace("Could not read IPC message from %s: %s (%ld)",
714 client->name, pcmk_strerror(read_rc), read_rc);
715
716 if (read_rc == -EAGAIN) {
717 continue;
718 }
719
720 } else if (client->dispatch_fn_ipc) {
721 const char *buffer = crm_ipc_buffer(client->ipc);
722
723 pcmk__trace("New %ld-byte IPC message from %s after I/O "
724 "condition %d",
725 read_rc, client->name, (int) condition);
726 if (client->dispatch_fn_ipc(buffer, read_rc, client->userdata) < 0) {
727 pcmk__trace("Connection to %s no longer required",
728 client->name);
729 rc = G_SOURCE_REMOVE;
730 }
731 }
732
733 pcmk__ipc_free_client_buffer(client->ipc);
734
735 } while ((rc == G_SOURCE_CONTINUE) && (--max > 0)
736 && ((read_rc > 0) || (read_rc == -EAGAIN)));
737
738 } else {
739 pcmk__trace("New I/O event for %s after I/O condition %d",
740 client->name, (int) condition);
741 if (client->dispatch_fn_io) {
742 if (client->dispatch_fn_io(client->userdata) < 0) {
743 pcmk__trace("Connection to %s no longer required",
744 client->name);
745 rc = G_SOURCE_REMOVE;
746 }
747 }
748 }
749 }
750
751 if (client->ipc && !crm_ipc_connected(client->ipc)) {
752 pcmk__err("Connection to %s closed " QB_XS " client=%p condition=%d",
753 client->name, client, condition);
754 rc = G_SOURCE_REMOVE;
755
756 } else if (condition & (G_IO_HUP | G_IO_NVAL | G_IO_ERR)) {
757 pcmk__trace("The connection %s[%p] has been closed (I/O condition=%d)",
758 client->name, client, condition);
759 rc = G_SOURCE_REMOVE;
760
761 } else if ((condition & G_IO_IN) == 0) {
762 /*
763 #define GLIB_SYSDEF_POLLIN =1
764 #define GLIB_SYSDEF_POLLPRI =2
765 #define GLIB_SYSDEF_POLLOUT =4
766 #define GLIB_SYSDEF_POLLERR =8
767 #define GLIB_SYSDEF_POLLHUP =16
768 #define GLIB_SYSDEF_POLLNVAL =32
769
770 typedef enum
771 {
772 G_IO_IN GLIB_SYSDEF_POLLIN,
773 G_IO_OUT GLIB_SYSDEF_POLLOUT,
774 G_IO_PRI GLIB_SYSDEF_POLLPRI,
775 G_IO_ERR GLIB_SYSDEF_POLLERR,
776 G_IO_HUP GLIB_SYSDEF_POLLHUP,
777 G_IO_NVAL GLIB_SYSDEF_POLLNVAL
778 } GIOCondition;
779
780 A bitwise combination representing a condition to watch for on an event source.
781
782 G_IO_IN There is data to read.
783 G_IO_OUT Data can be written (without blocking).
784 G_IO_PRI There is urgent data to read.
785 G_IO_ERR Error condition.
786 G_IO_HUP Hung up (the connection has been broken, usually for pipes and sockets).
787 G_IO_NVAL Invalid request. The file descriptor is not open.
788 */
789 pcmk__err("Strange condition: %d", condition);
790 }
791
792 /* G_SOURCE_REMOVE results in mainloop_gio_destroy() being called
793 * just before the source is removed from mainloop
794 */
795 return rc;
796 }
797
798 static void
|
(3) Event deallocator: |
Deallocator for "struct mainloop_io_s". |
| Also see events: |
[allocation][allocation] |
799 mainloop_gio_destroy(void *c)
800 {
801 mainloop_io_t *client = c;
802 char *c_name = strdup(client->name);
803
804 /* client->source is valid but about to be destroyed (ref_count == 0) in gmain.c
805 * client->channel will still have ref_count > 0... should be == 1
806 */
807 pcmk__trace("Destroying client %s[%p]", c_name, c);
808
809 if (client->ipc) {
810 crm_ipc_close(client->ipc);
811 }
812
813 if (client->destroy_fn) {
814 void (*destroy_fn)(void *userdata) = client->destroy_fn;
815
816 client->destroy_fn = NULL;
817 destroy_fn(client->userdata);
818 }
819
820 if (client->ipc) {
821 crm_ipc_t *ipc = client->ipc;
822
823 client->ipc = NULL;
824 crm_ipc_destroy(ipc);
825 }
826
827 pcmk__trace("Destroyed client %s[%p]", c_name, c);
828
829 free(client->name);
830 free(client);
831
832 free(c_name);
833 }
834
835 /*!
836 * \brief Connect to IPC and add it as a main loop source
837 *
838 * \param[in,out] ipc IPC connection to add
839 * \param[in] priority Event source priority to use for connection
840 * \param[in] userdata Data to register with callbacks
841 * \param[in] callbacks Dispatch and destroy callbacks for connection
842 * \param[out] source Newly allocated event source
843 *
844 * \return Standard Pacemaker return code
845 *
846 * \note On failure, the caller is still responsible for ipc. On success, the
847 * caller should call mainloop_del_ipc_client() when source is no longer
848 * needed, which will lead to the disconnection of the IPC later in the
849 * main loop if it is connected. However the IPC disconnects,
850 * mainloop_gio_destroy() will free ipc and source after calling the
851 * destroy callback.
852 */
853 int
854 pcmk__add_mainloop_ipc(crm_ipc_t *ipc, int priority, void *userdata,
855 const struct ipc_client_callbacks *callbacks,
856 mainloop_io_t **source)
857 {
858 int rc = pcmk_rc_ok;
859 int fd = -1;
860 const char *ipc_name = NULL;
861
862 CRM_CHECK((ipc != NULL) && (callbacks != NULL), return EINVAL);
863
864 ipc_name = pcmk__s(crm_ipc_name(ipc), "Pacemaker");
865 rc = pcmk__connect_generic_ipc(ipc);
866 if (rc != pcmk_rc_ok) {
867 pcmk__debug("Connection to %s failed: %s", ipc_name, pcmk_rc_str(rc));
868 return rc;
869 }
870
871 rc = pcmk__ipc_fd(ipc, &fd);
872 if (rc != pcmk_rc_ok) {
873 pcmk__debug("Could not obtain file descriptor for %s IPC: %s", ipc_name,
874 pcmk_rc_str(rc));
875 crm_ipc_close(ipc);
876 return rc;
877 }
878
879 *source = mainloop_add_fd(ipc_name, priority, fd, userdata, NULL);
880 if (*source == NULL) {
881 rc = errno;
882 crm_ipc_close(ipc);
883 return rc;
884 }
885
886 (*source)->ipc = ipc;
887 (*source)->destroy_fn = callbacks->destroy;
888 (*source)->dispatch_fn_ipc = callbacks->dispatch;
889 return pcmk_rc_ok;
890 }
891
892 /*!
893 * \brief Get period for mainloop timer
894 *
895 * \param[in] timer Timer
896 *
897 * \return Period in ms
898 */
899 unsigned int
900 pcmk__mainloop_timer_get_period(const mainloop_timer_t *timer)
901 {
902 if (timer) {
903 return timer->period_ms;
904 }
905 return 0;
906 }
907
908 mainloop_io_t *
909 mainloop_add_ipc_client(const char *name, int priority, size_t max_size,
910 void *userdata, struct ipc_client_callbacks *callbacks)
911 {
912 crm_ipc_t *ipc = crm_ipc_new(name, 0);
913 mainloop_io_t *source = NULL;
914 int rc = pcmk__add_mainloop_ipc(ipc, priority, userdata, callbacks,
915 &source);
916
917 if (rc != pcmk_rc_ok) {
918 if (crm_log_level == PCMK__LOG_STDOUT) {
919 fprintf(stderr, "Connection to %s failed: %s",
920 name, pcmk_rc_str(rc));
921 }
922 crm_ipc_destroy(ipc);
923 if (rc > 0) {
924 errno = rc;
925 } else {
926 errno = ENOTCONN;
927 }
928 return NULL;
929 }
930 return source;
931 }
932
933 void
934 mainloop_del_ipc_client(mainloop_io_t * client)
935 {
936 mainloop_del_fd(client);
937 }
938
939 crm_ipc_t *
940 mainloop_get_ipc_client(mainloop_io_t * client)
941 {
942 if (client) {
943 return client->ipc;
944 }
945 return NULL;
946 }
947
948 mainloop_io_t *
949 mainloop_add_fd(const char *name, int priority, int fd, void *userdata,
950 struct mainloop_fd_callbacks * callbacks)
951 {
952 mainloop_io_t *client = NULL;
953
954 if (fd >= 0) {
955 client = calloc(1, sizeof(mainloop_io_t));
956 if (client == NULL) {
957 return NULL;
958 }
959 client->name = strdup(name);
960 client->userdata = userdata;
961
962 if (callbacks) {
963 client->destroy_fn = callbacks->destroy;
964 client->dispatch_fn_io = callbacks->dispatch;
965 }
966
967 client->fd = fd;
|
CID (unavailable; MK=b13c37cce9c4cfa0c98303204ab57333) (#1 of 1): Resource not released (INCOMPLETE_DEALLOCATOR): |
|
(1) Event allocation: |
Memory is allocated. |
|
(2) Event allocation: |
The field "client->channel" is allocated, but not released in the identified deallocator. |
| Also see events: |
[deallocator] |
968 client->channel = g_io_channel_unix_new(fd);
969 client->source =
970 g_io_add_watch_full(client->channel, priority,
971 (G_IO_IN | G_IO_HUP | G_IO_NVAL | G_IO_ERR), mainloop_gio_callback,
972 client, mainloop_gio_destroy);
973
974 /* Now that mainloop now holds a reference to channel,
975 * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new().
976 *
977 * This means that channel will be free'd by:
978 * g_main_context_dispatch() or g_source_remove()
979 * -> g_source_destroy_internal()
980 * -> g_source_callback_unref()
981 * shortly after mainloop_gio_destroy() completes
982 */
983 g_io_channel_unref(client->channel);
984 pcmk__trace("Added connection %d for %s[%p].%d", client->source,
985 client->name, client, fd);
986 } else {
987 errno = EINVAL;
988 }
989
990 return client;
991 }
992
993 void
994 mainloop_del_fd(mainloop_io_t *client)
995 {
996 if ((client == NULL) || (client->source == 0)) {
997 return;
998 }
999
1000 pcmk__trace("Removing client %s[%p]", client->name, client);
1001
1002 // mainloop_gio_destroy() gets called during source removal
1003 g_source_remove(client->source);
1004 }
1005
1006 static GList *child_list = NULL;
1007
1008 pid_t
1009 mainloop_child_pid(mainloop_child_t * child)
1010 {
1011 return child->pid;
1012 }
1013
1014 const char *
1015 mainloop_child_name(mainloop_child_t * child)
1016 {
1017 return child->desc;
1018 }
1019
1020 int
1021 mainloop_child_timeout(mainloop_child_t * child)
1022 {
1023 return child->timeout;
1024 }
1025
1026 void *
1027 mainloop_child_userdata(mainloop_child_t * child)
1028 {
1029 return child->privatedata;
1030 }
1031
1032 void
1033 mainloop_clear_child_userdata(mainloop_child_t * child)
1034 {
1035 child->privatedata = NULL;
1036 }
1037
1038 /* good function name */
1039 static void
1040 child_free(mainloop_child_t *child)
1041 {
1042 if (child->timerid != 0) {
1043 pcmk__trace("Removing timer %d", child->timerid);
1044 g_source_remove(child->timerid);
1045 child->timerid = 0;
1046 }
1047 free(child->desc);
1048 free(child);
1049 }
1050
1051 /* terrible function name */
1052 static int
1053 child_kill_helper(mainloop_child_t *child)
1054 {
1055 int rc;
1056 if (child->flags & mainloop_leave_pid_group) {
1057 pcmk__debug("Killing PID %lld only. Leaving its process group intact.",
1058 (long long) child->pid);
1059 rc = kill(child->pid, SIGKILL);
1060 } else {
1061 pcmk__debug("Killing PID %lld's entire process group",
1062 (long long) child->pid);
1063 rc = kill(-child->pid, SIGKILL);
1064 }
1065
1066 if (rc < 0) {
1067 if (errno != ESRCH) {
1068 pcmk__err("kill(%d, KILL) failed: %s", child->pid, strerror(errno));
1069 }
1070 return -errno;
1071 }
1072 return 0;
1073 }
1074
1075 static gboolean
1076 child_timeout_callback(void *p)
1077 {
1078 mainloop_child_t *child = p;
1079 int rc = 0;
1080
1081 child->timerid = 0;
1082 if (child->timeout) {
1083 pcmk__warn("%s process (PID %lld) will not die!", child->desc,
1084 (long long) child->pid);
1085 return FALSE;
1086 }
1087
1088 rc = child_kill_helper(child);
1089 if (rc == -ESRCH) {
1090 /* Nothing left to do. pid doesn't exist */
1091 return FALSE;
1092 }
1093
1094 child->timeout = TRUE;
1095 pcmk__debug("%s process (PID %lld) timed out", child->desc,
1096 (long long) child->pid);
1097
1098 child->timerid = pcmk__create_timer(5000, child_timeout_callback, child);
1099 return FALSE;
1100 }
1101
1102 static bool
1103 child_waitpid(mainloop_child_t *child, int flags)
1104 {
1105 int rc = 0;
1106 int core = 0;
1107 int signo = 0;
1108 int status = 0;
1109 int exitcode = 0;
1110 bool callback_needed = true;
1111
1112 rc = waitpid(child->pid, &status, flags);
1113 if (rc == 0) { // WNOHANG in flags, and child status is not available
1114 pcmk__trace("Child process %lld (%s) still active",
1115 (long long) child->pid, child->desc);
1116 callback_needed = false;
1117
1118 } else if (rc != child->pid) {
1119 /* According to POSIX, possible conditions:
1120 * - child->pid was non-positive (process group or any child),
1121 * and rc is specific child
1122 * - errno ECHILD (pid does not exist or is not child)
1123 * - errno EINVAL (invalid flags)
1124 * - errno EINTR (caller interrupted by signal)
1125 *
1126 * @TODO Handle these cases more specifically.
1127 */
1128 signo = SIGCHLD;
1129 exitcode = 1;
1130 pcmk__notice("Wait for child process %d (%s) interrupted: %s",
1131 child->pid, child->desc, pcmk_rc_str(errno));
1132
1133 } else if (WIFEXITED(status)) {
1134 exitcode = WEXITSTATUS(status);
1135 pcmk__trace("Child process %lld (%s) exited with status %d",
1136 (long long) child->pid, child->desc, exitcode);
1137
1138 } else if (WIFSIGNALED(status)) {
1139 signo = WTERMSIG(status);
1140 pcmk__trace("Child process %lld (%s) exited with signal %d (%s)",
1141 (long long) child->pid, child->desc, signo,
1142 strsignal(signo));
1143
1144 #ifdef WCOREDUMP // AIX, SunOS, maybe others
1145 } else if (WCOREDUMP(status)) {
1146 core = 1;
1147 pcmk__err("Child process %d (%s) dumped core", child->pid, child->desc);
1148 #endif
1149
1150 } else { // flags must contain WUNTRACED and/or WCONTINUED to reach this
1151 pcmk__trace("Child process %lld (%s) stopped or continued",
1152 (long long) child->pid, child->desc);
1153 callback_needed = false;
1154 }
1155
1156 if (callback_needed && child->exit_fn) {
1157 child->exit_fn(child, core, signo, exitcode);
1158 }
1159 return callback_needed;
1160 }
1161
1162 static void
1163 child_death_dispatch(int signal)
1164 {
1165 for (GList *iter = child_list; iter; ) {
1166 GList *saved = iter;
1167 mainloop_child_t *child = iter->data;
1168
1169 iter = iter->next;
1170 if (child_waitpid(child, WNOHANG)) {
1171 pcmk__trace("Removing completed process %lld from child list",
1172 (long long) child->pid);
1173 child_list = g_list_remove_link(child_list, saved);
1174 g_list_free(saved);
1175 child_free(child);
1176 }
1177 }
1178 }
1179
1180 static gboolean
1181 child_signal_init(void *p)
1182 {
1183 pcmk__trace("Installed SIGCHLD handler");
1184 /* Do NOT use g_child_watch_add() and friends, they rely on pthreads */
1185 mainloop_add_signal(SIGCHLD, child_death_dispatch);
1186
1187 /* In case they terminated before the signal handler was installed */
1188 child_death_dispatch(SIGCHLD);
1189 return FALSE;
1190 }
1191
1192 gboolean
1193 mainloop_child_kill(pid_t pid)
1194 {
1195 GList *iter;
1196 mainloop_child_t *child = NULL;
1197 mainloop_child_t *match = NULL;
1198 /* It is impossible to block SIGKILL, this allows us to
1199 * call waitpid without WNOHANG flag.*/
1200 int waitflags = 0, rc = 0;
1201
1202 for (iter = child_list; iter != NULL && match == NULL; iter = iter->next) {
1203 child = iter->data;
1204 if (pid == child->pid) {
1205 match = child;
1206 }
1207 }
1208
1209 if (match == NULL) {
1210 return FALSE;
1211 }
1212
1213 rc = child_kill_helper(match);
1214 if(rc == -ESRCH) {
1215 /* It's gone, but hasn't shown up in waitpid() yet. Wait until we get
1216 * SIGCHLD and let handler clean it up as normal (so we get the correct
1217 * return code/status). The blocking alternative would be to call
1218 * child_waitpid(match, 0).
1219 */
1220 pcmk__trace("Waiting for signal that child process %lld completed",
1221 (long long) match->pid);
1222 return TRUE;
1223
1224 } else if(rc != 0) {
1225 /* If KILL for some other reason set the WNOHANG flag since we
1226 * can't be certain what happened.
1227 */
1228 waitflags = WNOHANG;
1229 }
1230
1231 if (!child_waitpid(match, waitflags)) {
1232 /* not much we can do if this occurs */
1233 return FALSE;
1234 }
1235
1236 child_list = g_list_remove(child_list, match);
1237 child_free(match);
1238 return TRUE;
1239 }
1240
1241 /* Create/Log a new tracked process
1242 * To track a process group, use -pid
1243 *
1244 * @TODO Using a non-positive pid (i.e. any child, or process group) would
1245 * likely not be useful since we will free the child after the first
1246 * completed process.
1247 */
1248 void
1249 mainloop_child_add_with_flags(pid_t pid, int timeout, const char *desc,
1250 void *privatedata,
1251 enum mainloop_child_flags flags,
1252 pcmk__mainloop_child_exit_fn_t exit_fn)
1253 {
1254 static bool need_init = TRUE;
1255 mainloop_child_t *child = pcmk__assert_alloc(1, sizeof(mainloop_child_t));
1256
1257 child->pid = pid;
1258 child->timerid = 0;
1259 child->timeout = FALSE;
1260 child->privatedata = privatedata;
1261 child->exit_fn = exit_fn;
1262 child->flags = flags;
1263 child->desc = pcmk__str_copy(desc);
1264
1265 if (timeout) {
1266 child->timerid = pcmk__create_timer(timeout, child_timeout_callback, child);
1267 }
1268
1269 child_list = g_list_append(child_list, child);
1270
1271 if(need_init) {
1272 need_init = FALSE;
1273 /* SIGCHLD processing has to be invoked from mainloop.
1274 * We do not want it to be possible to both add a child pid
1275 * to mainloop, and have the pid's exit callback invoked within
1276 * the same callstack. */
1277 pcmk__create_timer(1, child_signal_init, NULL);
1278 }
1279 }
1280
1281 void
1282 mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata,
1283 pcmk__mainloop_child_exit_fn_t exit_fn)
1284 {
1285 mainloop_child_add_with_flags(pid, timeout, desc, privatedata, 0, exit_fn);
1286 }
1287
1288 static gboolean
1289 mainloop_timer_cb(void *user_data)
1290 {
1291 int id = 0;
1292 bool repeat = FALSE;
1293 struct mainloop_timer_s *t = user_data;
1294
1295 pcmk__assert(t != NULL);
1296
1297 id = t->id;
1298 t->id = 0; /* Ensure it's unset during callbacks so that
1299 * mainloop_timer_running() works as expected
1300 */
1301
1302 if(t->cb) {
1303 pcmk__trace("Invoking callbacks for timer %s", t->name);
1304 repeat = t->repeat;
1305 if(t->cb(t->userdata) == FALSE) {
1306 pcmk__trace("Timer %s complete", t->name);
1307 repeat = FALSE;
1308 }
1309 }
1310
1311 if(repeat) {
1312 /* Restore if repeating */
1313 t->id = id;
1314 }
1315
1316 return repeat;
1317 }
1318
1319 bool
1320 mainloop_timer_running(mainloop_timer_t *t)
1321 {
1322 if(t && t->id != 0) {
1323 return TRUE;
1324 }
1325 return FALSE;
1326 }
1327
1328 void
1329 mainloop_timer_start(mainloop_timer_t *t)
1330 {
1331 mainloop_timer_stop(t);
1332 if(t && t->period_ms > 0) {
1333 pcmk__trace("Starting timer %s", t->name);
1334 t->id = pcmk__create_timer(t->period_ms, mainloop_timer_cb, t);
1335 }
1336 }
1337
1338 void
1339 mainloop_timer_stop(mainloop_timer_t *t)
1340 {
1341 if(t && t->id != 0) {
1342 pcmk__trace("Stopping timer %s", t->name);
1343 g_source_remove(t->id);
1344 t->id = 0;
1345 }
1346 }
1347
1348 unsigned int
1349 mainloop_timer_set_period(mainloop_timer_t *t, unsigned int period_ms)
1350 {
1351 unsigned int last = 0;
1352
1353 if(t) {
1354 last = t->period_ms;
1355 t->period_ms = period_ms;
1356 }
1357
1358 if(t && t->id != 0 && last != t->period_ms) {
1359 mainloop_timer_start(t);
1360 }
1361 return last;
1362 }
1363
1364 mainloop_timer_t *
1365 mainloop_timer_add(const char *name, unsigned int period_ms, bool repeat,
1366 GSourceFunc cb, void *userdata)
1367 {
1368 mainloop_timer_t *t = pcmk__assert_alloc(1, sizeof(mainloop_timer_t));
1369
1370 if (name != NULL) {
1371 t->name = pcmk__assert_asprintf("%s-%u-%d", name, period_ms, repeat);
1372 } else {
1373 t->name = pcmk__assert_asprintf("%p-%u-%d", t, period_ms, repeat);
1374 }
1375 t->id = 0;
1376 t->period_ms = period_ms;
1377 t->repeat = repeat;
1378 t->cb = cb;
1379 t->userdata = userdata;
1380 pcmk__trace("Created timer %s with %p %p", t->name, userdata, t->userdata);
1381 return t;
1382 }
1383
1384 void
1385 mainloop_timer_del(mainloop_timer_t *t)
1386 {
1387 if(t) {
1388 pcmk__trace("Destroying timer %s", t->name);
1389 mainloop_timer_stop(t);
1390 free(t->name);
1391 free(t);
1392 }
1393 }
1394
1395 /*
1396 * Helpers to make sure certain events aren't lost at shutdown
1397 */
1398
1399 static gboolean
1400 drain_timeout_cb(void *user_data)
1401 {
1402 bool *timeout_popped = (bool*) user_data;
1403
1404 *timeout_popped = TRUE;
1405 return FALSE;
1406 }
1407
1408 /*!
1409 * \brief Drain some remaining main loop events then quit it
1410 *
1411 * \param[in,out] mloop Main loop to drain and quit
1412 * \param[in] n Drain up to this many pending events
1413 */
1414 void
1415 pcmk_quit_main_loop(GMainLoop *mloop, unsigned int n)
1416 {
1417 if ((mloop != NULL) && g_main_loop_is_running(mloop)) {
1418 GMainContext *ctx = g_main_loop_get_context(mloop);
1419
1420 /* Drain up to n events in case some memory clean-up is pending
1421 * (helpful to reduce noise in valgrind output).
1422 */
1423 for (int i = 0; (i < n) && g_main_context_pending(ctx); ++i) {
1424 g_main_context_dispatch(ctx);
1425 }
1426 g_main_loop_quit(mloop);
1427 }
1428 }
1429
1430 /*!
1431 * \brief Process main loop events while a certain condition is met
1432 *
1433 * \param[in,out] mloop Main loop to process
1434 * \param[in] timer_ms Don't process longer than this amount of time
1435 * \param[in] check Function that returns true if events should be
1436 * processed
1437 *
1438 * \note This function is intended to be called at shutdown if certain important
1439 * events should not be missed. The caller would likely quit the main loop
1440 * or exit after calling this function. The check() function will be
1441 * passed the remaining timeout in milliseconds.
1442 */
1443 void
1444 pcmk_drain_main_loop(GMainLoop *mloop, unsigned int timer_ms,
1445 bool (*check)(unsigned int))
1446 {
1447 bool timeout_popped = FALSE;
1448 unsigned int timer = 0;
1449 GMainContext *ctx = NULL;
1450
1451 CRM_CHECK(mloop && check, return);
1452
1453 ctx = g_main_loop_get_context(mloop);
1454 if (ctx) {
1455 time_t start_time = time(NULL);
1456
1457 timer = pcmk__create_timer(timer_ms, drain_timeout_cb, &timeout_popped);
1458 while (!timeout_popped
1459 && check(timer_ms - (time(NULL) - start_time) * 1000)) {
1460 g_main_context_iteration(ctx, TRUE);
1461 }
1462 }
1463 if (!timeout_popped && (timer > 0)) {
1464 g_source_remove(timer);
1465 }
1466 }
1467