1 /*
2 * Copyright 2009-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <stdbool.h> // bool
14 #include <stdio.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <sys/stat.h>
18 #include <unistd.h>
19 #include <sys/utsname.h>
20
21 #include <stdlib.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <ctype.h>
25
26 #include <libxml/tree.h> // xmlNode
27 #include <libxml/xpath.h> // xmlXPathObject, etc.
28
29 #include <crm/crm.h>
30 #include <crm/common/ipc.h>
31 #include <crm/cluster/internal.h>
32 #include <crm/common/mainloop.h>
33
34 #include <crm/stonith-ng.h>
35 #include <crm/fencing/internal.h>
36 #include <crm/common/xml.h>
37
38 #include <pacemaker-fenced.h>
39
40 static GHashTable *device_table = NULL;
41
42 GHashTable *topology = NULL;
43 static GList *cmd_list = NULL;
44
45 static GHashTable *fenced_handlers = NULL;
46
47 struct device_search_s {
48 /* target of fence action */
49 char *host;
50 /* requested fence action */
51 char *action;
52 /* timeout to use if a device is queried dynamically for possible targets */
53 // @TODO This name is misleading now, it's the value of fencing-timeout
54 int per_device_timeout;
55 /* number of registered fencing devices at time of request */
56 int replies_needed;
57 /* number of device replies received so far */
58 int replies_received;
59 /* whether the target is eligible to perform requested action (or off) */
60 bool allow_self;
61
62 /* private data to pass to search callback function */
63 void *user_data;
64 /* function to call when all replies have been received */
65 void (*callback) (GList * devices, void *user_data);
66 /* devices capable of performing requested action (or off if remapping) */
67 GList *capable;
68 /* Whether to perform searches that support the action */
69 uint32_t support_action_only;
70 };
71
72 static gboolean stonith_device_dispatch(gpointer user_data);
73 static void st_child_done(int pid, const pcmk__action_result_t *result,
74 void *user_data);
75
76 static void search_devices_record_result(struct device_search_s *search, const char *device,
77 gboolean can_fence);
78
79 static int get_agent_metadata(const char *agent, xmlNode **metadata);
80 static void read_action_metadata(fenced_device_t *device);
81 static enum fenced_target_by unpack_level_kind(const xmlNode *level);
82
83 typedef struct {
84 int id;
85 uint32_t options;
86 int default_timeout; /* seconds */
87 int timeout; /* seconds */
88
89 int start_delay; // seconds (-1 means disable static/random fencing delays)
90 int delay_id;
91
92 char *op;
93 char *origin;
94 char *client;
95 char *client_name;
96 char *remote_op_id;
97
98 char *target;
99 char *action;
100 char *device;
101
102 //! Head of device list (used only for freeing list with command object)
103 GList *device_list;
104
105 //! Next item to process in \c device_list
106 GList *next_device_iter;
107
108 void *internal_user_data;
109 void (*done_cb) (int pid, const pcmk__action_result_t *result,
110 void *user_data);
111
112 fenced_device_t *active_on;
113 fenced_device_t *activating_on;
114 } async_command_t;
115
116 static xmlNode *construct_async_reply(const async_command_t *cmd,
117 const pcmk__action_result_t *result);
118
119 /*!
120 * \internal
121 * \brief Set a bad fencer API request error in a result object
122 *
123 * \param[out] result Result to set
124 */
125 static inline void
126 set_bad_request_result(pcmk__action_result_t *result)
127 {
128 pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
129 "Fencer API request missing required information (bug?)");
130 }
131
132 /*!
133 * \internal
134 * \brief Check whether the fencer's device table contains a watchdog device
135 *
136 * \retval \c true If the device table contains a watchdog device
137 * \retval \c false Otherwise
138 */
139 bool
140 fenced_has_watchdog_device(void)
141 {
142 return (device_table != NULL)
143 && (g_hash_table_lookup(device_table, STONITH_WATCHDOG_ID) != NULL);
144 }
145
146 /*!
147 * \internal
148 * \brief Call a function for each known fence device
149 *
150 * \param[in] fn Function to call for each device
151 * \param[in,out] user_data User data
152 */
153 void
154 fenced_foreach_device(GHFunc fn, gpointer user_data)
155 {
156 pcmk__assert(fn != NULL);
157
158 if (device_table == NULL) {
159 return;
160 }
161
162 g_hash_table_foreach(device_table, fn, user_data);
163 }
164
165 /*!
166 * \internal
167 * \brief Remove each known fence device matching a given predicate
168 *
169 * \param[in] fn Function that returns \c TRUE to remove a fence device or
170 * \c FALSE to keep it
171 */
172 void
173 fenced_foreach_device_remove(GHRFunc fn)
174 {
175 pcmk__assert(fn != NULL);
176
177 if (device_table == NULL) {
178 return;
179 }
180
181 g_hash_table_foreach_remove(device_table, fn, NULL);
182 }
183
184 static gboolean
185 is_action_required(const char *action, const fenced_device_t *device)
186 {
187 return (device != NULL)
188 && pcmk__is_set(device->flags, fenced_df_auto_unfence)
189 && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none);
190 }
191
192 static int
193 get_action_delay_max(const fenced_device_t *device, const char *action)
194 {
195 const char *value = NULL;
196 guint delay_max = 0U;
197
198 if (!pcmk__is_fencing_action(action)) {
199 return 0;
200 }
201
202 value = g_hash_table_lookup(device->params, PCMK_FENCING_DELAY_MAX);
203 if (value != NULL) {
204 pcmk_parse_interval_spec(value, &delay_max);
205 delay_max /= 1000;
206 }
207
208 return (int) delay_max;
209 }
210
211 /*!
212 * \internal
213 * \brief If a mapping matches the given target, return its port value
214 *
215 * \param[in] target Fencing target node
216 * \param[in] mapping Target-to-port mapping (delimited by a colon)
217 *
218 * \return The port from \p mapping if it matches \p target, or \c NULL
219 * if \p mapping is malformed or is not a match.
220 */
221 static gchar *
222 get_value_if_matching(const char *target, const char *mapping)
223 {
224 gchar **nvpair = NULL;
225 gchar *value = NULL;
226
227 if (pcmk__str_empty(mapping)) {
228 goto done;
229 }
230
231 nvpair = g_strsplit(mapping, ":", 2);
232
233 if ((g_strv_length(nvpair) != 2)
234 || pcmk__str_empty(nvpair[0]) || pcmk__str_empty(nvpair[1])) {
235
236 pcmk__err(PCMK_FENCING_DELAY_BASE ": Malformed mapping '%s'", mapping);
237 goto done;
238 }
239
240 if (!pcmk__str_eq(target, nvpair[0], pcmk__str_casei)) {
241 goto done;
242 }
243
244 // Take ownership so that we don't free nvpair[1] with nvpair
245 value = nvpair[1];
246 nvpair[1] = NULL;
247
248 pcmk__debug(PCMK_FENCING_DELAY_BASE " mapped to %s for %s", value, target);
249
250 done:
251 g_strfreev(nvpair);
252 return value;
253 }
254
255 /*!
256 * \internal
257 * \brief If a mapping exists from the target node to a port, return the port
258 *
259 * \param[in] target Fencing target node
260 * \param[in] values List of target-to-port mappings (delimited by semicolon,
261 * space, or tab characters), or a single interval spec
262 *
263 * \return Port to which \p target is mapped, or \c NULL if no such mapping
264 * exists
265 *
266 * \note The caller is responsible for freeing the return value using
267 * \c g_free().
268 */
269 static gchar *
270 get_value_for_target(const char *target, const char *values)
271 {
272 gchar *value = NULL;
273 gchar **mappings = NULL;
274
275 /* If there are no colons, don't try to parse as a list of mappings.
276 * The caller will try to parse the values string as an interval spec.
277 */
278 if (strchr(values, ':') == NULL) {
279 return NULL;
280 }
281
282 mappings = g_strsplit_set(values, "; \t", 0);
283
284 for (gchar **mapping = mappings; (*mapping != NULL) && (value == NULL);
285 mapping++) {
286
287 value = get_value_if_matching(target, *mapping);
288 }
289
290 g_strfreev(mappings);
291 return value;
292 }
293
294 /* @TODO Consolidate some of this with build_port_aliases(). But keep in
295 * mind that build_port_aliases()/pcmk__host_map supports either '=' or ':'
296 * as a mapping separator, while pcmk_delay_base supports only ':'.
297 */
298 static int
299 get_action_delay_base(const fenced_device_t *device, const char *action,
300 const char *target)
301 {
302 const char *param = NULL;
303 gchar *stripped = NULL;
304 gchar *delay_base_s = NULL;
305 guint delay_base = 0U;
306
307 if (!pcmk__is_fencing_action(action)) {
308 return 0;
309 }
310
311 param = g_hash_table_lookup(device->params, PCMK_FENCING_DELAY_BASE);
312 if (param == NULL) {
313 return 0;
314 }
315
316 stripped = g_strstrip(g_strdup(param));
317
318 if (target != NULL) {
319 delay_base_s = get_value_for_target(target, stripped);
320 }
321
322 if (delay_base_s == NULL) {
323 /* Either target is NULL or we didn't find a mapping for it. Try to
324 * parse the entire stripped value as an interval spec. Take ownership
325 * so that we don't free stripped twice.
326 *
327 * We can't tell based on which characters are present whether stripped
328 * was a list of mappings or an interval spec. An ISO 8601 interval may
329 * contain a colon, and a Pacemaker time-and-units string may contain
330 * whitespace.
331 */
332 delay_base_s = stripped;
333 stripped = NULL;
334 }
335
336 /* @COMPAT Should we accept only a simple time-and-units string, rather than
337 * an interval spec?
338 */
339 pcmk_parse_interval_spec(delay_base_s, &delay_base);
340 delay_base /= 1000;
341
342 g_free(stripped);
343 g_free(delay_base_s);
344 return (int) delay_base;
345 }
346
347 /*!
348 * \internal
349 * \brief Override STONITH timeout with pcmk_*_timeout if available
350 *
351 * \param[in] device STONITH device to use
352 * \param[in] action STONITH action name
353 * \param[in] default_timeout Timeout to use if device does not have
354 * a pcmk_*_timeout parameter for action
355 *
356 * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
357 * \note For consistency, it would be nice if reboot/off/on timeouts could be
358 * set the same way as start/stop/monitor timeouts, i.e. with an
359 * <operation> entry in the fencing resource configuration. However that
360 * is insufficient because fencing devices may be registered directly via
361 * the fencer's register_device() API instead of going through the CIB
362 * (e.g. stonith_admin uses it for its -R option, and the executor uses it
363 * to ensure a device is registered when a command is issued). As device
364 * properties, pcmk_*_timeout parameters can be grabbed by the fencer when
365 * the device is registered, whether by CIB change or API call.
366 */
367 static int
368 get_action_timeout(const fenced_device_t *device, const char *action,
369 int default_timeout)
370 {
371 char *timeout_param = NULL;
372 const char *value = NULL;
373 long long timeout_ms = 0;
374 int timeout_sec = 0;
375
376 if ((action == NULL) || (device == NULL) || (device->params == NULL)) {
377 return default_timeout;
378 }
379
380 /* If "reboot" was requested but the device does not support it,
381 * we will remap to "off", so check timeout for "off" instead
382 */
383 if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)
384 && !pcmk__is_set(device->flags, fenced_df_supports_reboot)) {
385 pcmk__trace("%s doesn't support reboot, using timeout for off instead",
386 device->id);
387 action = PCMK_ACTION_OFF;
388 }
389
390 /* If the device config specified an action-specific timeout, use it */
391 timeout_param = pcmk__assert_asprintf("pcmk_%s_timeout", action);
392 value = g_hash_table_lookup(device->params, timeout_param);
393 free(timeout_param);
394
395 if (value == NULL) {
396 return default_timeout;
397 }
398
399 if ((pcmk__parse_ms(value, &timeout_ms) != pcmk_rc_ok)
400 || (timeout_ms < 0)) {
401 return default_timeout;
402 }
403
404 timeout_ms = QB_MIN(timeout_ms, UINT_MAX);
405 timeout_sec = pcmk__timeout_ms2s((guint) timeout_ms);
406
407 return QB_MIN(timeout_sec, INT_MAX);
408 }
409
410 /*!
411 * \internal
412 * \brief Get the currently executing device for a fencing operation
413 *
414 * \param[in] cmd Fencing operation to check
415 *
416 * \return Currently executing device for \p cmd if any, otherwise NULL
417 */
418 static fenced_device_t *
419 cmd_device(const async_command_t *cmd)
420 {
421 if ((cmd == NULL) || (cmd->device == NULL) || (device_table == NULL)) {
422 return NULL;
423 }
424 return g_hash_table_lookup(device_table, cmd->device);
425 }
426
427 /*!
428 * \internal
429 * \brief Return the configured reboot action for a given device
430 *
431 * \param[in] device_id Device ID
432 *
433 * \return Configured reboot action for \p device_id
434 */
435 const char *
436 fenced_device_reboot_action(const char *device_id)
437 {
438 fenced_device_t *device = NULL;
439 const char *action = NULL;
440
441 if ((device_table == NULL) || (device_id == NULL)) {
442 return PCMK_ACTION_REBOOT;
443 }
444
445 device = g_hash_table_lookup(device_table, device_id);
446
447 if ((device != NULL) && (device->params != NULL)) {
448 action = g_hash_table_lookup(device->params, "pcmk_reboot_action");
449 }
450
451 return pcmk__s(action, PCMK_ACTION_REBOOT);
452 }
453
454 /*!
455 * \internal
456 * \brief Check whether a given device supports the "on" action
457 *
458 * \param[in] device_id Device ID
459 *
460 * \return true if \p device_id supports "on", otherwise false
461 */
462 bool
463 fenced_device_supports_on(const char *device_id)
464 {
465 fenced_device_t *device = NULL;
466
467 if ((device_table == NULL) || (device_id == NULL)) {
468 return false;
469 }
470
471 device = g_hash_table_lookup(device_table, device_id);
472
473 if (device != NULL) {
474 return pcmk__is_set(device->flags, fenced_df_supports_on);
475 }
476
477 return false;
478 }
479
480 static void
481 free_async_command(async_command_t * cmd)
482 {
483 if (cmd == NULL) {
484 return;
485 }
486
487 if (cmd->delay_id != 0) {
488 g_source_remove(cmd->delay_id);
489 }
490
491 cmd_list = g_list_remove(cmd_list, cmd);
492
493 g_list_free_full(cmd->device_list, free);
494 free(cmd->device);
495 free(cmd->action);
496 free(cmd->target);
497 free(cmd->remote_op_id);
498 free(cmd->client);
499 free(cmd->client_name);
500 free(cmd->origin);
501 free(cmd->op);
502 free(cmd);
503 }
504
505 /*!
506 * \internal
507 * \brief Create a new asynchronous fencing operation from request XML
508 *
509 * \param[in] msg Fencing request XML (from IPC or CPG)
510 *
511 * \return Newly allocated fencing operation on success, otherwise NULL
512 *
513 * \note This asserts on memory errors, so a NULL return indicates an
514 * unparseable message.
515 */
516 static async_command_t *
517 create_async_command(xmlNode *msg)
518 {
519 xmlNode *op = NULL;
520 async_command_t *cmd = NULL;
521 int rc = pcmk_rc_ok;
522
523 if (msg == NULL) {
524 return NULL;
525 }
526
527 op = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]",
528 LOG_ERR);
529 if (op == NULL) {
530 return NULL;
531 }
532
533 cmd = pcmk__assert_alloc(1, sizeof(async_command_t));
534
535 // All messages must include these
536 cmd->action = pcmk__xe_get_copy(op, PCMK__XA_ST_DEVICE_ACTION);
537 cmd->op = pcmk__xe_get_copy(msg, PCMK__XA_ST_OP);
538 cmd->client = pcmk__xe_get_copy(msg, PCMK__XA_ST_CLIENTID);
539 if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) {
540 free_async_command(cmd);
541 return NULL;
542 }
543
544 pcmk__xe_get_int(msg, PCMK__XA_ST_CALLID, &(cmd->id));
545 pcmk__xe_get_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay));
546 pcmk__xe_get_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout));
547 cmd->timeout = cmd->default_timeout;
548
549 rc = pcmk__xe_get_flags(msg, PCMK__XA_ST_CALLOPT, &(cmd->options),
550 st_opt_none);
551 if (rc != pcmk_rc_ok) {
552 pcmk__warn("Couldn't parse options from request: %s", pcmk_rc_str(rc));
553 }
554
555 cmd->origin = pcmk__xe_get_copy(msg, PCMK__XA_SRC);
556 cmd->remote_op_id = pcmk__xe_get_copy(msg, PCMK__XA_ST_REMOTE_OP);
557 cmd->client_name = pcmk__xe_get_copy(msg, PCMK__XA_ST_CLIENTNAME);
558 cmd->target = pcmk__xe_get_copy(op, PCMK__XA_ST_TARGET);
559 cmd->device = pcmk__xe_get_copy(op, PCMK__XA_ST_DEVICE_ID);
560
561 cmd->done_cb = st_child_done;
562
563 // Track in global command list
564 cmd_list = g_list_append(cmd_list, cmd);
565
566 return cmd;
567 }
568
569 static int
570 get_action_limit(fenced_device_t *device)
571 {
572 const char *value = NULL;
573 int action_limit = 1;
574
575 value = g_hash_table_lookup(device->params, PCMK_FENCING_ACTION_LIMIT);
576 if ((value == NULL)
577 || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok)
578 || (action_limit == 0)) {
579 action_limit = 1;
580 }
581 return action_limit;
582 }
583
584 static int
585 get_active_cmds(fenced_device_t *device)
586 {
587 int counter = 0;
588 GList *gIter = NULL;
589 GList *gIterNext = NULL;
590
591 CRM_CHECK(device != NULL, return 0);
592
593 for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
594 async_command_t *cmd = gIter->data;
595
596 gIterNext = gIter->next;
597
598 if (cmd->active_on == device) {
599 counter++;
600 }
601 }
602
603 return counter;
604 }
605
606 static void
607 fork_cb(int pid, void *user_data)
608 {
609 async_command_t *cmd = (async_command_t *) user_data;
610 fenced_device_t *device = cmd->activating_on;
611
612 if (device == NULL) {
613 /* In case of a retry, we've done the move from activating_on to
614 * active_on already
615 */
616 device = cmd->active_on;
617 }
618
619 pcmk__assert(device != NULL);
620 pcmk__debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout",
621 cmd->action, pid,
622 ((cmd->target != NULL)? " targeting " : ""),
623 pcmk__s(cmd->target, ""), device->id, cmd->timeout);
624 cmd->active_on = device;
625 cmd->activating_on = NULL;
626 }
627
628 static int
629 get_agent_metadata_cb(gpointer data) {
630 fenced_device_t *device = data;
631 guint period_ms;
632 int rc = get_agent_metadata(device->agent, &device->agent_metadata);
633
634 if (rc == pcmk_rc_ok) {
635 if (device->agent_metadata != NULL) {
636 read_action_metadata(device);
637 device->default_host_arg =
638 stonith__default_host_arg(device->agent_metadata);
639 }
640
641 return G_SOURCE_REMOVE;
642 }
643
644 if (rc == EAGAIN) {
645 period_ms = pcmk__mainloop_timer_get_period(device->timer);
646 if (period_ms < 160 * 1000) {
647 mainloop_timer_set_period(device->timer, 2 * period_ms);
648 }
649
650 return G_SOURCE_CONTINUE;
651 }
652
653 return G_SOURCE_REMOVE;
654 }
655
656 /*!
657 * \internal
658 * \brief Call a command's action callback for an internal (not library) result
659 *
660 * \param[in,out] cmd Command to report result for
661 * \param[in] execution_status Execution status to use for result
662 * \param[in] exit_status Exit status to use for result
663 * \param[in] exit_reason Exit reason to use for result
664 */
665 static void
666 report_internal_result(async_command_t *cmd, int exit_status,
667 int execution_status, const char *exit_reason)
668 {
669 pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
670
671 pcmk__set_result(&result, exit_status, execution_status, exit_reason);
672 cmd->done_cb(0, &result, cmd);
673 pcmk__reset_result(&result);
674 }
675
676 static gboolean
677 stonith_device_execute(fenced_device_t *device)
678 {
679 int exec_rc = 0;
680 const char *action_str = NULL;
681 async_command_t *cmd = NULL;
682 stonith_action_t *action = NULL;
683 int active_cmds = 0;
684 int action_limit = 0;
685 GList *iter = NULL;
686
687 CRM_CHECK(device != NULL, return FALSE);
688
689 active_cmds = get_active_cmds(device);
690 action_limit = get_action_limit(device);
691 if (action_limit > -1 && active_cmds >= action_limit) {
692 pcmk__trace("%s is over its action limit of %d (%u active action%s)",
693 device->id, action_limit, active_cmds,
694 pcmk__plural_s(active_cmds));
695 return TRUE;
696 }
697
698 iter = device->pending_ops;
699
700 while (iter != NULL) {
701 GList *next = iter->next;
702 async_command_t *pending_op = iter->data;
703
704 if ((pending_op != NULL) && (pending_op->delay_id != 0)) {
705 pcmk__trace("Operation '%s'%s%s using %s was asked to run too "
706 "early, waiting for start delay of %ds",
707 pending_op->action,
708 ((pending_op->target == NULL)? "" : " targeting "),
709 pcmk__s(pending_op->target, ""),
710 device->id, pending_op->start_delay);
711 iter = next;
712 continue;
713 }
714
715 device->pending_ops = g_list_remove_link(device->pending_ops, iter);
716 g_list_free_1(iter);
717
718 cmd = pending_op;
719 break;
720 }
721
722 if (cmd == NULL) {
723 pcmk__trace("No actions using %s are needed", device->id);
724 return TRUE;
725 }
726
727 if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
728 STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
729 if (pcmk__is_fencing_action(cmd->action)) {
730 if (node_does_watchdog_fencing(fenced_get_local_node())) {
731 pcmk__panic("Watchdog self-fencing required");
732 goto done;
733 }
734 } else {
735 pcmk__info("Faking success for %s watchdog operation", cmd->action);
736 report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
737 goto done;
738 }
739 }
740
741 #if PCMK__ENABLE_CIBSECRETS
742 exec_rc = pcmk__substitute_secrets(device->id, device->params);
743 if (exec_rc != pcmk_rc_ok) {
744 if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) {
745 pcmk__info("Proceeding with stop operation for %s despite being "
746 "unable to load CIB secrets (%s)",
747 device->id, pcmk_rc_str(exec_rc));
748 } else {
749 pcmk__err("Considering %s unconfigured because unable to load CIB "
750 "secrets: %s",
751 device->id, pcmk_rc_str(exec_rc));
752 report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
753 "Failed to get CIB secrets");
754 goto done;
755 }
756 }
757 #endif
758
759 action_str = cmd->action;
760 if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none)
761 && !pcmk__is_set(device->flags, fenced_df_supports_reboot)) {
762
763 pcmk__notice("Remapping 'reboot' action%s%s using %s to 'off' because "
764 "agent '%s' does not support reboot",
765 ((cmd->target == NULL)? "" : " targeting "),
766 pcmk__s(cmd->target, ""), device->id, device->agent);
767 action_str = PCMK_ACTION_OFF;
768 }
769
770 action = stonith__action_create(device->agent, action_str, cmd->target,
771 cmd->timeout, device->params,
772 device->aliases, device->default_host_arg);
773
774 /* for async exec, exec_rc is negative for early error exit
775 otherwise handling of success/errors is done via callbacks */
776 cmd->activating_on = device;
777 exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
778 fork_cb);
779 if (exec_rc < 0) {
780 cmd->activating_on = NULL;
781 cmd->done_cb(0, stonith__action_result(action), cmd);
782 stonith__destroy_action(action);
783 }
784
785 done:
786 /* Device might get triggered to work by multiple fencing commands
787 * simultaneously. Trigger the device again to make sure any
788 * remaining concurrent commands get executed. */
789 if (device->pending_ops != NULL) {
790 mainloop_set_trigger(device->work);
791 }
792 return TRUE;
793 }
794
795 static gboolean
796 stonith_device_dispatch(gpointer user_data)
797 {
798 return stonith_device_execute(user_data);
799 }
800
801 static gboolean
802 start_delay_helper(gpointer data)
803 {
804 async_command_t *cmd = data;
805 fenced_device_t *device = cmd_device(cmd);
806
807 cmd->delay_id = 0;
808 if (device != NULL) {
809 mainloop_set_trigger(device->work);
810 }
811
812 return FALSE;
813 }
814
815 static void
816 schedule_stonith_command(async_command_t *cmd, fenced_device_t *device)
817 {
818 int delay_max = 0;
819 int delay_base = 0;
820 int requested_delay = cmd->start_delay;
821
822 CRM_CHECK(cmd != NULL, return);
823 CRM_CHECK(device != NULL, return);
824
825 if (cmd->device != NULL) {
826 free(cmd->device);
827 }
828
829 cmd->device = pcmk__str_copy(device->id);
830 cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
831
832 if (cmd->remote_op_id != NULL) {
833 pcmk__debug("Scheduling '%s' action%s%s using %s for remote peer %s "
834 "with op id %.8s and timeout %ds",
835 cmd->action,
836 (cmd->target == NULL)? "" : " targeting ",
837 pcmk__s(cmd->target, ""),
838 device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
839
840 } else {
841 pcmk__debug("Scheduling '%s' action%s%s using %s for %s with timeout "
842 "%ds",
843 cmd->action,
844 ((cmd->target != NULL)? " targeting " : ""),
845 pcmk__s(cmd->target, ""),
846 device->id, cmd->client, cmd->timeout);
847 }
848
849 device->pending_ops = g_list_append(device->pending_ops, cmd);
850 mainloop_set_trigger(device->work);
851
852 // Value -1 means disable any static/random fencing delays
853 if (requested_delay < 0) {
854 return;
855 }
856
857 delay_max = get_action_delay_max(device, cmd->action);
858 delay_base = get_action_delay_base(device, cmd->action, cmd->target);
859 if (delay_max == 0) {
860 delay_max = delay_base;
861 }
862 if (delay_max < delay_base) {
863 pcmk__warn(PCMK_FENCING_DELAY_BASE " (%ds) is larger than "
864 PCMK_FENCING_DELAY_MAX " (%ds) for %s using %s "
865 "(limiting to maximum delay)",
866 delay_base, delay_max, cmd->action, device->id);
867 delay_base = delay_max;
868 }
869 if (delay_max > 0) {
870 cmd->start_delay += delay_base;
871
872 // Add random offset so that delay_base <= cmd->start_delay <= delay_max
873 if (delay_max > delay_base) {
874 // coverity[dont_call] Doesn't matter that rand() is predictable
875 cmd->start_delay += rand() % (delay_max - delay_base + 1);
876 }
877 }
878
879 if (cmd->start_delay > 0) {
880 pcmk__notice("Delaying '%s' action%s%s using %s for %ds "
881 QB_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds",
882 cmd->action, (cmd->target == NULL)? "" : " targeting ",
883 pcmk__s(cmd->target, ""), device->id, cmd->start_delay,
884 cmd->timeout, requested_delay, delay_base, delay_max);
885 cmd->delay_id =
886 pcmk__create_timer(cmd->start_delay * 1000, start_delay_helper, cmd);
887 }
888 }
889
890 static void
891 free_device(gpointer data)
892 {
893 fenced_device_t *device = data;
894
895 g_hash_table_destroy(device->params);
896 g_hash_table_destroy(device->aliases);
897
898 for (GList *iter = device->pending_ops; iter != NULL; iter = iter->next) {
899 async_command_t *cmd = iter->data;
900
901 pcmk__warn("Removal of device '%s' purged operation '%s'", device->id,
902 cmd->action);
903 report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
904 "Device was removed before action could be executed");
905 }
906 g_list_free(device->pending_ops);
907
908 g_list_free_full(device->targets, free);
909
910 if (device->timer != NULL) {
911 mainloop_timer_stop(device->timer);
912 mainloop_timer_del(device->timer);
913 }
914
915 mainloop_destroy_trigger(device->work);
916
917 pcmk__xml_free(device->agent_metadata);
918 free(device->namespace);
919 g_strfreev(device->on_target_actions);
920 free(device->agent);
921 free(device->id);
922 free(device);
923 }
924
925 /*!
926 * \internal
927 * \brief Initialize the table of known fence devices
928 */
929 void
930 fenced_init_device_table(void)
931 {
932 if (device_table != NULL) {
933 return;
934 }
935
936 device_table = pcmk__strkey_table(NULL, free_device);
937 }
938
939 /*!
940 * \internal
941 * \brief Free the table of known fence devices
942 */
943 void
944 fenced_free_device_table(void)
945 {
946 g_clear_pointer(&device_table, g_hash_table_destroy);
947 }
948
949 static GHashTable *
950 build_port_aliases(const char *hostmap, GList **targets)
951 {
952 GHashTable *aliases = pcmk__strikey_table(free, free);
953 gchar *stripped = NULL;
954 gchar **mappings = NULL;
955
956 if (pcmk__str_empty(hostmap)) {
957 goto done;
958 }
959
960 stripped = g_strstrip(g_strdup(hostmap));
961 mappings = g_strsplit_set(stripped, "; \t", 0);
962
963 for (gchar **mapping = mappings; *mapping != NULL; mapping++) {
964 gchar **nvpair = NULL;
965
966 if (pcmk__str_empty(*mapping)) {
967 continue;
968 }
969
970 // @COMPAT Drop support for '=' as delimiter
971 nvpair = g_strsplit_set(*mapping, ":=", 2);
972
973 if (pcmk__str_empty(nvpair[0]) || pcmk__str_empty(nvpair[1])) {
974 pcmk__err(PCMK_FENCING_HOST_MAP ": Malformed mapping '%s'",
975 *mapping);
976
977 } else {
978 pcmk__debug("Adding alias '%s'='%s'", nvpair[0], nvpair[1]);
979 pcmk__insert_dup(aliases, nvpair[0], nvpair[1]);
980 *targets = g_list_append(*targets, pcmk__str_copy(nvpair[1]));
981 }
982 g_strfreev(nvpair);
983 }
984
985 done:
986 g_free(stripped);
987 g_strfreev(mappings);
988 return aliases;
989 }
990
991 GHashTable *metadata_cache = NULL;
992
993 void
994 free_metadata_cache(void)
995 {
996 g_clear_pointer(&metadata_cache, g_hash_table_destroy);
997 }
998
999 static void
1000 init_metadata_cache(void)
1001 {
1002 if (metadata_cache != NULL) {
1003 return;
1004 }
1005
1006 metadata_cache = pcmk__strkey_table(free, free);
1007 }
1008
1009 int
1010 get_agent_metadata(const char *agent, xmlNode ** metadata)
1011 {
1012 char *buffer = NULL;
1013 stonith_t *st = NULL;
1014 int rc = pcmk_ok;
1015
1016 if (metadata == NULL) {
1017 return EINVAL;
1018 }
1019
1020 *metadata = NULL;
1021
1022 if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
1023 return pcmk_rc_ok;
1024 }
1025
1026 init_metadata_cache();
1027 buffer = g_hash_table_lookup(metadata_cache, agent);
1028
1029 if (buffer != NULL) {
1030 goto done;
1031 }
1032
1033 st = stonith__api_new();
1034
1035 if (st == NULL) {
1036 pcmk__warn("Could not get agent meta-data: API memory allocation "
1037 "failed");
1038 return EAGAIN;
1039 }
1040
1041 rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10);
1042 stonith__api_free(st);
1043
1044 if ((rc != pcmk_ok) || (buffer == NULL)) {
1045 pcmk__err("Could not retrieve metadata for fencing agent %s", agent);
1046 return EAGAIN;
1047 }
1048
1049 g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer);
1050
1051 done:
1052 *metadata = pcmk__xml_parse(buffer);
1053 return pcmk_rc_ok;
1054 }
1055
1056 static void
1057 read_action_metadata(fenced_device_t *device)
1058 {
1059 xmlXPathObject *xpath = NULL;
1060 int max = 0;
1061
1062 // @TODO Use GStrvBuilder when we require glib 2.68
1063 GPtrArray *on_target_actions = NULL;
1064
1065 if (device->agent_metadata == NULL) {
1066 return;
1067 }
1068
1069 xpath = pcmk__xpath_search(device->agent_metadata->doc,
1070 "//" PCMK_XE_ACTION);
1071 max = pcmk__xpath_num_results(xpath);
1072
1073 if (max == 0) {
1074 xmlXPathFreeObject(xpath);
1075 return;
1076 }
1077
1078 for (int i = 0; i < max; i++) {
1079 const char *action = NULL;
1080 xmlNode *match = pcmk__xpath_result(xpath, i);
1081
1082 CRM_LOG_ASSERT(match != NULL);
1083 if(match == NULL) { continue; };
1084
1085 action = pcmk__xe_get(match, PCMK_XA_NAME);
1086
1087 if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
1088 fenced_device_set_flags(device, fenced_df_supports_list);
1089
1090 } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) {
1091 fenced_device_set_flags(device, fenced_df_supports_status);
1092
1093 } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
1094 fenced_device_set_flags(device, fenced_df_supports_reboot);
1095
1096 } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
1097 /* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it
1098 * joins.
1099 *
1100 * @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for
1101 * PCMK_XA_AUTOMATIC.
1102 */
1103 if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC)
1104 || pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) {
1105
1106 fenced_device_set_flags(device, fenced_df_auto_unfence);
1107 }
1108 fenced_device_set_flags(device, fenced_df_supports_on);
1109 }
1110
1111 if ((action != NULL)
1112 && pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) {
1113
1114 if (on_target_actions == NULL) {
1115 on_target_actions = g_ptr_array_new();
1116 }
1117 g_ptr_array_add(on_target_actions, g_strdup(action));
1118 }
1119 }
1120
1121 if (on_target_actions != NULL) {
1122 g_ptr_array_add(on_target_actions, NULL);
1123 device->on_target_actions =
1124 (gchar **) g_ptr_array_free(on_target_actions, FALSE);
1125 }
1126 xmlXPathFreeObject(xpath);
1127 }
1128
1129 static const char *
1130 target_list_type(fenced_device_t *dev)
1131 {
1132 const char *check_type = g_hash_table_lookup(dev->params,
1133 PCMK_FENCING_HOST_CHECK);
1134
1135 if (check_type != NULL) {
1136 return check_type;
1137 }
1138
1139 if (g_hash_table_lookup(dev->params, PCMK_FENCING_HOST_LIST) != NULL) {
1140 return PCMK_VALUE_STATIC_LIST;
1141 }
1142
1143 if (g_hash_table_lookup(dev->params, PCMK_FENCING_HOST_MAP) != NULL) {
1144 return PCMK_VALUE_STATIC_LIST;
1145 }
1146
1147 if (pcmk__is_set(dev->flags, fenced_df_supports_list)) {
1148 return PCMK_VALUE_DYNAMIC_LIST;
1149 }
1150
1151 if (pcmk__is_set(dev->flags, fenced_df_supports_status)) {
1152 return PCMK_VALUE_STATUS;
1153 }
1154
1155 return PCMK_VALUE_NONE;
1156 }
1157
1158 static fenced_device_t *
1159 build_device_from_xml(const xmlNode *dev)
1160 {
1161 const char *value;
1162 fenced_device_t *device = NULL;
1163 char *agent = pcmk__xe_get_copy(dev, PCMK_XA_AGENT);
1164 int rc = pcmk_rc_ok;
1165
1166 CRM_CHECK(agent != NULL, return device);
1167
1168 device = pcmk__assert_alloc(1, sizeof(fenced_device_t));
1169
1170 device->id = pcmk__xe_get_copy(dev, PCMK_XA_ID);
1171 device->agent = agent;
1172 device->namespace = pcmk__xe_get_copy(dev, PCMK__XA_NAMESPACE);
1173 device->params = xml2list(dev);
1174
1175 value = g_hash_table_lookup(device->params, PCMK_FENCING_HOST_LIST);
1176 if (value != NULL) {
1177 device->targets = stonith__parse_targets(value);
1178 }
1179
1180 value = g_hash_table_lookup(device->params, PCMK_FENCING_HOST_MAP);
1181 device->aliases = build_port_aliases(value, &(device->targets));
1182
1183 value = target_list_type(device);
1184 if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)
1185 && (device->targets != NULL)) {
1186
1187 // device->targets is necessary only with PCMK_VALUE_STATIC_LIST
1188 g_list_free_full(device->targets, free);
1189 device->targets = NULL;
1190 }
1191
1192 rc = get_agent_metadata(device->agent, &device->agent_metadata);
1193
1194 if ((rc == pcmk_rc_ok) && (device->agent_metadata != NULL)) {
1195 read_action_metadata(device);
1196 device->default_host_arg =
1197 stonith__default_host_arg(device->agent_metadata);
1198
1199 } else if (rc == EAGAIN) {
1200 if (device->timer == NULL) {
1201 device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
1202 TRUE, get_agent_metadata_cb,
1203 device);
1204 }
1205
1206 if (!mainloop_timer_running(device->timer)) {
1207 mainloop_timer_start(device->timer);
1208 }
1209 }
1210
1211 value = pcmk__xe_get(dev, PCMK__XA_RSC_PROVIDES);
1212 if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) {
1213 fenced_device_set_flags(device, fenced_df_auto_unfence);
1214 }
1215
1216 if (is_action_required(PCMK_ACTION_ON, device)) {
1217 pcmk__info("Fencing device '%s' requires unfencing", device->id);
1218 }
1219
1220 if (device->on_target_actions != NULL) {
1221 gchar *on_target_actions = g_strjoinv(" ", device->on_target_actions);
1222
1223 pcmk__info("Fencing device '%s' requires actions (%s) to be executed "
1224 "on target", device->id, on_target_actions);
1225 g_free(on_target_actions);
1226 }
1227
1228 device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
1229
1230 return device;
1231 }
1232
1233 static void
1234 schedule_internal_command(const char *origin, fenced_device_t *device,
1235 const char *action, const char *target, int timeout,
1236 void *internal_user_data,
1237 void (*done_cb) (int pid,
1238 const pcmk__action_result_t *result,
1239 void *user_data))
1240 {
1241 async_command_t *cmd = NULL;
1242
1243 cmd = pcmk__assert_alloc(1, sizeof(async_command_t));
1244
1245 cmd->id = -1;
1246 cmd->default_timeout = timeout ? timeout : 60;
1247 cmd->timeout = cmd->default_timeout;
1248 cmd->action = pcmk__str_copy(action);
1249 cmd->target = pcmk__str_copy(target);
1250 cmd->device = pcmk__str_copy(device->id);
1251 cmd->origin = pcmk__str_copy(origin);
1252 cmd->client = pcmk__str_copy(crm_system_name);
1253 cmd->client_name = pcmk__str_copy(crm_system_name);
1254
1255 cmd->internal_user_data = internal_user_data;
1256 cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
1257
1258 schedule_stonith_command(cmd, device);
1259 }
1260
1261 // Fence agent status commands use custom exit status codes
1262 enum fence_status_code {
1263 fence_status_invalid = -1,
1264 fence_status_active = 0,
1265 fence_status_unknown = 1,
1266 fence_status_inactive = 2,
1267 };
1268
1269 static void
1270 status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
1271 {
1272 async_command_t *cmd = user_data;
1273 struct device_search_s *search = cmd->internal_user_data;
1274 fenced_device_t *dev = cmd_device(cmd);
1275 gboolean can = FALSE;
1276
1277 free_async_command(cmd);
1278
1279 if (dev == NULL) {
1280 search_devices_record_result(search, NULL, FALSE);
1281 return;
1282 }
1283
1284 mainloop_set_trigger(dev->work);
1285
1286 if (result->execution_status != PCMK_EXEC_DONE) {
1287 const char *reason = result->exit_reason;
1288
1289 pcmk__warn("Assuming %s cannot fence %s because status could not be "
1290 "executed: %s%s%s%s",
1291 dev->id, search->host,
1292 pcmk_exec_status_str(result->execution_status),
1293 ((reason != NULL)? " (" : ""), pcmk__s(reason, ""),
1294 ((reason != NULL)? ")" : ""));
1295 search_devices_record_result(search, dev->id, FALSE);
1296 return;
1297 }
1298
1299 switch (result->exit_status) {
1300 case fence_status_unknown:
1301 pcmk__trace("%s reported it cannot fence %s", dev->id,
1302 search->host);
1303 break;
1304
1305 case fence_status_active:
1306 case fence_status_inactive:
1307 pcmk__trace("%s reported it can fence %s", dev->id, search->host);
1308 can = TRUE;
1309 break;
1310
1311 default:
1312 pcmk__warn("Assuming %s cannot fence %s (status returned unknown "
1313 "code %d)",
1314 dev->id, search->host, result->exit_status);
1315 break;
1316 }
1317 search_devices_record_result(search, dev->id, can);
1318 }
1319
1320 static void
1321 dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
1322 void *user_data)
1323 {
1324 async_command_t *cmd = user_data;
1325 struct device_search_s *search = cmd->internal_user_data;
1326 fenced_device_t *dev = cmd_device(cmd);
1327 gboolean can_fence = FALSE;
1328
1329 free_async_command(cmd);
1330
1331 /* Host/alias must be in the list output to be eligible to be fenced
1332 *
1333 * Will cause problems if down'd nodes aren't listed or (for virtual nodes)
1334 * if the guest is still listed despite being moved to another machine
1335 */
1336 if (dev == NULL) {
1337 search_devices_record_result(search, NULL, FALSE);
1338 return;
1339 }
1340
1341 mainloop_set_trigger(dev->work);
1342
1343 if (pcmk__result_ok(result)) {
1344 pcmk__info("Refreshing target list for %s", dev->id);
1345 g_list_free_full(dev->targets, free);
1346 dev->targets = stonith__parse_targets(result->action_stdout);
1347 dev->targets_age = time(NULL);
1348
1349 } else if (dev->targets != NULL) {
1350 if (result->execution_status == PCMK_EXEC_DONE) {
1351 pcmk__info("Reusing most recent target list for %s because list "
1352 "returned error code %d",
1353 dev->id, result->exit_status);
1354 } else {
1355 const char *reason = result->exit_reason;
1356
1357 pcmk__info("Reusing most recent target list for %s because list "
1358 "could not be executed: %s%s%s%s",
1359 dev->id, pcmk_exec_status_str(result->execution_status),
1360 ((reason != NULL)? " (" : ""), pcmk__s(reason, ""),
1361 ((reason != NULL)? ")" : ""));
1362 }
1363
1364 } else { // We have never successfully executed list
1365 if (result->execution_status == PCMK_EXEC_DONE) {
1366 pcmk__warn("Assuming %s cannot fence %s because list returned "
1367 "error code %d",
1368 dev->id, search->host, result->exit_status);
1369 } else {
1370 const char *reason = result->exit_reason;
1371
1372 pcmk__warn("Assuming %s cannot fence %s because list could not be "
1373 "executed: %s%s%s%s",
1374 dev->id, search->host,
1375 pcmk_exec_status_str(result->execution_status),
1376 ((reason != NULL)? " (" : ""), pcmk__s(reason, ""),
1377 ((reason != NULL)? ")" : ""));
1378 }
1379
1380 /* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't
1381 * explicitly specify PCMK_VALUE_DYNAMIC_LIST
1382 */
1383 if (g_hash_table_lookup(dev->params, PCMK_FENCING_HOST_CHECK) == NULL) {
1384 pcmk__notice("Switching to pcmk_host_check='status' for %s",
1385 dev->id);
1386 pcmk__insert_dup(dev->params, PCMK_FENCING_HOST_CHECK,
1387 PCMK_VALUE_STATUS);
1388 }
1389 }
1390
1391 if (dev->targets != NULL) {
1392 const char *alias = g_hash_table_lookup(dev->aliases, search->host);
1393
1394 if (alias == NULL) {
1395 alias = search->host;
1396 }
1397 if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) {
1398 can_fence = TRUE;
1399 }
1400 }
1401 search_devices_record_result(search, dev->id, can_fence);
1402 }
1403
1404 /*!
1405 * \internal
1406 * \brief Returns true if any key in first is not in second or second has a different value for key
1407 */
1408 static bool
1409 device_params_diff(GHashTable *first, GHashTable *second) {
1410 char *key = NULL;
1411 char *value = NULL;
1412 GHashTableIter gIter;
1413
1414 g_hash_table_iter_init(&gIter, first);
1415 while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
1416 const char *other_value = NULL;
1417
1418 if (g_str_has_prefix(key, CRM_META "_")
1419 || pcmk__str_eq(key, PCMK_XA_CRM_FEATURE_SET, pcmk__str_none)) {
1420 continue;
1421 }
1422
1423 other_value = g_hash_table_lookup(second, key);
1424
1425 if ((other_value == NULL)
1426 || !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
1427 pcmk__trace("Different value for %s: %s != %s", key,
1428 pcmk__s(other_value, "<null>"), value);
1429 return true;
1430 }
1431 }
1432
1433 return false;
1434 }
1435
1436 /*!
1437 * \internal
1438 * \brief Checks to see if an identical device already exists in the table
1439 */
1440 static fenced_device_t *
1441 device_has_duplicate(const fenced_device_t *device)
1442 {
1443 fenced_device_t *dup = g_hash_table_lookup(device_table, device->id);
1444
1445 if (dup == NULL) {
1446 pcmk__trace("No match for %s", device->id);
1447 return NULL;
1448
1449 } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
1450 pcmk__trace("Different agent: %s != %s", dup->agent, device->agent);
1451 return NULL;
1452 }
1453
1454 // Find a way to share logic with pcmk__digest_op_params() here?
1455 if (device_params_diff(device->params, dup->params) ||
1456 device_params_diff(dup->params, device->params)) {
1457 return NULL;
1458 }
1459
1460 pcmk__trace("Match");
1461 return dup;
1462 }
1463
1464 int
1465 fenced_device_register(const xmlNode *dev, bool from_cib)
1466 {
1467 const char *local_node_name = fenced_get_local_node();
1468 fenced_device_t *dup = NULL;
1469 fenced_device_t *device = build_device_from_xml(dev);
1470 int rc = pcmk_rc_ok;
1471
1472 CRM_CHECK(device != NULL, return ENOMEM);
1473
1474 /* do we have a watchdog-device? */
1475 if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)
1476 || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
1477 STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
1478
1479 if (fencing_watchdog_timeout_ms <= 0) {
1480 pcmk__err("Ignoring watchdog fence device without "
1481 PCMK_OPT_FENCING_WATCHDOG_TIMEOUT " set");
1482 rc = ENODEV;
1483 goto done;
1484 }
1485 if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
1486 STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
1487 pcmk__err("Ignoring watchdog fence device with unknown agent '%s' "
1488 "rather than '" STONITH_WATCHDOG_AGENT "'",
1489 pcmk__s(device->agent, ""));
1490 rc = ENODEV;
1491 goto done;
1492 }
1493 if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
1494 pcmk__err("Ignoring watchdog fence device named '%s' rather than "
1495 "'" STONITH_WATCHDOG_ID "'",
1496 pcmk__s(device->id, ""));
1497 rc = ENODEV;
1498 goto done;
1499 }
1500
1501 if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
1502 pcmk__str_none)) {
1503 /* This has either an empty list or the targets configured for
1504 * watchdog fencing
1505 */
1506 g_list_free_full(stonith_watchdog_targets, free);
1507 stonith_watchdog_targets = device->targets;
1508 device->targets = NULL;
1509 }
1510
1511 if (!node_does_watchdog_fencing(local_node_name)) {
1512 pcmk__debug("Skip registration of watchdog fence device on node "
1513 "not in host list");
1514 device->targets = NULL;
1515 stonith_device_remove(device->id, from_cib);
1516 goto done;
1517 }
1518
1519 // Proceed as with any other fencing device
1520 g_list_free_full(device->targets, free);
1521 device->targets = stonith__parse_targets(local_node_name);
1522 pcmk__insert_dup(device->params, PCMK_FENCING_HOST_LIST,
1523 local_node_name);
1524 }
1525
1526 dup = device_has_duplicate(device);
1527 if (dup != NULL) {
1528 guint ndevices = g_hash_table_size(device_table);
1529
1530 pcmk__debug("Device '%s' already in device list (%d active device%s)",
1531 device->id, ndevices, pcmk__plural_s(ndevices));
1532 free_device(device);
1533 device = dup;
1534 fenced_device_clear_flags(device, fenced_df_dirty);
1535
1536 } else {
1537 guint ndevices = 0;
1538 fenced_device_t *old = g_hash_table_lookup(device_table, device->id);
1539
1540 if (from_cib && (old != NULL)
1541 && pcmk__is_set(old->flags, fenced_df_api_registered)) {
1542 /* If the CIB is writing over an entry that is shared with a stonith
1543 * client, copy any pending ops that currently exist on the old
1544 * entry to the new one. Otherwise the pending ops will be reported
1545 * as failures.
1546 */
1547 pcmk__info("Overwriting existing entry for %s from CIB",
1548 device->id);
1549 device->pending_ops = old->pending_ops;
1550 fenced_device_set_flags(device, fenced_df_api_registered);
1551 old->pending_ops = NULL;
1552 if (device->pending_ops != NULL) {
1553 mainloop_set_trigger(device->work);
1554 }
1555 }
1556 g_hash_table_replace(device_table, device->id, device);
1557
1558 ndevices = g_hash_table_size(device_table);
1559 pcmk__notice("Added '%s' to device list (%d active device%s)",
1560 device->id, ndevices, pcmk__plural_s(ndevices));
1561 }
1562
1563 if (from_cib) {
1564 fenced_device_set_flags(device, fenced_df_cib_registered);
1565 } else {
1566 fenced_device_set_flags(device, fenced_df_api_registered);
1567 }
1568
1569 done:
1570 if (rc != pcmk_rc_ok) {
1571 free_device(device);
1572 }
1573 return rc;
1574 }
1575
1576 void
1577 stonith_device_remove(const char *id, bool from_cib)
1578 {
1579 fenced_device_t *device = g_hash_table_lookup(device_table, id);
1580 guint ndevices = 0;
1581
1582 if (device == NULL) {
1583 ndevices = g_hash_table_size(device_table);
1584 pcmk__info("Device '%s' not found (%u active device%s)", id, ndevices,
1585 pcmk__plural_s(ndevices));
1586 return;
1587 }
1588
1589 if (from_cib) {
1590 fenced_device_clear_flags(device, fenced_df_cib_registered);
1591 } else {
1592 fenced_device_clear_flags(device,
1593 fenced_df_api_registered|fenced_df_verified);
1594 }
1595
1596 if (!pcmk__any_flags_set(device->flags,
1597 fenced_df_api_registered
1598 |fenced_df_cib_registered)) {
1599
1600 g_hash_table_remove(device_table, id);
1601 ndevices = g_hash_table_size(device_table);
1602 pcmk__info("Removed '%s' from device list (%u active device%s)", id,
1603 ndevices, pcmk__plural_s(ndevices));
1604
1605 } else {
1606 // Exactly one is true at this point
1607 const bool cib_registered = pcmk__is_set(device->flags,
1608 fenced_df_cib_registered);
1609
1610 pcmk__trace("Not removing '%s' from device list (%u active) because "
1611 "still registered via %s",
1612 id, g_hash_table_size(device_table),
1613 (cib_registered? "CIB" : "API"));
1614 }
1615 }
1616
1617 /*!
1618 * \internal
1619 * \brief Return the number of stonith levels registered for a node
1620 *
1621 * \param[in] tp Node's topology table entry
1622 *
1623 * \return Number of non-NULL levels in topology entry
1624 * \note This function is used only for log messages.
1625 */
1626 static int
1627 count_active_levels(const stonith_topology_t *tp)
1628 {
1629 int count = 0;
1630
1631 for (int i = 0; i < ST__LEVEL_COUNT; i++) {
1632 if (tp->levels[i] != NULL) {
1633 count++;
1634 }
1635 }
1636
1637 return count;
1638 }
1639
1640 static void
1641 free_topology_entry(gpointer data)
1642 {
1643 stonith_topology_t *tp = data;
1644
1645 for (int i = 0; i < ST__LEVEL_COUNT; i++) {
1646 g_list_free_full(tp->levels[i], free);
1647 }
1648
1649 free(tp->target);
1650 free(tp->target_value);
1651 free(tp->target_pattern);
1652 free(tp->target_attribute);
1653 free(tp);
1654 }
1655
1656 void
1657 free_topology_list(void)
1658 {
|
CID (unavailable; MK=2c70abe85bd9a2aa82771411531953de) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS): |
|
(1) Event assign_union_field: |
The union field "in" of "_pp" is written. |
|
(2) Event inconsistent_union_field_access: |
In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in". |
1659 g_clear_pointer(&topology, g_hash_table_destroy);
1660 }
1661
1662 void
1663 init_topology_list(void)
1664 {
1665 if (topology != NULL) {
1666 return;
1667 }
1668
1669 topology = pcmk__strkey_table(NULL, free_topology_entry);
1670 }
1671
1672 char *
1673 stonith_level_key(const xmlNode *level, enum fenced_target_by mode)
1674 {
1675 if (mode == fenced_target_by_unknown) {
1676 mode = unpack_level_kind(level);
1677 }
1678 switch (mode) {
1679 case fenced_target_by_name:
1680 return pcmk__xe_get_copy(level, PCMK_XA_TARGET);
1681
1682 case fenced_target_by_pattern:
1683 return pcmk__xe_get_copy(level, PCMK_XA_TARGET_PATTERN);
1684
1685 case fenced_target_by_attribute:
1686 return pcmk__assert_asprintf("%s=%s",
1687 pcmk__xe_get(level,
1688 PCMK_XA_TARGET_ATTRIBUTE),
1689 pcmk__xe_get(level,
1690 PCMK_XA_TARGET_VALUE));
1691
1692 default:
1693 return pcmk__assert_asprintf("unknown-%s", pcmk__xe_id(level));
1694 }
1695 }
1696
1697 /*!
1698 * \internal
1699 * \brief Parse target identification from topology level XML
1700 *
1701 * \param[in] level Topology level XML to parse
1702 *
1703 * \return How to identify target of \p level
1704 */
1705 static enum fenced_target_by
1706 unpack_level_kind(const xmlNode *level)
1707 {
1708 if (pcmk__xe_get(level, PCMK_XA_TARGET) != NULL) {
1709 return fenced_target_by_name;
1710 }
1711 if (pcmk__xe_get(level, PCMK_XA_TARGET_PATTERN) != NULL) {
1712 return fenced_target_by_pattern;
1713 }
1714 if ((pcmk__xe_get(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL)
1715 && (pcmk__xe_get(level, PCMK_XA_TARGET_VALUE) != NULL)) {
1716 return fenced_target_by_attribute;
1717 }
1718 return fenced_target_by_unknown;
1719 }
1720
1721 /*!
1722 * \internal
1723 * \brief Unpack essential information from topology request XML
1724 *
1725 * \param[in] xml Request XML to search
1726 * \param[out] mode If not NULL, where to store level kind
1727 * \param[out] target If not NULL, where to store representation of target
1728 * \param[out] id If not NULL, where to store level number
1729 *
1730 * \return Topology level XML from within \p xml, or NULL if not found
1731 * \note The caller is responsible for freeing \p *target if set.
1732 */
1733 static xmlNode *
1734 unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target,
1735 int *id)
1736 {
1737 enum fenced_target_by local_mode = fenced_target_by_unknown;
1738 char *local_target = NULL;
1739 int local_id = 0;
1740
1741 /* The level element can be the top element or lower. If top level, don't
1742 * search by xpath, because it might give multiple hits if the XML is the
1743 * CIB.
1744 */
1745 if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) {
1746 xml = pcmk__xpath_find_one(xml->doc, "//" PCMK_XE_FENCING_LEVEL,
1747 LOG_WARNING);
1748 }
1749
1750 if (xml != NULL) {
1751 local_mode = unpack_level_kind(xml);
1752 local_target = stonith_level_key(xml, local_mode);
1753 pcmk__xe_get_int(xml, PCMK_XA_INDEX, &local_id);
1754 }
1755
1756 if (mode != NULL) {
1757 *mode = local_mode;
1758 }
1759 if (id != NULL) {
1760 *id = local_id;
1761 }
1762
1763 if (target != NULL) {
1764 *target = local_target;
1765 } else {
1766 free(local_target);
1767 }
1768
1769 return xml;
1770 }
1771
1772 /*!
1773 * \internal
1774 * \brief Register a fencing topology level for a target
1775 *
1776 * Given an XML request specifying the target name, level index, and device IDs
1777 * for the level, this will create an entry for the target in the global topology
1778 * table if one does not already exist, then append the specified device IDs to
1779 * the entry's device list for the specified level.
1780 *
1781 * \param[in] msg XML request for STONITH level registration
1782 * \param[out] result Where to set result of registration (can be \c NULL)
1783 */
1784 void
1785 fenced_register_level(xmlNode *msg, pcmk__action_result_t *result)
1786 {
1787 int nlevels = 0;
1788 int id = 0;
1789 xmlNode *level;
1790 enum fenced_target_by mode;
1791 char *target;
1792
1793 stonith_topology_t *tp;
1794 const char *value = NULL;
1795
1796 CRM_CHECK(msg != NULL, return);
1797
1798 level = unpack_level_request(msg, &mode, &target, &id);
1799 if (level == NULL) {
1800 set_bad_request_result(result);
1801 return;
1802 }
1803
1804 // Ensure an ID was given (even the client API adds an ID)
1805 if (pcmk__str_empty(pcmk__xe_id(level))) {
1806 pcmk__warn("Ignoring registration for topology level without ID");
1807 free(target);
1808 pcmk__log_xml_trace(level, "Bad level");
1809 pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
1810 "Topology level is invalid without ID");
1811 return;
1812 }
1813
1814 // Ensure a valid target was specified
1815 if (mode == fenced_target_by_unknown) {
1816 pcmk__warn("Ignoring registration for topology level '%s' without "
1817 "valid target",
1818 pcmk__xe_id(level));
1819 free(target);
1820 pcmk__log_xml_trace(level, "Bad level");
1821 pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
1822 "Invalid target for topology level '%s'",
1823 pcmk__xe_id(level));
1824 return;
1825 }
1826
1827 // Ensure level ID is in allowed range
1828 if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) {
1829 pcmk__warn("Ignoring topology registration for %s with invalid level "
1830 "%d",
1831 target, id);
1832 free(target);
1833 pcmk__log_xml_trace(level, "Bad level");
1834 pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
1835 "Invalid level number '%s' for topology level '%s'",
1836 pcmk__s(pcmk__xe_get(level, PCMK_XA_INDEX), ""),
1837 pcmk__xe_id(level));
1838 return;
1839 }
1840
1841 /* Find or create topology table entry */
1842 tp = g_hash_table_lookup(topology, target);
1843 if (tp == NULL) {
1844 tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t));
1845
1846 tp->kind = mode;
1847 tp->target = target;
1848 tp->target_value = pcmk__xe_get_copy(level, PCMK_XA_TARGET_VALUE);
1849 tp->target_pattern = pcmk__xe_get_copy(level, PCMK_XA_TARGET_PATTERN);
1850 tp->target_attribute = pcmk__xe_get_copy(level, PCMK_XA_TARGET_ATTRIBUTE);
1851
1852 g_hash_table_replace(topology, tp->target, tp);
1853 pcmk__trace("Added %s (%d) to the topology (%u active entries)", target,
1854 (int) mode, g_hash_table_size(topology));
1855 } else {
1856 free(target);
1857 }
1858
1859 if (tp->levels[id] != NULL) {
1860 pcmk__info("Adding to the existing %s[%d] topology entry", tp->target,
1861 id);
1862 }
1863
1864 value = pcmk__xe_get(level, PCMK_XA_DEVICES);
1865 if (value != NULL) {
1866 /* Empty string and whitespace are not possible with schema validation
1867 * enabled. Don't bother handling them specially here.
1868 */
1869 gchar **devices = g_strsplit(value, ",", 0);
1870
1871 for (char **dev = devices; (dev != NULL) && (*dev != NULL); dev++) {
1872 pcmk__trace("Adding device '%s' for %s[%d]", *dev, tp->target, id);
1873 tp->levels[id] = g_list_append(tp->levels[id],
1874 pcmk__str_copy(*dev));
1875 }
1876 g_strfreev(devices);
1877 }
1878
1879 nlevels = count_active_levels(tp);
1880
1881 pcmk__info("Target %s has %d active fencing level%s", tp->target, nlevels,
1882 pcmk__plural_s(nlevels));
1883
1884 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1885 }
1886
1887 /*!
1888 * \internal
1889 * \brief Unregister a fencing topology level for a target
1890 *
1891 * Given an XML request specifying the target name and level index (or 0 for all
1892 * levels), this will remove any corresponding entry for the target from the
1893 * global topology table.
1894 *
1895 * \param[in] msg XML request for STONITH level registration
1896 * \param[out] result Where to set result of unregistration (can be \c NULL)
1897 */
1898 void
1899 fenced_unregister_level(xmlNode *msg, pcmk__action_result_t *result)
1900 {
1901 int id = -1;
1902 stonith_topology_t *tp;
1903 char *target;
1904 xmlNode *level = NULL;
1905
1906 level = unpack_level_request(msg, NULL, &target, &id);
1907 if (level == NULL) {
1908 set_bad_request_result(result);
1909 return;
1910 }
1911
1912 // Ensure level ID is in allowed range
1913 if ((id < 0) || (id >= ST__LEVEL_COUNT)) {
1914 pcmk__warn("Ignoring topology unregistration for %s with invalid level "
1915 "%d",
1916 target, id);
1917 free(target);
1918 pcmk__log_xml_trace(level, "Bad level");
1919 pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
1920 "Invalid level number '%s' for topology level %s",
1921 pcmk__s(pcmk__xe_get(level, PCMK_XA_INDEX),
1922 "<null>"),
1923
1924 // Client API doesn't add ID to unregistration XML
1925 pcmk__s(pcmk__xe_id(level), ""));
1926 return;
1927 }
1928
1929 tp = g_hash_table_lookup(topology, target);
1930 if (tp == NULL) {
1931 guint nentries = g_hash_table_size(topology);
1932
1933 pcmk__info("No fencing topology found for %s (%d active %s)", target,
1934 nentries, pcmk__plural_alt(nentries, "entry", "entries"));
1935
1936 } else if (id == 0 && g_hash_table_remove(topology, target)) {
1937 guint nentries = g_hash_table_size(topology);
1938
1939 pcmk__info("Removed all fencing topology entries related to %s (%d "
1940 "active %s remaining)",
1941 target, nentries,
1942 pcmk__plural_alt(nentries, "entry", "entries"));
1943
1944 } else if (tp->levels[id] != NULL) {
1945 guint nlevels;
1946
1947 g_list_free_full(tp->levels[id], free);
1948 tp->levels[id] = NULL;
1949
1950 nlevels = count_active_levels(tp);
1951 pcmk__info("Removed level %d from fencing topology for %s (%d "
1952 "active level%s remaining)",
1953 id, target, nlevels, pcmk__plural_s(nlevels));
1954 }
1955
1956 free(target);
1957 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1958 }
1959
1960 static char *
1961 list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
1962 {
1963 int max = g_list_length(list);
1964 size_t delim_len = delim?strlen(delim):0;
1965 size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
1966 char *rv;
1967
1968 char *pos = NULL;
1969 const char *lead_delim = "";
1970
1971 for (const GList *iter = list; iter != NULL; iter = iter->next) {
1972 const char *value = (const char *) iter->data;
1973
1974 alloc_size += strlen(value);
1975 }
1976
1977 rv = pcmk__assert_alloc(alloc_size, sizeof(char));
1978 pos = rv;
1979
1980 for (const GList *iter = list; iter != NULL; iter = iter->next) {
1981 const char *value = (const char *) iter->data;
1982
1983 pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
1984 lead_delim = delim;
1985 }
1986
1987 if ((max != 0) && terminate_with_delim) {
1988 sprintf(pos, "%s", delim);
1989 }
1990
1991 return rv;
1992 }
1993
1994 /*!
1995 * \internal
1996 * \brief Execute a fence agent action directly (and asynchronously)
1997 *
1998 * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
1999 * directly on a specified device. Only list, monitor, and status actions are
2000 * expected to use this call, though it should work with any agent command.
2001 *
2002 * \param[in] msg Request XML specifying action
2003 * \param[out] result Where to store result of action
2004 *
2005 * \note If the action is monitor, the device must be registered via the API
2006 * (CIB registration is not sufficient), because monitor should not be
2007 * possible unless the device is "started" (API registered).
2008 */
2009 static void
2010 execute_agent_action(xmlNode *msg, pcmk__action_result_t *result)
2011 {
2012 xmlNode *dev = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_ST_DEVICE_ID,
2013 LOG_ERR);
2014 xmlNode *op = pcmk__xpath_find_one(msg->doc,
2015 "//*[@" PCMK__XA_ST_DEVICE_ACTION "]",
2016 LOG_ERR);
2017 const char *id = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID);
2018 const char *action = pcmk__xe_get(op, PCMK__XA_ST_DEVICE_ACTION);
2019 async_command_t *cmd = NULL;
2020 fenced_device_t *device = NULL;
2021
2022 if ((id == NULL) || (action == NULL)) {
2023 pcmk__info("Malformed API action request: device %s, action %s",
2024 pcmk__s(id, "not specified"),
2025 pcmk__s(action, "not specified"));
2026 set_bad_request_result(result);
2027 return;
2028 }
2029
2030 if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
2031 // Watchdog agent actions are implemented internally
2032 if (fencing_watchdog_timeout_ms <= 0) {
2033 pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
2034 "Watchdog fence device not configured");
2035 return;
2036
2037 } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
2038 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2039 pcmk__set_result_output(result,
2040 list_to_string(stonith_watchdog_targets,
2041 "\n", TRUE),
2042 NULL);
2043 return;
2044
2045 } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) {
2046 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2047 return;
2048 }
2049 }
2050
2051 device = g_hash_table_lookup(device_table, id);
2052 if (device == NULL) {
2053 pcmk__info("Ignoring API '%s' action request because device %s not "
2054 "found",
2055 action, id);
2056 pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
2057 "'%s' not found", id);
2058 return;
2059
2060 } else if (!pcmk__is_set(device->flags, fenced_df_api_registered)
2061 && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) {
2062 // Monitors may run only on "started" (API-registered) devices
2063 pcmk__info("Ignoring API '%s' action request because device %s not "
2064 "active",
2065 action, id);
2066 pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
2067 "'%s' not active", id);
2068 return;
2069 }
2070
2071 cmd = create_async_command(msg);
2072 if (cmd == NULL) {
2073 pcmk__log_xml_warn(msg, "invalid");
2074 set_bad_request_result(result);
2075 return;
2076 }
2077
2078 schedule_stonith_command(cmd, device);
2079 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
2080 }
2081
2082 static void
2083 search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
2084 {
2085 search->replies_received++;
2086 if (can_fence && (device != NULL)) {
2087 if (search->support_action_only != fenced_df_none) {
2088 fenced_device_t *dev = g_hash_table_lookup(device_table, device);
2089
2090 if ((dev != NULL) && !pcmk__is_set(dev->flags, search->support_action_only)) {
2091 return;
2092 }
2093 }
2094 search->capable = g_list_append(search->capable,
2095 pcmk__str_copy(device));
2096 }
2097
2098 if (search->replies_needed == search->replies_received) {
2099
2100 guint ndevices = g_list_length(search->capable);
2101
2102 pcmk__debug("Search found %d device%s that can perform '%s' targeting "
2103 "%s",
2104 ndevices, pcmk__plural_s(ndevices),
2105 pcmk__s(search->action, "unknown action"),
2106 pcmk__s(search->host, "any node"));
2107
2108 search->callback(search->capable, search->user_data);
2109 free(search->host);
2110 free(search->action);
2111 free(search);
2112 }
2113 }
2114
2115 /*!
2116 * \internal
2117 * \brief Check whether the local host is allowed to execute a fencing action
2118 *
2119 * \param[in] device Fence device to check
2120 * \param[in] action Fence action to check
2121 * \param[in] target Hostname of fence target
2122 * \param[in] allow_self Whether self-fencing is allowed for this operation
2123 *
2124 * \return \c true if local host is allowed to execute action, or \c false
2125 * otherwise
2126 */
2127 static bool
2128 localhost_is_eligible(const fenced_device_t *device, const char *action,
2129 const char *target, bool allow_self)
2130 {
2131 bool localhost_is_target = pcmk__str_eq(target, fenced_get_local_node(),
2132 pcmk__str_casei);
2133
2134 CRM_CHECK(action != NULL, return true);
2135
2136 if ((device != NULL) && (device->on_target_actions != NULL)
2137 && pcmk__g_strv_contains(device->on_target_actions, action)) {
2138
2139 if (!localhost_is_target) {
2140 pcmk__trace("Operation '%s' using %s can only be executed for "
2141 "local host, not %s", action, device->id, target);
2142 return false;
2143 }
2144
2145 } else if (localhost_is_target && !allow_self) {
2146 pcmk__trace("'%s' operation does not support self-fencing", action);
2147 return false;
2148 }
2149 return true;
2150 }
2151
2152 /*!
2153 * \internal
2154 * \brief Check if local node is allowed to execute (possibly remapped) action
2155 *
2156 * \param[in] device Fence device to check
2157 * \param[in] action Fence action to check
2158 * \param[in] target Node name of fence target
2159 * \param[in] allow_self Whether self-fencing is allowed for this operation
2160 *
2161 * \return true if local node is allowed to execute \p action or any actions it
2162 * might be remapped to, otherwise false
2163 */
2164 static bool
2165 localhost_is_eligible_with_remap(const fenced_device_t *device,
2166 const char *action, const char *target,
2167 bool allow_self)
2168 {
2169 // Check exact action
2170 if (localhost_is_eligible(device, action, target, allow_self)) {
2171 return true;
2172 }
2173
2174 // Check potential remaps
2175
2176 if (!pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
2177 return false;
2178 }
2179
2180 /* "reboot" might get remapped to "off" then "on", so even if reboot is
2181 * disallowed, return true if either of those is allowed. We'll report
2182 * the disallowed actions with the results. We never allow self-fencing
2183 * for remapped "on" actions because the target is off at that point.
2184 */
2185 if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self)
2186 || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) {
2187 return true;
2188 }
2189
2190 return false;
2191 }
2192
2193 /*!
2194 * \internal
2195 * \brief Check whether we can use a device's cached target list
2196 *
2197 * \param[in] dev Fencing device to check
2198 *
2199 * \return \c true if \p dev cached its targets less than a minute ago,
2200 * otherwise \c false
2201 */
2202 static inline bool
2203 can_use_target_cache(const fenced_device_t *dev)
2204 {
2205 return (dev->targets != NULL) && (time(NULL) < (dev->targets_age + 60));
2206 }
2207
2208 static void
2209 can_fence_host_with_device(fenced_device_t *dev,
2210 struct device_search_s *search)
2211 {
2212 gboolean can = FALSE;
2213 const char *dev_id = "Unspecified device";
2214 const char *action = NULL;
2215 const char *target = NULL;
2216 const char *check_type = "Internal bug";
2217 const char *alias = NULL;
2218
2219 CRM_CHECK((dev != NULL) && (search != NULL) && (search->action != NULL),
2220 goto search_report_results);
2221
2222 if (dev->id != NULL) {
2223 dev_id = dev->id;
2224 }
2225
2226 action = search->action;
2227
2228 target = search->host;
2229 if (target == NULL) {
2230 can = TRUE;
2231 check_type = "No target";
2232 goto search_report_results;
2233 }
2234
2235 /* Answer immediately if the device does not support the action
2236 * or the local node is not allowed to perform it
2237 */
2238 if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)
2239 && !pcmk__is_set(dev->flags, fenced_df_supports_on)) {
2240 check_type = "Agent does not support 'on'";
2241 goto search_report_results;
2242
2243 } else if (!localhost_is_eligible_with_remap(dev, action, target,
2244 search->allow_self)) {
2245 check_type = "This node is not allowed to execute action";
2246 goto search_report_results;
2247 }
2248
2249 // Check eligibility as specified by pcmk_host_check
2250 check_type = target_list_type(dev);
2251 alias = g_hash_table_lookup(dev->aliases, target);
2252 if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) {
2253 can = TRUE;
2254
2255 } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST,
2256 pcmk__str_casei)) {
2257
2258 if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) {
2259 can = TRUE;
2260 } else if (g_hash_table_lookup(dev->params, PCMK_FENCING_HOST_MAP)
2261 && g_hash_table_lookup(dev->aliases, target)) {
2262 can = TRUE;
2263 }
2264
2265 } else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST,
2266 pcmk__str_casei)) {
2267 if (!can_use_target_cache(dev)) {
2268 int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST,
2269 search->per_device_timeout);
2270
2271 if (device_timeout > search->per_device_timeout) {
2272 pcmk__notice("Since the pcmk_list_timeout (%ds) parameter of "
2273 "%s is larger than " PCMK_OPT_FENCING_TIMEOUT " "
2274 "(%ds), timeout may occur",
2275 device_timeout, dev_id,
2276 search->per_device_timeout);
2277 }
2278
2279 pcmk__trace("Running '%s' to check whether %s is eligible to fence "
2280 "%s (%s)",
2281 check_type, dev_id, target, action);
2282
2283 schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL,
2284 search->per_device_timeout, search, dynamic_list_search_cb);
2285
2286 /* we'll respond to this search request async in the cb */
2287 return;
2288 }
2289
2290 if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets,
2291 pcmk__str_casei)) {
2292 can = TRUE;
2293 }
2294
2295 } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) {
2296 int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout);
2297
2298 if (device_timeout > search->per_device_timeout) {
2299 pcmk__notice("Since the pcmk_status_timeout (%ds) parameter of %s "
2300 "is larger than " PCMK_OPT_FENCING_TIMEOUT " (%ds), "
2301 "timeout may occur",
2302 device_timeout, dev_id, search->per_device_timeout);
2303 }
2304
2305 pcmk__trace("Running '%s' to check whether %s is eligible to fence %s "
2306 "(%s)",
2307 check_type, dev_id, target, action);
2308 schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target,
2309 search->per_device_timeout, search, status_search_cb);
2310 /* we'll respond to this search request async in the cb */
2311 return;
2312 } else {
2313 pcmk__err("Invalid value for " PCMK_FENCING_HOST_CHECK ": %s",
2314 check_type);
2315 check_type = "Invalid " PCMK_FENCING_HOST_CHECK;
2316 }
2317
2318 search_report_results:
2319 pcmk__info("%s is%s eligible to fence (%s) %s%s%s%s: %s",
2320 dev_id, (can? "" : " not"),
2321 pcmk__s(action, "unspecified action"),
2322 pcmk__s(target, "unspecified target"),
2323 ((alias != NULL)? " (as '" : ""), pcmk__s(alias, ""),
2324 ((alias != NULL)? "')" : ""), check_type);
2325 search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can);
2326 }
2327
2328 static void
2329 search_devices(gpointer key, gpointer value, gpointer user_data)
2330 {
2331 fenced_device_t *dev = value;
2332 struct device_search_s *search = user_data;
2333
2334 can_fence_host_with_device(dev, search);
2335 }
2336
2337 #define DEFAULT_QUERY_TIMEOUT 20
2338 static void
2339 get_capable_devices(const char *host, const char *action, int timeout,
2340 bool allow_self, void *user_data,
2341 void (*callback) (GList * devices, void *user_data),
2342 uint32_t support_action_only)
2343 {
2344 struct device_search_s *search;
2345 guint ndevices = g_hash_table_size(device_table);
2346
2347 if (ndevices == 0) {
2348 callback(NULL, user_data);
2349 return;
2350 }
2351
2352 search = pcmk__assert_alloc(1, sizeof(struct device_search_s));
2353
2354 search->host = pcmk__str_copy(host);
2355 search->action = pcmk__str_copy(action);
2356 search->per_device_timeout = timeout;
2357 search->allow_self = allow_self;
2358 search->callback = callback;
2359 search->user_data = user_data;
2360 search->support_action_only = support_action_only;
2361
2362 /* We are guaranteed this many replies, even if a device is
2363 * unregistered while the search is in progress.
2364 */
2365 search->replies_needed = ndevices;
2366
2367 pcmk__debug("Searching %d device%s to see which can execute '%s' "
2368 "targeting %s", ndevices, pcmk__plural_s(ndevices),
2369 pcmk__s(search->action, "unknown action"),
2370 pcmk__s(search->host, "any node"));
2371 fenced_foreach_device(search_devices, search);
2372 }
2373
2374 struct st_query_data {
2375 xmlNode *reply;
2376 char *remote_peer;
2377 char *client_id;
2378 char *target;
2379 char *action;
2380 int call_options;
2381 };
2382
2383 /*!
2384 * \internal
2385 * \brief Add action-specific attributes to query reply XML
2386 *
2387 * \param[in,out] xml XML to add attributes to
2388 * \param[in] action Fence action
2389 * \param[in] device Fence device
2390 * \param[in] target Fence target
2391 */
2392 static void
2393 add_action_specific_attributes(xmlNode *xml, const char *action,
2394 const fenced_device_t *device,
2395 const char *target)
2396 {
2397 int action_specific_timeout;
2398 int delay_max;
2399 int delay_base;
2400
2401 CRM_CHECK(xml && action && device, return);
2402
2403 // PCMK__XA_ST_REQUIRED is currently used only for unfencing
2404 if (is_action_required(action, device)) {
2405 pcmk__trace("Action '%s' is required using %s", action, device->id);
2406 pcmk__xe_set_int(xml, PCMK__XA_ST_REQUIRED, 1);
2407 }
2408
2409 // pcmk_<action>_timeout if configured
2410 action_specific_timeout = get_action_timeout(device, action, 0);
2411 if (action_specific_timeout != 0) {
2412 pcmk__trace("Action '%s' has timeout %ds using %s",
2413 action, action_specific_timeout, device->id);
2414 pcmk__xe_set_int(xml, PCMK__XA_ST_ACTION_TIMEOUT,
2415 action_specific_timeout);
2416 }
2417
2418 delay_max = get_action_delay_max(device, action);
2419 if (delay_max > 0) {
2420 pcmk__trace("Action '%s' has maximum random delay %ds using %s", action,
2421 delay_max, device->id);
2422 pcmk__xe_set_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max);
2423 }
2424
2425 delay_base = get_action_delay_base(device, action, target);
2426 if (delay_base > 0) {
2427 pcmk__xe_set_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base);
2428 }
2429
2430 if ((delay_max > 0) && (delay_base == 0)) {
2431 pcmk__trace("Action '%s' has maximum random delay %ds using %s", action,
2432 delay_max, device->id);
2433 } else if ((delay_max == 0) && (delay_base > 0)) {
2434 pcmk__trace("Action '%s' has a static delay of %ds using %s", action,
2435 delay_base, device->id);
2436 } else if ((delay_max > 0) && (delay_base > 0)) {
2437 pcmk__trace("Action '%s' has a minimum delay of %ds and a randomly "
2438 "chosen maximum delay of %ds using %s",
2439 action, delay_base, delay_max, device->id);
2440 }
2441 }
2442
2443 /*!
2444 * \internal
2445 * \brief Add "disallowed" attribute to query reply XML if appropriate
2446 *
2447 * \param[in,out] xml XML to add attribute to
2448 * \param[in] action Fence action
2449 * \param[in] device Fence device
2450 * \param[in] target Fence target
2451 * \param[in] allow_self Whether self-fencing is allowed
2452 */
2453 static void
2454 add_disallowed(xmlNode *xml, const char *action, const fenced_device_t *device,
2455 const char *target, bool allow_self)
2456 {
2457 if (localhost_is_eligible(device, action, target, allow_self)) {
2458 return;
2459 }
2460
2461 pcmk__trace("Action '%s' using %s is disallowed for local host", action,
2462 device->id);
2463 pcmk__xe_set_bool(xml, PCMK__XA_ST_ACTION_DISALLOWED, true);
2464 }
2465
2466 /*!
2467 * \internal
2468 * \brief Add child element with action-specific values to query reply XML
2469 *
2470 * \param[in,out] xml XML to add attribute to
2471 * \param[in] action Fence action
2472 * \param[in] device Fence device
2473 * \param[in] target Fence target
2474 * \param[in] allow_self Whether self-fencing is allowed
2475 */
2476 static void
2477 add_action_reply(xmlNode *xml, const char *action,
2478 const fenced_device_t *device, const char *target,
2479 bool allow_self)
2480 {
2481 xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION);
2482
2483 pcmk__xe_set(child, PCMK_XA_ID, action);
2484 add_action_specific_attributes(child, action, device, target);
2485 add_disallowed(child, action, device, target, allow_self);
2486 }
2487
2488 /*!
2489 * \internal
2490 * \brief Send a reply to a CPG peer or IPC client
2491 *
2492 * \param[in] reply XML reply to send
2493 * \param[in] call_options Send synchronously if st_opt_sync_call is set
2494 * \param[in] remote_peer If not NULL, name of peer node to send CPG reply
2495 * \param[in,out] client If not NULL, client to send IPC reply
2496 */
2497 static void
2498 stonith_send_reply(const xmlNode *reply, int call_options,
2499 const char *remote_peer, pcmk__client_t *client)
2500 {
2501 const pcmk__node_status_t *node = NULL;
2502
2503 CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)),
2504 return);
2505
2506 if (remote_peer == NULL) {
2507 do_local_reply(reply, client, call_options);
2508 return;
2509 }
2510
2511 node = pcmk__get_node(0, remote_peer, NULL, pcmk__node_search_cluster_member);
2512 pcmk__cluster_send_message(node, pcmk_ipc_fenced, reply);
2513 }
2514
2515 static void
2516 stonith_query_capable_device_cb(GList * devices, void *user_data)
2517 {
2518 struct st_query_data *query = user_data;
2519 int available_devices = 0;
2520 xmlNode *wrapper = NULL;
2521 xmlNode *list = NULL;
2522 pcmk__client_t *client = NULL;
2523
2524 if (query->client_id != NULL) {
2525 client = pcmk__find_client_by_id(query->client_id);
2526 if ((client == NULL) && (query->remote_peer == NULL)) {
2527 pcmk__trace("Skipping reply to %s: no longer a client",
2528 query->client_id);
2529 goto done;
2530 }
2531 }
2532
2533 // Pack the results into XML
2534 wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA);
2535 list = pcmk__xe_create(wrapper, __func__);
2536 pcmk__xe_set(list, PCMK__XA_ST_TARGET, query->target);
2537
2538 for (const GList *iter = devices; iter != NULL; iter = iter->next) {
2539 fenced_device_t *device = g_hash_table_lookup(device_table, iter->data);
2540 const char *action = query->action;
2541 xmlNode *dev = NULL;
2542
2543 if (device == NULL) {
2544 /* It is possible the device got unregistered while
2545 * determining who can fence the target */
2546 continue;
2547 }
2548
2549 available_devices++;
2550
2551 dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID);
2552 pcmk__xe_set(dev, PCMK_XA_ID, device->id);
2553 pcmk__xe_set(dev, PCMK__XA_NAMESPACE, device->namespace);
2554 pcmk__xe_set(dev, PCMK_XA_AGENT, device->agent);
2555
2556 // Has had successful monitor, list, or status on this node
2557 pcmk__xe_set_int(dev, PCMK__XA_ST_MONITOR_VERIFIED,
2558 pcmk__is_set(device->flags, fenced_df_verified));
2559
2560 pcmk__xe_set_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags);
2561
2562 /* If the originating fencer wants to reboot the node, and we have a
2563 * capable device that doesn't support "reboot", remap to "off" instead.
2564 */
2565 if (!pcmk__is_set(device->flags, fenced_df_supports_reboot)
2566 && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT,
2567 pcmk__str_none)) {
2568 pcmk__trace("%s doesn't support reboot, using values for off "
2569 "instead",
2570 device->id);
2571 action = PCMK_ACTION_OFF;
2572 }
2573
2574 /* Add action-specific values if available */
2575 add_action_specific_attributes(dev, action, device, query->target);
2576 if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
2577 /* A "reboot" *might* get remapped to "off" then "on", so after
2578 * sending the "reboot"-specific values in the main element, we add
2579 * sub-elements for "off" and "on" values.
2580 *
2581 * We short-circuited earlier if "reboot", "off" and "on" are all
2582 * disallowed for the local host. However if only one or two are
2583 * disallowed, we send back the results and mark which ones are
2584 * disallowed. If "reboot" is disallowed, this might cause problems
2585 * with older fencer versions, which won't check for it. Older
2586 * versions will ignore "off" and "on", so they are not a problem.
2587 */
2588 add_disallowed(dev, action, device, query->target,
2589 pcmk__is_set(query->call_options,
2590 st_opt_allow_self_fencing));
2591 add_action_reply(dev, PCMK_ACTION_OFF, device, query->target,
2592 pcmk__is_set(query->call_options,
2593 st_opt_allow_self_fencing));
2594 add_action_reply(dev, PCMK_ACTION_ON, device, query->target, false);
2595 }
2596
2597 /* A query without a target wants device parameters */
2598 if (query->target == NULL) {
2599 xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES);
2600
2601 g_hash_table_foreach(device->params, hash2field, attrs);
2602 }
2603 }
2604
2605 pcmk__xe_set_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices);
2606 if (query->target != NULL) {
2607 pcmk__debug("Found %d matching device%s for target '%s'",
2608 available_devices, pcmk__plural_s(available_devices),
2609 query->target);
2610 } else {
2611 pcmk__debug("%d device%s installed", available_devices,
2612 pcmk__plural_s(available_devices));
2613 }
2614
2615 pcmk__log_xml_trace(list, "query-result");
2616
2617 stonith_send_reply(query->reply, query->call_options, query->remote_peer,
2618 client);
2619
2620 done:
2621 pcmk__xml_free(query->reply);
2622 free(query->remote_peer);
2623 free(query->client_id);
2624 free(query->target);
2625 free(query->action);
2626 free(query);
2627 g_list_free_full(devices, free);
2628 }
2629
2630 /*!
2631 * \internal
2632 * \brief Log the result of an asynchronous command
2633 *
2634 * \param[in] cmd Command the result is for
2635 * \param[in] result Result of command
2636 * \param[in] pid Process ID of command, if available
2637 * \param[in] next Alternate device that will be tried if command failed
2638 * \param[in] op_merged Whether this command was merged with an earlier one
2639 */
2640 static void
2641 log_async_result(const async_command_t *cmd,
2642 const pcmk__action_result_t *result,
2643 int pid, const char *next, bool op_merged)
2644 {
2645 int log_level = LOG_ERR;
2646 int output_log_level = PCMK__LOG_NEVER;
2647 guint devices_remaining = g_list_length(cmd->next_device_iter);
2648
2649 GString *msg = g_string_sized_new(80); // Reasonable starting size
2650
2651 // Choose log levels appropriately if we have a result
2652 if (pcmk__result_ok(result)) {
2653 log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE;
2654 if ((result->action_stdout != NULL)
2655 && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
2656 pcmk__str_none)) {
2657 output_log_level = LOG_DEBUG;
2658 }
2659 next = NULL;
2660 } else {
2661 log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR;
2662 if ((result->action_stdout != NULL)
2663 && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
2664 pcmk__str_none)) {
2665 output_log_level = LOG_WARNING;
2666 }
2667 }
2668
2669 // Build the log message piece by piece
2670 pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL);
2671 if (pid != 0) {
2672 g_string_append_printf(msg, "[%d] ", pid);
2673 }
2674 if (cmd->target != NULL) {
2675 pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL);
2676 }
2677 if (cmd->device != NULL) {
2678 pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL);
2679 }
2680
2681 // Add exit status or execution status as appropriate
2682 if (result->execution_status == PCMK_EXEC_DONE) {
2683 g_string_append_printf(msg, "returned %d", result->exit_status);
2684 } else {
2685 pcmk__g_strcat(msg, "could not be executed: ",
2686 pcmk_exec_status_str(result->execution_status), NULL);
2687 }
2688
2689 // Add exit reason and next device if appropriate
2690 if (result->exit_reason != NULL) {
2691 pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL);
2692 }
2693 if (next != NULL) {
2694 pcmk__g_strcat(msg, ", retrying with ", next, NULL);
2695 }
2696 if (devices_remaining > 0) {
2697 g_string_append_printf(msg, " (%u device%s remaining)",
2698 (unsigned int) devices_remaining,
2699 pcmk__plural_s(devices_remaining));
2700 }
2701 g_string_append_printf(msg, " " QB_XS " %scall %d from %s",
2702 (op_merged? "merged " : ""), cmd->id,
2703 cmd->client_name);
2704
2705 // Log the result
2706 do_crm_log(log_level, "%s", msg->str);
2707 g_string_free(msg, TRUE);
2708
2709 // Log the output (which may have multiple lines), if appropriate
2710 if (output_log_level != PCMK__LOG_NEVER) {
2711 char *prefix = pcmk__assert_asprintf("%s[%d]", cmd->device, pid);
2712
2713 crm_log_output(output_log_level, prefix, result->action_stdout);
2714 free(prefix);
2715 }
2716 }
2717
2718 /*!
2719 * \internal
2720 * \brief Reply to requester after asynchronous command completion
2721 *
2722 * \param[in] cmd Command that completed
2723 * \param[in] result Result of command
2724 * \param[in] pid Process ID of command, if available
2725 * \param[in] merged If true, command was merged with another, not executed
2726 */
2727 static void
2728 send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result,
2729 int pid, bool merged)
2730 {
2731 xmlNode *reply = NULL;
2732 pcmk__client_t *client = NULL;
2733
2734 CRM_CHECK((cmd != NULL) && (result != NULL), return);
2735
2736 log_async_result(cmd, result, pid, NULL, merged);
2737
2738 if (cmd->client != NULL) {
2739 client = pcmk__find_client_by_id(cmd->client);
2740 if ((client == NULL) && (cmd->origin == NULL)) {
2741 pcmk__trace("Skipping reply to %s: no longer a client",
2742 cmd->client);
2743 return;
2744 }
2745 }
2746
2747 reply = construct_async_reply(cmd, result);
2748 if (merged) {
2749 pcmk__xe_set_bool(reply, PCMK__XA_ST_OP_MERGED, true);
2750 }
2751
2752 if (pcmk__is_fencing_action(cmd->action)
2753 && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) {
2754 /* The target was also the originator, so broadcast the result on its
2755 * behalf (since it will be unable to).
2756 */
2757 pcmk__trace("Broadcast '%s' result for %s (target was also originator)",
2758 cmd->action, cmd->target);
2759 pcmk__xe_set(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
2760 pcmk__xe_set(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
2761 pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, reply);
2762 } else {
2763 // Reply only to the originator
2764 stonith_send_reply(reply, cmd->options, cmd->origin, client);
2765 }
2766
2767 pcmk__log_xml_trace(reply, "Reply");
2768 pcmk__xml_free(reply);
2769 }
2770
2771 static void
2772 cancel_stonith_command(async_command_t * cmd)
2773 {
2774 fenced_device_t *device = cmd_device(cmd);
2775
2776 if (device == NULL) {
2777 return;
2778 }
2779
2780 pcmk__trace("Cancel scheduled '%s' action using %s", cmd->action,
2781 device->id);
2782 device->pending_ops = g_list_remove(device->pending_ops, cmd);
2783 }
2784
2785 /*!
2786 * \internal
2787 * \brief Cancel and reply to any duplicates of a just-completed operation
2788 *
2789 * Check whether any fencing operations are scheduled to do the same thing as
2790 * one that just succeeded. If so, rather than performing the same operation
2791 * twice, return the result of this operation for all matching pending commands.
2792 *
2793 * \param[in,out] cmd Fencing operation that just succeeded
2794 * \param[in] result Result of \p cmd
2795 * \param[in] pid If nonzero, process ID of agent invocation (for logs)
2796 *
2797 * \note Duplicate merging will do the right thing for either type of remapped
2798 * reboot. If the executing fencer remapped an unsupported reboot to off,
2799 * then cmd->action will be "reboot" and will be merged with any other
2800 * reboot requests. If the originating fencer remapped a topology reboot
2801 * to off then on, we will get here once with cmd->action "off" and once
2802 * with "on", and they will be merged separately with similar requests.
2803 */
2804 static void
2805 reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result,
2806 int pid)
2807 {
2808 GList *next = NULL;
2809
2810 for (GList *iter = cmd_list; iter != NULL; iter = next) {
2811 async_command_t *cmd_other = iter->data;
2812
2813 next = iter->next; // We might delete this entry, so grab next now
2814
2815 if (cmd == cmd_other) {
2816 continue;
2817 }
2818
2819 /* A pending operation matches if:
2820 * 1. The client connections are different.
2821 * 2. The target is the same.
2822 * 3. The fencing action is the same.
2823 * 4. The device scheduled to execute the action is the same.
2824 */
2825 if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) ||
2826 !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) ||
2827 !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) ||
2828 !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {
2829
2830 continue;
2831 }
2832
2833 pcmk__notice("Merging fencing action '%s'%s%s originating from client "
2834 "%s with identical fencing request from client %s",
2835 cmd_other->action,
2836 (cmd_other->target == NULL)? "" : " targeting ",
2837 pcmk__s(cmd_other->target, ""), cmd_other->client_name,
2838 cmd->client_name);
2839
2840 // Stop tracking the duplicate, send its result, and cancel it
2841 cmd_list = g_list_remove_link(cmd_list, iter);
2842 send_async_reply(cmd_other, result, pid, true);
2843 cancel_stonith_command(cmd_other);
2844
2845 free_async_command(cmd_other);
2846 g_list_free_1(iter);
2847 }
2848 }
2849
2850 /*!
2851 * \internal
2852 * \brief Return the next required device (if any) for an operation
2853 *
2854 * \param[in,out] cmd Fencing operation that just succeeded
2855 *
2856 * \return Next device required for action if any, otherwise NULL
2857 */
2858 static fenced_device_t *
2859 next_required_device(async_command_t *cmd)
2860 {
2861 for (const GList *iter = cmd->next_device_iter; iter != NULL;
2862 iter = iter->next) {
2863 fenced_device_t *next_device = g_hash_table_lookup(device_table,
2864 iter->data);
2865
2866 if (!is_action_required(cmd->action, next_device)) {
2867 continue;
2868 }
2869
2870 /* This is only called for successful actions, so it's OK to skip
2871 * non-required devices.
2872 */
2873 cmd->next_device_iter = iter->next;
2874 return next_device;
2875 }
2876
2877 return NULL;
2878 }
2879
2880 static void
2881 st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
2882 {
2883 async_command_t *cmd = user_data;
2884
2885 fenced_device_t *device = NULL;
2886 fenced_device_t *next_device = NULL;
2887
2888 CRM_CHECK(cmd != NULL, return);
2889
2890 device = cmd_device(cmd);
2891 cmd->active_on = NULL;
2892
2893 /* The device is ready to do something else now */
2894 if (device != NULL) {
2895 if (!pcmk__is_set(device->flags, fenced_df_verified)
2896 && pcmk__result_ok(result)
2897 && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST,
2898 PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS,
2899 NULL)) {
2900
2901 fenced_device_set_flags(device, fenced_df_verified);
2902 }
2903
2904 mainloop_set_trigger(device->work);
2905 }
2906
2907 if (pcmk__result_ok(result)) {
2908 next_device = next_required_device(cmd);
2909
2910 } else if ((cmd->next_device_iter != NULL)
2911 && !is_action_required(cmd->action, device)) {
2912 /* if this device didn't work out, see if there are any others we can try.
2913 * if the failed device was 'required', we can't pick another device. */
2914 next_device = g_hash_table_lookup(device_table,
2915 cmd->next_device_iter->data);
2916 cmd->next_device_iter = cmd->next_device_iter->next;
2917 }
2918
2919 if (next_device == NULL) {
2920 send_async_reply(cmd, result, pid, false);
2921 if (pcmk__result_ok(result)) {
2922 reply_to_duplicates(cmd, result, pid);
2923 }
2924 free_async_command(cmd);
2925
2926 } else { // This operation requires more fencing
2927 log_async_result(cmd, result, pid, next_device->id, false);
2928 schedule_stonith_command(cmd, next_device);
2929 }
2930 }
2931
2932 static void
2933 stonith_fence_get_devices_cb(GList * devices, void *user_data)
2934 {
2935 async_command_t *cmd = user_data;
2936 fenced_device_t *device = NULL;
2937 guint ndevices = g_list_length(devices);
2938
2939 pcmk__info("Found %d matching device%s for target '%s'", ndevices,
2940 pcmk__plural_s(ndevices), cmd->target);
2941
2942 if (devices != NULL) {
2943 device = g_hash_table_lookup(device_table, devices->data);
2944 }
2945
2946 if (device == NULL) { // No device found
2947 pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
2948
2949 pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
2950 "No device configured for target '%s'",
2951 cmd->target);
2952 send_async_reply(cmd, &result, 0, false);
2953 pcmk__reset_result(&result);
2954 free_async_command(cmd);
2955 g_list_free_full(devices, free);
2956
2957 } else {
2958 /* Device found. Schedule a fencing command for it.
2959 *
2960 * Assign devices to device_list so that it will be freed with cmd.
2961 */
2962 cmd->device_list = devices;
2963 cmd->next_device_iter = devices->next;
2964 schedule_stonith_command(cmd, device);
2965 }
2966 }
2967
2968 /*!
2969 * \internal
2970 * \brief Execute a fence action via the local node
2971 *
2972 * \param[in] msg Fencing request
2973 * \param[out] result Where to store result of fence action
2974 */
2975 static void
2976 fence_locally(xmlNode *msg, pcmk__action_result_t *result)
2977 {
2978 const char *device_id = NULL;
2979 fenced_device_t *device = NULL;
2980 async_command_t *cmd = NULL;
2981 xmlNode *dev = NULL;
2982
2983 CRM_CHECK((msg != NULL) && (result != NULL), return);
2984
2985 dev = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_TARGET "]",
2986 LOG_ERR);
2987
2988 cmd = create_async_command(msg);
2989 if (cmd == NULL) {
2990 pcmk__log_xml_warn(msg, "invalid");
2991 set_bad_request_result(result);
2992 return;
2993 }
2994
2995 device_id = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID);
2996 if (device_id != NULL) {
2997 device = g_hash_table_lookup(device_table, device_id);
2998 if (device == NULL) {
2999 pcmk__err("Requested device '%s' is not available", device_id);
3000 pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
3001 "Requested device '%s' not found", device_id);
3002 return;
3003 }
3004 schedule_stonith_command(cmd, device);
3005
3006 } else {
3007 const char *host = pcmk__xe_get(dev, PCMK__XA_ST_TARGET);
3008
3009 if (pcmk__is_set(cmd->options, st_opt_cs_nodeid)) {
3010 int nodeid = 0;
3011 pcmk__node_status_t *node = NULL;
3012
3013 pcmk__scan_min_int(host, &nodeid, 0);
3014 node = pcmk__search_node_caches(nodeid, NULL, NULL,
3015 pcmk__node_search_any
3016 |pcmk__node_search_cluster_cib);
3017 if (node != NULL) {
3018 host = node->name;
3019 }
3020 }
3021
3022 /* If we get to here, then self-fencing is implicitly allowed */
3023 get_capable_devices(host, cmd->action, cmd->default_timeout,
3024 TRUE, cmd, stonith_fence_get_devices_cb,
3025 fenced_support_flag(cmd->action));
3026 }
3027
3028 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
3029 }
3030
3031 /*!
3032 * \internal
3033 * \brief Build an XML reply for a fencing operation
3034 *
3035 * \param[in] request Request that reply is for
3036 * \param[in] data If not NULL, add to reply as call data
3037 * \param[in] result Full result of fencing operation
3038 *
3039 * \return Newly created XML reply
3040 * \note The caller is responsible for freeing the result.
3041 * \note This has some overlap with construct_async_reply(), but that copies
3042 * values from an async_command_t, whereas this one copies them from the
3043 * request.
3044 */
3045 xmlNode *
3046 fenced_construct_reply(const xmlNode *request, xmlNode *data,
3047 const pcmk__action_result_t *result)
3048 {
3049 xmlNode *reply = NULL;
3050
3051 reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
3052
3053 pcmk__xe_set(reply, PCMK__XA_ST_ORIGIN, __func__);
3054 pcmk__xe_set(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
3055 stonith__xe_set_result(reply, result);
3056
3057 if (request == NULL) {
3058 /* Most likely, this is the result of a stonith operation that was
3059 * initiated before we came up. Unfortunately that means we lack enough
3060 * information to provide clients with a full result.
3061 *
3062 * @TODO Maybe synchronize this information at start-up?
3063 */
3064 pcmk__warn("Missing request information for client notifications for "
3065 "operation with result '%s' (initiated before we came up?)",
3066 pcmk_exec_status_str(result->execution_status));
3067
3068 } else {
3069 const char *name = NULL;
3070 const char *value = NULL;
3071
3072 // Attributes to copy from request to reply
3073 const char *names[] = {
3074 PCMK__XA_ST_OP,
3075 PCMK__XA_ST_CALLID,
3076 PCMK__XA_ST_CLIENTID,
3077 PCMK__XA_ST_CLIENTNAME,
3078 PCMK__XA_ST_REMOTE_OP,
3079 PCMK__XA_ST_CALLOPT,
3080 };
3081
3082 for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
3083 name = names[lpc];
3084 value = pcmk__xe_get(request, name);
3085 pcmk__xe_set(reply, name, value);
3086 }
3087 if (data != NULL) {
3088 xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA);
3089
3090 pcmk__xml_copy(wrapper, data);
3091 }
3092 }
3093 return reply;
3094 }
3095
3096 /*!
3097 * \internal
3098 * \brief Build an XML reply to an asynchronous fencing command
3099 *
3100 * \param[in] cmd Fencing command that reply is for
3101 * \param[in] result Command result
3102 */
3103 static xmlNode *
3104 construct_async_reply(const async_command_t *cmd,
3105 const pcmk__action_result_t *result)
3106 {
3107 xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
3108
3109 pcmk__xe_set(reply, PCMK__XA_ST_ORIGIN, __func__);
3110 pcmk__xe_set(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
3111 pcmk__xe_set(reply, PCMK__XA_ST_OP, cmd->op);
3112 pcmk__xe_set(reply, PCMK__XA_ST_DEVICE_ID, cmd->device);
3113 pcmk__xe_set(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id);
3114 pcmk__xe_set(reply, PCMK__XA_ST_CLIENTID, cmd->client);
3115 pcmk__xe_set(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name);
3116 pcmk__xe_set(reply, PCMK__XA_ST_TARGET, cmd->target);
3117 pcmk__xe_set(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op);
3118 pcmk__xe_set(reply, PCMK__XA_ST_ORIGIN, cmd->origin);
3119 pcmk__xe_set_int(reply, PCMK__XA_ST_CALLID, cmd->id);
3120 pcmk__xe_set_int(reply, PCMK__XA_ST_CALLOPT, cmd->options);
3121
3122 stonith__xe_set_result(reply, result);
3123 return reply;
3124 }
3125
3126 bool
3127 fencing_peer_active(pcmk__node_status_t *peer)
3128 {
3129 return (peer != NULL) && (peer->name != NULL)
3130 && pcmk__is_set(peer->processes, crm_get_cluster_proc());
3131 }
3132
3133 void
3134 set_fencing_completed(remote_fencing_op_t *op)
3135 {
3136 struct timespec tv;
3137
3138 qb_util_timespec_from_epoch_get(&tv);
3139 op->completed = tv.tv_sec;
3140 op->completed_nsec = tv.tv_nsec;
3141 }
3142
3143 /*!
3144 * \internal
3145 * \brief Look for alternate node needed if local node shouldn't fence target
3146 *
3147 * \param[in] target Node that must be fenced
3148 *
3149 * \return Name of an alternate node that should fence \p target if any,
3150 * or NULL otherwise
3151 */
3152 static const char *
3153 check_alternate_host(const char *target)
3154 {
3155 GHashTableIter gIter;
3156 pcmk__node_status_t *entry = NULL;
3157
3158 if (!pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) {
3159 return NULL;
3160 }
3161
3162 g_hash_table_iter_init(&gIter, pcmk__peer_cache);
3163 while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
3164 if (!fencing_peer_active(entry)
3165 || pcmk__str_eq(entry->name, target, pcmk__str_casei)) {
3166 continue;
3167 }
3168
3169 pcmk__notice("Forwarding self-fencing request to %s", entry->name);
3170 return entry->name;
3171 }
3172
3173 pcmk__warn("Will handle own fencing because no peer can");
3174 return NULL;
3175 }
3176
3177 static void
3178 remove_relay_op(xmlNode * request)
3179 {
3180 xmlNode *dev = pcmk__xpath_find_one(request->doc,
3181 "//*[@" PCMK__XA_ST_DEVICE_ACTION "]",
3182 LOG_TRACE);
3183 const char *relay_op_id = NULL;
3184 const char *op_id = NULL;
3185 const char *client_name = NULL;
3186 const char *target = NULL;
3187 remote_fencing_op_t *relay_op = NULL;
3188 remote_fencing_op_t *list_op = NULL;
3189 GHashTableIter iter;
3190
3191 if (dev != NULL) {
3192 target = pcmk__xe_get(dev, PCMK__XA_ST_TARGET);
3193 }
3194
3195 relay_op_id = pcmk__xe_get(request, PCMK__XA_ST_REMOTE_OP_RELAY);
3196 op_id = pcmk__xe_get(request, PCMK__XA_ST_REMOTE_OP);
3197 client_name = pcmk__xe_get(request, PCMK__XA_ST_CLIENTNAME);
3198
3199 if ((relay_op_id == NULL) || (target == NULL)
3200 || !pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) {
3201 return;
3202 }
3203
3204 /* Delete RELAY operation. */
3205 relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
3206
3207 if (relay_op == NULL) {
3208 return;
3209 }
3210
3211 g_hash_table_iter_init(&iter, stonith_remote_op_list);
3212
3213 /* If the operation to be deleted is registered as a duplicate, delete the registration. */
3214 while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
3215 if (list_op == relay_op) {
3216 continue;
3217 }
3218
3219 for (GList *dup_iter = list_op->duplicates; dup_iter != NULL;
3220 dup_iter = dup_iter->next) {
3221 remote_fencing_op_t *other = dup_iter->data;
3222
3223 if (other != relay_op) {
3224 continue;
3225 }
3226
3227 other->duplicates = g_list_remove(other->duplicates, relay_op);
3228 break;
3229 }
3230 }
3231
3232 pcmk__debug("Deleting relay op %s ('%s'%s%s for %s), "
3233 "replaced by op %s ('%s'%s%s for %s)",
3234 relay_op->id, relay_op->action,
3235 (relay_op->target == NULL)? "" : " targeting ",
3236 pcmk__s(relay_op->target, ""),
3237 relay_op->client_name, op_id, relay_op->action,
3238 (target == NULL)? "" : " targeting ", pcmk__s(target, ""),
3239 client_name);
3240
3241 g_hash_table_remove(stonith_remote_op_list, relay_op_id);
3242 }
3243
3244 /*!
3245 * \internal
3246 * \brief Check whether an API request was sent by a privileged user
3247 *
3248 * API commands related to fencing configuration may be done only by privileged
3249 * IPC users (i.e. root or hacluster), because all other users should go through
3250 * the CIB to have ACLs applied. If no client was given, this is a peer request,
3251 * which is always allowed.
3252 *
3253 * \param[in] c IPC client that sent request (or NULL if sent by CPG peer)
3254 * \param[in] op Requested API operation (for logging only)
3255 *
3256 * \return true if sender is peer or privileged client, otherwise false
3257 */
3258 static inline bool
3259 is_privileged(const pcmk__client_t *c, const char *op)
3260 {
3261 if ((c == NULL) || pcmk__is_set(c->flags, pcmk__client_privileged)) {
3262 return true;
3263 }
3264
3265 pcmk__warn("Rejecting IPC request '%s' from unprivileged client %s",
3266 pcmk__s(op, ""), pcmk__client_name(c));
3267 return false;
3268 }
3269
3270 static xmlNode *
3271 handle_unknown_request(pcmk__request_t *request)
3272 {
3273 pcmk__err("Unknown %s request %s from %s %s",
3274 (request->ipc_client != NULL) ? "IPC" : "CPG",
3275 request->op, pcmk__request_origin_type(request),
3276 pcmk__request_origin(request));
3277 pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
3278 "Unknown request type '%s' (bug?)",
3279 pcmk__s(request->op, ""));
3280 return fenced_construct_reply(request->xml, NULL, &request->result);
3281 }
3282
3283 // CRM_OP_REGISTER
3284 static xmlNode *
3285 handle_register_request(pcmk__request_t *request)
3286 {
3287 xmlNode *reply = NULL;
3288
3289 if (request->peer != NULL) {
3290 return handle_unknown_request(request);
3291 }
3292
3293 reply = pcmk__xe_create(NULL, "reply");
3294 pcmk__xe_set(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER);
3295 pcmk__xe_set(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id);
3296 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3297 pcmk__set_request_flags(request, pcmk__request_reuse_options);
3298 return reply;
3299 }
3300
3301 // STONITH_OP_EXEC
3302 static xmlNode *
3303 handle_agent_request(pcmk__request_t *request)
3304 {
3305 execute_agent_action(request->xml, &request->result);
3306 if (request->result.execution_status == PCMK_EXEC_PENDING) {
3307 return NULL;
3308 }
3309 return fenced_construct_reply(request->xml, NULL, &request->result);
3310 }
3311
3312 // STONITH_OP_TIMEOUT_UPDATE
3313 static xmlNode *
3314 handle_update_timeout_request(pcmk__request_t *request)
3315 {
3316 const char *call_id = pcmk__xe_get(request->xml, PCMK__XA_ST_CALLID);
3317 const char *client_id = pcmk__xe_get(request->xml, PCMK__XA_ST_CLIENTID);
3318 int op_timeout = 0;
3319
3320 pcmk__xe_get_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout);
3321 do_stonith_async_timeout_update(client_id, call_id, op_timeout);
3322 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3323 return NULL;
3324 }
3325
3326 // STONITH_OP_QUERY
3327 static xmlNode *
3328 handle_query_request(pcmk__request_t *request)
3329 {
3330 int timeout = 0;
3331 xmlNode *dev = NULL;
3332 const char *action = NULL;
3333 const char *target = NULL;
3334 const char *client_id = pcmk__xe_get(request->xml, PCMK__XA_ST_CLIENTID);
3335 struct st_query_data *query = NULL;
3336
3337 if (request->peer != NULL) {
3338 // Record it for the future notification
3339 create_remote_stonith_op(client_id, request->xml, TRUE);
3340 }
3341
3342 /* Delete the DC node RELAY operation. */
3343 remove_relay_op(request->xml);
3344
3345 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3346
3347 dev = pcmk__xpath_find_one(request->xml->doc,
3348 "//*[@" PCMK__XA_ST_DEVICE_ACTION "]",
3349 PCMK__LOG_NEVER);
3350 if (dev != NULL) {
3351 const char *device = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID);
3352
3353 if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
3354 return NULL; // No query or reply necessary
3355 }
3356 target = pcmk__xe_get(dev, PCMK__XA_ST_TARGET);
3357 action = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ACTION);
3358 }
3359
3360 pcmk__log_xml_trace(request->xml, "Query");
3361
3362 query = pcmk__assert_alloc(1, sizeof(struct st_query_data));
3363
3364 query->reply = fenced_construct_reply(request->xml, NULL, &request->result);
3365 query->remote_peer = pcmk__str_copy(request->peer);
3366 query->client_id = pcmk__str_copy(client_id);
3367 query->target = pcmk__str_copy(target);
3368 query->action = pcmk__str_copy(action);
3369 query->call_options = request->call_options;
3370
3371 pcmk__xe_get_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout);
3372 get_capable_devices(target, action, timeout,
3373 pcmk__is_set(query->call_options,
3374 st_opt_allow_self_fencing),
3375 query, stonith_query_capable_device_cb, fenced_df_none);
3376 return NULL;
3377 }
3378
3379 // STONITH_OP_NOTIFY
3380 static xmlNode *
3381 handle_notify_request(pcmk__request_t *request)
3382 {
3383 const char *flag_name = NULL;
3384
3385 if (request->peer != NULL) {
3386 return handle_unknown_request(request);
3387 }
3388
3389 flag_name = pcmk__xe_get(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE);
3390 if (flag_name != NULL) {
3391 pcmk__debug("Enabling %s callbacks for client %s", flag_name,
3392 pcmk__request_origin(request));
3393 pcmk__set_client_flags(request->ipc_client,
3394 fenced_parse_notify_flag(flag_name));
3395 }
3396
3397 flag_name = pcmk__xe_get(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE);
3398 if (flag_name != NULL) {
3399 pcmk__debug("Disabling %s callbacks for client %s", flag_name,
3400 pcmk__request_origin(request));
3401 pcmk__clear_client_flags(request->ipc_client,
3402 fenced_parse_notify_flag(flag_name));
3403 }
3404
3405 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3406 pcmk__set_request_flags(request, pcmk__request_reuse_options);
3407
3408 return pcmk__ipc_create_ack(request->ipc_flags, NULL, CRM_EX_OK);
3409 }
3410
3411 // STONITH_OP_RELAY
3412 static xmlNode *
3413 handle_relay_request(pcmk__request_t *request)
3414 {
3415 xmlNode *dev = pcmk__xpath_find_one(request->xml->doc,
3416 "//*[@" PCMK__XA_ST_TARGET "]",
3417 LOG_TRACE);
3418
3419 pcmk__notice("Received forwarded fencing request from %s %s to fence (%s) "
3420 "peer %s",
3421 pcmk__request_origin_type(request),
3422 pcmk__request_origin(request),
3423 pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ACTION),
3424 pcmk__xe_get(dev, PCMK__XA_ST_TARGET));
3425
3426 if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) {
3427 set_bad_request_result(&request->result);
3428 return fenced_construct_reply(request->xml, NULL, &request->result);
3429 }
3430
3431 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
3432 return NULL;
3433 }
3434
3435 // STONITH_OP_FENCE
3436 static xmlNode *
3437 handle_fence_request(pcmk__request_t *request)
3438 {
3439 const char *alternate_host = NULL;
3440 xmlNode *dev = NULL;
3441 const char *target = NULL;
3442 const char *action = NULL;
3443 const char *device = NULL;
3444
3445 if (request->peer != NULL) {
3446 fence_locally(request->xml, &request->result);
3447 goto done;
3448 }
3449
3450 if (pcmk__is_set(request->call_options, st_opt_manual_ack)) {
3451 int rc = fenced_handle_manual_confirmation(request->ipc_client,
3452 request->xml);
3453
3454 if (rc == pcmk_rc_ok) {
3455 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3456 } else if (rc == EINPROGRESS) {
3457 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
3458 NULL);
3459 } else {
3460 set_bad_request_result(&request->result);
3461 }
3462
3463 goto done;
3464 }
3465
3466 dev = pcmk__xpath_find_one(request->xml->doc,
3467 "//*[@" PCMK__XA_ST_TARGET "]", LOG_TRACE);
3468 target = pcmk__xe_get(dev, PCMK__XA_ST_TARGET);
3469 action = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ACTION);
3470 device = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID);
3471
3472 if (request->ipc_client != NULL) {
3473 int tolerance = 0;
3474
3475 pcmk__notice("Client %s wants to fence (%s) %s using %s",
3476 pcmk__request_origin(request), action, target,
3477 (device? device : "any device"));
3478 pcmk__xe_get_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance);
3479 if (stonith_check_fence_tolerance(tolerance, target, action)) {
3480 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3481 return fenced_construct_reply(request->xml, NULL, &request->result);
3482 }
3483 alternate_host = check_alternate_host(target);
3484
3485 } else {
3486 pcmk__notice("Peer %s wants to fence (%s) '%s' with device '%s'",
3487 request->peer, action, target,
3488 (device == NULL)? "(any)" : device);
3489 }
3490
3491 if (alternate_host != NULL) {
3492 const char *client_id = NULL;
3493 remote_fencing_op_t *op = NULL;
3494 pcmk__node_status_t *node = pcmk__get_node(0, alternate_host, NULL,
3495 pcmk__node_search_cluster_member);
3496
3497 if (request->ipc_client->id == 0) {
3498 client_id = pcmk__xe_get(request->xml, PCMK__XA_ST_CLIENTID);
3499 } else {
3500 client_id = request->ipc_client->id;
3501 }
3502
3503 /* Create a duplicate fencing operation to relay with the client ID.
3504 * When a query response is received, this operation should be
3505 * deleted to avoid keeping the duplicate around.
3506 */
3507 op = create_remote_stonith_op(client_id, request->xml, FALSE);
3508
3509 pcmk__xe_set(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY);
3510 pcmk__xe_set(request->xml, PCMK__XA_ST_CLIENTID,
3511 request->ipc_client->id);
3512 pcmk__xe_set(request->xml, PCMK__XA_ST_REMOTE_OP, op->id);
3513
3514 // @TODO On failure, fail request immediately, or maybe panic
3515 pcmk__cluster_send_message(node, pcmk_ipc_fenced, request->xml);
3516
3517 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
3518
3519 } else if (initiate_remote_stonith_op(request->ipc_client, request->xml,
3520 FALSE) == NULL) {
3521 set_bad_request_result(&request->result);
3522
3523 } else {
3524 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
3525 }
3526
3527 done:
3528 if (request->result.execution_status == PCMK_EXEC_PENDING) {
3529 return NULL;
3530 }
3531
3532 return fenced_construct_reply(request->xml, NULL, &request->result);
3533 }
3534
3535 // STONITH_OP_FENCE_HISTORY
3536 static xmlNode *
3537 handle_history_request(pcmk__request_t *request)
3538 {
3539 xmlNode *reply = NULL;
3540 xmlNode *data = NULL;
3541
3542 stonith_fence_history(request->xml, &data, request->peer,
3543 request->call_options);
3544 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3545 if (!pcmk__is_set(request->call_options, st_opt_discard_reply)) {
3546 /* When the local node broadcasts its history, it sets
3547 * st_opt_discard_reply and doesn't need a reply.
3548 */
3549 reply = fenced_construct_reply(request->xml, data, &request->result);
3550 }
3551 pcmk__xml_free(data);
3552 return reply;
3553 }
3554
3555 // STONITH_OP_DEVICE_ADD
3556 static xmlNode *
3557 handle_device_add_request(pcmk__request_t *request)
3558 {
3559 const char *op = pcmk__xe_get(request->xml, PCMK__XA_ST_OP);
3560 xmlNode *dev = pcmk__xpath_find_one(request->xml->doc,
3561 "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR);
3562
3563 if (is_privileged(request->ipc_client, op)) {
3564 int rc = fenced_device_register(dev, false);
3565
3566 rc = pcmk_rc2legacy(rc);
3567 pcmk__set_result(&request->result,
3568 ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
3569 stonith__legacy2status(rc),
3570 ((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
3571 } else {
3572 pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
3573 PCMK_EXEC_INVALID,
3574 "Unprivileged users must register device via CIB");
3575 }
3576 fenced_send_config_notification(op, &request->result,
3577 (dev == NULL)? NULL : pcmk__xe_id(dev));
3578 return fenced_construct_reply(request->xml, NULL, &request->result);
3579 }
3580
3581 // STONITH_OP_DEVICE_DEL
3582 static xmlNode *
3583 handle_device_delete_request(pcmk__request_t *request)
3584 {
3585 xmlNode *dev = pcmk__xpath_find_one(request->xml->doc,
3586 "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR);
3587 const char *device_id = pcmk__xe_get(dev, PCMK_XA_ID);
3588 const char *op = pcmk__xe_get(request->xml, PCMK__XA_ST_OP);
3589
3590 if (is_privileged(request->ipc_client, op)) {
3591 stonith_device_remove(device_id, false);
3592 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3593 } else {
3594 pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
3595 PCMK_EXEC_INVALID,
3596 "Unprivileged users must delete device via CIB");
3597 }
3598 fenced_send_config_notification(op, &request->result, device_id);
3599 return fenced_construct_reply(request->xml, NULL, &request->result);
3600 }
3601
3602 // STONITH_OP_LEVEL_ADD
3603 static xmlNode *
3604 handle_level_add_request(pcmk__request_t *request)
3605 {
3606 const char *op = pcmk__xe_get(request->xml, PCMK__XA_ST_OP);
3607
3608 if (is_privileged(request->ipc_client, op)) {
3609 fenced_register_level(request->xml, &request->result);
3610 } else {
3611 unpack_level_request(request->xml, NULL, NULL, NULL);
3612 pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
3613 PCMK_EXEC_INVALID,
3614 "Unprivileged users must add level via CIB");
3615 }
3616 return fenced_construct_reply(request->xml, NULL, &request->result);
3617 }
3618
3619 // STONITH_OP_LEVEL_DEL
3620 static xmlNode *
3621 handle_level_delete_request(pcmk__request_t *request)
3622 {
3623 const char *op = pcmk__xe_get(request->xml, PCMK__XA_ST_OP);
3624
3625 if (is_privileged(request->ipc_client, op)) {
3626 fenced_unregister_level(request->xml, &request->result);
3627 } else {
3628 unpack_level_request(request->xml, NULL, NULL, NULL);
3629 pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
3630 PCMK_EXEC_INVALID,
3631 "Unprivileged users must delete level via CIB");
3632 }
3633 return fenced_construct_reply(request->xml, NULL, &request->result);
3634 }
3635
3636 // CRM_OP_RM_NODE_CACHE
3637 static xmlNode *
3638 handle_cache_request(pcmk__request_t *request)
3639 {
3640 int node_id = 0;
3641 const char *name = NULL;
3642
3643 pcmk__xe_get_int(request->xml, PCMK_XA_ID, &node_id);
3644 name = pcmk__xe_get(request->xml, PCMK_XA_UNAME);
3645 pcmk__cluster_forget_cluster_node(node_id, name);
3646 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3647 return NULL;
3648 }
3649
3650 static void
3651 fenced_register_handlers(void)
3652 {
3653 pcmk__server_command_t handlers[] = {
3654 { CRM_OP_REGISTER, handle_register_request },
3655 { STONITH_OP_EXEC, handle_agent_request },
3656 { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request },
3657 { STONITH_OP_QUERY, handle_query_request },
3658 { STONITH_OP_NOTIFY, handle_notify_request },
3659 { STONITH_OP_RELAY, handle_relay_request },
3660 { STONITH_OP_FENCE, handle_fence_request },
3661 { STONITH_OP_FENCE_HISTORY, handle_history_request },
3662 { STONITH_OP_DEVICE_ADD, handle_device_add_request },
3663 { STONITH_OP_DEVICE_DEL, handle_device_delete_request },
3664 { STONITH_OP_LEVEL_ADD, handle_level_add_request },
3665 { STONITH_OP_LEVEL_DEL, handle_level_delete_request },
3666 { CRM_OP_RM_NODE_CACHE, handle_cache_request },
3667 { NULL, handle_unknown_request },
3668 };
3669
3670 fenced_handlers = pcmk__register_handlers(handlers);
3671 }
3672
3673 void
3674 fenced_unregister_handlers(void)
3675 {
3676 g_clear_pointer(&fenced_handlers, g_hash_table_destroy);
3677 }
3678
3679 void
3680 fenced_handle_request(pcmk__request_t *request)
3681 {
3682 xmlNode *reply = NULL;
3683 char *log_msg = NULL;
3684 const char *exec_status_s = NULL;
3685 const char *reason = NULL;
3686
3687 if (fenced_handlers == NULL) {
3688 fenced_register_handlers();
3689 }
3690
3691 reply = pcmk__process_request(request, fenced_handlers);
3692
3693 if (reply != NULL) {
3694 pcmk__log_xml_trace(reply, "Reply");
3695
3696 if (pcmk__is_set(request->flags, pcmk__request_reuse_options)
3697 && (request->ipc_client != NULL)) {
3698 /* Certain IPC-only commands must reuse the call options from the
3699 * original request rather than the ones set by stonith_send_reply()
3700 * -> do_local_reply().
3701 */
3702 pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
3703 request->ipc_flags);
3704 request->ipc_client->request_id = 0;
3705
3706 } else {
3707 stonith_send_reply(reply, request->call_options,
3708 request->peer, request->ipc_client);
3709 }
3710 pcmk__xml_free(reply);
3711 }
3712
3713 exec_status_s = pcmk_exec_status_str(request->result.execution_status);
3714 reason = request->result.exit_reason;
3715 log_msg = pcmk__assert_asprintf("Processed %s request from %s %s: %s%s%s%s",
3716 request->op,
3717 pcmk__request_origin_type(request),
3718 pcmk__request_origin(request),
3719 exec_status_s,
3720 (reason == NULL)? "" : " (",
3721 pcmk__s(reason, ""),
3722 (reason == NULL)? "" : ")");
3723
3724 if (!pcmk__result_ok(&request->result)) {
3725 pcmk__warn("%s", log_msg);
3726 } else {
3727 pcmk__debug("%s", log_msg);
3728 }
3729
3730 free(log_msg);
3731 pcmk__reset_request(request);
3732 }
3733