1 /*
2 * Copyright 2009-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <stdbool.h> // bool
14 #include <stdio.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <sys/stat.h>
18 #include <unistd.h>
19 #include <sys/utsname.h>
20
21 #include <stdlib.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <ctype.h>
25
26 #include <libxml/tree.h> // xmlNode
27 #include <libxml/xpath.h> // xmlXPathObject, etc.
28
29 #include <crm/crm.h>
30 #include <crm/common/ipc.h>
31 #include <crm/cluster/internal.h>
32 #include <crm/common/mainloop.h>
33
34 #include <crm/stonith-ng.h>
35 #include <crm/fencing/internal.h>
36 #include <crm/common/xml.h>
37
38 #include <pacemaker-fenced.h>
39
40 static GHashTable *device_table = NULL;
41
42 GHashTable *topology = NULL;
43 static GList *cmd_list = NULL;
44
45 static GHashTable *fenced_handlers = NULL;
46
47 struct device_search_s {
48 /* target of fence action */
49 char *host;
50 /* requested fence action */
51 char *action;
52 /* timeout to use if a device is queried dynamically for possible targets */
53 // @TODO This name is misleading now, it's the value of fencing-timeout
54 int per_device_timeout;
55 /* number of registered fencing devices at time of request */
56 int replies_needed;
57 /* number of device replies received so far */
58 int replies_received;
59 /* whether the target is eligible to perform requested action (or off) */
60 bool allow_self;
61
62 /* private data to pass to search callback function */
63 void *user_data;
64 /* function to call when all replies have been received */
65 void (*callback) (GList * devices, void *user_data);
66 /* devices capable of performing requested action (or off if remapping) */
67 GList *capable;
68 /* Whether to perform searches that support the action */
69 uint32_t support_action_only;
70 };
71
72 static gboolean stonith_device_dispatch(gpointer user_data);
73 static void st_child_done(int pid, const pcmk__action_result_t *result,
74 void *user_data);
75
76 static void search_devices_record_result(struct device_search_s *search, const char *device,
77 gboolean can_fence);
78
79 static int get_agent_metadata(const char *agent, xmlNode **metadata);
80 static void read_action_metadata(fenced_device_t *device);
81 static enum fenced_target_by unpack_level_kind(const xmlNode *level);
82
83 typedef struct {
84 int id;
85 uint32_t options;
86 int default_timeout; /* seconds */
87 int timeout; /* seconds */
88
89 int start_delay; // seconds (-1 means disable static/random fencing delays)
90 int delay_id;
91
92 char *op;
93 char *origin;
94 char *client;
95 char *client_name;
96 char *remote_op_id;
97
98 char *target;
99 char *action;
100 char *device;
101
102 //! Head of device list (used only for freeing list with command object)
103 GList *device_list;
104
105 //! Next item to process in \c device_list
106 GList *next_device_iter;
107
108 void *internal_user_data;
109 void (*done_cb) (int pid, const pcmk__action_result_t *result,
110 void *user_data);
111
112 fenced_device_t *active_on;
113 fenced_device_t *activating_on;
114 } async_command_t;
115
116 static xmlNode *construct_async_reply(const async_command_t *cmd,
117 const pcmk__action_result_t *result);
118
119 /*!
120 * \internal
121 * \brief Set a bad fencer API request error in a result object
122 *
123 * \param[out] result Result to set
124 */
125 static inline void
126 set_bad_request_result(pcmk__action_result_t *result)
127 {
128 pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
129 "Fencer API request missing required information (bug?)");
130 }
131
132 /*!
133 * \internal
134 * \brief Check whether the fencer's device table contains a watchdog device
135 *
136 * \retval \c true If the device table contains a watchdog device
137 * \retval \c false Otherwise
138 */
139 bool
140 fenced_has_watchdog_device(void)
141 {
142 return (device_table != NULL)
143 && (g_hash_table_lookup(device_table, STONITH_WATCHDOG_ID) != NULL);
144 }
145
146 /*!
147 * \internal
148 * \brief Call a function for each known fence device
149 *
150 * \param[in] fn Function to call for each device
151 * \param[in,out] user_data User data
152 */
153 void
154 fenced_foreach_device(GHFunc fn, gpointer user_data)
155 {
156 if (device_table == NULL) {
157 return;
158 }
159
160 g_hash_table_foreach(device_table, fn, user_data);
161 }
162
163 /*!
164 * \internal
165 * \brief Remove each known fence device matching a given predicate
166 *
167 * \param[in] fn Function that returns \c TRUE to remove a fence device or
168 * \c FALSE to keep it
169 */
170 void
171 fenced_foreach_device_remove(GHRFunc fn)
172 {
173 if (device_table == NULL) {
174 return;
175 }
176
177 g_hash_table_foreach_remove(device_table, fn, NULL);
178 }
179
180 static gboolean
181 is_action_required(const char *action, const fenced_device_t *device)
182 {
183 return (device != NULL)
184 && pcmk__is_set(device->flags, fenced_df_auto_unfence)
185 && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none);
186 }
187
188 static int
189 get_action_delay_max(const fenced_device_t *device, const char *action)
190 {
191 const char *value = NULL;
192 guint delay_max = 0U;
193
194 if (!pcmk__is_fencing_action(action)) {
195 return 0;
196 }
197
198 value = g_hash_table_lookup(device->params, PCMK_FENCING_DELAY_MAX);
199 if (value != NULL) {
200 pcmk_parse_interval_spec(value, &delay_max);
201 delay_max /= 1000;
202 }
203
204 return (int) delay_max;
205 }
206
207 /*!
208 * \internal
209 * \brief If a mapping matches the given target, return its port value
210 *
211 * \param[in] target Fencing target node
212 * \param[in] mapping Target-to-port mapping (delimited by a colon)
213 *
214 * \return The port from \p mapping if it matches \p target, or \c NULL
215 * if \p mapping is malformed or is not a match.
216 */
217 static gchar *
218 get_value_if_matching(const char *target, const char *mapping)
219 {
220 gchar **nvpair = NULL;
221 gchar *value = NULL;
222
223 if (pcmk__str_empty(mapping)) {
224 goto done;
225 }
226
227 nvpair = g_strsplit(mapping, ":", 2);
228
229 if ((g_strv_length(nvpair) != 2)
230 || pcmk__str_empty(nvpair[0]) || pcmk__str_empty(nvpair[1])) {
231
232 pcmk__err(PCMK_FENCING_DELAY_BASE ": Malformed mapping '%s'", mapping);
233 goto done;
234 }
235
236 if (!pcmk__str_eq(target, nvpair[0], pcmk__str_casei)) {
237 goto done;
238 }
239
240 // Take ownership so that we don't free nvpair[1] with nvpair
241 value = nvpair[1];
242 nvpair[1] = NULL;
243
244 pcmk__debug(PCMK_FENCING_DELAY_BASE " mapped to %s for %s", value, target);
245
246 done:
247 g_strfreev(nvpair);
248 return value;
249 }
250
251 /*!
252 * \internal
253 * \brief If a mapping exists from the target node to a port, return the port
254 *
255 * \param[in] target Fencing target node
256 * \param[in] values List of target-to-port mappings (delimited by semicolon,
257 * space, or tab characters), or a single interval spec
258 *
259 * \return Port to which \p target is mapped, or \c NULL if no such mapping
260 * exists
261 *
262 * \note The caller is responsible for freeing the return value using
263 * \c g_free().
264 */
265 static gchar *
266 get_value_for_target(const char *target, const char *values)
267 {
268 gchar *value = NULL;
269 gchar **mappings = NULL;
270
271 /* If there are no colons, don't try to parse as a list of mappings.
272 * The caller will try to parse the values string as an interval spec.
273 */
274 if (strchr(values, ':') == NULL) {
275 return NULL;
276 }
277
278 mappings = g_strsplit_set(values, "; \t", 0);
279
280 for (gchar **mapping = mappings; (*mapping != NULL) && (value == NULL);
281 mapping++) {
282
283 value = get_value_if_matching(target, *mapping);
284 }
285
286 g_strfreev(mappings);
287 return value;
288 }
289
290 /* @TODO Consolidate some of this with build_port_aliases(). But keep in
291 * mind that build_port_aliases()/pcmk__host_map supports either '=' or ':'
292 * as a mapping separator, while pcmk_delay_base supports only ':'.
293 */
294 static int
295 get_action_delay_base(const fenced_device_t *device, const char *action,
296 const char *target)
297 {
298 const char *param = NULL;
299 gchar *stripped = NULL;
300 gchar *delay_base_s = NULL;
301 guint delay_base = 0U;
302
303 if (!pcmk__is_fencing_action(action)) {
304 return 0;
305 }
306
307 param = g_hash_table_lookup(device->params, PCMK_FENCING_DELAY_BASE);
308 if (param == NULL) {
309 return 0;
310 }
311
312 stripped = g_strstrip(g_strdup(param));
313
314 if (target != NULL) {
315 delay_base_s = get_value_for_target(target, stripped);
316 }
317
318 if (delay_base_s == NULL) {
319 /* Either target is NULL or we didn't find a mapping for it. Try to
320 * parse the entire stripped value as an interval spec. Take ownership
321 * so that we don't free stripped twice.
322 *
323 * We can't tell based on which characters are present whether stripped
324 * was a list of mappings or an interval spec. An ISO 8601 interval may
325 * contain a colon, and a Pacemaker time-and-units string may contain
326 * whitespace.
327 */
328 delay_base_s = stripped;
329 stripped = NULL;
330 }
331
332 /* @COMPAT Should we accept only a simple time-and-units string, rather than
333 * an interval spec?
334 */
335 pcmk_parse_interval_spec(delay_base_s, &delay_base);
336 delay_base /= 1000;
337
338 g_free(stripped);
339 g_free(delay_base_s);
340 return (int) delay_base;
341 }
342
343 /*!
344 * \internal
345 * \brief Override STONITH timeout with pcmk_*_timeout if available
346 *
347 * \param[in] device STONITH device to use
348 * \param[in] action STONITH action name
349 * \param[in] default_timeout Timeout to use if device does not have
350 * a pcmk_*_timeout parameter for action
351 *
352 * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
353 * \note For consistency, it would be nice if reboot/off/on timeouts could be
354 * set the same way as start/stop/monitor timeouts, i.e. with an
355 * <operation> entry in the fencing resource configuration. However that
356 * is insufficient because fencing devices may be registered directly via
357 * the fencer's register_device() API instead of going through the CIB
358 * (e.g. stonith_admin uses it for its -R option, and the executor uses it
359 * to ensure a device is registered when a command is issued). As device
360 * properties, pcmk_*_timeout parameters can be grabbed by the fencer when
361 * the device is registered, whether by CIB change or API call.
362 */
363 static int
364 get_action_timeout(const fenced_device_t *device, const char *action,
365 int default_timeout)
366 {
367 char *timeout_param = NULL;
368 const char *value = NULL;
369 long long timeout_ms = 0;
370 int timeout_sec = 0;
371
372 if ((action == NULL) || (device == NULL) || (device->params == NULL)) {
373 return default_timeout;
374 }
375
376 /* If "reboot" was requested but the device does not support it,
377 * we will remap to "off", so check timeout for "off" instead
378 */
379 if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)
380 && !pcmk__is_set(device->flags, fenced_df_supports_reboot)) {
381 pcmk__trace("%s doesn't support reboot, using timeout for off instead",
382 device->id);
383 action = PCMK_ACTION_OFF;
384 }
385
386 /* If the device config specified an action-specific timeout, use it */
387 timeout_param = pcmk__assert_asprintf("pcmk_%s_timeout", action);
388 value = g_hash_table_lookup(device->params, timeout_param);
389 free(timeout_param);
390
391 if (value == NULL) {
392 return default_timeout;
393 }
394
395 if ((pcmk__parse_ms(value, &timeout_ms) != pcmk_rc_ok)
396 || (timeout_ms < 0)) {
397 return default_timeout;
398 }
399
400 timeout_ms = QB_MIN(timeout_ms, UINT_MAX);
401 timeout_sec = pcmk__timeout_ms2s((guint) timeout_ms);
402
403 return QB_MIN(timeout_sec, INT_MAX);
404 }
405
406 /*!
407 * \internal
408 * \brief Get the currently executing device for a fencing operation
409 *
410 * \param[in] cmd Fencing operation to check
411 *
412 * \return Currently executing device for \p cmd if any, otherwise NULL
413 */
414 static fenced_device_t *
415 cmd_device(const async_command_t *cmd)
416 {
417 if ((cmd == NULL) || (cmd->device == NULL) || (device_table == NULL)) {
418 return NULL;
419 }
420 return g_hash_table_lookup(device_table, cmd->device);
421 }
422
423 /*!
424 * \internal
425 * \brief Return the configured reboot action for a given device
426 *
427 * \param[in] device_id Device ID
428 *
429 * \return Configured reboot action for \p device_id
430 */
431 const char *
432 fenced_device_reboot_action(const char *device_id)
433 {
434 fenced_device_t *device = NULL;
435 const char *action = NULL;
436
437 if ((device_table == NULL) || (device_id == NULL)) {
438 return PCMK_ACTION_REBOOT;
439 }
440
441 device = g_hash_table_lookup(device_table, device_id);
442
443 if ((device != NULL) && (device->params != NULL)) {
444 action = g_hash_table_lookup(device->params, "pcmk_reboot_action");
445 }
446
447 return pcmk__s(action, PCMK_ACTION_REBOOT);
448 }
449
450 /*!
451 * \internal
452 * \brief Check whether a given device supports the "on" action
453 *
454 * \param[in] device_id Device ID
455 *
456 * \return true if \p device_id supports "on", otherwise false
457 */
458 bool
459 fenced_device_supports_on(const char *device_id)
460 {
461 fenced_device_t *device = NULL;
462
463 if ((device_table == NULL) || (device_id == NULL)) {
464 return false;
465 }
466
467 device = g_hash_table_lookup(device_table, device_id);
468
469 if (device != NULL) {
470 return pcmk__is_set(device->flags, fenced_df_supports_on);
471 }
472
473 return false;
474 }
475
476 static void
477 free_async_command(async_command_t * cmd)
478 {
479 if (cmd == NULL) {
480 return;
481 }
482
483 if (cmd->delay_id != 0) {
484 g_source_remove(cmd->delay_id);
485 }
486
487 cmd_list = g_list_remove(cmd_list, cmd);
488
489 g_list_free_full(cmd->device_list, free);
490 free(cmd->device);
491 free(cmd->action);
492 free(cmd->target);
493 free(cmd->remote_op_id);
494 free(cmd->client);
495 free(cmd->client_name);
496 free(cmd->origin);
497 free(cmd->op);
498 free(cmd);
499 }
500
501 /*!
502 * \internal
503 * \brief Create a new asynchronous fencing operation from request XML
504 *
505 * \param[in] msg Fencing request XML (from IPC or CPG)
506 *
507 * \return Newly allocated fencing operation on success, otherwise NULL
508 *
509 * \note This asserts on memory errors, so a NULL return indicates an
510 * unparseable message.
511 */
512 static async_command_t *
513 create_async_command(xmlNode *msg)
514 {
515 xmlNode *op = NULL;
516 async_command_t *cmd = NULL;
517 int rc = pcmk_rc_ok;
518
519 if (msg == NULL) {
520 return NULL;
521 }
522
523 op = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]",
524 LOG_ERR);
525 if (op == NULL) {
526 return NULL;
527 }
528
529 cmd = pcmk__assert_alloc(1, sizeof(async_command_t));
530
531 // All messages must include these
532 cmd->action = pcmk__xe_get_copy(op, PCMK__XA_ST_DEVICE_ACTION);
533 cmd->op = pcmk__xe_get_copy(msg, PCMK__XA_ST_OP);
534 cmd->client = pcmk__xe_get_copy(msg, PCMK__XA_ST_CLIENTID);
535 if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) {
536 free_async_command(cmd);
537 return NULL;
538 }
539
540 pcmk__xe_get_int(msg, PCMK__XA_ST_CALLID, &(cmd->id));
541 pcmk__xe_get_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay));
542 pcmk__xe_get_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout));
543 cmd->timeout = cmd->default_timeout;
544
545 rc = pcmk__xe_get_flags(msg, PCMK__XA_ST_CALLOPT, &(cmd->options),
546 st_opt_none);
547 if (rc != pcmk_rc_ok) {
548 pcmk__warn("Couldn't parse options from request: %s", pcmk_rc_str(rc));
549 }
550
551 cmd->origin = pcmk__xe_get_copy(msg, PCMK__XA_SRC);
552 cmd->remote_op_id = pcmk__xe_get_copy(msg, PCMK__XA_ST_REMOTE_OP);
553 cmd->client_name = pcmk__xe_get_copy(msg, PCMK__XA_ST_CLIENTNAME);
554 cmd->target = pcmk__xe_get_copy(op, PCMK__XA_ST_TARGET);
555 cmd->device = pcmk__xe_get_copy(op, PCMK__XA_ST_DEVICE_ID);
556
557 cmd->done_cb = st_child_done;
558
559 // Track in global command list
560 cmd_list = g_list_append(cmd_list, cmd);
561
562 return cmd;
563 }
564
565 static int
566 get_action_limit(fenced_device_t *device)
567 {
568 const char *value = NULL;
569 int action_limit = 1;
570
571 value = g_hash_table_lookup(device->params, PCMK_FENCING_ACTION_LIMIT);
572 if ((value == NULL)
573 || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok)
574 || (action_limit == 0)) {
575 action_limit = 1;
576 }
577 return action_limit;
578 }
579
580 static int
581 get_active_cmds(fenced_device_t *device)
582 {
583 int counter = 0;
584 GList *gIter = NULL;
585 GList *gIterNext = NULL;
586
587 CRM_CHECK(device != NULL, return 0);
588
589 for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
590 async_command_t *cmd = gIter->data;
591
592 gIterNext = gIter->next;
593
594 if (cmd->active_on == device) {
595 counter++;
596 }
597 }
598
599 return counter;
600 }
601
602 static void
603 fork_cb(int pid, void *user_data)
604 {
605 async_command_t *cmd = (async_command_t *) user_data;
606 fenced_device_t *device = cmd->activating_on;
607
608 if (device == NULL) {
609 /* In case of a retry, we've done the move from activating_on to
610 * active_on already
611 */
612 device = cmd->active_on;
613 }
614
615 pcmk__assert(device != NULL);
616 pcmk__debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout",
617 cmd->action, pid,
618 ((cmd->target != NULL)? " targeting " : ""),
619 pcmk__s(cmd->target, ""), device->id, cmd->timeout);
620 cmd->active_on = device;
621 cmd->activating_on = NULL;
622 }
623
624 static int
625 get_agent_metadata_cb(gpointer data) {
626 fenced_device_t *device = data;
627 guint period_ms;
628 int rc = get_agent_metadata(device->agent, &device->agent_metadata);
629
630 if (rc == pcmk_rc_ok) {
631 if (device->agent_metadata != NULL) {
632 read_action_metadata(device);
633 device->default_host_arg =
634 stonith__default_host_arg(device->agent_metadata);
635 }
636
637 return G_SOURCE_REMOVE;
638 }
639
640 if (rc == EAGAIN) {
641 period_ms = pcmk__mainloop_timer_get_period(device->timer);
642 if (period_ms < 160 * 1000) {
643 mainloop_timer_set_period(device->timer, 2 * period_ms);
644 }
645
646 return G_SOURCE_CONTINUE;
647 }
648
649 return G_SOURCE_REMOVE;
650 }
651
652 /*!
653 * \internal
654 * \brief Call a command's action callback for an internal (not library) result
655 *
656 * \param[in,out] cmd Command to report result for
657 * \param[in] execution_status Execution status to use for result
658 * \param[in] exit_status Exit status to use for result
659 * \param[in] exit_reason Exit reason to use for result
660 */
661 static void
662 report_internal_result(async_command_t *cmd, int exit_status,
663 int execution_status, const char *exit_reason)
664 {
665 pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
666
667 pcmk__set_result(&result, exit_status, execution_status, exit_reason);
668 cmd->done_cb(0, &result, cmd);
669 pcmk__reset_result(&result);
670 }
671
672 static gboolean
673 stonith_device_execute(fenced_device_t *device)
674 {
675 int exec_rc = 0;
676 const char *action_str = NULL;
677 async_command_t *cmd = NULL;
678 stonith_action_t *action = NULL;
679 int active_cmds = 0;
680 int action_limit = 0;
681 GList *iter = NULL;
682
683 CRM_CHECK(device != NULL, return FALSE);
684
685 active_cmds = get_active_cmds(device);
686 action_limit = get_action_limit(device);
687 if (action_limit > -1 && active_cmds >= action_limit) {
688 pcmk__trace("%s is over its action limit of %d (%u active action%s)",
689 device->id, action_limit, active_cmds,
690 pcmk__plural_s(active_cmds));
691 return TRUE;
692 }
693
694 iter = device->pending_ops;
695
696 while (iter != NULL) {
697 GList *next = iter->next;
698 async_command_t *pending_op = iter->data;
699
700 if ((pending_op != NULL) && (pending_op->delay_id != 0)) {
701 pcmk__trace("Operation '%s'%s%s using %s was asked to run too "
702 "early, waiting for start delay of %ds",
703 pending_op->action,
704 ((pending_op->target == NULL)? "" : " targeting "),
705 pcmk__s(pending_op->target, ""),
706 device->id, pending_op->start_delay);
707 iter = next;
708 continue;
709 }
710
711 device->pending_ops = g_list_remove_link(device->pending_ops, iter);
712 g_list_free_1(iter);
713
714 cmd = pending_op;
715 break;
716 }
717
718 if (cmd == NULL) {
719 pcmk__trace("No actions using %s are needed", device->id);
720 return TRUE;
721 }
722
723 if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
724 STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
725 if (pcmk__is_fencing_action(cmd->action)) {
726 if (node_does_watchdog_fencing(fenced_get_local_node())) {
727 pcmk__panic("Watchdog self-fencing required");
728 goto done;
729 }
730 } else {
731 pcmk__info("Faking success for %s watchdog operation", cmd->action);
732 report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
733 goto done;
734 }
735 }
736
737 #if PCMK__ENABLE_CIBSECRETS
738 exec_rc = pcmk__substitute_secrets(device->id, device->params);
739 if (exec_rc != pcmk_rc_ok) {
740 if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) {
741 pcmk__info("Proceeding with stop operation for %s despite being "
742 "unable to load CIB secrets (%s)",
743 device->id, pcmk_rc_str(exec_rc));
744 } else {
745 pcmk__err("Considering %s unconfigured because unable to load CIB "
746 "secrets: %s",
747 device->id, pcmk_rc_str(exec_rc));
748 report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
749 "Failed to get CIB secrets");
750 goto done;
751 }
752 }
753 #endif
754
755 action_str = cmd->action;
756 if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none)
757 && !pcmk__is_set(device->flags, fenced_df_supports_reboot)) {
758
759 pcmk__notice("Remapping 'reboot' action%s%s using %s to 'off' because "
760 "agent '%s' does not support reboot",
761 ((cmd->target == NULL)? "" : " targeting "),
762 pcmk__s(cmd->target, ""), device->id, device->agent);
763 action_str = PCMK_ACTION_OFF;
764 }
765
766 action = stonith__action_create(device->agent, action_str, cmd->target,
767 cmd->timeout, device->params,
768 device->aliases, device->default_host_arg);
769
770 /* for async exec, exec_rc is negative for early error exit
771 otherwise handling of success/errors is done via callbacks */
772 cmd->activating_on = device;
773 exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
774 fork_cb);
775 if (exec_rc < 0) {
776 cmd->activating_on = NULL;
777 cmd->done_cb(0, stonith__action_result(action), cmd);
778 stonith__destroy_action(action);
779 }
780
781 done:
782 /* Device might get triggered to work by multiple fencing commands
783 * simultaneously. Trigger the device again to make sure any
784 * remaining concurrent commands get executed. */
785 if (device->pending_ops != NULL) {
786 mainloop_set_trigger(device->work);
787 }
788 return TRUE;
789 }
790
791 static gboolean
792 stonith_device_dispatch(gpointer user_data)
793 {
794 return stonith_device_execute(user_data);
795 }
796
797 static gboolean
798 start_delay_helper(gpointer data)
799 {
800 async_command_t *cmd = data;
801 fenced_device_t *device = cmd_device(cmd);
802
803 cmd->delay_id = 0;
804 if (device != NULL) {
805 mainloop_set_trigger(device->work);
806 }
807
808 return FALSE;
809 }
810
811 static void
812 schedule_stonith_command(async_command_t *cmd, fenced_device_t *device)
813 {
814 int delay_max = 0;
815 int delay_base = 0;
816 int requested_delay = cmd->start_delay;
817
818 CRM_CHECK(cmd != NULL, return);
819 CRM_CHECK(device != NULL, return);
820
821 if (cmd->device != NULL) {
822 free(cmd->device);
823 }
824
825 cmd->device = pcmk__str_copy(device->id);
826 cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
827
828 if (cmd->remote_op_id != NULL) {
829 pcmk__debug("Scheduling '%s' action%s%s using %s for remote peer %s "
830 "with op id %.8s and timeout %ds",
831 cmd->action,
832 (cmd->target == NULL)? "" : " targeting ",
833 pcmk__s(cmd->target, ""),
834 device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
835
836 } else {
837 pcmk__debug("Scheduling '%s' action%s%s using %s for %s with timeout "
838 "%ds",
839 cmd->action,
840 ((cmd->target != NULL)? " targeting " : ""),
841 pcmk__s(cmd->target, ""),
842 device->id, cmd->client, cmd->timeout);
843 }
844
845 device->pending_ops = g_list_append(device->pending_ops, cmd);
846 mainloop_set_trigger(device->work);
847
848 // Value -1 means disable any static/random fencing delays
849 if (requested_delay < 0) {
850 return;
851 }
852
853 delay_max = get_action_delay_max(device, cmd->action);
854 delay_base = get_action_delay_base(device, cmd->action, cmd->target);
855 if (delay_max == 0) {
856 delay_max = delay_base;
857 }
858 if (delay_max < delay_base) {
859 pcmk__warn(PCMK_FENCING_DELAY_BASE " (%ds) is larger than "
860 PCMK_FENCING_DELAY_MAX " (%ds) for %s using %s "
861 "(limiting to maximum delay)",
862 delay_base, delay_max, cmd->action, device->id);
863 delay_base = delay_max;
864 }
865 if (delay_max > 0) {
866 cmd->start_delay += delay_base;
867
868 // Add random offset so that delay_base <= cmd->start_delay <= delay_max
869 if (delay_max > delay_base) {
870 // coverity[dont_call] Doesn't matter that rand() is predictable
871 cmd->start_delay += rand() % (delay_max - delay_base + 1);
872 }
873 }
874
875 if (cmd->start_delay > 0) {
876 pcmk__notice("Delaying '%s' action%s%s using %s for %ds "
877 QB_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds",
878 cmd->action, (cmd->target == NULL)? "" : " targeting ",
879 pcmk__s(cmd->target, ""), device->id, cmd->start_delay,
880 cmd->timeout, requested_delay, delay_base, delay_max);
881 cmd->delay_id =
882 pcmk__create_timer(cmd->start_delay * 1000, start_delay_helper, cmd);
883 }
884 }
885
886 static void
887 free_device(gpointer data)
888 {
889 fenced_device_t *device = data;
890
891 g_hash_table_destroy(device->params);
892 g_hash_table_destroy(device->aliases);
893
894 for (GList *iter = device->pending_ops; iter != NULL; iter = iter->next) {
895 async_command_t *cmd = iter->data;
896
897 pcmk__warn("Removal of device '%s' purged operation '%s'", device->id,
898 cmd->action);
899 report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
900 "Device was removed before action could be executed");
901 }
902 g_list_free(device->pending_ops);
903
904 g_list_free_full(device->targets, free);
905
906 if (device->timer != NULL) {
907 mainloop_timer_stop(device->timer);
908 mainloop_timer_del(device->timer);
909 }
910
911 mainloop_destroy_trigger(device->work);
912
913 pcmk__xml_free(device->agent_metadata);
914 free(device->namespace);
915 g_strfreev(device->on_target_actions);
916 free(device->agent);
917 free(device->id);
918 free(device);
919 }
920
921 /*!
922 * \internal
923 * \brief Initialize the table of known fence devices
924 */
925 void
926 fenced_init_device_table(void)
927 {
928 if (device_table != NULL) {
929 return;
930 }
931
932 device_table = pcmk__strkey_table(NULL, free_device);
933 }
934
935 /*!
936 * \internal
937 * \brief Free the table of known fence devices
938 */
939 void
940 fenced_free_device_table(void)
941 {
|
CID (unavailable; MK=cd07f2030d25bf24ef225d75003e9faf) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS): |
|
(1) Event assign_union_field: |
The union field "in" of "_pp" is written. |
|
(2) Event inconsistent_union_field_access: |
In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in". |
942 g_clear_pointer(&device_table, g_hash_table_destroy);
943 }
944
945 static GHashTable *
946 build_port_aliases(const char *hostmap, GList **targets)
947 {
948 GHashTable *aliases = pcmk__strikey_table(free, free);
949 gchar *stripped = NULL;
950 gchar **mappings = NULL;
951
952 if (pcmk__str_empty(hostmap)) {
953 goto done;
954 }
955
956 stripped = g_strstrip(g_strdup(hostmap));
957 mappings = g_strsplit_set(stripped, "; \t", 0);
958
959 for (gchar **mapping = mappings; *mapping != NULL; mapping++) {
960 gchar **nvpair = NULL;
961
962 if (pcmk__str_empty(*mapping)) {
963 continue;
964 }
965
966 // @COMPAT Drop support for '=' as delimiter
967 nvpair = g_strsplit_set(*mapping, ":=", 2);
968
969 if (pcmk__str_empty(nvpair[0]) || pcmk__str_empty(nvpair[1])) {
970 pcmk__err(PCMK_FENCING_HOST_MAP ": Malformed mapping '%s'",
971 *mapping);
972
973 } else {
974 pcmk__debug("Adding alias '%s'='%s'", nvpair[0], nvpair[1]);
975 pcmk__insert_dup(aliases, nvpair[0], nvpair[1]);
976 *targets = g_list_append(*targets, pcmk__str_copy(nvpair[1]));
977 }
978 g_strfreev(nvpair);
979 }
980
981 done:
982 g_free(stripped);
983 g_strfreev(mappings);
984 return aliases;
985 }
986
987 GHashTable *metadata_cache = NULL;
988
989 void
990 free_metadata_cache(void)
991 {
992 g_clear_pointer(&metadata_cache, g_hash_table_destroy);
993 }
994
995 static void
996 init_metadata_cache(void)
997 {
998 if (metadata_cache != NULL) {
999 return;
1000 }
1001
1002 metadata_cache = pcmk__strkey_table(free, free);
1003 }
1004
1005 int
1006 get_agent_metadata(const char *agent, xmlNode ** metadata)
1007 {
1008 char *buffer = NULL;
1009 stonith_t *st = NULL;
1010 int rc = pcmk_ok;
1011
1012 if (metadata == NULL) {
1013 return EINVAL;
1014 }
1015
1016 *metadata = NULL;
1017
1018 if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
1019 return pcmk_rc_ok;
1020 }
1021
1022 init_metadata_cache();
1023 buffer = g_hash_table_lookup(metadata_cache, agent);
1024
1025 if (buffer != NULL) {
1026 goto done;
1027 }
1028
1029 st = stonith__api_new();
1030
1031 if (st == NULL) {
1032 pcmk__warn("Could not get agent meta-data: API memory allocation "
1033 "failed");
1034 return EAGAIN;
1035 }
1036
1037 rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10);
1038 stonith__api_free(st);
1039
1040 if ((rc != pcmk_ok) || (buffer == NULL)) {
1041 pcmk__err("Could not retrieve metadata for fencing agent %s", agent);
1042 return EAGAIN;
1043 }
1044
1045 g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer);
1046
1047 done:
1048 *metadata = pcmk__xml_parse(buffer);
1049 return pcmk_rc_ok;
1050 }
1051
1052 static void
1053 read_action_metadata(fenced_device_t *device)
1054 {
1055 xmlXPathObject *xpath = NULL;
1056 int max = 0;
1057
1058 // @TODO Use GStrvBuilder when we require glib 2.68
1059 GPtrArray *on_target_actions = NULL;
1060
1061 if (device->agent_metadata == NULL) {
1062 return;
1063 }
1064
1065 xpath = pcmk__xpath_search(device->agent_metadata->doc,
1066 "//" PCMK_XE_ACTION);
1067 max = pcmk__xpath_num_results(xpath);
1068
1069 if (max == 0) {
1070 xmlXPathFreeObject(xpath);
1071 return;
1072 }
1073
1074 for (int i = 0; i < max; i++) {
1075 const char *action = NULL;
1076 xmlNode *match = pcmk__xpath_result(xpath, i);
1077
1078 CRM_LOG_ASSERT(match != NULL);
1079 if(match == NULL) { continue; };
1080
1081 action = pcmk__xe_get(match, PCMK_XA_NAME);
1082
1083 if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
1084 fenced_device_set_flags(device, fenced_df_supports_list);
1085
1086 } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) {
1087 fenced_device_set_flags(device, fenced_df_supports_status);
1088
1089 } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
1090 fenced_device_set_flags(device, fenced_df_supports_reboot);
1091
1092 } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
1093 /* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it
1094 * joins.
1095 *
1096 * @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for
1097 * PCMK_XA_AUTOMATIC.
1098 */
1099 if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC)
1100 || pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) {
1101
1102 fenced_device_set_flags(device, fenced_df_auto_unfence);
1103 }
1104 fenced_device_set_flags(device, fenced_df_supports_on);
1105 }
1106
1107 if ((action != NULL)
1108 && pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) {
1109
1110 if (on_target_actions == NULL) {
1111 on_target_actions = g_ptr_array_new();
1112 }
1113 g_ptr_array_add(on_target_actions, g_strdup(action));
1114 }
1115 }
1116
1117 if (on_target_actions != NULL) {
1118 g_ptr_array_add(on_target_actions, NULL);
1119 device->on_target_actions =
1120 (gchar **) g_ptr_array_free(on_target_actions, FALSE);
1121 }
1122 xmlXPathFreeObject(xpath);
1123 }
1124
1125 static const char *
1126 target_list_type(fenced_device_t *dev)
1127 {
1128 const char *check_type = g_hash_table_lookup(dev->params,
1129 PCMK_FENCING_HOST_CHECK);
1130
1131 if (check_type != NULL) {
1132 return check_type;
1133 }
1134
1135 if (g_hash_table_lookup(dev->params, PCMK_FENCING_HOST_LIST) != NULL) {
1136 return PCMK_VALUE_STATIC_LIST;
1137 }
1138
1139 if (g_hash_table_lookup(dev->params, PCMK_FENCING_HOST_MAP) != NULL) {
1140 return PCMK_VALUE_STATIC_LIST;
1141 }
1142
1143 if (pcmk__is_set(dev->flags, fenced_df_supports_list)) {
1144 return PCMK_VALUE_DYNAMIC_LIST;
1145 }
1146
1147 if (pcmk__is_set(dev->flags, fenced_df_supports_status)) {
1148 return PCMK_VALUE_STATUS;
1149 }
1150
1151 return PCMK_VALUE_NONE;
1152 }
1153
1154 static fenced_device_t *
1155 build_device_from_xml(const xmlNode *dev)
1156 {
1157 const char *value;
1158 fenced_device_t *device = NULL;
1159 char *agent = pcmk__xe_get_copy(dev, PCMK_XA_AGENT);
1160 int rc = pcmk_rc_ok;
1161
1162 CRM_CHECK(agent != NULL, return device);
1163
1164 device = pcmk__assert_alloc(1, sizeof(fenced_device_t));
1165
1166 device->id = pcmk__xe_get_copy(dev, PCMK_XA_ID);
1167 device->agent = agent;
1168 device->namespace = pcmk__xe_get_copy(dev, PCMK__XA_NAMESPACE);
1169 device->params = xml2list(dev);
1170
1171 value = g_hash_table_lookup(device->params, PCMK_FENCING_HOST_LIST);
1172 if (value != NULL) {
1173 device->targets = stonith__parse_targets(value);
1174 }
1175
1176 value = g_hash_table_lookup(device->params, PCMK_FENCING_HOST_MAP);
1177 device->aliases = build_port_aliases(value, &(device->targets));
1178
1179 value = target_list_type(device);
1180 if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)
1181 && (device->targets != NULL)) {
1182
1183 // device->targets is necessary only with PCMK_VALUE_STATIC_LIST
1184 g_list_free_full(device->targets, free);
1185 device->targets = NULL;
1186 }
1187
1188 rc = get_agent_metadata(device->agent, &device->agent_metadata);
1189
1190 if ((rc == pcmk_rc_ok) && (device->agent_metadata != NULL)) {
1191 read_action_metadata(device);
1192 device->default_host_arg =
1193 stonith__default_host_arg(device->agent_metadata);
1194
1195 } else if (rc == EAGAIN) {
1196 if (device->timer == NULL) {
1197 device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
1198 TRUE, get_agent_metadata_cb,
1199 device);
1200 }
1201
1202 if (!mainloop_timer_running(device->timer)) {
1203 mainloop_timer_start(device->timer);
1204 }
1205 }
1206
1207 value = pcmk__xe_get(dev, PCMK__XA_RSC_PROVIDES);
1208 if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) {
1209 fenced_device_set_flags(device, fenced_df_auto_unfence);
1210 }
1211
1212 if (is_action_required(PCMK_ACTION_ON, device)) {
1213 pcmk__info("Fencing device '%s' requires unfencing", device->id);
1214 }
1215
1216 if (device->on_target_actions != NULL) {
1217 gchar *on_target_actions = g_strjoinv(" ", device->on_target_actions);
1218
1219 pcmk__info("Fencing device '%s' requires actions (%s) to be executed "
1220 "on target", device->id, on_target_actions);
1221 g_free(on_target_actions);
1222 }
1223
1224 device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
1225
1226 return device;
1227 }
1228
1229 static void
1230 schedule_internal_command(const char *origin, fenced_device_t *device,
1231 const char *action, const char *target, int timeout,
1232 void *internal_user_data,
1233 void (*done_cb) (int pid,
1234 const pcmk__action_result_t *result,
1235 void *user_data))
1236 {
1237 async_command_t *cmd = NULL;
1238
1239 cmd = pcmk__assert_alloc(1, sizeof(async_command_t));
1240
1241 cmd->id = -1;
1242 cmd->default_timeout = timeout ? timeout : 60;
1243 cmd->timeout = cmd->default_timeout;
1244 cmd->action = pcmk__str_copy(action);
1245 cmd->target = pcmk__str_copy(target);
1246 cmd->device = pcmk__str_copy(device->id);
1247 cmd->origin = pcmk__str_copy(origin);
1248 cmd->client = pcmk__str_copy(crm_system_name);
1249 cmd->client_name = pcmk__str_copy(crm_system_name);
1250
1251 cmd->internal_user_data = internal_user_data;
1252 cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
1253
1254 schedule_stonith_command(cmd, device);
1255 }
1256
1257 // Fence agent status commands use custom exit status codes
1258 enum fence_status_code {
1259 fence_status_invalid = -1,
1260 fence_status_active = 0,
1261 fence_status_unknown = 1,
1262 fence_status_inactive = 2,
1263 };
1264
1265 static void
1266 status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
1267 {
1268 async_command_t *cmd = user_data;
1269 struct device_search_s *search = cmd->internal_user_data;
1270 fenced_device_t *dev = cmd_device(cmd);
1271 gboolean can = FALSE;
1272
1273 free_async_command(cmd);
1274
1275 if (dev == NULL) {
1276 search_devices_record_result(search, NULL, FALSE);
1277 return;
1278 }
1279
1280 mainloop_set_trigger(dev->work);
1281
1282 if (result->execution_status != PCMK_EXEC_DONE) {
1283 const char *reason = result->exit_reason;
1284
1285 pcmk__warn("Assuming %s cannot fence %s because status could not be "
1286 "executed: %s%s%s%s",
1287 dev->id, search->host,
1288 pcmk_exec_status_str(result->execution_status),
1289 ((reason != NULL)? " (" : ""), pcmk__s(reason, ""),
1290 ((reason != NULL)? ")" : ""));
1291 search_devices_record_result(search, dev->id, FALSE);
1292 return;
1293 }
1294
1295 switch (result->exit_status) {
1296 case fence_status_unknown:
1297 pcmk__trace("%s reported it cannot fence %s", dev->id,
1298 search->host);
1299 break;
1300
1301 case fence_status_active:
1302 case fence_status_inactive:
1303 pcmk__trace("%s reported it can fence %s", dev->id, search->host);
1304 can = TRUE;
1305 break;
1306
1307 default:
1308 pcmk__warn("Assuming %s cannot fence %s (status returned unknown "
1309 "code %d)",
1310 dev->id, search->host, result->exit_status);
1311 break;
1312 }
1313 search_devices_record_result(search, dev->id, can);
1314 }
1315
1316 static void
1317 dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
1318 void *user_data)
1319 {
1320 async_command_t *cmd = user_data;
1321 struct device_search_s *search = cmd->internal_user_data;
1322 fenced_device_t *dev = cmd_device(cmd);
1323 gboolean can_fence = FALSE;
1324
1325 free_async_command(cmd);
1326
1327 /* Host/alias must be in the list output to be eligible to be fenced
1328 *
1329 * Will cause problems if down'd nodes aren't listed or (for virtual nodes)
1330 * if the guest is still listed despite being moved to another machine
1331 */
1332 if (dev == NULL) {
1333 search_devices_record_result(search, NULL, FALSE);
1334 return;
1335 }
1336
1337 mainloop_set_trigger(dev->work);
1338
1339 if (pcmk__result_ok(result)) {
1340 pcmk__info("Refreshing target list for %s", dev->id);
1341 g_list_free_full(dev->targets, free);
1342 dev->targets = stonith__parse_targets(result->action_stdout);
1343 dev->targets_age = time(NULL);
1344
1345 } else if (dev->targets != NULL) {
1346 if (result->execution_status == PCMK_EXEC_DONE) {
1347 pcmk__info("Reusing most recent target list for %s because list "
1348 "returned error code %d",
1349 dev->id, result->exit_status);
1350 } else {
1351 const char *reason = result->exit_reason;
1352
1353 pcmk__info("Reusing most recent target list for %s because list "
1354 "could not be executed: %s%s%s%s",
1355 dev->id, pcmk_exec_status_str(result->execution_status),
1356 ((reason != NULL)? " (" : ""), pcmk__s(reason, ""),
1357 ((reason != NULL)? ")" : ""));
1358 }
1359
1360 } else { // We have never successfully executed list
1361 if (result->execution_status == PCMK_EXEC_DONE) {
1362 pcmk__warn("Assuming %s cannot fence %s because list returned "
1363 "error code %d",
1364 dev->id, search->host, result->exit_status);
1365 } else {
1366 const char *reason = result->exit_reason;
1367
1368 pcmk__warn("Assuming %s cannot fence %s because list could not be "
1369 "executed: %s%s%s%s",
1370 dev->id, search->host,
1371 pcmk_exec_status_str(result->execution_status),
1372 ((reason != NULL)? " (" : ""), pcmk__s(reason, ""),
1373 ((reason != NULL)? ")" : ""));
1374 }
1375
1376 /* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't
1377 * explicitly specify PCMK_VALUE_DYNAMIC_LIST
1378 */
1379 if (g_hash_table_lookup(dev->params, PCMK_FENCING_HOST_CHECK) == NULL) {
1380 pcmk__notice("Switching to pcmk_host_check='status' for %s",
1381 dev->id);
1382 pcmk__insert_dup(dev->params, PCMK_FENCING_HOST_CHECK,
1383 PCMK_VALUE_STATUS);
1384 }
1385 }
1386
1387 if (dev->targets != NULL) {
1388 const char *alias = g_hash_table_lookup(dev->aliases, search->host);
1389
1390 if (alias == NULL) {
1391 alias = search->host;
1392 }
1393 if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) {
1394 can_fence = TRUE;
1395 }
1396 }
1397 search_devices_record_result(search, dev->id, can_fence);
1398 }
1399
1400 /*!
1401 * \internal
1402 * \brief Returns true if any key in first is not in second or second has a different value for key
1403 */
1404 static bool
1405 device_params_diff(GHashTable *first, GHashTable *second) {
1406 char *key = NULL;
1407 char *value = NULL;
1408 GHashTableIter gIter;
1409
1410 g_hash_table_iter_init(&gIter, first);
1411 while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
1412 const char *other_value = NULL;
1413
1414 if (g_str_has_prefix(key, CRM_META "_")
1415 || pcmk__str_eq(key, PCMK_XA_CRM_FEATURE_SET, pcmk__str_none)) {
1416 continue;
1417 }
1418
1419 other_value = g_hash_table_lookup(second, key);
1420
1421 if ((other_value == NULL)
1422 || !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
1423 pcmk__trace("Different value for %s: %s != %s", key,
1424 pcmk__s(other_value, "<null>"), value);
1425 return true;
1426 }
1427 }
1428
1429 return false;
1430 }
1431
1432 /*!
1433 * \internal
1434 * \brief Checks to see if an identical device already exists in the table
1435 */
1436 static fenced_device_t *
1437 device_has_duplicate(const fenced_device_t *device)
1438 {
1439 fenced_device_t *dup = g_hash_table_lookup(device_table, device->id);
1440
1441 if (dup == NULL) {
1442 pcmk__trace("No match for %s", device->id);
1443 return NULL;
1444
1445 } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
1446 pcmk__trace("Different agent: %s != %s", dup->agent, device->agent);
1447 return NULL;
1448 }
1449
1450 // Find a way to share logic with pcmk__digest_op_params() here?
1451 if (device_params_diff(device->params, dup->params) ||
1452 device_params_diff(dup->params, device->params)) {
1453 return NULL;
1454 }
1455
1456 pcmk__trace("Match");
1457 return dup;
1458 }
1459
1460 int
1461 fenced_device_register(const xmlNode *dev, bool from_cib)
1462 {
1463 const char *local_node_name = fenced_get_local_node();
1464 fenced_device_t *dup = NULL;
1465 fenced_device_t *device = build_device_from_xml(dev);
1466 int rc = pcmk_rc_ok;
1467
1468 CRM_CHECK(device != NULL, return ENOMEM);
1469
1470 /* do we have a watchdog-device? */
1471 if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)
1472 || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
1473 STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
1474
1475 if (fencing_watchdog_timeout_ms <= 0) {
1476 pcmk__err("Ignoring watchdog fence device without "
1477 PCMK_OPT_FENCING_WATCHDOG_TIMEOUT " set");
1478 rc = ENODEV;
1479 goto done;
1480 }
1481 if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
1482 STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
1483 pcmk__err("Ignoring watchdog fence device with unknown agent '%s' "
1484 "rather than '" STONITH_WATCHDOG_AGENT "'",
1485 pcmk__s(device->agent, ""));
1486 rc = ENODEV;
1487 goto done;
1488 }
1489 if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
1490 pcmk__err("Ignoring watchdog fence device named '%s' rather than "
1491 "'" STONITH_WATCHDOG_ID "'",
1492 pcmk__s(device->id, ""));
1493 rc = ENODEV;
1494 goto done;
1495 }
1496
1497 if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
1498 pcmk__str_none)) {
1499 /* This has either an empty list or the targets configured for
1500 * watchdog fencing
1501 */
1502 g_list_free_full(stonith_watchdog_targets, free);
1503 stonith_watchdog_targets = device->targets;
1504 device->targets = NULL;
1505 }
1506
1507 if (!node_does_watchdog_fencing(local_node_name)) {
1508 pcmk__debug("Skip registration of watchdog fence device on node "
1509 "not in host list");
1510 device->targets = NULL;
1511 stonith_device_remove(device->id, from_cib);
1512 goto done;
1513 }
1514
1515 // Proceed as with any other fencing device
1516 g_list_free_full(device->targets, free);
1517 device->targets = stonith__parse_targets(local_node_name);
1518 pcmk__insert_dup(device->params, PCMK_FENCING_HOST_LIST,
1519 local_node_name);
1520 }
1521
1522 dup = device_has_duplicate(device);
1523 if (dup != NULL) {
1524 guint ndevices = g_hash_table_size(device_table);
1525
1526 pcmk__debug("Device '%s' already in device list (%d active device%s)",
1527 device->id, ndevices, pcmk__plural_s(ndevices));
1528 free_device(device);
1529 device = dup;
1530 fenced_device_clear_flags(device, fenced_df_dirty);
1531
1532 } else {
1533 guint ndevices = 0;
1534 fenced_device_t *old = g_hash_table_lookup(device_table, device->id);
1535
1536 if (from_cib && (old != NULL)
1537 && pcmk__is_set(old->flags, fenced_df_api_registered)) {
1538 /* If the CIB is writing over an entry that is shared with a stonith
1539 * client, copy any pending ops that currently exist on the old
1540 * entry to the new one. Otherwise the pending ops will be reported
1541 * as failures.
1542 */
1543 pcmk__info("Overwriting existing entry for %s from CIB",
1544 device->id);
1545 device->pending_ops = old->pending_ops;
1546 fenced_device_set_flags(device, fenced_df_api_registered);
1547 old->pending_ops = NULL;
1548 if (device->pending_ops != NULL) {
1549 mainloop_set_trigger(device->work);
1550 }
1551 }
1552 g_hash_table_replace(device_table, device->id, device);
1553
1554 ndevices = g_hash_table_size(device_table);
1555 pcmk__notice("Added '%s' to device list (%d active device%s)",
1556 device->id, ndevices, pcmk__plural_s(ndevices));
1557 }
1558
1559 if (from_cib) {
1560 fenced_device_set_flags(device, fenced_df_cib_registered);
1561 } else {
1562 fenced_device_set_flags(device, fenced_df_api_registered);
1563 }
1564
1565 done:
1566 if (rc != pcmk_rc_ok) {
1567 free_device(device);
1568 }
1569 return rc;
1570 }
1571
1572 void
1573 stonith_device_remove(const char *id, bool from_cib)
1574 {
1575 fenced_device_t *device = g_hash_table_lookup(device_table, id);
1576 guint ndevices = 0;
1577
1578 if (device == NULL) {
1579 ndevices = g_hash_table_size(device_table);
1580 pcmk__info("Device '%s' not found (%u active device%s)", id, ndevices,
1581 pcmk__plural_s(ndevices));
1582 return;
1583 }
1584
1585 if (from_cib) {
1586 fenced_device_clear_flags(device, fenced_df_cib_registered);
1587 } else {
1588 fenced_device_clear_flags(device,
1589 fenced_df_api_registered|fenced_df_verified);
1590 }
1591
1592 if (!pcmk__any_flags_set(device->flags,
1593 fenced_df_api_registered
1594 |fenced_df_cib_registered)) {
1595
1596 g_hash_table_remove(device_table, id);
1597 ndevices = g_hash_table_size(device_table);
1598 pcmk__info("Removed '%s' from device list (%u active device%s)", id,
1599 ndevices, pcmk__plural_s(ndevices));
1600
1601 } else {
1602 // Exactly one is true at this point
1603 const bool cib_registered = pcmk__is_set(device->flags,
1604 fenced_df_cib_registered);
1605
1606 pcmk__trace("Not removing '%s' from device list (%u active) because "
1607 "still registered via %s",
1608 id, g_hash_table_size(device_table),
1609 (cib_registered? "CIB" : "API"));
1610 }
1611 }
1612
1613 /*!
1614 * \internal
1615 * \brief Return the number of stonith levels registered for a node
1616 *
1617 * \param[in] tp Node's topology table entry
1618 *
1619 * \return Number of non-NULL levels in topology entry
1620 * \note This function is used only for log messages.
1621 */
1622 static int
1623 count_active_levels(const stonith_topology_t *tp)
1624 {
1625 int count = 0;
1626
1627 for (int i = 0; i < ST__LEVEL_COUNT; i++) {
1628 if (tp->levels[i] != NULL) {
1629 count++;
1630 }
1631 }
1632
1633 return count;
1634 }
1635
1636 static void
1637 free_topology_entry(gpointer data)
1638 {
1639 stonith_topology_t *tp = data;
1640
1641 for (int i = 0; i < ST__LEVEL_COUNT; i++) {
1642 g_list_free_full(tp->levels[i], free);
1643 }
1644
1645 free(tp->target);
1646 free(tp->target_value);
1647 free(tp->target_pattern);
1648 free(tp->target_attribute);
1649 free(tp);
1650 }
1651
1652 void
1653 free_topology_list(void)
1654 {
1655 g_clear_pointer(&topology, g_hash_table_destroy);
1656 }
1657
1658 void
1659 init_topology_list(void)
1660 {
1661 if (topology != NULL) {
1662 return;
1663 }
1664
1665 topology = pcmk__strkey_table(NULL, free_topology_entry);
1666 }
1667
1668 char *
1669 stonith_level_key(const xmlNode *level, enum fenced_target_by mode)
1670 {
1671 if (mode == fenced_target_by_unknown) {
1672 mode = unpack_level_kind(level);
1673 }
1674 switch (mode) {
1675 case fenced_target_by_name:
1676 return pcmk__xe_get_copy(level, PCMK_XA_TARGET);
1677
1678 case fenced_target_by_pattern:
1679 return pcmk__xe_get_copy(level, PCMK_XA_TARGET_PATTERN);
1680
1681 case fenced_target_by_attribute:
1682 return pcmk__assert_asprintf("%s=%s",
1683 pcmk__xe_get(level,
1684 PCMK_XA_TARGET_ATTRIBUTE),
1685 pcmk__xe_get(level,
1686 PCMK_XA_TARGET_VALUE));
1687
1688 default:
1689 return pcmk__assert_asprintf("unknown-%s", pcmk__xe_id(level));
1690 }
1691 }
1692
1693 /*!
1694 * \internal
1695 * \brief Parse target identification from topology level XML
1696 *
1697 * \param[in] level Topology level XML to parse
1698 *
1699 * \return How to identify target of \p level
1700 */
1701 static enum fenced_target_by
1702 unpack_level_kind(const xmlNode *level)
1703 {
1704 if (pcmk__xe_get(level, PCMK_XA_TARGET) != NULL) {
1705 return fenced_target_by_name;
1706 }
1707 if (pcmk__xe_get(level, PCMK_XA_TARGET_PATTERN) != NULL) {
1708 return fenced_target_by_pattern;
1709 }
1710 if ((pcmk__xe_get(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL)
1711 && (pcmk__xe_get(level, PCMK_XA_TARGET_VALUE) != NULL)) {
1712 return fenced_target_by_attribute;
1713 }
1714 return fenced_target_by_unknown;
1715 }
1716
1717 /*!
1718 * \internal
1719 * \brief Unpack essential information from topology request XML
1720 *
1721 * \param[in] xml Request XML to search
1722 * \param[out] mode If not NULL, where to store level kind
1723 * \param[out] target If not NULL, where to store representation of target
1724 * \param[out] id If not NULL, where to store level number
1725 *
1726 * \return Topology level XML from within \p xml, or NULL if not found
1727 * \note The caller is responsible for freeing \p *target if set.
1728 */
1729 static xmlNode *
1730 unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target,
1731 int *id)
1732 {
1733 enum fenced_target_by local_mode = fenced_target_by_unknown;
1734 char *local_target = NULL;
1735 int local_id = 0;
1736
1737 /* The level element can be the top element or lower. If top level, don't
1738 * search by xpath, because it might give multiple hits if the XML is the
1739 * CIB.
1740 */
1741 if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) {
1742 xml = pcmk__xpath_find_one(xml->doc, "//" PCMK_XE_FENCING_LEVEL,
1743 LOG_WARNING);
1744 }
1745
1746 if (xml != NULL) {
1747 local_mode = unpack_level_kind(xml);
1748 local_target = stonith_level_key(xml, local_mode);
1749 pcmk__xe_get_int(xml, PCMK_XA_INDEX, &local_id);
1750 }
1751
1752 if (mode != NULL) {
1753 *mode = local_mode;
1754 }
1755 if (id != NULL) {
1756 *id = local_id;
1757 }
1758
1759 if (target != NULL) {
1760 *target = local_target;
1761 } else {
1762 free(local_target);
1763 }
1764
1765 return xml;
1766 }
1767
1768 /*!
1769 * \internal
1770 * \brief Register a fencing topology level for a target
1771 *
1772 * Given an XML request specifying the target name, level index, and device IDs
1773 * for the level, this will create an entry for the target in the global topology
1774 * table if one does not already exist, then append the specified device IDs to
1775 * the entry's device list for the specified level.
1776 *
1777 * \param[in] msg XML request for STONITH level registration
1778 * \param[out] result Where to set result of registration (can be \c NULL)
1779 */
1780 void
1781 fenced_register_level(xmlNode *msg, pcmk__action_result_t *result)
1782 {
1783 int nlevels = 0;
1784 int id = 0;
1785 xmlNode *level;
1786 enum fenced_target_by mode;
1787 char *target;
1788
1789 stonith_topology_t *tp;
1790 const char *value = NULL;
1791
1792 CRM_CHECK(msg != NULL, return);
1793
1794 level = unpack_level_request(msg, &mode, &target, &id);
1795 if (level == NULL) {
1796 set_bad_request_result(result);
1797 return;
1798 }
1799
1800 // Ensure an ID was given (even the client API adds an ID)
1801 if (pcmk__str_empty(pcmk__xe_id(level))) {
1802 pcmk__warn("Ignoring registration for topology level without ID");
1803 free(target);
1804 pcmk__log_xml_trace(level, "Bad level");
1805 pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
1806 "Topology level is invalid without ID");
1807 return;
1808 }
1809
1810 // Ensure a valid target was specified
1811 if (mode == fenced_target_by_unknown) {
1812 pcmk__warn("Ignoring registration for topology level '%s' without "
1813 "valid target",
1814 pcmk__xe_id(level));
1815 free(target);
1816 pcmk__log_xml_trace(level, "Bad level");
1817 pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
1818 "Invalid target for topology level '%s'",
1819 pcmk__xe_id(level));
1820 return;
1821 }
1822
1823 // Ensure level ID is in allowed range
1824 if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) {
1825 pcmk__warn("Ignoring topology registration for %s with invalid level "
1826 "%d",
1827 target, id);
1828 free(target);
1829 pcmk__log_xml_trace(level, "Bad level");
1830 pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
1831 "Invalid level number '%s' for topology level '%s'",
1832 pcmk__s(pcmk__xe_get(level, PCMK_XA_INDEX), ""),
1833 pcmk__xe_id(level));
1834 return;
1835 }
1836
1837 /* Find or create topology table entry */
1838 tp = g_hash_table_lookup(topology, target);
1839 if (tp == NULL) {
1840 tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t));
1841
1842 tp->kind = mode;
1843 tp->target = target;
1844 tp->target_value = pcmk__xe_get_copy(level, PCMK_XA_TARGET_VALUE);
1845 tp->target_pattern = pcmk__xe_get_copy(level, PCMK_XA_TARGET_PATTERN);
1846 tp->target_attribute = pcmk__xe_get_copy(level, PCMK_XA_TARGET_ATTRIBUTE);
1847
1848 g_hash_table_replace(topology, tp->target, tp);
1849 pcmk__trace("Added %s (%d) to the topology (%u active entries)", target,
1850 (int) mode, g_hash_table_size(topology));
1851 } else {
1852 free(target);
1853 }
1854
1855 if (tp->levels[id] != NULL) {
1856 pcmk__info("Adding to the existing %s[%d] topology entry", tp->target,
1857 id);
1858 }
1859
1860 value = pcmk__xe_get(level, PCMK_XA_DEVICES);
1861 if (value != NULL) {
1862 /* Empty string and whitespace are not possible with schema validation
1863 * enabled. Don't bother handling them specially here.
1864 */
1865 gchar **devices = g_strsplit(value, ",", 0);
1866
1867 for (char **dev = devices; (dev != NULL) && (*dev != NULL); dev++) {
1868 pcmk__trace("Adding device '%s' for %s[%d]", *dev, tp->target, id);
1869 tp->levels[id] = g_list_append(tp->levels[id],
1870 pcmk__str_copy(*dev));
1871 }
1872 g_strfreev(devices);
1873 }
1874
1875 nlevels = count_active_levels(tp);
1876
1877 pcmk__info("Target %s has %d active fencing level%s", tp->target, nlevels,
1878 pcmk__plural_s(nlevels));
1879
1880 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1881 }
1882
1883 /*!
1884 * \internal
1885 * \brief Unregister a fencing topology level for a target
1886 *
1887 * Given an XML request specifying the target name and level index (or 0 for all
1888 * levels), this will remove any corresponding entry for the target from the
1889 * global topology table.
1890 *
1891 * \param[in] msg XML request for STONITH level registration
1892 * \param[out] result Where to set result of unregistration (can be \c NULL)
1893 */
1894 void
1895 fenced_unregister_level(xmlNode *msg, pcmk__action_result_t *result)
1896 {
1897 int id = -1;
1898 stonith_topology_t *tp;
1899 char *target;
1900 xmlNode *level = NULL;
1901
1902 level = unpack_level_request(msg, NULL, &target, &id);
1903 if (level == NULL) {
1904 set_bad_request_result(result);
1905 return;
1906 }
1907
1908 // Ensure level ID is in allowed range
1909 if ((id < 0) || (id >= ST__LEVEL_COUNT)) {
1910 pcmk__warn("Ignoring topology unregistration for %s with invalid level "
1911 "%d",
1912 target, id);
1913 free(target);
1914 pcmk__log_xml_trace(level, "Bad level");
1915 pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
1916 "Invalid level number '%s' for topology level %s",
1917 pcmk__s(pcmk__xe_get(level, PCMK_XA_INDEX),
1918 "<null>"),
1919
1920 // Client API doesn't add ID to unregistration XML
1921 pcmk__s(pcmk__xe_id(level), ""));
1922 return;
1923 }
1924
1925 tp = g_hash_table_lookup(topology, target);
1926 if (tp == NULL) {
1927 guint nentries = g_hash_table_size(topology);
1928
1929 pcmk__info("No fencing topology found for %s (%d active %s)", target,
1930 nentries, pcmk__plural_alt(nentries, "entry", "entries"));
1931
1932 } else if (id == 0 && g_hash_table_remove(topology, target)) {
1933 guint nentries = g_hash_table_size(topology);
1934
1935 pcmk__info("Removed all fencing topology entries related to %s (%d "
1936 "active %s remaining)",
1937 target, nentries,
1938 pcmk__plural_alt(nentries, "entry", "entries"));
1939
1940 } else if (tp->levels[id] != NULL) {
1941 guint nlevels;
1942
1943 g_list_free_full(tp->levels[id], free);
1944 tp->levels[id] = NULL;
1945
1946 nlevels = count_active_levels(tp);
1947 pcmk__info("Removed level %d from fencing topology for %s (%d "
1948 "active level%s remaining)",
1949 id, target, nlevels, pcmk__plural_s(nlevels));
1950 }
1951
1952 free(target);
1953 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1954 }
1955
1956 static char *
1957 list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
1958 {
1959 int max = g_list_length(list);
1960 size_t delim_len = delim?strlen(delim):0;
1961 size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
1962 char *rv;
1963
1964 char *pos = NULL;
1965 const char *lead_delim = "";
1966
1967 for (const GList *iter = list; iter != NULL; iter = iter->next) {
1968 const char *value = (const char *) iter->data;
1969
1970 alloc_size += strlen(value);
1971 }
1972
1973 rv = pcmk__assert_alloc(alloc_size, sizeof(char));
1974 pos = rv;
1975
1976 for (const GList *iter = list; iter != NULL; iter = iter->next) {
1977 const char *value = (const char *) iter->data;
1978
1979 pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
1980 lead_delim = delim;
1981 }
1982
1983 if ((max != 0) && terminate_with_delim) {
1984 sprintf(pos, "%s", delim);
1985 }
1986
1987 return rv;
1988 }
1989
1990 /*!
1991 * \internal
1992 * \brief Execute a fence agent action directly (and asynchronously)
1993 *
1994 * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
1995 * directly on a specified device. Only list, monitor, and status actions are
1996 * expected to use this call, though it should work with any agent command.
1997 *
1998 * \param[in] msg Request XML specifying action
1999 * \param[out] result Where to store result of action
2000 *
2001 * \note If the action is monitor, the device must be registered via the API
2002 * (CIB registration is not sufficient), because monitor should not be
2003 * possible unless the device is "started" (API registered).
2004 */
2005 static void
2006 execute_agent_action(xmlNode *msg, pcmk__action_result_t *result)
2007 {
2008 xmlNode *dev = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_ST_DEVICE_ID,
2009 LOG_ERR);
2010 xmlNode *op = pcmk__xpath_find_one(msg->doc,
2011 "//*[@" PCMK__XA_ST_DEVICE_ACTION "]",
2012 LOG_ERR);
2013 const char *id = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID);
2014 const char *action = pcmk__xe_get(op, PCMK__XA_ST_DEVICE_ACTION);
2015 async_command_t *cmd = NULL;
2016 fenced_device_t *device = NULL;
2017
2018 if ((id == NULL) || (action == NULL)) {
2019 pcmk__info("Malformed API action request: device %s, action %s",
2020 pcmk__s(id, "not specified"),
2021 pcmk__s(action, "not specified"));
2022 set_bad_request_result(result);
2023 return;
2024 }
2025
2026 if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
2027 // Watchdog agent actions are implemented internally
2028 if (fencing_watchdog_timeout_ms <= 0) {
2029 pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
2030 "Watchdog fence device not configured");
2031 return;
2032
2033 } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
2034 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2035 pcmk__set_result_output(result,
2036 list_to_string(stonith_watchdog_targets,
2037 "\n", TRUE),
2038 NULL);
2039 return;
2040
2041 } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) {
2042 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2043 return;
2044 }
2045 }
2046
2047 device = g_hash_table_lookup(device_table, id);
2048 if (device == NULL) {
2049 pcmk__info("Ignoring API '%s' action request because device %s not "
2050 "found",
2051 action, id);
2052 pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
2053 "'%s' not found", id);
2054 return;
2055
2056 } else if (!pcmk__is_set(device->flags, fenced_df_api_registered)
2057 && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) {
2058 // Monitors may run only on "started" (API-registered) devices
2059 pcmk__info("Ignoring API '%s' action request because device %s not "
2060 "active",
2061 action, id);
2062 pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
2063 "'%s' not active", id);
2064 return;
2065 }
2066
2067 cmd = create_async_command(msg);
2068 if (cmd == NULL) {
2069 pcmk__log_xml_warn(msg, "invalid");
2070 set_bad_request_result(result);
2071 return;
2072 }
2073
2074 schedule_stonith_command(cmd, device);
2075 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
2076 }
2077
2078 static void
2079 search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
2080 {
2081 search->replies_received++;
2082 if (can_fence && (device != NULL)) {
2083 if (search->support_action_only != fenced_df_none) {
2084 fenced_device_t *dev = g_hash_table_lookup(device_table, device);
2085
2086 if ((dev != NULL) && !pcmk__is_set(dev->flags, search->support_action_only)) {
2087 return;
2088 }
2089 }
2090 search->capable = g_list_append(search->capable,
2091 pcmk__str_copy(device));
2092 }
2093
2094 if (search->replies_needed == search->replies_received) {
2095
2096 guint ndevices = g_list_length(search->capable);
2097
2098 pcmk__debug("Search found %d device%s that can perform '%s' targeting "
2099 "%s",
2100 ndevices, pcmk__plural_s(ndevices),
2101 pcmk__s(search->action, "unknown action"),
2102 pcmk__s(search->host, "any node"));
2103
2104 search->callback(search->capable, search->user_data);
2105 free(search->host);
2106 free(search->action);
2107 free(search);
2108 }
2109 }
2110
2111 /*!
2112 * \internal
2113 * \brief Check whether the local host is allowed to execute a fencing action
2114 *
2115 * \param[in] device Fence device to check
2116 * \param[in] action Fence action to check
2117 * \param[in] target Hostname of fence target
2118 * \param[in] allow_self Whether self-fencing is allowed for this operation
2119 *
2120 * \return \c true if local host is allowed to execute action, or \c false
2121 * otherwise
2122 */
2123 static bool
2124 localhost_is_eligible(const fenced_device_t *device, const char *action,
2125 const char *target, bool allow_self)
2126 {
2127 bool localhost_is_target = pcmk__str_eq(target, fenced_get_local_node(),
2128 pcmk__str_casei);
2129
2130 CRM_CHECK(action != NULL, return true);
2131
2132 if ((device != NULL) && (device->on_target_actions != NULL)
2133 && pcmk__g_strv_contains(device->on_target_actions, action)) {
2134
2135 if (!localhost_is_target) {
2136 pcmk__trace("Operation '%s' using %s can only be executed for "
2137 "local host, not %s", action, device->id, target);
2138 return false;
2139 }
2140
2141 } else if (localhost_is_target && !allow_self) {
2142 pcmk__trace("'%s' operation does not support self-fencing", action);
2143 return false;
2144 }
2145 return true;
2146 }
2147
2148 /*!
2149 * \internal
2150 * \brief Check if local node is allowed to execute (possibly remapped) action
2151 *
2152 * \param[in] device Fence device to check
2153 * \param[in] action Fence action to check
2154 * \param[in] target Node name of fence target
2155 * \param[in] allow_self Whether self-fencing is allowed for this operation
2156 *
2157 * \return true if local node is allowed to execute \p action or any actions it
2158 * might be remapped to, otherwise false
2159 */
2160 static bool
2161 localhost_is_eligible_with_remap(const fenced_device_t *device,
2162 const char *action, const char *target,
2163 bool allow_self)
2164 {
2165 // Check exact action
2166 if (localhost_is_eligible(device, action, target, allow_self)) {
2167 return true;
2168 }
2169
2170 // Check potential remaps
2171
2172 if (!pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
2173 return false;
2174 }
2175
2176 /* "reboot" might get remapped to "off" then "on", so even if reboot is
2177 * disallowed, return true if either of those is allowed. We'll report
2178 * the disallowed actions with the results. We never allow self-fencing
2179 * for remapped "on" actions because the target is off at that point.
2180 */
2181 if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self)
2182 || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) {
2183 return true;
2184 }
2185
2186 return false;
2187 }
2188
2189 /*!
2190 * \internal
2191 * \brief Check whether we can use a device's cached target list
2192 *
2193 * \param[in] dev Fencing device to check
2194 *
2195 * \return \c true if \p dev cached its targets less than a minute ago,
2196 * otherwise \c false
2197 */
2198 static inline bool
2199 can_use_target_cache(const fenced_device_t *dev)
2200 {
2201 return (dev->targets != NULL) && (time(NULL) < (dev->targets_age + 60));
2202 }
2203
2204 static void
2205 can_fence_host_with_device(fenced_device_t *dev,
2206 struct device_search_s *search)
2207 {
2208 gboolean can = FALSE;
2209 const char *dev_id = "Unspecified device";
2210 const char *action = NULL;
2211 const char *target = NULL;
2212 const char *check_type = "Internal bug";
2213 const char *alias = NULL;
2214
2215 CRM_CHECK((dev != NULL) && (search != NULL) && (search->action != NULL),
2216 goto search_report_results);
2217
2218 if (dev->id != NULL) {
2219 dev_id = dev->id;
2220 }
2221
2222 action = search->action;
2223
2224 target = search->host;
2225 if (target == NULL) {
2226 can = TRUE;
2227 check_type = "No target";
2228 goto search_report_results;
2229 }
2230
2231 /* Answer immediately if the device does not support the action
2232 * or the local node is not allowed to perform it
2233 */
2234 if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)
2235 && !pcmk__is_set(dev->flags, fenced_df_supports_on)) {
2236 check_type = "Agent does not support 'on'";
2237 goto search_report_results;
2238
2239 } else if (!localhost_is_eligible_with_remap(dev, action, target,
2240 search->allow_self)) {
2241 check_type = "This node is not allowed to execute action";
2242 goto search_report_results;
2243 }
2244
2245 // Check eligibility as specified by pcmk_host_check
2246 check_type = target_list_type(dev);
2247 alias = g_hash_table_lookup(dev->aliases, target);
2248 if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) {
2249 can = TRUE;
2250
2251 } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST,
2252 pcmk__str_casei)) {
2253
2254 if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) {
2255 can = TRUE;
2256 } else if (g_hash_table_lookup(dev->params, PCMK_FENCING_HOST_MAP)
2257 && g_hash_table_lookup(dev->aliases, target)) {
2258 can = TRUE;
2259 }
2260
2261 } else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST,
2262 pcmk__str_casei)) {
2263 if (!can_use_target_cache(dev)) {
2264 int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST,
2265 search->per_device_timeout);
2266
2267 if (device_timeout > search->per_device_timeout) {
2268 pcmk__notice("Since the pcmk_list_timeout (%ds) parameter of "
2269 "%s is larger than " PCMK_OPT_FENCING_TIMEOUT " "
2270 "(%ds), timeout may occur",
2271 device_timeout, dev_id,
2272 search->per_device_timeout);
2273 }
2274
2275 pcmk__trace("Running '%s' to check whether %s is eligible to fence "
2276 "%s (%s)",
2277 check_type, dev_id, target, action);
2278
2279 schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL,
2280 search->per_device_timeout, search, dynamic_list_search_cb);
2281
2282 /* we'll respond to this search request async in the cb */
2283 return;
2284 }
2285
2286 if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets,
2287 pcmk__str_casei)) {
2288 can = TRUE;
2289 }
2290
2291 } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) {
2292 int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout);
2293
2294 if (device_timeout > search->per_device_timeout) {
2295 pcmk__notice("Since the pcmk_status_timeout (%ds) parameter of %s "
2296 "is larger than " PCMK_OPT_FENCING_TIMEOUT " (%ds), "
2297 "timeout may occur",
2298 device_timeout, dev_id, search->per_device_timeout);
2299 }
2300
2301 pcmk__trace("Running '%s' to check whether %s is eligible to fence %s "
2302 "(%s)",
2303 check_type, dev_id, target, action);
2304 schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target,
2305 search->per_device_timeout, search, status_search_cb);
2306 /* we'll respond to this search request async in the cb */
2307 return;
2308 } else {
2309 pcmk__err("Invalid value for " PCMK_FENCING_HOST_CHECK ": %s",
2310 check_type);
2311 check_type = "Invalid " PCMK_FENCING_HOST_CHECK;
2312 }
2313
2314 search_report_results:
2315 pcmk__info("%s is%s eligible to fence (%s) %s%s%s%s: %s",
2316 dev_id, (can? "" : " not"),
2317 pcmk__s(action, "unspecified action"),
2318 pcmk__s(target, "unspecified target"),
2319 ((alias != NULL)? " (as '" : ""), pcmk__s(alias, ""),
2320 ((alias != NULL)? "')" : ""), check_type);
2321 search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can);
2322 }
2323
2324 static void
2325 search_devices(gpointer key, gpointer value, gpointer user_data)
2326 {
2327 fenced_device_t *dev = value;
2328 struct device_search_s *search = user_data;
2329
2330 can_fence_host_with_device(dev, search);
2331 }
2332
2333 #define DEFAULT_QUERY_TIMEOUT 20
2334 static void
2335 get_capable_devices(const char *host, const char *action, int timeout,
2336 bool allow_self, void *user_data,
2337 void (*callback) (GList * devices, void *user_data),
2338 uint32_t support_action_only)
2339 {
2340 struct device_search_s *search;
2341 guint ndevices = g_hash_table_size(device_table);
2342
2343 if (ndevices == 0) {
2344 callback(NULL, user_data);
2345 return;
2346 }
2347
2348 search = pcmk__assert_alloc(1, sizeof(struct device_search_s));
2349
2350 search->host = pcmk__str_copy(host);
2351 search->action = pcmk__str_copy(action);
2352 search->per_device_timeout = timeout;
2353 search->allow_self = allow_self;
2354 search->callback = callback;
2355 search->user_data = user_data;
2356 search->support_action_only = support_action_only;
2357
2358 /* We are guaranteed this many replies, even if a device is
2359 * unregistered while the search is in progress.
2360 */
2361 search->replies_needed = ndevices;
2362
2363 pcmk__debug("Searching %d device%s to see which can execute '%s' "
2364 "targeting %s", ndevices, pcmk__plural_s(ndevices),
2365 pcmk__s(search->action, "unknown action"),
2366 pcmk__s(search->host, "any node"));
2367 fenced_foreach_device(search_devices, search);
2368 }
2369
2370 struct st_query_data {
2371 xmlNode *reply;
2372 char *remote_peer;
2373 char *client_id;
2374 char *target;
2375 char *action;
2376 int call_options;
2377 };
2378
2379 /*!
2380 * \internal
2381 * \brief Add action-specific attributes to query reply XML
2382 *
2383 * \param[in,out] xml XML to add attributes to
2384 * \param[in] action Fence action
2385 * \param[in] device Fence device
2386 * \param[in] target Fence target
2387 */
2388 static void
2389 add_action_specific_attributes(xmlNode *xml, const char *action,
2390 const fenced_device_t *device,
2391 const char *target)
2392 {
2393 int action_specific_timeout;
2394 int delay_max;
2395 int delay_base;
2396
2397 CRM_CHECK(xml && action && device, return);
2398
2399 // PCMK__XA_ST_REQUIRED is currently used only for unfencing
2400 if (is_action_required(action, device)) {
2401 pcmk__trace("Action '%s' is required using %s", action, device->id);
2402 pcmk__xe_set_int(xml, PCMK__XA_ST_REQUIRED, 1);
2403 }
2404
2405 // pcmk_<action>_timeout if configured
2406 action_specific_timeout = get_action_timeout(device, action, 0);
2407 if (action_specific_timeout != 0) {
2408 pcmk__trace("Action '%s' has timeout %ds using %s",
2409 action, action_specific_timeout, device->id);
2410 pcmk__xe_set_int(xml, PCMK__XA_ST_ACTION_TIMEOUT,
2411 action_specific_timeout);
2412 }
2413
2414 delay_max = get_action_delay_max(device, action);
2415 if (delay_max > 0) {
2416 pcmk__trace("Action '%s' has maximum random delay %ds using %s", action,
2417 delay_max, device->id);
2418 pcmk__xe_set_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max);
2419 }
2420
2421 delay_base = get_action_delay_base(device, action, target);
2422 if (delay_base > 0) {
2423 pcmk__xe_set_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base);
2424 }
2425
2426 if ((delay_max > 0) && (delay_base == 0)) {
2427 pcmk__trace("Action '%s' has maximum random delay %ds using %s", action,
2428 delay_max, device->id);
2429 } else if ((delay_max == 0) && (delay_base > 0)) {
2430 pcmk__trace("Action '%s' has a static delay of %ds using %s", action,
2431 delay_base, device->id);
2432 } else if ((delay_max > 0) && (delay_base > 0)) {
2433 pcmk__trace("Action '%s' has a minimum delay of %ds and a randomly "
2434 "chosen maximum delay of %ds using %s",
2435 action, delay_base, delay_max, device->id);
2436 }
2437 }
2438
2439 /*!
2440 * \internal
2441 * \brief Add "disallowed" attribute to query reply XML if appropriate
2442 *
2443 * \param[in,out] xml XML to add attribute to
2444 * \param[in] action Fence action
2445 * \param[in] device Fence device
2446 * \param[in] target Fence target
2447 * \param[in] allow_self Whether self-fencing is allowed
2448 */
2449 static void
2450 add_disallowed(xmlNode *xml, const char *action, const fenced_device_t *device,
2451 const char *target, bool allow_self)
2452 {
2453 if (localhost_is_eligible(device, action, target, allow_self)) {
2454 return;
2455 }
2456
2457 pcmk__trace("Action '%s' using %s is disallowed for local host", action,
2458 device->id);
2459 pcmk__xe_set_bool(xml, PCMK__XA_ST_ACTION_DISALLOWED, true);
2460 }
2461
2462 /*!
2463 * \internal
2464 * \brief Add child element with action-specific values to query reply XML
2465 *
2466 * \param[in,out] xml XML to add attribute to
2467 * \param[in] action Fence action
2468 * \param[in] device Fence device
2469 * \param[in] target Fence target
2470 * \param[in] allow_self Whether self-fencing is allowed
2471 */
2472 static void
2473 add_action_reply(xmlNode *xml, const char *action,
2474 const fenced_device_t *device, const char *target,
2475 bool allow_self)
2476 {
2477 xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION);
2478
2479 pcmk__xe_set(child, PCMK_XA_ID, action);
2480 add_action_specific_attributes(child, action, device, target);
2481 add_disallowed(child, action, device, target, allow_self);
2482 }
2483
2484 /*!
2485 * \internal
2486 * \brief Send a reply to a CPG peer or IPC client
2487 *
2488 * \param[in] reply XML reply to send
2489 * \param[in] call_options Send synchronously if st_opt_sync_call is set
2490 * \param[in] remote_peer If not NULL, name of peer node to send CPG reply
2491 * \param[in,out] client If not NULL, client to send IPC reply
2492 */
2493 static void
2494 stonith_send_reply(const xmlNode *reply, int call_options,
2495 const char *remote_peer, pcmk__client_t *client)
2496 {
2497 const pcmk__node_status_t *node = NULL;
2498
2499 CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)),
2500 return);
2501
2502 if (remote_peer == NULL) {
2503 do_local_reply(reply, client, call_options);
2504 return;
2505 }
2506
2507 node = pcmk__get_node(0, remote_peer, NULL, pcmk__node_search_cluster_member);
2508 pcmk__cluster_send_message(node, pcmk_ipc_fenced, reply);
2509 }
2510
2511 static void
2512 stonith_query_capable_device_cb(GList * devices, void *user_data)
2513 {
2514 struct st_query_data *query = user_data;
2515 int available_devices = 0;
2516 xmlNode *wrapper = NULL;
2517 xmlNode *list = NULL;
2518 pcmk__client_t *client = NULL;
2519
2520 if (query->client_id != NULL) {
2521 client = pcmk__find_client_by_id(query->client_id);
2522 if ((client == NULL) && (query->remote_peer == NULL)) {
2523 pcmk__trace("Skipping reply to %s: no longer a client",
2524 query->client_id);
2525 goto done;
2526 }
2527 }
2528
2529 // Pack the results into XML
2530 wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA);
2531 list = pcmk__xe_create(wrapper, __func__);
2532 pcmk__xe_set(list, PCMK__XA_ST_TARGET, query->target);
2533
2534 for (const GList *iter = devices; iter != NULL; iter = iter->next) {
2535 fenced_device_t *device = g_hash_table_lookup(device_table, iter->data);
2536 const char *action = query->action;
2537 xmlNode *dev = NULL;
2538
2539 if (device == NULL) {
2540 /* It is possible the device got unregistered while
2541 * determining who can fence the target */
2542 continue;
2543 }
2544
2545 available_devices++;
2546
2547 dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID);
2548 pcmk__xe_set(dev, PCMK_XA_ID, device->id);
2549 pcmk__xe_set(dev, PCMK__XA_NAMESPACE, device->namespace);
2550 pcmk__xe_set(dev, PCMK_XA_AGENT, device->agent);
2551
2552 // Has had successful monitor, list, or status on this node
2553 pcmk__xe_set_int(dev, PCMK__XA_ST_MONITOR_VERIFIED,
2554 pcmk__is_set(device->flags, fenced_df_verified));
2555
2556 pcmk__xe_set_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags);
2557
2558 /* If the originating fencer wants to reboot the node, and we have a
2559 * capable device that doesn't support "reboot", remap to "off" instead.
2560 */
2561 if (!pcmk__is_set(device->flags, fenced_df_supports_reboot)
2562 && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT,
2563 pcmk__str_none)) {
2564 pcmk__trace("%s doesn't support reboot, using values for off "
2565 "instead",
2566 device->id);
2567 action = PCMK_ACTION_OFF;
2568 }
2569
2570 /* Add action-specific values if available */
2571 add_action_specific_attributes(dev, action, device, query->target);
2572 if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
2573 /* A "reboot" *might* get remapped to "off" then "on", so after
2574 * sending the "reboot"-specific values in the main element, we add
2575 * sub-elements for "off" and "on" values.
2576 *
2577 * We short-circuited earlier if "reboot", "off" and "on" are all
2578 * disallowed for the local host. However if only one or two are
2579 * disallowed, we send back the results and mark which ones are
2580 * disallowed. If "reboot" is disallowed, this might cause problems
2581 * with older fencer versions, which won't check for it. Older
2582 * versions will ignore "off" and "on", so they are not a problem.
2583 */
2584 add_disallowed(dev, action, device, query->target,
2585 pcmk__is_set(query->call_options,
2586 st_opt_allow_self_fencing));
2587 add_action_reply(dev, PCMK_ACTION_OFF, device, query->target,
2588 pcmk__is_set(query->call_options,
2589 st_opt_allow_self_fencing));
2590 add_action_reply(dev, PCMK_ACTION_ON, device, query->target, false);
2591 }
2592
2593 /* A query without a target wants device parameters */
2594 if (query->target == NULL) {
2595 xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES);
2596
2597 g_hash_table_foreach(device->params, hash2field, attrs);
2598 }
2599 }
2600
2601 pcmk__xe_set_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices);
2602 if (query->target != NULL) {
2603 pcmk__debug("Found %d matching device%s for target '%s'",
2604 available_devices, pcmk__plural_s(available_devices),
2605 query->target);
2606 } else {
2607 pcmk__debug("%d device%s installed", available_devices,
2608 pcmk__plural_s(available_devices));
2609 }
2610
2611 pcmk__log_xml_trace(list, "query-result");
2612
2613 stonith_send_reply(query->reply, query->call_options, query->remote_peer,
2614 client);
2615
2616 done:
2617 pcmk__xml_free(query->reply);
2618 free(query->remote_peer);
2619 free(query->client_id);
2620 free(query->target);
2621 free(query->action);
2622 free(query);
2623 g_list_free_full(devices, free);
2624 }
2625
2626 /*!
2627 * \internal
2628 * \brief Log the result of an asynchronous command
2629 *
2630 * \param[in] cmd Command the result is for
2631 * \param[in] result Result of command
2632 * \param[in] pid Process ID of command, if available
2633 * \param[in] next Alternate device that will be tried if command failed
2634 * \param[in] op_merged Whether this command was merged with an earlier one
2635 */
2636 static void
2637 log_async_result(const async_command_t *cmd,
2638 const pcmk__action_result_t *result,
2639 int pid, const char *next, bool op_merged)
2640 {
2641 int log_level = LOG_ERR;
2642 int output_log_level = PCMK__LOG_NEVER;
2643 guint devices_remaining = g_list_length(cmd->next_device_iter);
2644
2645 GString *msg = g_string_sized_new(80); // Reasonable starting size
2646
2647 // Choose log levels appropriately if we have a result
2648 if (pcmk__result_ok(result)) {
2649 log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE;
2650 if ((result->action_stdout != NULL)
2651 && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
2652 pcmk__str_none)) {
2653 output_log_level = LOG_DEBUG;
2654 }
2655 next = NULL;
2656 } else {
2657 log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR;
2658 if ((result->action_stdout != NULL)
2659 && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
2660 pcmk__str_none)) {
2661 output_log_level = LOG_WARNING;
2662 }
2663 }
2664
2665 // Build the log message piece by piece
2666 pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL);
2667 if (pid != 0) {
2668 g_string_append_printf(msg, "[%d] ", pid);
2669 }
2670 if (cmd->target != NULL) {
2671 pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL);
2672 }
2673 if (cmd->device != NULL) {
2674 pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL);
2675 }
2676
2677 // Add exit status or execution status as appropriate
2678 if (result->execution_status == PCMK_EXEC_DONE) {
2679 g_string_append_printf(msg, "returned %d", result->exit_status);
2680 } else {
2681 pcmk__g_strcat(msg, "could not be executed: ",
2682 pcmk_exec_status_str(result->execution_status), NULL);
2683 }
2684
2685 // Add exit reason and next device if appropriate
2686 if (result->exit_reason != NULL) {
2687 pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL);
2688 }
2689 if (next != NULL) {
2690 pcmk__g_strcat(msg, ", retrying with ", next, NULL);
2691 }
2692 if (devices_remaining > 0) {
2693 g_string_append_printf(msg, " (%u device%s remaining)",
2694 (unsigned int) devices_remaining,
2695 pcmk__plural_s(devices_remaining));
2696 }
2697 g_string_append_printf(msg, " " QB_XS " %scall %d from %s",
2698 (op_merged? "merged " : ""), cmd->id,
2699 cmd->client_name);
2700
2701 // Log the result
2702 do_crm_log(log_level, "%s", msg->str);
2703 g_string_free(msg, TRUE);
2704
2705 // Log the output (which may have multiple lines), if appropriate
2706 if (output_log_level != PCMK__LOG_NEVER) {
2707 char *prefix = pcmk__assert_asprintf("%s[%d]", cmd->device, pid);
2708
2709 crm_log_output(output_log_level, prefix, result->action_stdout);
2710 free(prefix);
2711 }
2712 }
2713
2714 /*!
2715 * \internal
2716 * \brief Reply to requester after asynchronous command completion
2717 *
2718 * \param[in] cmd Command that completed
2719 * \param[in] result Result of command
2720 * \param[in] pid Process ID of command, if available
2721 * \param[in] merged If true, command was merged with another, not executed
2722 */
2723 static void
2724 send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result,
2725 int pid, bool merged)
2726 {
2727 xmlNode *reply = NULL;
2728 pcmk__client_t *client = NULL;
2729
2730 CRM_CHECK((cmd != NULL) && (result != NULL), return);
2731
2732 log_async_result(cmd, result, pid, NULL, merged);
2733
2734 if (cmd->client != NULL) {
2735 client = pcmk__find_client_by_id(cmd->client);
2736 if ((client == NULL) && (cmd->origin == NULL)) {
2737 pcmk__trace("Skipping reply to %s: no longer a client",
2738 cmd->client);
2739 return;
2740 }
2741 }
2742
2743 reply = construct_async_reply(cmd, result);
2744 if (merged) {
2745 pcmk__xe_set_bool(reply, PCMK__XA_ST_OP_MERGED, true);
2746 }
2747
2748 if (pcmk__is_fencing_action(cmd->action)
2749 && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) {
2750 /* The target was also the originator, so broadcast the result on its
2751 * behalf (since it will be unable to).
2752 */
2753 pcmk__trace("Broadcast '%s' result for %s (target was also originator)",
2754 cmd->action, cmd->target);
2755 pcmk__xe_set(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
2756 pcmk__xe_set(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
2757 pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, reply);
2758 } else {
2759 // Reply only to the originator
2760 stonith_send_reply(reply, cmd->options, cmd->origin, client);
2761 }
2762
2763 pcmk__log_xml_trace(reply, "Reply");
2764 pcmk__xml_free(reply);
2765 }
2766
2767 static void
2768 cancel_stonith_command(async_command_t * cmd)
2769 {
2770 fenced_device_t *device = cmd_device(cmd);
2771
2772 if (device == NULL) {
2773 return;
2774 }
2775
2776 pcmk__trace("Cancel scheduled '%s' action using %s", cmd->action,
2777 device->id);
2778 device->pending_ops = g_list_remove(device->pending_ops, cmd);
2779 }
2780
2781 /*!
2782 * \internal
2783 * \brief Cancel and reply to any duplicates of a just-completed operation
2784 *
2785 * Check whether any fencing operations are scheduled to do the same thing as
2786 * one that just succeeded. If so, rather than performing the same operation
2787 * twice, return the result of this operation for all matching pending commands.
2788 *
2789 * \param[in,out] cmd Fencing operation that just succeeded
2790 * \param[in] result Result of \p cmd
2791 * \param[in] pid If nonzero, process ID of agent invocation (for logs)
2792 *
2793 * \note Duplicate merging will do the right thing for either type of remapped
2794 * reboot. If the executing fencer remapped an unsupported reboot to off,
2795 * then cmd->action will be "reboot" and will be merged with any other
2796 * reboot requests. If the originating fencer remapped a topology reboot
2797 * to off then on, we will get here once with cmd->action "off" and once
2798 * with "on", and they will be merged separately with similar requests.
2799 */
2800 static void
2801 reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result,
2802 int pid)
2803 {
2804 GList *next = NULL;
2805
2806 for (GList *iter = cmd_list; iter != NULL; iter = next) {
2807 async_command_t *cmd_other = iter->data;
2808
2809 next = iter->next; // We might delete this entry, so grab next now
2810
2811 if (cmd == cmd_other) {
2812 continue;
2813 }
2814
2815 /* A pending operation matches if:
2816 * 1. The client connections are different.
2817 * 2. The target is the same.
2818 * 3. The fencing action is the same.
2819 * 4. The device scheduled to execute the action is the same.
2820 */
2821 if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) ||
2822 !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) ||
2823 !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) ||
2824 !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {
2825
2826 continue;
2827 }
2828
2829 pcmk__notice("Merging fencing action '%s'%s%s originating from client "
2830 "%s with identical fencing request from client %s",
2831 cmd_other->action,
2832 (cmd_other->target == NULL)? "" : " targeting ",
2833 pcmk__s(cmd_other->target, ""), cmd_other->client_name,
2834 cmd->client_name);
2835
2836 // Stop tracking the duplicate, send its result, and cancel it
2837 cmd_list = g_list_remove_link(cmd_list, iter);
2838 send_async_reply(cmd_other, result, pid, true);
2839 cancel_stonith_command(cmd_other);
2840
2841 free_async_command(cmd_other);
2842 g_list_free_1(iter);
2843 }
2844 }
2845
2846 /*!
2847 * \internal
2848 * \brief Return the next required device (if any) for an operation
2849 *
2850 * \param[in,out] cmd Fencing operation that just succeeded
2851 *
2852 * \return Next device required for action if any, otherwise NULL
2853 */
2854 static fenced_device_t *
2855 next_required_device(async_command_t *cmd)
2856 {
2857 for (const GList *iter = cmd->next_device_iter; iter != NULL;
2858 iter = iter->next) {
2859 fenced_device_t *next_device = g_hash_table_lookup(device_table,
2860 iter->data);
2861
2862 if (!is_action_required(cmd->action, next_device)) {
2863 continue;
2864 }
2865
2866 /* This is only called for successful actions, so it's OK to skip
2867 * non-required devices.
2868 */
2869 cmd->next_device_iter = iter->next;
2870 return next_device;
2871 }
2872
2873 return NULL;
2874 }
2875
2876 static void
2877 st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
2878 {
2879 async_command_t *cmd = user_data;
2880
2881 fenced_device_t *device = NULL;
2882 fenced_device_t *next_device = NULL;
2883
2884 CRM_CHECK(cmd != NULL, return);
2885
2886 device = cmd_device(cmd);
2887 cmd->active_on = NULL;
2888
2889 /* The device is ready to do something else now */
2890 if (device != NULL) {
2891 if (!pcmk__is_set(device->flags, fenced_df_verified)
2892 && pcmk__result_ok(result)
2893 && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST,
2894 PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS,
2895 NULL)) {
2896
2897 fenced_device_set_flags(device, fenced_df_verified);
2898 }
2899
2900 mainloop_set_trigger(device->work);
2901 }
2902
2903 if (pcmk__result_ok(result)) {
2904 next_device = next_required_device(cmd);
2905
2906 } else if ((cmd->next_device_iter != NULL)
2907 && !is_action_required(cmd->action, device)) {
2908 /* if this device didn't work out, see if there are any others we can try.
2909 * if the failed device was 'required', we can't pick another device. */
2910 next_device = g_hash_table_lookup(device_table,
2911 cmd->next_device_iter->data);
2912 cmd->next_device_iter = cmd->next_device_iter->next;
2913 }
2914
2915 if (next_device == NULL) {
2916 send_async_reply(cmd, result, pid, false);
2917 if (pcmk__result_ok(result)) {
2918 reply_to_duplicates(cmd, result, pid);
2919 }
2920 free_async_command(cmd);
2921
2922 } else { // This operation requires more fencing
2923 log_async_result(cmd, result, pid, next_device->id, false);
2924 schedule_stonith_command(cmd, next_device);
2925 }
2926 }
2927
2928 static void
2929 stonith_fence_get_devices_cb(GList * devices, void *user_data)
2930 {
2931 async_command_t *cmd = user_data;
2932 fenced_device_t *device = NULL;
2933 guint ndevices = g_list_length(devices);
2934
2935 pcmk__info("Found %d matching device%s for target '%s'", ndevices,
2936 pcmk__plural_s(ndevices), cmd->target);
2937
2938 if (devices != NULL) {
2939 device = g_hash_table_lookup(device_table, devices->data);
2940 }
2941
2942 if (device == NULL) { // No device found
2943 pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
2944
2945 pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
2946 "No device configured for target '%s'",
2947 cmd->target);
2948 send_async_reply(cmd, &result, 0, false);
2949 pcmk__reset_result(&result);
2950 free_async_command(cmd);
2951 g_list_free_full(devices, free);
2952
2953 } else {
2954 /* Device found. Schedule a fencing command for it.
2955 *
2956 * Assign devices to device_list so that it will be freed with cmd.
2957 */
2958 cmd->device_list = devices;
2959 cmd->next_device_iter = devices->next;
2960 schedule_stonith_command(cmd, device);
2961 }
2962 }
2963
2964 /*!
2965 * \internal
2966 * \brief Execute a fence action via the local node
2967 *
2968 * \param[in] msg Fencing request
2969 * \param[out] result Where to store result of fence action
2970 */
2971 static void
2972 fence_locally(xmlNode *msg, pcmk__action_result_t *result)
2973 {
2974 const char *device_id = NULL;
2975 fenced_device_t *device = NULL;
2976 async_command_t *cmd = NULL;
2977 xmlNode *dev = NULL;
2978
2979 CRM_CHECK((msg != NULL) && (result != NULL), return);
2980
2981 dev = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_TARGET "]",
2982 LOG_ERR);
2983
2984 cmd = create_async_command(msg);
2985 if (cmd == NULL) {
2986 pcmk__log_xml_warn(msg, "invalid");
2987 set_bad_request_result(result);
2988 return;
2989 }
2990
2991 device_id = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID);
2992 if (device_id != NULL) {
2993 device = g_hash_table_lookup(device_table, device_id);
2994 if (device == NULL) {
2995 pcmk__err("Requested device '%s' is not available", device_id);
2996 pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
2997 "Requested device '%s' not found", device_id);
2998 return;
2999 }
3000 schedule_stonith_command(cmd, device);
3001
3002 } else {
3003 const char *host = pcmk__xe_get(dev, PCMK__XA_ST_TARGET);
3004
3005 if (pcmk__is_set(cmd->options, st_opt_cs_nodeid)) {
3006 int nodeid = 0;
3007 pcmk__node_status_t *node = NULL;
3008
3009 pcmk__scan_min_int(host, &nodeid, 0);
3010 node = pcmk__search_node_caches(nodeid, NULL, NULL,
3011 pcmk__node_search_any
3012 |pcmk__node_search_cluster_cib);
3013 if (node != NULL) {
3014 host = node->name;
3015 }
3016 }
3017
3018 /* If we get to here, then self-fencing is implicitly allowed */
3019 get_capable_devices(host, cmd->action, cmd->default_timeout,
3020 TRUE, cmd, stonith_fence_get_devices_cb,
3021 fenced_support_flag(cmd->action));
3022 }
3023
3024 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
3025 }
3026
3027 /*!
3028 * \internal
3029 * \brief Build an XML reply for a fencing operation
3030 *
3031 * \param[in] request Request that reply is for
3032 * \param[in] data If not NULL, add to reply as call data
3033 * \param[in] result Full result of fencing operation
3034 *
3035 * \return Newly created XML reply
3036 * \note The caller is responsible for freeing the result.
3037 * \note This has some overlap with construct_async_reply(), but that copies
3038 * values from an async_command_t, whereas this one copies them from the
3039 * request.
3040 */
3041 xmlNode *
3042 fenced_construct_reply(const xmlNode *request, xmlNode *data,
3043 const pcmk__action_result_t *result)
3044 {
3045 xmlNode *reply = NULL;
3046
3047 reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
3048
3049 pcmk__xe_set(reply, PCMK__XA_ST_ORIGIN, __func__);
3050 pcmk__xe_set(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
3051 stonith__xe_set_result(reply, result);
3052
3053 if (request == NULL) {
3054 /* Most likely, this is the result of a stonith operation that was
3055 * initiated before we came up. Unfortunately that means we lack enough
3056 * information to provide clients with a full result.
3057 *
3058 * @TODO Maybe synchronize this information at start-up?
3059 */
3060 pcmk__warn("Missing request information for client notifications for "
3061 "operation with result '%s' (initiated before we came up?)",
3062 pcmk_exec_status_str(result->execution_status));
3063
3064 } else {
3065 const char *name = NULL;
3066 const char *value = NULL;
3067
3068 // Attributes to copy from request to reply
3069 const char *names[] = {
3070 PCMK__XA_ST_OP,
3071 PCMK__XA_ST_CALLID,
3072 PCMK__XA_ST_CLIENTID,
3073 PCMK__XA_ST_CLIENTNAME,
3074 PCMK__XA_ST_REMOTE_OP,
3075 PCMK__XA_ST_CALLOPT,
3076 };
3077
3078 for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
3079 name = names[lpc];
3080 value = pcmk__xe_get(request, name);
3081 pcmk__xe_set(reply, name, value);
3082 }
3083 if (data != NULL) {
3084 xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA);
3085
3086 pcmk__xml_copy(wrapper, data);
3087 }
3088 }
3089 return reply;
3090 }
3091
3092 /*!
3093 * \internal
3094 * \brief Build an XML reply to an asynchronous fencing command
3095 *
3096 * \param[in] cmd Fencing command that reply is for
3097 * \param[in] result Command result
3098 */
3099 static xmlNode *
3100 construct_async_reply(const async_command_t *cmd,
3101 const pcmk__action_result_t *result)
3102 {
3103 xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
3104
3105 pcmk__xe_set(reply, PCMK__XA_ST_ORIGIN, __func__);
3106 pcmk__xe_set(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
3107 pcmk__xe_set(reply, PCMK__XA_ST_OP, cmd->op);
3108 pcmk__xe_set(reply, PCMK__XA_ST_DEVICE_ID, cmd->device);
3109 pcmk__xe_set(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id);
3110 pcmk__xe_set(reply, PCMK__XA_ST_CLIENTID, cmd->client);
3111 pcmk__xe_set(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name);
3112 pcmk__xe_set(reply, PCMK__XA_ST_TARGET, cmd->target);
3113 pcmk__xe_set(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op);
3114 pcmk__xe_set(reply, PCMK__XA_ST_ORIGIN, cmd->origin);
3115 pcmk__xe_set_int(reply, PCMK__XA_ST_CALLID, cmd->id);
3116 pcmk__xe_set_int(reply, PCMK__XA_ST_CALLOPT, cmd->options);
3117
3118 stonith__xe_set_result(reply, result);
3119 return reply;
3120 }
3121
3122 bool
3123 fencing_peer_active(pcmk__node_status_t *peer)
3124 {
3125 return (peer != NULL) && (peer->name != NULL)
3126 && pcmk__is_set(peer->processes, crm_get_cluster_proc());
3127 }
3128
3129 void
3130 set_fencing_completed(remote_fencing_op_t *op)
3131 {
3132 struct timespec tv;
3133
3134 qb_util_timespec_from_epoch_get(&tv);
3135 op->completed = tv.tv_sec;
3136 op->completed_nsec = tv.tv_nsec;
3137 }
3138
3139 /*!
3140 * \internal
3141 * \brief Look for alternate node needed if local node shouldn't fence target
3142 *
3143 * \param[in] target Node that must be fenced
3144 *
3145 * \return Name of an alternate node that should fence \p target if any,
3146 * or NULL otherwise
3147 */
3148 static const char *
3149 check_alternate_host(const char *target)
3150 {
3151 GHashTableIter gIter;
3152 pcmk__node_status_t *entry = NULL;
3153
3154 if (!pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) {
3155 return NULL;
3156 }
3157
3158 g_hash_table_iter_init(&gIter, pcmk__peer_cache);
3159 while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
3160 if (!fencing_peer_active(entry)
3161 || pcmk__str_eq(entry->name, target, pcmk__str_casei)) {
3162 continue;
3163 }
3164
3165 pcmk__notice("Forwarding self-fencing request to %s", entry->name);
3166 return entry->name;
3167 }
3168
3169 pcmk__warn("Will handle own fencing because no peer can");
3170 return NULL;
3171 }
3172
3173 static void
3174 remove_relay_op(xmlNode * request)
3175 {
3176 xmlNode *dev = pcmk__xpath_find_one(request->doc,
3177 "//*[@" PCMK__XA_ST_DEVICE_ACTION "]",
3178 LOG_TRACE);
3179 const char *relay_op_id = NULL;
3180 const char *op_id = NULL;
3181 const char *client_name = NULL;
3182 const char *target = NULL;
3183 remote_fencing_op_t *relay_op = NULL;
3184 remote_fencing_op_t *list_op = NULL;
3185 GHashTableIter iter;
3186
3187 if (dev != NULL) {
3188 target = pcmk__xe_get(dev, PCMK__XA_ST_TARGET);
3189 }
3190
3191 relay_op_id = pcmk__xe_get(request, PCMK__XA_ST_REMOTE_OP_RELAY);
3192 op_id = pcmk__xe_get(request, PCMK__XA_ST_REMOTE_OP);
3193 client_name = pcmk__xe_get(request, PCMK__XA_ST_CLIENTNAME);
3194
3195 if ((relay_op_id == NULL) || (target == NULL)
3196 || !pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) {
3197 return;
3198 }
3199
3200 /* Delete RELAY operation. */
3201 relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
3202
3203 if (relay_op == NULL) {
3204 return;
3205 }
3206
3207 g_hash_table_iter_init(&iter, stonith_remote_op_list);
3208
3209 /* If the operation to be deleted is registered as a duplicate, delete the registration. */
3210 while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
3211 if (list_op == relay_op) {
3212 continue;
3213 }
3214
3215 for (GList *dup_iter = list_op->duplicates; dup_iter != NULL;
3216 dup_iter = dup_iter->next) {
3217 remote_fencing_op_t *other = dup_iter->data;
3218
3219 if (other != relay_op) {
3220 continue;
3221 }
3222
3223 other->duplicates = g_list_remove(other->duplicates, relay_op);
3224 break;
3225 }
3226 }
3227
3228 pcmk__debug("Deleting relay op %s ('%s'%s%s for %s), "
3229 "replaced by op %s ('%s'%s%s for %s)",
3230 relay_op->id, relay_op->action,
3231 (relay_op->target == NULL)? "" : " targeting ",
3232 pcmk__s(relay_op->target, ""),
3233 relay_op->client_name, op_id, relay_op->action,
3234 (target == NULL)? "" : " targeting ", pcmk__s(target, ""),
3235 client_name);
3236
3237 g_hash_table_remove(stonith_remote_op_list, relay_op_id);
3238 }
3239
3240 /*!
3241 * \internal
3242 * \brief Check whether an API request was sent by a privileged user
3243 *
3244 * API commands related to fencing configuration may be done only by privileged
3245 * IPC users (i.e. root or hacluster), because all other users should go through
3246 * the CIB to have ACLs applied. If no client was given, this is a peer request,
3247 * which is always allowed.
3248 *
3249 * \param[in] c IPC client that sent request (or NULL if sent by CPG peer)
3250 * \param[in] op Requested API operation (for logging only)
3251 *
3252 * \return true if sender is peer or privileged client, otherwise false
3253 */
3254 static inline bool
3255 is_privileged(const pcmk__client_t *c, const char *op)
3256 {
3257 if ((c == NULL) || pcmk__is_set(c->flags, pcmk__client_privileged)) {
3258 return true;
3259 }
3260
3261 pcmk__warn("Rejecting IPC request '%s' from unprivileged client %s",
3262 pcmk__s(op, ""), pcmk__client_name(c));
3263 return false;
3264 }
3265
3266 static xmlNode *
3267 handle_unknown_request(pcmk__request_t *request)
3268 {
3269 pcmk__err("Unknown %s request %s from %s %s",
3270 (request->ipc_client != NULL) ? "IPC" : "CPG",
3271 request->op, pcmk__request_origin_type(request),
3272 pcmk__request_origin(request));
3273 pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
3274 "Unknown request type '%s' (bug?)",
3275 pcmk__s(request->op, ""));
3276 return fenced_construct_reply(request->xml, NULL, &request->result);
3277 }
3278
3279 // CRM_OP_REGISTER
3280 static xmlNode *
3281 handle_register_request(pcmk__request_t *request)
3282 {
3283 xmlNode *reply = NULL;
3284
3285 if (request->peer != NULL) {
3286 return handle_unknown_request(request);
3287 }
3288
3289 reply = pcmk__xe_create(NULL, "reply");
3290 pcmk__xe_set(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER);
3291 pcmk__xe_set(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id);
3292 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3293 pcmk__set_request_flags(request, pcmk__request_reuse_options);
3294 return reply;
3295 }
3296
3297 // STONITH_OP_EXEC
3298 static xmlNode *
3299 handle_agent_request(pcmk__request_t *request)
3300 {
3301 execute_agent_action(request->xml, &request->result);
3302 if (request->result.execution_status == PCMK_EXEC_PENDING) {
3303 return NULL;
3304 }
3305 return fenced_construct_reply(request->xml, NULL, &request->result);
3306 }
3307
3308 // STONITH_OP_TIMEOUT_UPDATE
3309 static xmlNode *
3310 handle_update_timeout_request(pcmk__request_t *request)
3311 {
3312 const char *call_id = pcmk__xe_get(request->xml, PCMK__XA_ST_CALLID);
3313 const char *client_id = pcmk__xe_get(request->xml, PCMK__XA_ST_CLIENTID);
3314 int op_timeout = 0;
3315
3316 pcmk__xe_get_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout);
3317 do_stonith_async_timeout_update(client_id, call_id, op_timeout);
3318 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3319 return NULL;
3320 }
3321
3322 // STONITH_OP_QUERY
3323 static xmlNode *
3324 handle_query_request(pcmk__request_t *request)
3325 {
3326 int timeout = 0;
3327 xmlNode *dev = NULL;
3328 const char *action = NULL;
3329 const char *target = NULL;
3330 const char *client_id = pcmk__xe_get(request->xml, PCMK__XA_ST_CLIENTID);
3331 struct st_query_data *query = NULL;
3332
3333 if (request->peer != NULL) {
3334 // Record it for the future notification
3335 create_remote_stonith_op(client_id, request->xml, TRUE);
3336 }
3337
3338 /* Delete the DC node RELAY operation. */
3339 remove_relay_op(request->xml);
3340
3341 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3342
3343 dev = pcmk__xpath_find_one(request->xml->doc,
3344 "//*[@" PCMK__XA_ST_DEVICE_ACTION "]",
3345 PCMK__LOG_NEVER);
3346 if (dev != NULL) {
3347 const char *device = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID);
3348
3349 if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
3350 return NULL; // No query or reply necessary
3351 }
3352 target = pcmk__xe_get(dev, PCMK__XA_ST_TARGET);
3353 action = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ACTION);
3354 }
3355
3356 pcmk__log_xml_trace(request->xml, "Query");
3357
3358 query = pcmk__assert_alloc(1, sizeof(struct st_query_data));
3359
3360 query->reply = fenced_construct_reply(request->xml, NULL, &request->result);
3361 query->remote_peer = pcmk__str_copy(request->peer);
3362 query->client_id = pcmk__str_copy(client_id);
3363 query->target = pcmk__str_copy(target);
3364 query->action = pcmk__str_copy(action);
3365 query->call_options = request->call_options;
3366
3367 pcmk__xe_get_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout);
3368 get_capable_devices(target, action, timeout,
3369 pcmk__is_set(query->call_options,
3370 st_opt_allow_self_fencing),
3371 query, stonith_query_capable_device_cb, fenced_df_none);
3372 return NULL;
3373 }
3374
3375 // STONITH_OP_NOTIFY
3376 static xmlNode *
3377 handle_notify_request(pcmk__request_t *request)
3378 {
3379 const char *flag_name = NULL;
3380
3381 if (request->peer != NULL) {
3382 return handle_unknown_request(request);
3383 }
3384
3385 flag_name = pcmk__xe_get(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE);
3386 if (flag_name != NULL) {
3387 pcmk__debug("Enabling %s callbacks for client %s", flag_name,
3388 pcmk__request_origin(request));
3389 pcmk__set_client_flags(request->ipc_client,
3390 fenced_parse_notify_flag(flag_name));
3391 }
3392
3393 flag_name = pcmk__xe_get(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE);
3394 if (flag_name != NULL) {
3395 pcmk__debug("Disabling %s callbacks for client %s", flag_name,
3396 pcmk__request_origin(request));
3397 pcmk__clear_client_flags(request->ipc_client,
3398 fenced_parse_notify_flag(flag_name));
3399 }
3400
3401 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3402 pcmk__set_request_flags(request, pcmk__request_reuse_options);
3403
3404 return pcmk__ipc_create_ack(request->ipc_flags, NULL, CRM_EX_OK);
3405 }
3406
3407 // STONITH_OP_RELAY
3408 static xmlNode *
3409 handle_relay_request(pcmk__request_t *request)
3410 {
3411 xmlNode *dev = pcmk__xpath_find_one(request->xml->doc,
3412 "//*[@" PCMK__XA_ST_TARGET "]",
3413 LOG_TRACE);
3414
3415 pcmk__notice("Received forwarded fencing request from %s %s to fence (%s) "
3416 "peer %s",
3417 pcmk__request_origin_type(request),
3418 pcmk__request_origin(request),
3419 pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ACTION),
3420 pcmk__xe_get(dev, PCMK__XA_ST_TARGET));
3421
3422 if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) {
3423 set_bad_request_result(&request->result);
3424 return fenced_construct_reply(request->xml, NULL, &request->result);
3425 }
3426
3427 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
3428 return NULL;
3429 }
3430
3431 // STONITH_OP_FENCE
3432 static xmlNode *
3433 handle_fence_request(pcmk__request_t *request)
3434 {
3435 const char *alternate_host = NULL;
3436 xmlNode *dev = NULL;
3437 const char *target = NULL;
3438 const char *action = NULL;
3439 const char *device = NULL;
3440
3441 if (request->peer != NULL) {
3442 fence_locally(request->xml, &request->result);
3443 goto done;
3444 }
3445
3446 if (pcmk__is_set(request->call_options, st_opt_manual_ack)) {
3447 int rc = fenced_handle_manual_confirmation(request->ipc_client,
3448 request->xml);
3449
3450 if (rc == pcmk_rc_ok) {
3451 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3452 } else if (rc == EINPROGRESS) {
3453 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
3454 NULL);
3455 } else {
3456 set_bad_request_result(&request->result);
3457 }
3458
3459 goto done;
3460 }
3461
3462 dev = pcmk__xpath_find_one(request->xml->doc,
3463 "//*[@" PCMK__XA_ST_TARGET "]", LOG_TRACE);
3464 target = pcmk__xe_get(dev, PCMK__XA_ST_TARGET);
3465 action = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ACTION);
3466 device = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID);
3467
3468 if (request->ipc_client != NULL) {
3469 int tolerance = 0;
3470
3471 pcmk__notice("Client %s wants to fence (%s) %s using %s",
3472 pcmk__request_origin(request), action, target,
3473 (device? device : "any device"));
3474 pcmk__xe_get_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance);
3475 if (stonith_check_fence_tolerance(tolerance, target, action)) {
3476 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3477 return fenced_construct_reply(request->xml, NULL, &request->result);
3478 }
3479 alternate_host = check_alternate_host(target);
3480
3481 } else {
3482 pcmk__notice("Peer %s wants to fence (%s) '%s' with device '%s'",
3483 request->peer, action, target,
3484 (device == NULL)? "(any)" : device);
3485 }
3486
3487 if (alternate_host != NULL) {
3488 const char *client_id = NULL;
3489 remote_fencing_op_t *op = NULL;
3490 pcmk__node_status_t *node = pcmk__get_node(0, alternate_host, NULL,
3491 pcmk__node_search_cluster_member);
3492
3493 if (request->ipc_client->id == 0) {
3494 client_id = pcmk__xe_get(request->xml, PCMK__XA_ST_CLIENTID);
3495 } else {
3496 client_id = request->ipc_client->id;
3497 }
3498
3499 /* Create a duplicate fencing operation to relay with the client ID.
3500 * When a query response is received, this operation should be
3501 * deleted to avoid keeping the duplicate around.
3502 */
3503 op = create_remote_stonith_op(client_id, request->xml, FALSE);
3504
3505 pcmk__xe_set(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY);
3506 pcmk__xe_set(request->xml, PCMK__XA_ST_CLIENTID,
3507 request->ipc_client->id);
3508 pcmk__xe_set(request->xml, PCMK__XA_ST_REMOTE_OP, op->id);
3509
3510 // @TODO On failure, fail request immediately, or maybe panic
3511 pcmk__cluster_send_message(node, pcmk_ipc_fenced, request->xml);
3512
3513 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
3514
3515 } else if (initiate_remote_stonith_op(request->ipc_client, request->xml,
3516 FALSE) == NULL) {
3517 set_bad_request_result(&request->result);
3518
3519 } else {
3520 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
3521 }
3522
3523 done:
3524 if (request->result.execution_status == PCMK_EXEC_PENDING) {
3525 return NULL;
3526 }
3527
3528 return fenced_construct_reply(request->xml, NULL, &request->result);
3529 }
3530
3531 // STONITH_OP_FENCE_HISTORY
3532 static xmlNode *
3533 handle_history_request(pcmk__request_t *request)
3534 {
3535 xmlNode *reply = NULL;
3536 xmlNode *data = NULL;
3537
3538 stonith_fence_history(request->xml, &data, request->peer,
3539 request->call_options);
3540 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3541 if (!pcmk__is_set(request->call_options, st_opt_discard_reply)) {
3542 /* When the local node broadcasts its history, it sets
3543 * st_opt_discard_reply and doesn't need a reply.
3544 */
3545 reply = fenced_construct_reply(request->xml, data, &request->result);
3546 }
3547 pcmk__xml_free(data);
3548 return reply;
3549 }
3550
3551 // STONITH_OP_DEVICE_ADD
3552 static xmlNode *
3553 handle_device_add_request(pcmk__request_t *request)
3554 {
3555 const char *op = pcmk__xe_get(request->xml, PCMK__XA_ST_OP);
3556 xmlNode *dev = pcmk__xpath_find_one(request->xml->doc,
3557 "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR);
3558
3559 if (is_privileged(request->ipc_client, op)) {
3560 int rc = fenced_device_register(dev, false);
3561
3562 rc = pcmk_rc2legacy(rc);
3563 pcmk__set_result(&request->result,
3564 ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
3565 stonith__legacy2status(rc),
3566 ((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
3567 } else {
3568 pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
3569 PCMK_EXEC_INVALID,
3570 "Unprivileged users must register device via CIB");
3571 }
3572 fenced_send_config_notification(op, &request->result,
3573 (dev == NULL)? NULL : pcmk__xe_id(dev));
3574 return fenced_construct_reply(request->xml, NULL, &request->result);
3575 }
3576
3577 // STONITH_OP_DEVICE_DEL
3578 static xmlNode *
3579 handle_device_delete_request(pcmk__request_t *request)
3580 {
3581 xmlNode *dev = pcmk__xpath_find_one(request->xml->doc,
3582 "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR);
3583 const char *device_id = pcmk__xe_get(dev, PCMK_XA_ID);
3584 const char *op = pcmk__xe_get(request->xml, PCMK__XA_ST_OP);
3585
3586 if (is_privileged(request->ipc_client, op)) {
3587 stonith_device_remove(device_id, false);
3588 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3589 } else {
3590 pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
3591 PCMK_EXEC_INVALID,
3592 "Unprivileged users must delete device via CIB");
3593 }
3594 fenced_send_config_notification(op, &request->result, device_id);
3595 return fenced_construct_reply(request->xml, NULL, &request->result);
3596 }
3597
3598 // STONITH_OP_LEVEL_ADD
3599 static xmlNode *
3600 handle_level_add_request(pcmk__request_t *request)
3601 {
3602 const char *op = pcmk__xe_get(request->xml, PCMK__XA_ST_OP);
3603
3604 if (is_privileged(request->ipc_client, op)) {
3605 fenced_register_level(request->xml, &request->result);
3606 } else {
3607 unpack_level_request(request->xml, NULL, NULL, NULL);
3608 pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
3609 PCMK_EXEC_INVALID,
3610 "Unprivileged users must add level via CIB");
3611 }
3612 return fenced_construct_reply(request->xml, NULL, &request->result);
3613 }
3614
3615 // STONITH_OP_LEVEL_DEL
3616 static xmlNode *
3617 handle_level_delete_request(pcmk__request_t *request)
3618 {
3619 const char *op = pcmk__xe_get(request->xml, PCMK__XA_ST_OP);
3620
3621 if (is_privileged(request->ipc_client, op)) {
3622 fenced_unregister_level(request->xml, &request->result);
3623 } else {
3624 unpack_level_request(request->xml, NULL, NULL, NULL);
3625 pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
3626 PCMK_EXEC_INVALID,
3627 "Unprivileged users must delete level via CIB");
3628 }
3629 return fenced_construct_reply(request->xml, NULL, &request->result);
3630 }
3631
3632 // CRM_OP_RM_NODE_CACHE
3633 static xmlNode *
3634 handle_cache_request(pcmk__request_t *request)
3635 {
3636 int node_id = 0;
3637 const char *name = NULL;
3638
3639 pcmk__xe_get_int(request->xml, PCMK_XA_ID, &node_id);
3640 name = pcmk__xe_get(request->xml, PCMK_XA_UNAME);
3641 pcmk__cluster_forget_cluster_node(node_id, name);
3642 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3643 return NULL;
3644 }
3645
3646 static void
3647 fenced_register_handlers(void)
3648 {
3649 pcmk__server_command_t handlers[] = {
3650 { CRM_OP_REGISTER, handle_register_request },
3651 { STONITH_OP_EXEC, handle_agent_request },
3652 { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request },
3653 { STONITH_OP_QUERY, handle_query_request },
3654 { STONITH_OP_NOTIFY, handle_notify_request },
3655 { STONITH_OP_RELAY, handle_relay_request },
3656 { STONITH_OP_FENCE, handle_fence_request },
3657 { STONITH_OP_FENCE_HISTORY, handle_history_request },
3658 { STONITH_OP_DEVICE_ADD, handle_device_add_request },
3659 { STONITH_OP_DEVICE_DEL, handle_device_delete_request },
3660 { STONITH_OP_LEVEL_ADD, handle_level_add_request },
3661 { STONITH_OP_LEVEL_DEL, handle_level_delete_request },
3662 { CRM_OP_RM_NODE_CACHE, handle_cache_request },
3663 { NULL, handle_unknown_request },
3664 };
3665
3666 fenced_handlers = pcmk__register_handlers(handlers);
3667 }
3668
3669 void
3670 fenced_unregister_handlers(void)
3671 {
3672 g_clear_pointer(&fenced_handlers, g_hash_table_destroy);
3673 }
3674
3675 void
3676 fenced_handle_request(pcmk__request_t *request)
3677 {
3678 xmlNode *reply = NULL;
3679 char *log_msg = NULL;
3680 const char *exec_status_s = NULL;
3681 const char *reason = NULL;
3682
3683 if (fenced_handlers == NULL) {
3684 fenced_register_handlers();
3685 }
3686
3687 reply = pcmk__process_request(request, fenced_handlers);
3688
3689 if (reply != NULL) {
3690 pcmk__log_xml_trace(reply, "Reply");
3691
3692 if (pcmk__is_set(request->flags, pcmk__request_reuse_options)
3693 && (request->ipc_client != NULL)) {
3694 /* Certain IPC-only commands must reuse the call options from the
3695 * original request rather than the ones set by stonith_send_reply()
3696 * -> do_local_reply().
3697 */
3698 pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
3699 request->ipc_flags);
3700 request->ipc_client->request_id = 0;
3701
3702 } else {
3703 stonith_send_reply(reply, request->call_options,
3704 request->peer, request->ipc_client);
3705 }
3706 pcmk__xml_free(reply);
3707 }
3708
3709 exec_status_s = pcmk_exec_status_str(request->result.execution_status);
3710 reason = request->result.exit_reason;
3711 log_msg = pcmk__assert_asprintf("Processed %s request from %s %s: %s%s%s%s",
3712 request->op,
3713 pcmk__request_origin_type(request),
3714 pcmk__request_origin(request),
3715 exec_status_s,
3716 (reason == NULL)? "" : " (",
3717 pcmk__s(reason, ""),
3718 (reason == NULL)? "" : ")");
3719
3720 if (!pcmk__result_ok(&request->result)) {
3721 pcmk__warn("%s", log_msg);
3722 } else {
3723 pcmk__debug("%s", log_msg);
3724 }
3725
3726 free(log_msg);
3727 pcmk__reset_request(request);
3728 }
3729