1 /*
2 * Copyright 2009-2024 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <stdio.h>
14 #include <sys/types.h>
15 #include <sys/wait.h>
16 #include <sys/stat.h>
17 #include <unistd.h>
18 #include <sys/utsname.h>
19
20 #include <stdlib.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <ctype.h>
24
25 #include <crm/crm.h>
26 #include <crm/common/ipc.h>
27 #include <crm/common/ipc_internal.h>
28 #include <crm/cluster/internal.h>
29 #include <crm/common/mainloop.h>
30
31 #include <crm/stonith-ng.h>
32 #include <crm/fencing/internal.h>
33 #include <crm/common/xml.h>
34
35 #include <pacemaker-fenced.h>
36
37 GHashTable *device_list = NULL;
38 GHashTable *topology = NULL;
39 static GList *cmd_list = NULL;
40
41 static GHashTable *fenced_handlers = NULL;
42
43 struct device_search_s {
44 /* target of fence action */
45 char *host;
46 /* requested fence action */
47 char *action;
48 /* timeout to use if a device is queried dynamically for possible targets */
49 // @TODO This name is misleading now, it's the value of stonith-timeout
50 int per_device_timeout;
51 /* number of registered fencing devices at time of request */
52 int replies_needed;
53 /* number of device replies received so far */
54 int replies_received;
55 /* whether the target is eligible to perform requested action (or off) */
56 bool allow_self;
57
58 /* private data to pass to search callback function */
59 void *user_data;
60 /* function to call when all replies have been received */
61 void (*callback) (GList * devices, void *user_data);
62 /* devices capable of performing requested action (or off if remapping) */
63 GList *capable;
64 /* Whether to perform searches that support the action */
65 uint32_t support_action_only;
66 };
67
68 static gboolean stonith_device_dispatch(gpointer user_data);
69 static void st_child_done(int pid, const pcmk__action_result_t *result,
70 void *user_data);
71
72 static void search_devices_record_result(struct device_search_s *search, const char *device,
73 gboolean can_fence);
74
75 static int get_agent_metadata(const char *agent, xmlNode **metadata);
76 static void read_action_metadata(stonith_device_t *device);
77 static enum fenced_target_by unpack_level_kind(const xmlNode *level);
78
79 typedef struct async_command_s {
80
81 int id;
82 int pid;
83 int fd_stdout;
84 uint32_t options;
85 int default_timeout; /* seconds */
86 int timeout; /* seconds */
87
88 int start_delay; // seconds (-1 means disable static/random fencing delays)
89 int delay_id;
90
91 char *op;
92 char *origin;
93 char *client;
94 char *client_name;
95 char *remote_op_id;
96
97 char *target;
98 uint32_t target_nodeid;
99 char *action;
100 char *device;
101
102 GList *device_list;
103 GList *next_device_iter; // device_list entry for next device to execute
104
105 void *internal_user_data;
106 void (*done_cb) (int pid, const pcmk__action_result_t *result,
107 void *user_data);
108 guint timer_sigterm;
109 guint timer_sigkill;
110 /*! If the operation timed out, this is the last signal
111 * we sent to the process to get it to terminate */
112 int last_timeout_signo;
113
114 stonith_device_t *active_on;
115 stonith_device_t *activating_on;
116 } async_command_t;
117
118 static xmlNode *construct_async_reply(const async_command_t *cmd,
119 const pcmk__action_result_t *result);
120
121 static gboolean
122 is_action_required(const char *action, const stonith_device_t *device)
123 {
124 return (device != NULL) && device->automatic_unfencing
125 && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none);
126 }
127
128 static int
129 get_action_delay_max(const stonith_device_t *device, const char *action)
130 {
131 const char *value = NULL;
132 guint delay_max = 0U;
133
134 if (!pcmk__is_fencing_action(action)) {
135 return 0;
136 }
137
138 value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX);
139 if (value) {
140 pcmk_parse_interval_spec(value, &delay_max);
141 delay_max /= 1000;
142 }
143
144 return (int) delay_max;
145 }
146
147 static int
148 get_action_delay_base(const stonith_device_t *device, const char *action,
149 const char *target)
150 {
151 char *hash_value = NULL;
152 guint delay_base = 0U;
153
154 if (!pcmk__is_fencing_action(action)) {
155 return 0;
156 }
157
158 hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE);
159
160 if (hash_value) {
161 char *value = pcmk__str_copy(hash_value);
162 char *valptr = value;
163
164 if (target != NULL) {
165 for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) {
166 char *mapval = strchr(val, ':');
167
168 if (mapval == NULL || mapval[1] == 0) {
169 crm_err("pcmk_delay_base: empty value in mapping", val);
170 continue;
171 }
172
173 if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) {
174 value = mapval + 1;
175 crm_debug("pcmk_delay_base mapped to %s for %s",
176 value, target);
177 break;
178 }
179 }
180 }
181
182 if (strchr(value, ':') == 0) {
183 pcmk_parse_interval_spec(value, &delay_base);
184 delay_base /= 1000;
185 }
186
187 free(valptr);
188 }
189
190 return (int) delay_base;
191 }
192
193 /*!
194 * \internal
195 * \brief Override STONITH timeout with pcmk_*_timeout if available
196 *
197 * \param[in] device STONITH device to use
198 * \param[in] action STONITH action name
199 * \param[in] default_timeout Timeout to use if device does not have
200 * a pcmk_*_timeout parameter for action
201 *
202 * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
203 * \note For consistency, it would be nice if reboot/off/on timeouts could be
204 * set the same way as start/stop/monitor timeouts, i.e. with an
205 * <operation> entry in the fencing resource configuration. However that
206 * is insufficient because fencing devices may be registered directly via
207 * the fencer's register_device() API instead of going through the CIB
208 * (e.g. stonith_admin uses it for its -R option, and the executor uses it
209 * to ensure a device is registered when a command is issued). As device
210 * properties, pcmk_*_timeout parameters can be grabbed by the fencer when
211 * the device is registered, whether by CIB change or API call.
212 */
213 static int
214 get_action_timeout(const stonith_device_t *device, const char *action,
215 int default_timeout)
216 {
217 if (action && device && device->params) {
218 char buffer[64] = { 0, };
219 const char *value = NULL;
220
221 /* If "reboot" was requested but the device does not support it,
222 * we will remap to "off", so check timeout for "off" instead
223 */
224 if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)
225 && !pcmk_is_set(device->flags, st_device_supports_reboot)) {
226 crm_trace("%s doesn't support reboot, using timeout for off instead",
227 device->id);
228 action = PCMK_ACTION_OFF;
229 }
230
231 /* If the device config specified an action-specific timeout, use it */
232 snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
233 value = g_hash_table_lookup(device->params, buffer);
234 if (value) {
235 long long timeout_ms = crm_get_msec(value);
236 return (int) QB_MIN(pcmk__timeout_ms2s(timeout_ms), INT_MAX);
237 }
238 }
239 return default_timeout;
240 }
241
242 /*!
243 * \internal
244 * \brief Get the currently executing device for a fencing operation
245 *
246 * \param[in] cmd Fencing operation to check
247 *
248 * \return Currently executing device for \p cmd if any, otherwise NULL
249 */
250 static stonith_device_t *
251 cmd_device(const async_command_t *cmd)
252 {
253 if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) {
254 return NULL;
255 }
256 return g_hash_table_lookup(device_list, cmd->device);
257 }
258
259 /*!
260 * \internal
261 * \brief Return the configured reboot action for a given device
262 *
263 * \param[in] device_id Device ID
264 *
265 * \return Configured reboot action for \p device_id
266 */
267 const char *
268 fenced_device_reboot_action(const char *device_id)
269 {
270 const char *action = NULL;
271
272 if ((device_list != NULL) && (device_id != NULL)) {
273 stonith_device_t *device = g_hash_table_lookup(device_list, device_id);
274
275 if ((device != NULL) && (device->params != NULL)) {
276 action = g_hash_table_lookup(device->params, "pcmk_reboot_action");
277 }
278 }
279 return pcmk__s(action, PCMK_ACTION_REBOOT);
280 }
281
282 /*!
283 * \internal
284 * \brief Check whether a given device supports the "on" action
285 *
286 * \param[in] device_id Device ID
287 *
288 * \return true if \p device_id supports "on", otherwise false
289 */
290 bool
291 fenced_device_supports_on(const char *device_id)
292 {
293 if ((device_list != NULL) && (device_id != NULL)) {
294 stonith_device_t *device = g_hash_table_lookup(device_list, device_id);
295
296 if (device != NULL) {
297 return pcmk_is_set(device->flags, st_device_supports_on);
298 }
299 }
300 return false;
301 }
302
303 static void
304 free_async_command(async_command_t * cmd)
305 {
306 if (!cmd) {
307 return;
308 }
309
310 if (cmd->delay_id) {
311 g_source_remove(cmd->delay_id);
312 }
313
314 cmd_list = g_list_remove(cmd_list, cmd);
315
316 g_list_free_full(cmd->device_list, free);
317 free(cmd->device);
318 free(cmd->action);
319 free(cmd->target);
320 free(cmd->remote_op_id);
321 free(cmd->client);
322 free(cmd->client_name);
323 free(cmd->origin);
324 free(cmd->op);
325 free(cmd);
326 }
327
328 /*!
329 * \internal
330 * \brief Create a new asynchronous fencing operation from request XML
331 *
332 * \param[in] msg Fencing request XML (from IPC or CPG)
333 *
334 * \return Newly allocated fencing operation on success, otherwise NULL
335 *
336 * \note This asserts on memory errors, so a NULL return indicates an
337 * unparseable message.
338 */
339 static async_command_t *
340 create_async_command(xmlNode *msg)
341 {
342 xmlNode *op = NULL;
343 async_command_t *cmd = NULL;
344 int rc = pcmk_rc_ok;
345
346 if (msg == NULL) {
347 return NULL;
348 }
349
350 op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR);
351 if (op == NULL) {
352 return NULL;
353 }
354
355 cmd = pcmk__assert_alloc(1, sizeof(async_command_t));
356
357 // All messages must include these
358 cmd->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION);
359 cmd->op = crm_element_value_copy(msg, PCMK__XA_ST_OP);
360 cmd->client = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTID);
361 if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) {
362 free_async_command(cmd);
363 return NULL;
364 }
365
366 crm_element_value_int(msg, PCMK__XA_ST_CALLID, &(cmd->id));
367 crm_element_value_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay));
368 crm_element_value_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout));
369 cmd->timeout = cmd->default_timeout;
370
371 rc = pcmk__xe_get_flags(msg, PCMK__XA_ST_CALLOPT, &(cmd->options),
372 st_opt_none);
373 if (rc != pcmk_rc_ok) {
374 crm_warn("Couldn't parse options from request: %s", pcmk_rc_str(rc));
375 }
376
377 cmd->origin = crm_element_value_copy(msg, PCMK__XA_SRC);
378 cmd->remote_op_id = crm_element_value_copy(msg, PCMK__XA_ST_REMOTE_OP);
379 cmd->client_name = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTNAME);
380 cmd->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET);
381 cmd->device = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ID);
382
383 cmd->done_cb = st_child_done;
384
385 // Track in global command list
386 cmd_list = g_list_append(cmd_list, cmd);
387
388 return cmd;
389 }
390
391 static int
392 get_action_limit(stonith_device_t * device)
393 {
394 const char *value = NULL;
395 int action_limit = 1;
396
397 value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT);
398 if ((value == NULL)
399 || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok)
400 || (action_limit == 0)) {
401 action_limit = 1;
402 }
403 return action_limit;
404 }
405
406 static int
407 get_active_cmds(stonith_device_t * device)
408 {
409 int counter = 0;
410 GList *gIter = NULL;
411 GList *gIterNext = NULL;
412
413 CRM_CHECK(device != NULL, return 0);
414
415 for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
416 async_command_t *cmd = gIter->data;
417
418 gIterNext = gIter->next;
419
420 if (cmd->active_on == device) {
421 counter++;
422 }
423 }
424
425 return counter;
426 }
427
428 static void
429 fork_cb(int pid, void *user_data)
430 {
431 async_command_t *cmd = (async_command_t *) user_data;
432 stonith_device_t * device =
433 /* in case of a retry we've done the move from
434 activating_on to active_on already
435 */
436 cmd->activating_on?cmd->activating_on:cmd->active_on;
437
438 pcmk__assert(device != NULL);
439 crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout",
440 cmd->action, pid,
441 ((cmd->target == NULL)? "" : " targeting "),
442 pcmk__s(cmd->target, ""), device->id, cmd->timeout);
443 cmd->active_on = device;
444 cmd->activating_on = NULL;
445 }
446
447 static int
448 get_agent_metadata_cb(gpointer data) {
449 stonith_device_t *device = data;
450 guint period_ms;
451
452 switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
453 case pcmk_rc_ok:
454 if (device->agent_metadata) {
455 read_action_metadata(device);
456 stonith__device_parameter_flags(&(device->flags), device->id,
457 device->agent_metadata);
458 }
459 return G_SOURCE_REMOVE;
460
461 case EAGAIN:
462 period_ms = pcmk__mainloop_timer_get_period(device->timer);
463 if (period_ms < 160 * 1000) {
464 mainloop_timer_set_period(device->timer, 2 * period_ms);
465 }
466 return G_SOURCE_CONTINUE;
467
468 default:
469 return G_SOURCE_REMOVE;
470 }
471 }
472
473 /*!
474 * \internal
475 * \brief Call a command's action callback for an internal (not library) result
476 *
477 * \param[in,out] cmd Command to report result for
478 * \param[in] execution_status Execution status to use for result
479 * \param[in] exit_status Exit status to use for result
480 * \param[in] exit_reason Exit reason to use for result
481 */
482 static void
483 report_internal_result(async_command_t *cmd, int exit_status,
484 int execution_status, const char *exit_reason)
485 {
486 pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
487
488 pcmk__set_result(&result, exit_status, execution_status, exit_reason);
489 cmd->done_cb(0, &result, cmd);
490 pcmk__reset_result(&result);
491 }
492
493 static gboolean
494 stonith_device_execute(stonith_device_t * device)
495 {
496 int exec_rc = 0;
497 const char *action_str = NULL;
498 const char *host_arg = NULL;
499 async_command_t *cmd = NULL;
500 stonith_action_t *action = NULL;
501 int active_cmds = 0;
502 int action_limit = 0;
503 GList *gIter = NULL;
504 GList *gIterNext = NULL;
505
506 CRM_CHECK(device != NULL, return FALSE);
507
508 active_cmds = get_active_cmds(device);
509 action_limit = get_action_limit(device);
510 if (action_limit > -1 && active_cmds >= action_limit) {
511 crm_trace("%s is over its action limit of %d (%u active action%s)",
512 device->id, action_limit, active_cmds,
513 pcmk__plural_s(active_cmds));
514 return TRUE;
515 }
516
517 for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) {
518 async_command_t *pending_op = gIter->data;
519
520 gIterNext = gIter->next;
521
522 if (pending_op && pending_op->delay_id) {
523 crm_trace("Operation '%s'%s%s using %s was asked to run too early, "
524 "waiting for start delay of %ds",
525 pending_op->action,
526 ((pending_op->target == NULL)? "" : " targeting "),
527 pcmk__s(pending_op->target, ""),
528 device->id, pending_op->start_delay);
529 continue;
530 }
531
532 device->pending_ops = g_list_remove_link(device->pending_ops, gIter);
533 g_list_free_1(gIter);
534
535 cmd = pending_op;
536 break;
537 }
538
539 if (cmd == NULL) {
540 crm_trace("No actions using %s are needed", device->id);
541 return TRUE;
542 }
543
544 if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
545 STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
546 if (pcmk__is_fencing_action(cmd->action)) {
547 if (node_does_watchdog_fencing(fenced_get_local_node())) {
548 pcmk__panic("Watchdog self-fencing required");
549 goto done;
550 }
551 } else {
552 crm_info("Faking success for %s watchdog operation", cmd->action);
553 report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
554 goto done;
555 }
556 }
557
558 #if PCMK__ENABLE_CIBSECRETS
559 exec_rc = pcmk__substitute_secrets(device->id, device->params);
560 if (exec_rc != pcmk_rc_ok) {
561 if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) {
562 crm_info("Proceeding with stop operation for %s "
563 "despite being unable to load CIB secrets (%s)",
564 device->id, pcmk_rc_str(exec_rc));
565 } else {
566 crm_err("Considering %s unconfigured "
567 "because unable to load CIB secrets: %s",
568 device->id, pcmk_rc_str(exec_rc));
569 report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
570 "Failed to get CIB secrets");
571 goto done;
572 }
573 }
574 #endif
575
576 action_str = cmd->action;
577 if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none)
578 && !pcmk_is_set(device->flags, st_device_supports_reboot)) {
579
580 crm_notice("Remapping 'reboot' action%s%s using %s to 'off' "
581 "because agent '%s' does not support reboot",
582 ((cmd->target == NULL)? "" : " targeting "),
583 pcmk__s(cmd->target, ""), device->id, device->agent);
584 action_str = PCMK_ACTION_OFF;
585 }
586
587 if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) {
588 host_arg = "port";
589
590 } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) {
591 host_arg = "plug";
592 }
593
594 action = stonith__action_create(device->agent, action_str, cmd->target,
595 cmd->target_nodeid, cmd->timeout,
596 device->params, device->aliases, host_arg);
597
598 /* for async exec, exec_rc is negative for early error exit
599 otherwise handling of success/errors is done via callbacks */
600 cmd->activating_on = device;
601 exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
602 fork_cb);
603 if (exec_rc < 0) {
604 cmd->activating_on = NULL;
605 cmd->done_cb(0, stonith__action_result(action), cmd);
606 stonith__destroy_action(action);
607 }
608
609 done:
610 /* Device might get triggered to work by multiple fencing commands
611 * simultaneously. Trigger the device again to make sure any
612 * remaining concurrent commands get executed. */
613 if (device->pending_ops) {
614 mainloop_set_trigger(device->work);
615 }
616 return TRUE;
617 }
618
619 static gboolean
620 stonith_device_dispatch(gpointer user_data)
621 {
622 return stonith_device_execute(user_data);
623 }
624
625 static gboolean
626 start_delay_helper(gpointer data)
627 {
628 async_command_t *cmd = data;
629 stonith_device_t *device = cmd_device(cmd);
630
631 cmd->delay_id = 0;
632 if (device) {
633 mainloop_set_trigger(device->work);
634 }
635
636 return FALSE;
637 }
638
639 static void
640 schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
641 {
642 int delay_max = 0;
643 int delay_base = 0;
644 int requested_delay = cmd->start_delay;
645
646 CRM_CHECK(cmd != NULL, return);
647 CRM_CHECK(device != NULL, return);
648
649 if (cmd->device) {
650 free(cmd->device);
651 }
652
653 if (device->include_nodeid && (cmd->target != NULL)) {
654 pcmk__node_status_t *node =
655 pcmk__get_node(0, cmd->target, NULL,
656 pcmk__node_search_cluster_member);
657
658 cmd->target_nodeid = node->cluster_layer_id;
659 }
660
661 cmd->device = pcmk__str_copy(device->id);
662 cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
663
664 if (cmd->remote_op_id) {
665 crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s "
666 "with op id %.8s and timeout %ds",
667 cmd->action,
668 (cmd->target == NULL)? "" : " targeting ",
669 pcmk__s(cmd->target, ""),
670 device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
671 } else {
672 crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds",
673 cmd->action,
674 (cmd->target == NULL)? "" : " targeting ",
675 pcmk__s(cmd->target, ""),
676 device->id, cmd->client, cmd->timeout);
677 }
678
679 device->pending_ops = g_list_append(device->pending_ops, cmd);
680 mainloop_set_trigger(device->work);
681
682 // Value -1 means disable any static/random fencing delays
683 if (requested_delay < 0) {
684 return;
685 }
686
687 delay_max = get_action_delay_max(device, cmd->action);
688 delay_base = get_action_delay_base(device, cmd->action, cmd->target);
689 if (delay_max == 0) {
690 delay_max = delay_base;
691 }
692 if (delay_max < delay_base) {
693 crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than "
694 PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s "
695 "(limiting to maximum delay)",
696 delay_base, delay_max, cmd->action, device->id);
697 delay_base = delay_max;
698 }
699 if (delay_max > 0) {
700 // coverity[dontcall] It doesn't matter here if rand() is predictable
701 cmd->start_delay +=
|
CID (unavailable; MK=a54f586bb4320a1382a6788a6d359b5c) (#1 of 1): Calling risky function (DC.WEAK_CRYPTO): |
|
(1) Event dont_call: |
"rand" should not be used for security-related applications, because linear congruential algorithms are too easy to break. |
|
(2) Event remediation: |
Use a compliant random number generator, such as "/dev/random" or "/dev/urandom" on Unix-like systems, and CNG (Cryptography API: Next Generation) on Windows. |
702 ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
703 + delay_base;
704 }
705
706 if (cmd->start_delay > 0) {
707 crm_notice("Delaying '%s' action%s%s using %s for %ds " QB_XS
708 " timeout=%ds requested_delay=%ds base=%ds max=%ds",
709 cmd->action,
710 (cmd->target == NULL)? "" : " targeting ",
711 pcmk__s(cmd->target, ""),
712 device->id, cmd->start_delay, cmd->timeout,
713 requested_delay, delay_base, delay_max);
714 cmd->delay_id =
715 pcmk__create_timer(cmd->start_delay * 1000, start_delay_helper, cmd);
716 }
717 }
718
719 static void
720 free_device(gpointer data)
721 {
722 GList *gIter = NULL;
723 stonith_device_t *device = data;
724
725 g_hash_table_destroy(device->params);
726 g_hash_table_destroy(device->aliases);
727
728 for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
729 async_command_t *cmd = gIter->data;
730
731 crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
732 report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
733 "Device was removed before action could be executed");
734 }
735 g_list_free(device->pending_ops);
736
737 g_list_free_full(device->targets, free);
738
739 if (device->timer) {
740 mainloop_timer_stop(device->timer);
741 mainloop_timer_del(device->timer);
742 }
743
744 mainloop_destroy_trigger(device->work);
745
746 pcmk__xml_free(device->agent_metadata);
747 free(device->namespace);
748 if (device->on_target_actions != NULL) {
749 g_string_free(device->on_target_actions, TRUE);
750 }
751 free(device->agent);
752 free(device->id);
753 free(device);
754 }
755
756 void free_device_list(void)
757 {
758 if (device_list != NULL) {
759 g_hash_table_destroy(device_list);
760 device_list = NULL;
761 }
762 }
763
764 void
765 init_device_list(void)
766 {
767 if (device_list == NULL) {
768 device_list = pcmk__strkey_table(NULL, free_device);
769 }
770 }
771
772 static GHashTable *
773 build_port_aliases(const char *hostmap, GList ** targets)
774 {
775 char *name = NULL;
776 int last = 0, lpc = 0, max = 0, added = 0;
777 GHashTable *aliases = pcmk__strikey_table(free, free);
778
779 if (hostmap == NULL) {
780 return aliases;
781 }
782
783 max = strlen(hostmap);
784 for (; lpc <= max; lpc++) {
785 switch (hostmap[lpc]) {
786 /* Skip escaped chars */
787 case '\\':
788 lpc++;
789 break;
790
791 /* Assignment chars */
792 case '=':
793 case ':':
794 if (lpc > last) {
795 free(name);
796 name = pcmk__assert_alloc(1, 1 + lpc - last);
797 memcpy(name, hostmap + last, lpc - last);
798 }
799 last = lpc + 1;
800 break;
801
802 /* Delimeter chars */
803 /* case ',': Potentially used to specify multiple ports */
804 case 0:
805 case ';':
806 case ' ':
807 case '\t':
808 if (name) {
809 char *value = NULL;
810 int k = 0;
811
812 value = pcmk__assert_alloc(1, 1 + lpc - last);
813 memcpy(value, hostmap + last, lpc - last);
814
815 for (int i = 0; value[i] != '\0'; i++) {
816 if (value[i] != '\\') {
817 value[k++] = value[i];
818 }
819 }
820 value[k] = '\0';
821
822 crm_debug("Adding alias '%s'='%s'", name, value);
823 g_hash_table_replace(aliases, name, value);
824 if (targets) {
825 *targets = g_list_append(*targets, pcmk__str_copy(value));
826 }
827 value = NULL;
828 name = NULL;
829 added++;
830
831 } else if (lpc > last) {
832 crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
833 }
834
835 last = lpc + 1;
836 break;
837 }
838
839 if (hostmap[lpc] == 0) {
840 break;
841 }
842 }
843
844 if (added == 0) {
845 crm_info("No host mappings detected in '%s'", hostmap);
846 }
847
848 free(name);
849 return aliases;
850 }
851
852 GHashTable *metadata_cache = NULL;
853
854 void
855 free_metadata_cache(void) {
856 if (metadata_cache != NULL) {
857 g_hash_table_destroy(metadata_cache);
858 metadata_cache = NULL;
859 }
860 }
861
862 static void
863 init_metadata_cache(void) {
864 if (metadata_cache == NULL) {
865 metadata_cache = pcmk__strkey_table(free, free);
866 }
867 }
868
869 int
870 get_agent_metadata(const char *agent, xmlNode ** metadata)
871 {
872 char *buffer = NULL;
873
874 if (metadata == NULL) {
875 return EINVAL;
876 }
877 *metadata = NULL;
878 if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
879 return pcmk_rc_ok;
880 }
881 init_metadata_cache();
882 buffer = g_hash_table_lookup(metadata_cache, agent);
883 if (buffer == NULL) {
884 stonith_t *st = stonith_api_new();
885 int rc;
886
887 if (st == NULL) {
888 crm_warn("Could not get agent meta-data: "
889 "API memory allocation failed");
890 return EAGAIN;
891 }
892 rc = st->cmds->metadata(st, st_opt_sync_call, agent,
893 NULL, &buffer, 10);
894 stonith_api_delete(st);
895 if (rc || !buffer) {
896 crm_err("Could not retrieve metadata for fencing agent %s", agent);
897 return EAGAIN;
898 }
899 g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer);
900 }
901
902 *metadata = pcmk__xml_parse(buffer);
903 return pcmk_rc_ok;
904 }
905
906 static gboolean
907 is_nodeid_required(xmlNode * xml)
908 {
909 xmlXPathObjectPtr xpath = NULL;
910
911 if (!xml) {
912 return FALSE;
913 }
914
915 xpath = xpath_search(xml,
916 "//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='nodeid']");
917 if (numXpathResults(xpath) <= 0) {
918 freeXpathObject(xpath);
919 return FALSE;
920 }
921
922 freeXpathObject(xpath);
923 return TRUE;
924 }
925
926 static void
927 read_action_metadata(stonith_device_t *device)
928 {
929 xmlXPathObjectPtr xpath = NULL;
930 int max = 0;
931 int lpc = 0;
932
933 if (device->agent_metadata == NULL) {
934 return;
935 }
936
937 xpath = xpath_search(device->agent_metadata, "//action");
938 max = numXpathResults(xpath);
939
940 if (max <= 0) {
941 freeXpathObject(xpath);
942 return;
943 }
944
945 for (lpc = 0; lpc < max; lpc++) {
946 const char *action = NULL;
947 xmlNode *match = getXpathResult(xpath, lpc);
948
949 CRM_LOG_ASSERT(match != NULL);
950 if(match == NULL) { continue; };
951
952 action = crm_element_value(match, PCMK_XA_NAME);
953
954 if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
955 stonith__set_device_flags(device->flags, device->id,
956 st_device_supports_list);
957 } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) {
958 stonith__set_device_flags(device->flags, device->id,
959 st_device_supports_status);
960 } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
961 stonith__set_device_flags(device->flags, device->id,
962 st_device_supports_reboot);
963 } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
964 /* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it
965 * joins.
966 *
967 * @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for
968 * PCMK_XA_AUTOMATIC.
969 */
970 if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC)
971 || pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) {
972 device->automatic_unfencing = TRUE;
973 }
974 stonith__set_device_flags(device->flags, device->id,
975 st_device_supports_on);
976 }
977
978 if ((action != NULL)
979 && pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) {
980
981 pcmk__add_word(&(device->on_target_actions), 64, action);
982 }
983 }
984
985 freeXpathObject(xpath);
986 }
987
988 static const char *
989 target_list_type(stonith_device_t * dev)
990 {
991 const char *check_type = NULL;
992
993 check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK);
994
995 if (check_type == NULL) {
996
997 if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) {
998 check_type = PCMK_VALUE_STATIC_LIST;
999 } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) {
1000 check_type = PCMK_VALUE_STATIC_LIST;
1001 } else if (pcmk_is_set(dev->flags, st_device_supports_list)) {
1002 check_type = PCMK_VALUE_DYNAMIC_LIST;
1003 } else if (pcmk_is_set(dev->flags, st_device_supports_status)) {
1004 check_type = PCMK_VALUE_STATUS;
1005 } else {
1006 check_type = PCMK_VALUE_NONE;
1007 }
1008 }
1009
1010 return check_type;
1011 }
1012
1013 static stonith_device_t *
1014 build_device_from_xml(xmlNode *dev)
1015 {
1016 const char *value;
1017 stonith_device_t *device = NULL;
1018 char *agent = crm_element_value_copy(dev, PCMK_XA_AGENT);
1019
1020 CRM_CHECK(agent != NULL, return device);
1021
1022 device = pcmk__assert_alloc(1, sizeof(stonith_device_t));
1023
1024 device->id = crm_element_value_copy(dev, PCMK_XA_ID);
1025 device->agent = agent;
1026 device->namespace = crm_element_value_copy(dev, PCMK__XA_NAMESPACE);
1027 device->params = xml2list(dev);
1028
1029 value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST);
1030 if (value) {
1031 device->targets = stonith__parse_targets(value);
1032 }
1033
1034 value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP);
1035 device->aliases = build_port_aliases(value, &(device->targets));
1036
1037 value = target_list_type(device);
1038 if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)
1039 && (device->targets != NULL)) {
1040
1041 // device->targets is necessary only with PCMK_VALUE_STATIC_LIST
1042 g_list_free_full(device->targets, free);
1043 device->targets = NULL;
1044 }
1045 switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
1046 case pcmk_rc_ok:
1047 if (device->agent_metadata) {
1048 read_action_metadata(device);
1049 stonith__device_parameter_flags(&(device->flags), device->id,
1050 device->agent_metadata);
1051 }
1052 break;
1053
1054 case EAGAIN:
1055 if (device->timer == NULL) {
1056 device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
1057 TRUE, get_agent_metadata_cb, device);
1058 }
1059 if (!mainloop_timer_running(device->timer)) {
1060 mainloop_timer_start(device->timer);
1061 }
1062 break;
1063
1064 default:
1065 break;
1066 }
1067
1068 value = g_hash_table_lookup(device->params, "nodeid");
1069 if (!value) {
1070 device->include_nodeid = is_nodeid_required(device->agent_metadata);
1071 }
1072
1073 value = crm_element_value(dev, PCMK__XA_RSC_PROVIDES);
1074 if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) {
1075 device->automatic_unfencing = TRUE;
1076 }
1077
1078 if (is_action_required(PCMK_ACTION_ON, device)) {
1079 crm_info("Fencing device '%s' requires unfencing", device->id);
1080 }
1081
1082 if (device->on_target_actions != NULL) {
1083 crm_info("Fencing device '%s' requires actions (%s) to be executed "
1084 "on target", device->id,
1085 (const char *) device->on_target_actions->str);
1086 }
1087
1088 device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
1089 /* TODO: Hook up priority */
1090
1091 return device;
1092 }
1093
1094 static void
1095 schedule_internal_command(const char *origin,
1096 stonith_device_t * device,
1097 const char *action,
1098 const char *target,
1099 int timeout,
1100 void *internal_user_data,
1101 void (*done_cb) (int pid,
1102 const pcmk__action_result_t *result,
1103 void *user_data))
1104 {
1105 async_command_t *cmd = NULL;
1106
1107 cmd = pcmk__assert_alloc(1, sizeof(async_command_t));
1108
1109 cmd->id = -1;
1110 cmd->default_timeout = timeout ? timeout : 60;
1111 cmd->timeout = cmd->default_timeout;
1112 cmd->action = pcmk__str_copy(action);
1113 cmd->target = pcmk__str_copy(target);
1114 cmd->device = pcmk__str_copy(device->id);
1115 cmd->origin = pcmk__str_copy(origin);
1116 cmd->client = pcmk__str_copy(crm_system_name);
1117 cmd->client_name = pcmk__str_copy(crm_system_name);
1118
1119 cmd->internal_user_data = internal_user_data;
1120 cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
1121
1122 schedule_stonith_command(cmd, device);
1123 }
1124
1125 // Fence agent status commands use custom exit status codes
1126 enum fence_status_code {
1127 fence_status_invalid = -1,
1128 fence_status_active = 0,
1129 fence_status_unknown = 1,
1130 fence_status_inactive = 2,
1131 };
1132
1133 static void
1134 status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
1135 {
1136 async_command_t *cmd = user_data;
1137 struct device_search_s *search = cmd->internal_user_data;
1138 stonith_device_t *dev = cmd_device(cmd);
1139 gboolean can = FALSE;
1140
1141 free_async_command(cmd);
1142
1143 if (!dev) {
1144 search_devices_record_result(search, NULL, FALSE);
1145 return;
1146 }
1147
1148 mainloop_set_trigger(dev->work);
1149
1150 if (result->execution_status != PCMK_EXEC_DONE) {
1151 crm_warn("Assuming %s cannot fence %s "
1152 "because status could not be executed: %s%s%s%s",
1153 dev->id, search->host,
1154 pcmk_exec_status_str(result->execution_status),
1155 ((result->exit_reason == NULL)? "" : " ("),
1156 ((result->exit_reason == NULL)? "" : result->exit_reason),
1157 ((result->exit_reason == NULL)? "" : ")"));
1158 search_devices_record_result(search, dev->id, FALSE);
1159 return;
1160 }
1161
1162 switch (result->exit_status) {
1163 case fence_status_unknown:
1164 crm_trace("%s reported it cannot fence %s", dev->id, search->host);
1165 break;
1166
1167 case fence_status_active:
1168 case fence_status_inactive:
1169 crm_trace("%s reported it can fence %s", dev->id, search->host);
1170 can = TRUE;
1171 break;
1172
1173 default:
1174 crm_warn("Assuming %s cannot fence %s "
1175 "(status returned unknown code %d)",
1176 dev->id, search->host, result->exit_status);
1177 break;
1178 }
1179 search_devices_record_result(search, dev->id, can);
1180 }
1181
1182 static void
1183 dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
1184 void *user_data)
1185 {
1186 async_command_t *cmd = user_data;
1187 struct device_search_s *search = cmd->internal_user_data;
1188 stonith_device_t *dev = cmd_device(cmd);
1189 gboolean can_fence = FALSE;
1190
1191 free_async_command(cmd);
1192
1193 /* Host/alias must be in the list output to be eligible to be fenced
1194 *
1195 * Will cause problems if down'd nodes aren't listed or (for virtual nodes)
1196 * if the guest is still listed despite being moved to another machine
1197 */
1198 if (!dev) {
1199 search_devices_record_result(search, NULL, FALSE);
1200 return;
1201 }
1202
1203 mainloop_set_trigger(dev->work);
1204
1205 if (pcmk__result_ok(result)) {
1206 crm_info("Refreshing target list for %s", dev->id);
1207 g_list_free_full(dev->targets, free);
1208 dev->targets = stonith__parse_targets(result->action_stdout);
1209 dev->targets_age = time(NULL);
1210
1211 } else if (dev->targets != NULL) {
1212 if (result->execution_status == PCMK_EXEC_DONE) {
1213 crm_info("Reusing most recent target list for %s "
1214 "because list returned error code %d",
1215 dev->id, result->exit_status);
1216 } else {
1217 crm_info("Reusing most recent target list for %s "
1218 "because list could not be executed: %s%s%s%s",
1219 dev->id, pcmk_exec_status_str(result->execution_status),
1220 ((result->exit_reason == NULL)? "" : " ("),
1221 ((result->exit_reason == NULL)? "" : result->exit_reason),
1222 ((result->exit_reason == NULL)? "" : ")"));
1223 }
1224
1225 } else { // We have never successfully executed list
1226 if (result->execution_status == PCMK_EXEC_DONE) {
1227 crm_warn("Assuming %s cannot fence %s "
1228 "because list returned error code %d",
1229 dev->id, search->host, result->exit_status);
1230 } else {
1231 crm_warn("Assuming %s cannot fence %s "
1232 "because list could not be executed: %s%s%s%s",
1233 dev->id, search->host,
1234 pcmk_exec_status_str(result->execution_status),
1235 ((result->exit_reason == NULL)? "" : " ("),
1236 ((result->exit_reason == NULL)? "" : result->exit_reason),
1237 ((result->exit_reason == NULL)? "" : ")"));
1238 }
1239
1240 /* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't
1241 * explicitly specify PCMK_VALUE_DYNAMIC_LIST
1242 */
1243 if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) {
1244 crm_notice("Switching to pcmk_host_check='status' for %s", dev->id);
1245 pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK,
1246 PCMK_VALUE_STATUS);
1247 }
1248 }
1249
1250 if (dev->targets) {
1251 const char *alias = g_hash_table_lookup(dev->aliases, search->host);
1252
1253 if (!alias) {
1254 alias = search->host;
1255 }
1256 if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) {
1257 can_fence = TRUE;
1258 }
1259 }
1260 search_devices_record_result(search, dev->id, can_fence);
1261 }
1262
1263 /*!
1264 * \internal
1265 * \brief Returns true if any key in first is not in second or second has a different value for key
1266 */
1267 static int
1268 device_params_diff(GHashTable *first, GHashTable *second) {
1269 char *key = NULL;
1270 char *value = NULL;
1271 GHashTableIter gIter;
1272
1273 g_hash_table_iter_init(&gIter, first);
1274 while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
1275
1276 if(strstr(key, "CRM_meta") == key) {
1277 continue;
1278 } else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) {
1279 continue;
1280 } else {
1281 char *other_value = g_hash_table_lookup(second, key);
1282
1283 if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
1284 crm_trace("Different value for %s: %s != %s", key, other_value, value);
1285 return 1;
1286 }
1287 }
1288 }
1289
1290 return 0;
1291 }
1292
1293 /*!
1294 * \internal
1295 * \brief Checks to see if an identical device already exists in the device_list
1296 */
1297 static stonith_device_t *
1298 device_has_duplicate(const stonith_device_t *device)
1299 {
1300 stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);
1301
1302 if (!dup) {
1303 crm_trace("No match for %s", device->id);
1304 return NULL;
1305
1306 } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
1307 crm_trace("Different agent: %s != %s", dup->agent, device->agent);
1308 return NULL;
1309 }
1310
1311 // Use pcmk__digest_operation() here?
1312 if (device_params_diff(device->params, dup->params) ||
1313 device_params_diff(dup->params, device->params)) {
1314 return NULL;
1315 }
1316
1317 crm_trace("Match");
1318 return dup;
1319 }
1320
1321 int
1322 stonith_device_register(xmlNode *dev, gboolean from_cib)
1323 {
1324 stonith_device_t *dup = NULL;
1325 stonith_device_t *device = build_device_from_xml(dev);
1326 guint ndevices = 0;
1327 int rv = pcmk_ok;
1328
1329 CRM_CHECK(device != NULL, return -ENOMEM);
1330
1331 /* do we have a watchdog-device? */
1332 if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) ||
1333 pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
1334 STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do {
1335 if (stonith_watchdog_timeout_ms <= 0) {
1336 crm_err("Ignoring watchdog fence device without "
1337 PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set.");
1338 rv = -ENODEV;
1339 /* fall through to cleanup & return */
1340 } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
1341 STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
1342 crm_err("Ignoring watchdog fence device with unknown "
1343 "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.",
1344 device->agent?device->agent:"");
1345 rv = -ENODEV;
1346 /* fall through to cleanup & return */
1347 } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID,
1348 pcmk__str_none)) {
1349 crm_err("Ignoring watchdog fence device "
1350 "named %s !='"STONITH_WATCHDOG_ID"'.",
1351 device->id?device->id:"");
1352 rv = -ENODEV;
1353 /* fall through to cleanup & return */
1354 } else {
1355 const char *local_node_name = fenced_get_local_node();
1356
1357 if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
1358 pcmk__str_none)) {
1359 /* this either has an empty list or the targets
1360 configured for watchdog-fencing
1361 */
1362 g_list_free_full(stonith_watchdog_targets, free);
1363 stonith_watchdog_targets = device->targets;
1364 device->targets = NULL;
1365 }
1366 if (node_does_watchdog_fencing(local_node_name)) {
1367 g_list_free_full(device->targets, free);
1368 device->targets = stonith__parse_targets(local_node_name);
1369 pcmk__insert_dup(device->params,
1370 PCMK_STONITH_HOST_LIST, local_node_name);
1371 /* proceed as with any other stonith-device */
1372 break;
1373 }
1374
1375 crm_debug("Skip registration of watchdog fence device on node not in host-list.");
1376 /* cleanup and fall through to more cleanup and return */
1377 device->targets = NULL;
1378 stonith_device_remove(device->id, from_cib);
1379 }
1380 free_device(device);
1381 return rv;
1382 } while (0);
1383
1384 dup = device_has_duplicate(device);
1385 if (dup) {
1386 ndevices = g_hash_table_size(device_list);
1387 crm_debug("Device '%s' already in device list (%d active device%s)",
1388 device->id, ndevices, pcmk__plural_s(ndevices));
1389 free_device(device);
1390 device = dup;
1391 dup = g_hash_table_lookup(device_list, device->id);
1392 dup->dirty = FALSE;
1393
1394 } else {
1395 stonith_device_t *old = g_hash_table_lookup(device_list, device->id);
1396
1397 if (from_cib && old && old->api_registered) {
1398 /* If the cib is writing over an entry that is shared with a stonith client,
1399 * copy any pending ops that currently exist on the old entry to the new one.
1400 * Otherwise the pending ops will be reported as failures
1401 */
1402 crm_info("Overwriting existing entry for %s from CIB", device->id);
1403 device->pending_ops = old->pending_ops;
1404 device->api_registered = TRUE;
1405 old->pending_ops = NULL;
1406 if (device->pending_ops) {
1407 mainloop_set_trigger(device->work);
1408 }
1409 }
1410 g_hash_table_replace(device_list, device->id, device);
1411
1412 ndevices = g_hash_table_size(device_list);
1413 crm_notice("Added '%s' to device list (%d active device%s)",
1414 device->id, ndevices, pcmk__plural_s(ndevices));
1415 }
1416
1417 if (from_cib) {
1418 device->cib_registered = TRUE;
1419 } else {
1420 device->api_registered = TRUE;
1421 }
1422
1423 return pcmk_ok;
1424 }
1425
1426 void
1427 stonith_device_remove(const char *id, bool from_cib)
1428 {
1429 stonith_device_t *device = g_hash_table_lookup(device_list, id);
1430 guint ndevices = 0;
1431
1432 if (!device) {
1433 ndevices = g_hash_table_size(device_list);
1434 crm_info("Device '%s' not found (%d active device%s)",
1435 id, ndevices, pcmk__plural_s(ndevices));
1436 return;
1437 }
1438
1439 if (from_cib) {
1440 device->cib_registered = FALSE;
1441 } else {
1442 device->verified = FALSE;
1443 device->api_registered = FALSE;
1444 }
1445
1446 if (!device->cib_registered && !device->api_registered) {
1447 g_hash_table_remove(device_list, id);
1448 ndevices = g_hash_table_size(device_list);
1449 crm_info("Removed '%s' from device list (%d active device%s)",
1450 id, ndevices, pcmk__plural_s(ndevices));
1451 } else {
1452 crm_trace("Not removing '%s' from device list (%d active) because "
1453 "still registered via:%s%s",
1454 id, g_hash_table_size(device_list),
1455 (device->cib_registered? " cib" : ""),
1456 (device->api_registered? " api" : ""));
1457 }
1458 }
1459
1460 /*!
1461 * \internal
1462 * \brief Return the number of stonith levels registered for a node
1463 *
1464 * \param[in] tp Node's topology table entry
1465 *
1466 * \return Number of non-NULL levels in topology entry
1467 * \note This function is used only for log messages.
1468 */
1469 static int
1470 count_active_levels(const stonith_topology_t *tp)
1471 {
1472 int lpc = 0;
1473 int count = 0;
1474
1475 for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) {
1476 if (tp->levels[lpc] != NULL) {
1477 count++;
1478 }
1479 }
1480 return count;
1481 }
1482
1483 static void
1484 free_topology_entry(gpointer data)
1485 {
1486 stonith_topology_t *tp = data;
1487
1488 int lpc = 0;
1489
1490 for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) {
1491 if (tp->levels[lpc] != NULL) {
1492 g_list_free_full(tp->levels[lpc], free);
1493 }
1494 }
1495 free(tp->target);
1496 free(tp->target_value);
1497 free(tp->target_pattern);
1498 free(tp->target_attribute);
1499 free(tp);
1500 }
1501
1502 void
1503 free_topology_list(void)
1504 {
1505 if (topology != NULL) {
1506 g_hash_table_destroy(topology);
1507 topology = NULL;
1508 }
1509 }
1510
1511 void
1512 init_topology_list(void)
1513 {
1514 if (topology == NULL) {
1515 topology = pcmk__strkey_table(NULL, free_topology_entry);
1516 }
1517 }
1518
1519 char *
1520 stonith_level_key(const xmlNode *level, enum fenced_target_by mode)
1521 {
1522 if (mode == fenced_target_by_unknown) {
1523 mode = unpack_level_kind(level);
1524 }
1525 switch (mode) {
1526 case fenced_target_by_name:
1527 return crm_element_value_copy(level, PCMK_XA_TARGET);
1528
1529 case fenced_target_by_pattern:
1530 return crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN);
1531
1532 case fenced_target_by_attribute:
1533 return crm_strdup_printf("%s=%s",
1534 crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE),
1535 crm_element_value(level, PCMK_XA_TARGET_VALUE));
1536
1537 default:
1538 return crm_strdup_printf("unknown-%s", pcmk__xe_id(level));
1539 }
1540 }
1541
1542 /*!
1543 * \internal
1544 * \brief Parse target identification from topology level XML
1545 *
1546 * \param[in] level Topology level XML to parse
1547 *
1548 * \return How to identify target of \p level
1549 */
1550 static enum fenced_target_by
1551 unpack_level_kind(const xmlNode *level)
1552 {
1553 if (crm_element_value(level, PCMK_XA_TARGET) != NULL) {
1554 return fenced_target_by_name;
1555 }
1556 if (crm_element_value(level, PCMK_XA_TARGET_PATTERN) != NULL) {
1557 return fenced_target_by_pattern;
1558 }
1559 if ((crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL)
1560 && (crm_element_value(level, PCMK_XA_TARGET_VALUE) != NULL)) {
1561 return fenced_target_by_attribute;
1562 }
1563 return fenced_target_by_unknown;
1564 }
1565
1566 static stonith_key_value_t *
1567 parse_device_list(const char *devices)
1568 {
1569 int lpc = 0;
1570 int max = 0;
1571 int last = 0;
1572 stonith_key_value_t *output = NULL;
1573
1574 if (devices == NULL) {
1575 return output;
1576 }
1577
1578 max = strlen(devices);
1579 for (lpc = 0; lpc <= max; lpc++) {
1580 if (devices[lpc] == ',' || devices[lpc] == 0) {
1581 char *line = strndup(devices + last, lpc - last);
1582
1583 output = stonith_key_value_add(output, NULL, line);
1584 free(line);
1585
1586 last = lpc + 1;
1587 }
1588 }
1589
1590 return output;
1591 }
1592
1593 /*!
1594 * \internal
1595 * \brief Unpack essential information from topology request XML
1596 *
1597 * \param[in] xml Request XML to search
1598 * \param[out] mode If not NULL, where to store level kind
1599 * \param[out] target If not NULL, where to store representation of target
1600 * \param[out] id If not NULL, where to store level number
1601 * \param[out] desc If not NULL, where to store log-friendly level description
1602 *
1603 * \return Topology level XML from within \p xml, or NULL if not found
1604 * \note The caller is responsible for freeing \p *target and \p *desc if set.
1605 */
1606 static xmlNode *
1607 unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target,
1608 int *id, char **desc)
1609 {
1610 enum fenced_target_by local_mode = fenced_target_by_unknown;
1611 char *local_target = NULL;
1612 int local_id = 0;
1613
1614 /* The level element can be the top element or lower. If top level, don't
1615 * search by xpath, because it might give multiple hits if the XML is the
1616 * CIB.
1617 */
1618 if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) {
1619 xml = get_xpath_object("//" PCMK_XE_FENCING_LEVEL, xml, LOG_WARNING);
1620 }
1621
1622 if (xml == NULL) {
1623 if (desc != NULL) {
1624 *desc = crm_strdup_printf("missing");
1625 }
1626 } else {
1627 local_mode = unpack_level_kind(xml);
1628 local_target = stonith_level_key(xml, local_mode);
1629 crm_element_value_int(xml, PCMK_XA_INDEX, &local_id);
1630 if (desc != NULL) {
1631 *desc = crm_strdup_printf("%s[%d]", local_target, local_id);
1632 }
1633 }
1634
1635 if (mode != NULL) {
1636 *mode = local_mode;
1637 }
1638 if (id != NULL) {
1639 *id = local_id;
1640 }
1641
1642 if (target != NULL) {
1643 *target = local_target;
1644 } else {
1645 free(local_target);
1646 }
1647
1648 return xml;
1649 }
1650
1651 /*!
1652 * \internal
1653 * \brief Register a fencing topology level for a target
1654 *
1655 * Given an XML request specifying the target name, level index, and device IDs
1656 * for the level, this will create an entry for the target in the global topology
1657 * table if one does not already exist, then append the specified device IDs to
1658 * the entry's device list for the specified level.
1659 *
1660 * \param[in] msg XML request for STONITH level registration
1661 * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
1662 * \param[out] result Where to set result of registration
1663 */
1664 void
1665 fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
1666 {
1667 int id = 0;
1668 xmlNode *level;
1669 enum fenced_target_by mode;
1670 char *target;
1671
1672 stonith_topology_t *tp;
1673 stonith_key_value_t *dIter = NULL;
1674 stonith_key_value_t *devices = NULL;
1675
1676 CRM_CHECK((msg != NULL) && (result != NULL), return);
1677
1678 level = unpack_level_request(msg, &mode, &target, &id, desc);
1679 if (level == NULL) {
1680 fenced_set_protocol_error(result);
1681 return;
1682 }
1683
1684 // Ensure an ID was given (even the client API adds an ID)
1685 if (pcmk__str_empty(pcmk__xe_id(level))) {
1686 crm_warn("Ignoring registration for topology level without ID");
1687 free(target);
1688 crm_log_xml_trace(level, "Bad level");
1689 pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
1690 "Topology level is invalid without ID");
1691 return;
1692 }
1693
1694 // Ensure a valid target was specified
1695 if (mode == fenced_target_by_unknown) {
1696 crm_warn("Ignoring registration for topology level '%s' "
1697 "without valid target", pcmk__xe_id(level));
1698 free(target);
1699 crm_log_xml_trace(level, "Bad level");
1700 pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
1701 "Invalid target for topology level '%s'",
1702 pcmk__xe_id(level));
1703 return;
1704 }
1705
1706 // Ensure level ID is in allowed range
1707 if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) {
1708 crm_warn("Ignoring topology registration for %s with invalid level %d",
1709 target, id);
1710 free(target);
1711 crm_log_xml_trace(level, "Bad level");
1712 pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
1713 "Invalid level number '%s' for topology level '%s'",
1714 pcmk__s(crm_element_value(level, PCMK_XA_INDEX),
1715 ""),
1716 pcmk__xe_id(level));
1717 return;
1718 }
1719
1720 /* Find or create topology table entry */
1721 tp = g_hash_table_lookup(topology, target);
1722 if (tp == NULL) {
1723 tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t));
1724
1725 tp->kind = mode;
1726 tp->target = target;
1727 tp->target_value = crm_element_value_copy(level, PCMK_XA_TARGET_VALUE);
1728 tp->target_pattern = crm_element_value_copy(level,
1729 PCMK_XA_TARGET_PATTERN);
1730 tp->target_attribute = crm_element_value_copy(level,
1731 PCMK_XA_TARGET_ATTRIBUTE);
1732
1733 g_hash_table_replace(topology, tp->target, tp);
1734 crm_trace("Added %s (%d) to the topology (%d active entries)",
1735 target, (int) mode, g_hash_table_size(topology));
1736 } else {
1737 free(target);
1738 }
1739
1740 if (tp->levels[id] != NULL) {
1741 crm_info("Adding to the existing %s[%d] topology entry",
1742 tp->target, id);
1743 }
1744
1745 devices = parse_device_list(crm_element_value(level, PCMK_XA_DEVICES));
1746 for (dIter = devices; dIter; dIter = dIter->next) {
1747 const char *device = dIter->value;
1748
1749 crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
1750 tp->levels[id] = g_list_append(tp->levels[id], pcmk__str_copy(device));
1751 }
1752 stonith_key_value_freeall(devices, 1, 1);
1753
1754 {
1755 int nlevels = count_active_levels(tp);
1756
1757 crm_info("Target %s has %d active fencing level%s",
1758 tp->target, nlevels, pcmk__plural_s(nlevels));
1759 }
1760
1761 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1762 }
1763
1764 /*!
1765 * \internal
1766 * \brief Unregister a fencing topology level for a target
1767 *
1768 * Given an XML request specifying the target name and level index (or 0 for all
1769 * levels), this will remove any corresponding entry for the target from the
1770 * global topology table.
1771 *
1772 * \param[in] msg XML request for STONITH level registration
1773 * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
1774 * \param[out] result Where to set result of unregistration
1775 */
1776 void
1777 fenced_unregister_level(xmlNode *msg, char **desc,
1778 pcmk__action_result_t *result)
1779 {
1780 int id = -1;
1781 stonith_topology_t *tp;
1782 char *target;
1783 xmlNode *level = NULL;
1784
1785 CRM_CHECK(result != NULL, return);
1786
1787 level = unpack_level_request(msg, NULL, &target, &id, desc);
1788 if (level == NULL) {
1789 fenced_set_protocol_error(result);
1790 return;
1791 }
1792
1793 // Ensure level ID is in allowed range
1794 if ((id < 0) || (id >= ST__LEVEL_COUNT)) {
1795 crm_warn("Ignoring topology unregistration for %s with invalid level %d",
1796 target, id);
1797 free(target);
1798 crm_log_xml_trace(level, "Bad level");
1799 pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
1800 "Invalid level number '%s' for topology level %s",
1801 pcmk__s(crm_element_value(level, PCMK_XA_INDEX),
1802 "<null>"),
1803
1804 // Client API doesn't add ID to unregistration XML
1805 pcmk__s(pcmk__xe_id(level), ""));
1806 return;
1807 }
1808
1809 tp = g_hash_table_lookup(topology, target);
1810 if (tp == NULL) {
1811 guint nentries = g_hash_table_size(topology);
1812
1813 crm_info("No fencing topology found for %s (%d active %s)",
1814 target, nentries,
1815 pcmk__plural_alt(nentries, "entry", "entries"));
1816
1817 } else if (id == 0 && g_hash_table_remove(topology, target)) {
1818 guint nentries = g_hash_table_size(topology);
1819
1820 crm_info("Removed all fencing topology entries related to %s "
1821 "(%d active %s remaining)", target, nentries,
1822 pcmk__plural_alt(nentries, "entry", "entries"));
1823
1824 } else if (tp->levels[id] != NULL) {
1825 guint nlevels;
1826
1827 g_list_free_full(tp->levels[id], free);
1828 tp->levels[id] = NULL;
1829
1830 nlevels = count_active_levels(tp);
1831 crm_info("Removed level %d from fencing topology for %s "
1832 "(%d active level%s remaining)",
1833 id, target, nlevels, pcmk__plural_s(nlevels));
1834 }
1835
1836 free(target);
1837 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1838 }
1839
1840 static char *
1841 list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
1842 {
1843 int max = g_list_length(list);
1844 size_t delim_len = delim?strlen(delim):0;
1845 size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
1846 char *rv;
1847 GList *gIter;
1848
1849 char *pos = NULL;
1850 const char *lead_delim = "";
1851
1852 for (gIter = list; gIter != NULL; gIter = gIter->next) {
1853 const char *value = (const char *) gIter->data;
1854
1855 alloc_size += strlen(value);
1856 }
1857
1858 rv = pcmk__assert_alloc(alloc_size, sizeof(char));
1859 pos = rv;
1860
1861 for (gIter = list; gIter != NULL; gIter = gIter->next) {
1862 const char *value = (const char *) gIter->data;
1863
1864 pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
1865 lead_delim = delim;
1866 }
1867
1868 if (max && terminate_with_delim) {
1869 sprintf(pos, "%s", delim);
1870 }
1871
1872 return rv;
1873 }
1874
1875 /*!
1876 * \internal
1877 * \brief Execute a fence agent action directly (and asynchronously)
1878 *
1879 * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
1880 * directly on a specified device. Only list, monitor, and status actions are
1881 * expected to use this call, though it should work with any agent command.
1882 *
1883 * \param[in] msg Request XML specifying action
1884 * \param[out] result Where to store result of action
1885 *
1886 * \note If the action is monitor, the device must be registered via the API
1887 * (CIB registration is not sufficient), because monitor should not be
1888 * possible unless the device is "started" (API registered).
1889 */
1890 static void
1891 execute_agent_action(xmlNode *msg, pcmk__action_result_t *result)
1892 {
1893 xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, msg, LOG_ERR);
1894 xmlNode *op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg,
1895 LOG_ERR);
1896 const char *id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
1897 const char *action = crm_element_value(op, PCMK__XA_ST_DEVICE_ACTION);
1898 async_command_t *cmd = NULL;
1899 stonith_device_t *device = NULL;
1900
1901 if ((id == NULL) || (action == NULL)) {
1902 crm_info("Malformed API action request: device %s, action %s",
1903 (id? id : "not specified"),
1904 (action? action : "not specified"));
1905 fenced_set_protocol_error(result);
1906 return;
1907 }
1908
1909 if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
1910 // Watchdog agent actions are implemented internally
1911 if (stonith_watchdog_timeout_ms <= 0) {
1912 pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
1913 "Watchdog fence device not configured");
1914 return;
1915
1916 } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
1917 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1918 pcmk__set_result_output(result,
1919 list_to_string(stonith_watchdog_targets,
1920 "\n", TRUE),
1921 NULL);
1922 return;
1923
1924 } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) {
1925 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1926 return;
1927 }
1928 }
1929
1930 device = g_hash_table_lookup(device_list, id);
1931 if (device == NULL) {
1932 crm_info("Ignoring API '%s' action request because device %s not found",
1933 action, id);
1934 pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
1935 "'%s' not found", id);
1936 return;
1937
1938 } else if (!device->api_registered
1939 && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) {
1940 // Monitors may run only on "started" (API-registered) devices
1941 crm_info("Ignoring API '%s' action request because device %s not active",
1942 action, id);
1943 pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
1944 "'%s' not active", id);
1945 return;
1946 }
1947
1948 cmd = create_async_command(msg);
1949 if (cmd == NULL) {
1950 crm_log_xml_warn(msg, "invalid");
1951 fenced_set_protocol_error(result);
1952 return;
1953 }
1954
1955 schedule_stonith_command(cmd, device);
1956 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
1957 }
1958
1959 static void
1960 search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
1961 {
1962 search->replies_received++;
1963 if (can_fence && device) {
1964 if (search->support_action_only != st_device_supports_none) {
1965 stonith_device_t *dev = g_hash_table_lookup(device_list, device);
1966 if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) {
1967 return;
1968 }
1969 }
1970 search->capable = g_list_append(search->capable,
1971 pcmk__str_copy(device));
1972 }
1973
1974 if (search->replies_needed == search->replies_received) {
1975
1976 guint ndevices = g_list_length(search->capable);
1977
1978 crm_debug("Search found %d device%s that can perform '%s' targeting %s",
1979 ndevices, pcmk__plural_s(ndevices),
1980 (search->action? search->action : "unknown action"),
1981 (search->host? search->host : "any node"));
1982
1983 search->callback(search->capable, search->user_data);
1984 free(search->host);
1985 free(search->action);
1986 free(search);
1987 }
1988 }
1989
1990 /*!
1991 * \internal
1992 * \brief Check whether the local host is allowed to execute a fencing action
1993 *
1994 * \param[in] device Fence device to check
1995 * \param[in] action Fence action to check
1996 * \param[in] target Hostname of fence target
1997 * \param[in] allow_self Whether self-fencing is allowed for this operation
1998 *
1999 * \return TRUE if local host is allowed to execute action, FALSE otherwise
2000 */
2001 static gboolean
2002 localhost_is_eligible(const stonith_device_t *device, const char *action,
2003 const char *target, gboolean allow_self)
2004 {
2005 gboolean localhost_is_target = pcmk__str_eq(target, fenced_get_local_node(),
2006 pcmk__str_casei);
2007
2008 if ((device != NULL) && (action != NULL)
2009 && (device->on_target_actions != NULL)
2010 && (strstr((const char*) device->on_target_actions->str,
2011 action) != NULL)) {
2012
2013 if (!localhost_is_target) {
2014 crm_trace("Operation '%s' using %s can only be executed for local "
2015 "host, not %s", action, device->id, target);
2016 return FALSE;
2017 }
2018
2019 } else if (localhost_is_target && !allow_self) {
2020 crm_trace("'%s' operation does not support self-fencing", action);
2021 return FALSE;
2022 }
2023 return TRUE;
2024 }
2025
2026 /*!
2027 * \internal
2028 * \brief Check if local node is allowed to execute (possibly remapped) action
2029 *
2030 * \param[in] device Fence device to check
2031 * \param[in] action Fence action to check
2032 * \param[in] target Node name of fence target
2033 * \param[in] allow_self Whether self-fencing is allowed for this operation
2034 *
2035 * \return true if local node is allowed to execute \p action or any actions it
2036 * might be remapped to, otherwise false
2037 */
2038 static bool
2039 localhost_is_eligible_with_remap(const stonith_device_t *device,
2040 const char *action, const char *target,
2041 gboolean allow_self)
2042 {
2043 // Check exact action
2044 if (localhost_is_eligible(device, action, target, allow_self)) {
2045 return true;
2046 }
2047
2048 // Check potential remaps
2049
2050 if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
2051 /* "reboot" might get remapped to "off" then "on", so even if reboot is
2052 * disallowed, return true if either of those is allowed. We'll report
2053 * the disallowed actions with the results. We never allow self-fencing
2054 * for remapped "on" actions because the target is off at that point.
2055 */
2056 if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self)
2057 || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) {
2058 return true;
2059 }
2060 }
2061
2062 return false;
2063 }
2064
2065 /*!
2066 * \internal
2067 * \brief Check whether we can use a device's cached target list
2068 *
2069 * \param[in] dev Fencing device to check
2070 *
2071 * \return \c true if \p dev cached its targets less than a minute ago,
2072 * otherwise \c false
2073 */
2074 static inline bool
2075 can_use_target_cache(const stonith_device_t *dev)
2076 {
2077 return (dev->targets != NULL) && (time(NULL) < (dev->targets_age + 60));
2078 }
2079
2080 static void
2081 can_fence_host_with_device(stonith_device_t *dev,
2082 struct device_search_s *search)
2083 {
2084 gboolean can = FALSE;
2085 const char *check_type = "Internal bug";
2086 const char *target = NULL;
2087 const char *alias = NULL;
2088 const char *dev_id = "Unspecified device";
2089 const char *action = (search == NULL)? NULL : search->action;
2090
2091 CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results);
2092
2093 if (dev->id != NULL) {
2094 dev_id = dev->id;
2095 }
2096
2097 target = search->host;
2098 if (target == NULL) {
2099 can = TRUE;
2100 check_type = "No target";
2101 goto search_report_results;
2102 }
2103
2104 /* Answer immediately if the device does not support the action
2105 * or the local node is not allowed to perform it
2106 */
2107 if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)
2108 && !pcmk_is_set(dev->flags, st_device_supports_on)) {
2109 check_type = "Agent does not support 'on'";
2110 goto search_report_results;
2111
2112 } else if (!localhost_is_eligible_with_remap(dev, action, target,
2113 search->allow_self)) {
2114 check_type = "This node is not allowed to execute action";
2115 goto search_report_results;
2116 }
2117
2118 // Check eligibility as specified by pcmk_host_check
2119 check_type = target_list_type(dev);
2120 alias = g_hash_table_lookup(dev->aliases, target);
2121 if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) {
2122 can = TRUE;
2123
2124 } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST,
2125 pcmk__str_casei)) {
2126
2127 if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) {
2128 can = TRUE;
2129 } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
2130 && g_hash_table_lookup(dev->aliases, target)) {
2131 can = TRUE;
2132 }
2133
2134 } else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST,
2135 pcmk__str_casei)) {
2136 if (!can_use_target_cache(dev)) {
2137 int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST,
2138 search->per_device_timeout);
2139
2140 if (device_timeout > search->per_device_timeout) {
2141 crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s "
2142 "is larger than " PCMK_OPT_STONITH_TIMEOUT
2143 " (%ds), timeout may occur",
2144 device_timeout, dev_id, search->per_device_timeout);
2145 }
2146
2147 crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
2148 check_type, dev_id, target, action);
2149
2150 schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL,
2151 search->per_device_timeout, search, dynamic_list_search_cb);
2152
2153 /* we'll respond to this search request async in the cb */
2154 return;
2155 }
2156
2157 if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets,
2158 pcmk__str_casei)) {
2159 can = TRUE;
2160 }
2161
2162 } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) {
2163 int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout);
2164
2165 if (device_timeout > search->per_device_timeout) {
2166 crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is "
2167 "larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), "
2168 "timeout may occur",
2169 device_timeout, dev_id, search->per_device_timeout);
2170 }
2171
2172 crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
2173 check_type, dev_id, target, action);
2174 schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target,
2175 search->per_device_timeout, search, status_search_cb);
2176 /* we'll respond to this search request async in the cb */
2177 return;
2178 } else {
2179 crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type);
2180 check_type = "Invalid " PCMK_STONITH_HOST_CHECK;
2181 }
2182
2183 search_report_results:
2184 crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s",
2185 dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"),
2186 pcmk__s(target, "unspecified target"),
2187 (alias == NULL)? "" : " (as '", pcmk__s(alias, ""),
2188 (alias == NULL)? "" : "')", check_type);
2189 search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can);
2190 }
2191
2192 static void
2193 search_devices(gpointer key, gpointer value, gpointer user_data)
2194 {
2195 stonith_device_t *dev = value;
2196 struct device_search_s *search = user_data;
2197
2198 can_fence_host_with_device(dev, search);
2199 }
2200
2201 #define DEFAULT_QUERY_TIMEOUT 20
2202 static void
2203 get_capable_devices(const char *host, const char *action, int timeout,
2204 bool allow_self, void *user_data,
2205 void (*callback) (GList * devices, void *user_data),
2206 uint32_t support_action_only)
2207 {
2208 struct device_search_s *search;
2209 guint ndevices = g_hash_table_size(device_list);
2210
2211 if (ndevices == 0) {
2212 callback(NULL, user_data);
2213 return;
2214 }
2215
2216 search = pcmk__assert_alloc(1, sizeof(struct device_search_s));
2217
2218 search->host = pcmk__str_copy(host);
2219 search->action = pcmk__str_copy(action);
2220 search->per_device_timeout = timeout;
2221 search->allow_self = allow_self;
2222 search->callback = callback;
2223 search->user_data = user_data;
2224 search->support_action_only = support_action_only;
2225
2226 /* We are guaranteed this many replies, even if a device is
2227 * unregistered while the search is in progress.
2228 */
2229 search->replies_needed = ndevices;
2230
2231 crm_debug("Searching %d device%s to see which can execute '%s' targeting %s",
2232 ndevices, pcmk__plural_s(ndevices),
2233 (search->action? search->action : "unknown action"),
2234 (search->host? search->host : "any node"));
2235 g_hash_table_foreach(device_list, search_devices, search);
2236 }
2237
2238 struct st_query_data {
2239 xmlNode *reply;
2240 char *remote_peer;
2241 char *client_id;
2242 char *target;
2243 char *action;
2244 int call_options;
2245 };
2246
2247 /*!
2248 * \internal
2249 * \brief Add action-specific attributes to query reply XML
2250 *
2251 * \param[in,out] xml XML to add attributes to
2252 * \param[in] action Fence action
2253 * \param[in] device Fence device
2254 * \param[in] target Fence target
2255 */
2256 static void
2257 add_action_specific_attributes(xmlNode *xml, const char *action,
2258 const stonith_device_t *device,
2259 const char *target)
2260 {
2261 int action_specific_timeout;
2262 int delay_max;
2263 int delay_base;
2264
2265 CRM_CHECK(xml && action && device, return);
2266
2267 // PCMK__XA_ST_REQUIRED is currently used only for unfencing
2268 if (is_action_required(action, device)) {
2269 crm_trace("Action '%s' is required using %s", action, device->id);
2270 crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1);
2271 }
2272
2273 // pcmk_<action>_timeout if configured
2274 action_specific_timeout = get_action_timeout(device, action, 0);
2275 if (action_specific_timeout) {
2276 crm_trace("Action '%s' has timeout %ds using %s",
2277 action, action_specific_timeout, device->id);
2278 crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT,
2279 action_specific_timeout);
2280 }
2281
2282 delay_max = get_action_delay_max(device, action);
2283 if (delay_max > 0) {
2284 crm_trace("Action '%s' has maximum random delay %ds using %s",
2285 action, delay_max, device->id);
2286 crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max);
2287 }
2288
2289 delay_base = get_action_delay_base(device, action, target);
2290 if (delay_base > 0) {
2291 crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base);
2292 }
2293
2294 if ((delay_max > 0) && (delay_base == 0)) {
2295 crm_trace("Action '%s' has maximum random delay %ds using %s",
2296 action, delay_max, device->id);
2297 } else if ((delay_max == 0) && (delay_base > 0)) {
2298 crm_trace("Action '%s' has a static delay of %ds using %s",
2299 action, delay_base, device->id);
2300 } else if ((delay_max > 0) && (delay_base > 0)) {
2301 crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen "
2302 "maximum delay of %ds using %s",
2303 action, delay_base, delay_max, device->id);
2304 }
2305 }
2306
2307 /*!
2308 * \internal
2309 * \brief Add "disallowed" attribute to query reply XML if appropriate
2310 *
2311 * \param[in,out] xml XML to add attribute to
2312 * \param[in] action Fence action
2313 * \param[in] device Fence device
2314 * \param[in] target Fence target
2315 * \param[in] allow_self Whether self-fencing is allowed
2316 */
2317 static void
2318 add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device,
2319 const char *target, gboolean allow_self)
2320 {
2321 if (!localhost_is_eligible(device, action, target, allow_self)) {
2322 crm_trace("Action '%s' using %s is disallowed for local host",
2323 action, device->id);
2324 pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true);
2325 }
2326 }
2327
2328 /*!
2329 * \internal
2330 * \brief Add child element with action-specific values to query reply XML
2331 *
2332 * \param[in,out] xml XML to add attribute to
2333 * \param[in] action Fence action
2334 * \param[in] device Fence device
2335 * \param[in] target Fence target
2336 * \param[in] allow_self Whether self-fencing is allowed
2337 */
2338 static void
2339 add_action_reply(xmlNode *xml, const char *action,
2340 const stonith_device_t *device, const char *target,
2341 gboolean allow_self)
2342 {
2343 xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION);
2344
2345 crm_xml_add(child, PCMK_XA_ID, action);
2346 add_action_specific_attributes(child, action, device, target);
2347 add_disallowed(child, action, device, target, allow_self);
2348 }
2349
2350 /*!
2351 * \internal
2352 * \brief Send a reply to a CPG peer or IPC client
2353 *
2354 * \param[in] reply XML reply to send
2355 * \param[in] call_options Send synchronously if st_opt_sync_call is set
2356 * \param[in] remote_peer If not NULL, name of peer node to send CPG reply
2357 * \param[in,out] client If not NULL, client to send IPC reply
2358 */
2359 static void
2360 stonith_send_reply(const xmlNode *reply, int call_options,
2361 const char *remote_peer, pcmk__client_t *client)
2362 {
2363 CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)),
2364 return);
2365
2366 if (remote_peer == NULL) {
2367 do_local_reply(reply, client, call_options);
2368 } else {
2369 const pcmk__node_status_t *node =
2370 pcmk__get_node(0, remote_peer, NULL,
2371 pcmk__node_search_cluster_member);
2372
2373 pcmk__cluster_send_message(node, pcmk_ipc_fenced, reply);
2374 }
2375 }
2376
2377 static void
2378 stonith_query_capable_device_cb(GList * devices, void *user_data)
2379 {
2380 struct st_query_data *query = user_data;
2381 int available_devices = 0;
2382 xmlNode *wrapper = NULL;
2383 xmlNode *list = NULL;
2384 GList *lpc = NULL;
2385 pcmk__client_t *client = NULL;
2386
2387 if (query->client_id != NULL) {
2388 client = pcmk__find_client_by_id(query->client_id);
2389 if ((client == NULL) && (query->remote_peer == NULL)) {
2390 crm_trace("Skipping reply to %s: no longer a client",
2391 query->client_id);
2392 goto done;
2393 }
2394 }
2395
2396 // Pack the results into XML
2397 wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA);
2398 list = pcmk__xe_create(wrapper, __func__);
2399 crm_xml_add(list, PCMK__XA_ST_TARGET, query->target);
2400
2401 for (lpc = devices; lpc != NULL; lpc = lpc->next) {
2402 stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
2403 const char *action = query->action;
2404 xmlNode *dev = NULL;
2405
2406 if (!device) {
2407 /* It is possible the device got unregistered while
2408 * determining who can fence the target */
2409 continue;
2410 }
2411
2412 available_devices++;
2413
2414 dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID);
2415 crm_xml_add(dev, PCMK_XA_ID, device->id);
2416 crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace);
2417 crm_xml_add(dev, PCMK_XA_AGENT, device->agent);
2418
2419 // Has had successful monitor, list, or status on this node
2420 crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, device->verified);
2421
2422 crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags);
2423
2424 /* If the originating fencer wants to reboot the node, and we have a
2425 * capable device that doesn't support "reboot", remap to "off" instead.
2426 */
2427 if (!pcmk_is_set(device->flags, st_device_supports_reboot)
2428 && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT,
2429 pcmk__str_none)) {
2430 crm_trace("%s doesn't support reboot, using values for off instead",
2431 device->id);
2432 action = PCMK_ACTION_OFF;
2433 }
2434
2435 /* Add action-specific values if available */
2436 add_action_specific_attributes(dev, action, device, query->target);
2437 if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
2438 /* A "reboot" *might* get remapped to "off" then "on", so after
2439 * sending the "reboot"-specific values in the main element, we add
2440 * sub-elements for "off" and "on" values.
2441 *
2442 * We short-circuited earlier if "reboot", "off" and "on" are all
2443 * disallowed for the local host. However if only one or two are
2444 * disallowed, we send back the results and mark which ones are
2445 * disallowed. If "reboot" is disallowed, this might cause problems
2446 * with older fencer versions, which won't check for it. Older
2447 * versions will ignore "off" and "on", so they are not a problem.
2448 */
2449 add_disallowed(dev, action, device, query->target,
2450 pcmk_is_set(query->call_options,
2451 st_opt_allow_self_fencing));
2452 add_action_reply(dev, PCMK_ACTION_OFF, device, query->target,
2453 pcmk_is_set(query->call_options,
2454 st_opt_allow_self_fencing));
2455 add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE);
2456 }
2457
2458 /* A query without a target wants device parameters */
2459 if (query->target == NULL) {
2460 xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES);
2461
2462 g_hash_table_foreach(device->params, hash2field, attrs);
2463 }
2464 }
2465
2466 crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices);
2467 if (query->target) {
2468 crm_debug("Found %d matching device%s for target '%s'",
2469 available_devices, pcmk__plural_s(available_devices),
2470 query->target);
2471 } else {
2472 crm_debug("%d device%s installed",
2473 available_devices, pcmk__plural_s(available_devices));
2474 }
2475
2476 crm_log_xml_trace(list, "query-result");
2477
2478 stonith_send_reply(query->reply, query->call_options, query->remote_peer,
2479 client);
2480
2481 done:
2482 pcmk__xml_free(query->reply);
2483 free(query->remote_peer);
2484 free(query->client_id);
2485 free(query->target);
2486 free(query->action);
2487 free(query);
2488 g_list_free_full(devices, free);
2489 }
2490
2491 /*!
2492 * \internal
2493 * \brief Log the result of an asynchronous command
2494 *
2495 * \param[in] cmd Command the result is for
2496 * \param[in] result Result of command
2497 * \param[in] pid Process ID of command, if available
2498 * \param[in] next Alternate device that will be tried if command failed
2499 * \param[in] op_merged Whether this command was merged with an earlier one
2500 */
2501 static void
2502 log_async_result(const async_command_t *cmd,
2503 const pcmk__action_result_t *result,
2504 int pid, const char *next, bool op_merged)
2505 {
2506 int log_level = LOG_ERR;
2507 int output_log_level = LOG_NEVER;
2508 guint devices_remaining = g_list_length(cmd->next_device_iter);
2509
2510 GString *msg = g_string_sized_new(80); // Reasonable starting size
2511
2512 // Choose log levels appropriately if we have a result
2513 if (pcmk__result_ok(result)) {
2514 log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE;
2515 if ((result->action_stdout != NULL)
2516 && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
2517 pcmk__str_none)) {
2518 output_log_level = LOG_DEBUG;
2519 }
2520 next = NULL;
2521 } else {
2522 log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR;
2523 if ((result->action_stdout != NULL)
2524 && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
2525 pcmk__str_none)) {
2526 output_log_level = LOG_WARNING;
2527 }
2528 }
2529
2530 // Build the log message piece by piece
2531 pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL);
2532 if (pid != 0) {
2533 g_string_append_printf(msg, "[%d] ", pid);
2534 }
2535 if (cmd->target != NULL) {
2536 pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL);
2537 }
2538 if (cmd->device != NULL) {
2539 pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL);
2540 }
2541
2542 // Add exit status or execution status as appropriate
2543 if (result->execution_status == PCMK_EXEC_DONE) {
2544 g_string_append_printf(msg, "returned %d", result->exit_status);
2545 } else {
2546 pcmk__g_strcat(msg, "could not be executed: ",
2547 pcmk_exec_status_str(result->execution_status), NULL);
2548 }
2549
2550 // Add exit reason and next device if appropriate
2551 if (result->exit_reason != NULL) {
2552 pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL);
2553 }
2554 if (next != NULL) {
2555 pcmk__g_strcat(msg, ", retrying with ", next, NULL);
2556 }
2557 if (devices_remaining > 0) {
2558 g_string_append_printf(msg, " (%u device%s remaining)",
2559 (unsigned int) devices_remaining,
2560 pcmk__plural_s(devices_remaining));
2561 }
2562 g_string_append_printf(msg, " " QB_XS " %scall %d from %s",
2563 (op_merged? "merged " : ""), cmd->id,
2564 cmd->client_name);
2565
2566 // Log the result
2567 do_crm_log(log_level, "%s", msg->str);
2568 g_string_free(msg, TRUE);
2569
2570 // Log the output (which may have multiple lines), if appropriate
2571 if (output_log_level != LOG_NEVER) {
2572 char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);
2573
2574 crm_log_output(output_log_level, prefix, result->action_stdout);
2575 free(prefix);
2576 }
2577 }
2578
2579 /*!
2580 * \internal
2581 * \brief Reply to requester after asynchronous command completion
2582 *
2583 * \param[in] cmd Command that completed
2584 * \param[in] result Result of command
2585 * \param[in] pid Process ID of command, if available
2586 * \param[in] merged If true, command was merged with another, not executed
2587 */
2588 static void
2589 send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result,
2590 int pid, bool merged)
2591 {
2592 xmlNode *reply = NULL;
2593 pcmk__client_t *client = NULL;
2594
2595 CRM_CHECK((cmd != NULL) && (result != NULL), return);
2596
2597 log_async_result(cmd, result, pid, NULL, merged);
2598
2599 if (cmd->client != NULL) {
2600 client = pcmk__find_client_by_id(cmd->client);
2601 if ((client == NULL) && (cmd->origin == NULL)) {
2602 crm_trace("Skipping reply to %s: no longer a client", cmd->client);
2603 return;
2604 }
2605 }
2606
2607 reply = construct_async_reply(cmd, result);
2608 if (merged) {
2609 pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true);
2610 }
2611
2612 if (pcmk__is_fencing_action(cmd->action)
2613 && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) {
2614 /* The target was also the originator, so broadcast the result on its
2615 * behalf (since it will be unable to).
2616 */
2617 crm_trace("Broadcast '%s' result for %s (target was also originator)",
2618 cmd->action, cmd->target);
2619 crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
2620 crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
2621 pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, reply);
2622 } else {
2623 // Reply only to the originator
2624 stonith_send_reply(reply, cmd->options, cmd->origin, client);
2625 }
2626
2627 crm_log_xml_trace(reply, "Reply");
2628 pcmk__xml_free(reply);
2629 }
2630
2631 static void
2632 cancel_stonith_command(async_command_t * cmd)
2633 {
2634 stonith_device_t *device = cmd_device(cmd);
2635
2636 if (device) {
2637 crm_trace("Cancel scheduled '%s' action using %s",
2638 cmd->action, device->id);
2639 device->pending_ops = g_list_remove(device->pending_ops, cmd);
2640 }
2641 }
2642
2643 /*!
2644 * \internal
2645 * \brief Cancel and reply to any duplicates of a just-completed operation
2646 *
2647 * Check whether any fencing operations are scheduled to do the same thing as
2648 * one that just succeeded. If so, rather than performing the same operation
2649 * twice, return the result of this operation for all matching pending commands.
2650 *
2651 * \param[in,out] cmd Fencing operation that just succeeded
2652 * \param[in] result Result of \p cmd
2653 * \param[in] pid If nonzero, process ID of agent invocation (for logs)
2654 *
2655 * \note Duplicate merging will do the right thing for either type of remapped
2656 * reboot. If the executing fencer remapped an unsupported reboot to off,
2657 * then cmd->action will be "reboot" and will be merged with any other
2658 * reboot requests. If the originating fencer remapped a topology reboot
2659 * to off then on, we will get here once with cmd->action "off" and once
2660 * with "on", and they will be merged separately with similar requests.
2661 */
2662 static void
2663 reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result,
2664 int pid)
2665 {
2666 GList *next = NULL;
2667
2668 for (GList *iter = cmd_list; iter != NULL; iter = next) {
2669 async_command_t *cmd_other = iter->data;
2670
2671 next = iter->next; // We might delete this entry, so grab next now
2672
2673 if (cmd == cmd_other) {
2674 continue;
2675 }
2676
2677 /* A pending operation matches if:
2678 * 1. The client connections are different.
2679 * 2. The target is the same.
2680 * 3. The fencing action is the same.
2681 * 4. The device scheduled to execute the action is the same.
2682 */
2683 if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) ||
2684 !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) ||
2685 !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) ||
2686 !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {
2687
2688 continue;
2689 }
2690
2691 crm_notice("Merging fencing action '%s'%s%s originating from "
2692 "client %s with identical fencing request from client %s",
2693 cmd_other->action,
2694 (cmd_other->target == NULL)? "" : " targeting ",
2695 pcmk__s(cmd_other->target, ""), cmd_other->client_name,
2696 cmd->client_name);
2697
2698 // Stop tracking the duplicate, send its result, and cancel it
2699 cmd_list = g_list_remove_link(cmd_list, iter);
2700 send_async_reply(cmd_other, result, pid, true);
2701 cancel_stonith_command(cmd_other);
2702
2703 free_async_command(cmd_other);
2704 g_list_free_1(iter);
2705 }
2706 }
2707
2708 /*!
2709 * \internal
2710 * \brief Return the next required device (if any) for an operation
2711 *
2712 * \param[in,out] cmd Fencing operation that just succeeded
2713 *
2714 * \return Next device required for action if any, otherwise NULL
2715 */
2716 static stonith_device_t *
2717 next_required_device(async_command_t *cmd)
2718 {
2719 for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) {
2720 stonith_device_t *next_device = g_hash_table_lookup(device_list,
2721 iter->data);
2722
2723 if (is_action_required(cmd->action, next_device)) {
2724 /* This is only called for successful actions, so it's OK to skip
2725 * non-required devices.
2726 */
2727 cmd->next_device_iter = iter->next;
2728 return next_device;
2729 }
2730 }
2731 return NULL;
2732 }
2733
2734 static void
2735 st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
2736 {
2737 async_command_t *cmd = user_data;
2738
2739 stonith_device_t *device = NULL;
2740 stonith_device_t *next_device = NULL;
2741
2742 CRM_CHECK(cmd != NULL, return);
2743
2744 device = cmd_device(cmd);
2745 cmd->active_on = NULL;
2746
2747 /* The device is ready to do something else now */
2748 if (device) {
2749 if (!device->verified && pcmk__result_ok(result)
2750 && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST,
2751 PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS,
2752 NULL)) {
2753
2754 device->verified = TRUE;
2755 }
2756
2757 mainloop_set_trigger(device->work);
2758 }
2759
2760 if (pcmk__result_ok(result)) {
2761 next_device = next_required_device(cmd);
2762
2763 } else if ((cmd->next_device_iter != NULL)
2764 && !is_action_required(cmd->action, device)) {
2765 /* if this device didn't work out, see if there are any others we can try.
2766 * if the failed device was 'required', we can't pick another device. */
2767 next_device = g_hash_table_lookup(device_list,
2768 cmd->next_device_iter->data);
2769 cmd->next_device_iter = cmd->next_device_iter->next;
2770 }
2771
2772 if (next_device == NULL) {
2773 send_async_reply(cmd, result, pid, false);
2774 if (pcmk__result_ok(result)) {
2775 reply_to_duplicates(cmd, result, pid);
2776 }
2777 free_async_command(cmd);
2778
2779 } else { // This operation requires more fencing
2780 log_async_result(cmd, result, pid, next_device->id, false);
2781 schedule_stonith_command(cmd, next_device);
2782 }
2783 }
2784
2785 static gint
2786 sort_device_priority(gconstpointer a, gconstpointer b)
2787 {
2788 const stonith_device_t *dev_a = a;
2789 const stonith_device_t *dev_b = b;
2790
2791 if (dev_a->priority > dev_b->priority) {
2792 return -1;
2793 } else if (dev_a->priority < dev_b->priority) {
2794 return 1;
2795 }
2796 return 0;
2797 }
2798
2799 static void
2800 stonith_fence_get_devices_cb(GList * devices, void *user_data)
2801 {
2802 async_command_t *cmd = user_data;
2803 stonith_device_t *device = NULL;
2804 guint ndevices = g_list_length(devices);
2805
2806 crm_info("Found %d matching device%s for target '%s'",
2807 ndevices, pcmk__plural_s(ndevices), cmd->target);
2808
2809 if (devices != NULL) {
2810 /* Order based on priority */
2811 devices = g_list_sort(devices, sort_device_priority);
2812 device = g_hash_table_lookup(device_list, devices->data);
2813 }
2814
2815 if (device == NULL) { // No device found
2816 pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
2817
2818 pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
2819 "No device configured for target '%s'",
2820 cmd->target);
2821 send_async_reply(cmd, &result, 0, false);
2822 pcmk__reset_result(&result);
2823 free_async_command(cmd);
2824 g_list_free_full(devices, free);
2825
2826 } else { // Device found, schedule it for fencing
2827 cmd->device_list = devices;
2828 cmd->next_device_iter = devices->next;
2829 schedule_stonith_command(cmd, device);
2830 }
2831 }
2832
2833 /*!
2834 * \internal
2835 * \brief Execute a fence action via the local node
2836 *
2837 * \param[in] msg Fencing request
2838 * \param[out] result Where to store result of fence action
2839 */
2840 static void
2841 fence_locally(xmlNode *msg, pcmk__action_result_t *result)
2842 {
2843 const char *device_id = NULL;
2844 stonith_device_t *device = NULL;
2845 async_command_t *cmd = NULL;
2846 xmlNode *dev = NULL;
2847
2848 CRM_CHECK((msg != NULL) && (result != NULL), return);
2849
2850 dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR);
2851
2852 cmd = create_async_command(msg);
2853 if (cmd == NULL) {
2854 crm_log_xml_warn(msg, "invalid");
2855 fenced_set_protocol_error(result);
2856 return;
2857 }
2858
2859 device_id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
2860 if (device_id != NULL) {
2861 device = g_hash_table_lookup(device_list, device_id);
2862 if (device == NULL) {
2863 crm_err("Requested device '%s' is not available", device_id);
2864 pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
2865 "Requested device '%s' not found", device_id);
2866 return;
2867 }
2868 schedule_stonith_command(cmd, device);
2869
2870 } else {
2871 const char *host = crm_element_value(dev, PCMK__XA_ST_TARGET);
2872
2873 if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) {
2874 int nodeid = 0;
2875 pcmk__node_status_t *node = NULL;
2876
2877 pcmk__scan_min_int(host, &nodeid, 0);
2878 node = pcmk__search_node_caches(nodeid, NULL, NULL,
2879 pcmk__node_search_any
2880 |pcmk__node_search_cluster_cib);
2881 if (node != NULL) {
2882 host = node->name;
2883 }
2884 }
2885
2886 /* If we get to here, then self-fencing is implicitly allowed */
2887 get_capable_devices(host, cmd->action, cmd->default_timeout,
2888 TRUE, cmd, stonith_fence_get_devices_cb,
2889 fenced_support_flag(cmd->action));
2890 }
2891
2892 pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
2893 }
2894
2895 /*!
2896 * \internal
2897 * \brief Build an XML reply for a fencing operation
2898 *
2899 * \param[in] request Request that reply is for
2900 * \param[in] data If not NULL, add to reply as call data
2901 * \param[in] result Full result of fencing operation
2902 *
2903 * \return Newly created XML reply
2904 * \note The caller is responsible for freeing the result.
2905 * \note This has some overlap with construct_async_reply(), but that copies
2906 * values from an async_command_t, whereas this one copies them from the
2907 * request.
2908 */
2909 xmlNode *
2910 fenced_construct_reply(const xmlNode *request, xmlNode *data,
2911 const pcmk__action_result_t *result)
2912 {
2913 xmlNode *reply = NULL;
2914
2915 reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
2916
2917 crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__);
2918 crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
2919 stonith__xe_set_result(reply, result);
2920
2921 if (request == NULL) {
2922 /* Most likely, this is the result of a stonith operation that was
2923 * initiated before we came up. Unfortunately that means we lack enough
2924 * information to provide clients with a full result.
2925 *
2926 * @TODO Maybe synchronize this information at start-up?
2927 */
2928 crm_warn("Missing request information for client notifications for "
2929 "operation with result '%s' (initiated before we came up?)",
2930 pcmk_exec_status_str(result->execution_status));
2931
2932 } else {
2933 const char *name = NULL;
2934 const char *value = NULL;
2935
2936 // Attributes to copy from request to reply
2937 const char *names[] = {
2938 PCMK__XA_ST_OP,
2939 PCMK__XA_ST_CALLID,
2940 PCMK__XA_ST_CLIENTID,
2941 PCMK__XA_ST_CLIENTNAME,
2942 PCMK__XA_ST_REMOTE_OP,
2943 PCMK__XA_ST_CALLOPT,
2944 };
2945
2946 for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
2947 name = names[lpc];
2948 value = crm_element_value(request, name);
2949 crm_xml_add(reply, name, value);
2950 }
2951 if (data != NULL) {
2952 xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA);
2953
2954 pcmk__xml_copy(wrapper, data);
2955 }
2956 }
2957 return reply;
2958 }
2959
2960 /*!
2961 * \internal
2962 * \brief Build an XML reply to an asynchronous fencing command
2963 *
2964 * \param[in] cmd Fencing command that reply is for
2965 * \param[in] result Command result
2966 */
2967 static xmlNode *
2968 construct_async_reply(const async_command_t *cmd,
2969 const pcmk__action_result_t *result)
2970 {
2971 xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
2972
2973 crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__);
2974 crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
2975 crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op);
2976 crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device);
2977 crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id);
2978 crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client);
2979 crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name);
2980 crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target);
2981 crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op);
2982 crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin);
2983 crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id);
2984 crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options);
2985
2986 stonith__xe_set_result(reply, result);
2987 return reply;
2988 }
2989
2990 bool
2991 fencing_peer_active(pcmk__node_status_t *peer)
2992 {
2993 return (peer != NULL) && (peer->name != NULL)
2994 && pcmk_is_set(peer->processes, crm_get_cluster_proc());
2995 }
2996
2997 void
2998 set_fencing_completed(remote_fencing_op_t *op)
2999 {
3000 struct timespec tv;
3001
3002 qb_util_timespec_from_epoch_get(&tv);
3003 op->completed = tv.tv_sec;
3004 op->completed_nsec = tv.tv_nsec;
3005 }
3006
3007 /*!
3008 * \internal
3009 * \brief Look for alternate node needed if local node shouldn't fence target
3010 *
3011 * \param[in] target Node that must be fenced
3012 *
3013 * \return Name of an alternate node that should fence \p target if any,
3014 * or NULL otherwise
3015 */
3016 static const char *
3017 check_alternate_host(const char *target)
3018 {
3019 if (pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) {
3020 GHashTableIter gIter;
3021 pcmk__node_status_t *entry = NULL;
3022
3023 g_hash_table_iter_init(&gIter, pcmk__peer_cache);
3024 while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
3025 if (fencing_peer_active(entry)
3026 && !pcmk__str_eq(entry->name, target, pcmk__str_casei)) {
3027 crm_notice("Forwarding self-fencing request to %s",
3028 entry->name);
3029 return entry->name;
3030 }
3031 }
3032 crm_warn("Will handle own fencing because no peer can");
3033 }
3034 return NULL;
3035 }
3036
3037 static void
3038 remove_relay_op(xmlNode * request)
3039 {
3040 xmlNode *dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request,
3041 LOG_TRACE);
3042 const char *relay_op_id = NULL;
3043 const char *op_id = NULL;
3044 const char *client_name = NULL;
3045 const char *target = NULL;
3046 remote_fencing_op_t *relay_op = NULL;
3047
3048 if (dev) {
3049 target = crm_element_value(dev, PCMK__XA_ST_TARGET);
3050 }
3051
3052 relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP_RELAY);
3053 op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP);
3054 client_name = crm_element_value(request, PCMK__XA_ST_CLIENTNAME);
3055
3056 /* Delete RELAY operation. */
3057 if ((relay_op_id != NULL) && (target != NULL)
3058 && pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) {
3059 relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
3060
3061 if (relay_op) {
3062 GHashTableIter iter;
3063 remote_fencing_op_t *list_op = NULL;
3064 g_hash_table_iter_init(&iter, stonith_remote_op_list);
3065
3066 /* If the operation to be deleted is registered as a duplicate, delete the registration. */
3067 while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
3068 GList *dup_iter = NULL;
3069 if (list_op != relay_op) {
3070 for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) {
3071 remote_fencing_op_t *other = dup_iter->data;
3072 if (other == relay_op) {
3073 other->duplicates = g_list_remove(other->duplicates, relay_op);
3074 break;
3075 }
3076 }
3077 }
3078 }
3079 crm_debug("Deleting relay op %s ('%s'%s%s for %s), "
3080 "replaced by op %s ('%s'%s%s for %s)",
3081 relay_op->id, relay_op->action,
3082 (relay_op->target == NULL)? "" : " targeting ",
3083 pcmk__s(relay_op->target, ""),
3084 relay_op->client_name, op_id, relay_op->action,
3085 (target == NULL)? "" : " targeting ", pcmk__s(target, ""),
3086 client_name);
3087
3088 g_hash_table_remove(stonith_remote_op_list, relay_op_id);
3089 }
3090 }
3091 }
3092
3093 /*!
3094 * \internal
3095 * \brief Check whether an API request was sent by a privileged user
3096 *
3097 * API commands related to fencing configuration may be done only by privileged
3098 * IPC users (i.e. root or hacluster), because all other users should go through
3099 * the CIB to have ACLs applied. If no client was given, this is a peer request,
3100 * which is always allowed.
3101 *
3102 * \param[in] c IPC client that sent request (or NULL if sent by CPG peer)
3103 * \param[in] op Requested API operation (for logging only)
3104 *
3105 * \return true if sender is peer or privileged client, otherwise false
3106 */
3107 static inline bool
3108 is_privileged(const pcmk__client_t *c, const char *op)
3109 {
3110 if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) {
3111 return true;
3112 } else {
3113 crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
3114 pcmk__s(op, ""), pcmk__client_name(c));
3115 return false;
3116 }
3117 }
3118
3119 // CRM_OP_REGISTER
3120 static xmlNode *
3121 handle_register_request(pcmk__request_t *request)
3122 {
3123 xmlNode *reply = pcmk__xe_create(NULL, "reply");
3124
3125 pcmk__assert(request->ipc_client != NULL);
3126 crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER);
3127 crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id);
3128 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3129 pcmk__set_request_flags(request, pcmk__request_reuse_options);
3130 return reply;
3131 }
3132
3133 // STONITH_OP_EXEC
3134 static xmlNode *
3135 handle_agent_request(pcmk__request_t *request)
3136 {
3137 execute_agent_action(request->xml, &request->result);
3138 if (request->result.execution_status == PCMK_EXEC_PENDING) {
3139 return NULL;
3140 }
3141 return fenced_construct_reply(request->xml, NULL, &request->result);
3142 }
3143
3144 // STONITH_OP_TIMEOUT_UPDATE
3145 static xmlNode *
3146 handle_update_timeout_request(pcmk__request_t *request)
3147 {
3148 const char *call_id = crm_element_value(request->xml, PCMK__XA_ST_CALLID);
3149 const char *client_id = crm_element_value(request->xml,
3150 PCMK__XA_ST_CLIENTID);
3151 int op_timeout = 0;
3152
3153 crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout);
3154 do_stonith_async_timeout_update(client_id, call_id, op_timeout);
3155 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3156 return NULL;
3157 }
3158
3159 // STONITH_OP_QUERY
3160 static xmlNode *
3161 handle_query_request(pcmk__request_t *request)
3162 {
3163 int timeout = 0;
3164 xmlNode *dev = NULL;
3165 const char *action = NULL;
3166 const char *target = NULL;
3167 const char *client_id = crm_element_value(request->xml,
3168 PCMK__XA_ST_CLIENTID);
3169 struct st_query_data *query = NULL;
3170
3171 if (request->peer != NULL) {
3172 // Record it for the future notification
3173 create_remote_stonith_op(client_id, request->xml, TRUE);
3174 }
3175
3176 /* Delete the DC node RELAY operation. */
3177 remove_relay_op(request->xml);
3178
3179 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3180
3181 dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request->xml,
3182 LOG_NEVER);
3183 if (dev != NULL) {
3184 const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
3185
3186 if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
3187 return NULL; // No query or reply necessary
3188 }
3189 target = crm_element_value(dev, PCMK__XA_ST_TARGET);
3190 action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION);
3191 }
3192
3193 crm_log_xml_trace(request->xml, "Query");
3194
3195 query = pcmk__assert_alloc(1, sizeof(struct st_query_data));
3196
3197 query->reply = fenced_construct_reply(request->xml, NULL, &request->result);
3198 query->remote_peer = pcmk__str_copy(request->peer);
3199 query->client_id = pcmk__str_copy(client_id);
3200 query->target = pcmk__str_copy(target);
3201 query->action = pcmk__str_copy(action);
3202 query->call_options = request->call_options;
3203
3204 crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout);
3205 get_capable_devices(target, action, timeout,
3206 pcmk_is_set(query->call_options,
3207 st_opt_allow_self_fencing),
3208 query, stonith_query_capable_device_cb, st_device_supports_none);
3209 return NULL;
3210 }
3211
3212 // STONITH_OP_NOTIFY
3213 static xmlNode *
3214 handle_notify_request(pcmk__request_t *request)
3215 {
3216 const char *flag_name = NULL;
3217
3218 pcmk__assert(request->ipc_client != NULL);
3219 flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE);
3220 if (flag_name != NULL) {
3221 crm_debug("Enabling %s callbacks for client %s",
3222 flag_name, pcmk__request_origin(request));
3223 pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name));
3224 }
3225
3226 flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE);
3227 if (flag_name != NULL) {
3228 crm_debug("Disabling %s callbacks for client %s",
3229 flag_name, pcmk__request_origin(request));
3230 pcmk__clear_client_flags(request->ipc_client,
3231 get_stonith_flag(flag_name));
3232 }
3233
3234 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3235 pcmk__set_request_flags(request, pcmk__request_reuse_options);
3236
3237 return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL,
3238 CRM_EX_OK);
3239 }
3240
3241 // STONITH_OP_RELAY
3242 static xmlNode *
3243 handle_relay_request(pcmk__request_t *request)
3244 {
3245 xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml,
3246 LOG_TRACE);
3247
3248 crm_notice("Received forwarded fencing request from "
3249 "%s %s to fence (%s) peer %s",
3250 pcmk__request_origin_type(request),
3251 pcmk__request_origin(request),
3252 crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION),
3253 crm_element_value(dev, PCMK__XA_ST_TARGET));
3254
3255 if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) {
3256 fenced_set_protocol_error(&request->result);
3257 return fenced_construct_reply(request->xml, NULL, &request->result);
3258 }
3259
3260 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
3261 return NULL;
3262 }
3263
3264 // STONITH_OP_FENCE
3265 static xmlNode *
3266 handle_fence_request(pcmk__request_t *request)
3267 {
3268 if (request->peer != NULL) {
3269 fence_locally(request->xml, &request->result);
3270
3271 } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) {
3272 switch (fenced_handle_manual_confirmation(request->ipc_client,
3273 request->xml)) {
3274 case pcmk_rc_ok:
3275 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
3276 NULL);
3277 break;
3278 case EINPROGRESS:
3279 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
3280 NULL);
3281 break;
3282 default:
3283 fenced_set_protocol_error(&request->result);
3284 break;
3285 }
3286
3287 } else {
3288 const char *alternate_host = NULL;
3289 xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml,
3290 LOG_TRACE);
3291 const char *target = crm_element_value(dev, PCMK__XA_ST_TARGET);
3292 const char *action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION);
3293 const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
3294
3295 if (request->ipc_client != NULL) {
3296 int tolerance = 0;
3297
3298 crm_notice("Client %s wants to fence (%s) %s using %s",
3299 pcmk__request_origin(request), action,
3300 target, (device? device : "any device"));
3301 crm_element_value_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance);
3302 if (stonith_check_fence_tolerance(tolerance, target, action)) {
3303 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
3304 NULL);
3305 return fenced_construct_reply(request->xml, NULL,
3306 &request->result);
3307 }
3308 alternate_host = check_alternate_host(target);
3309
3310 } else {
3311 crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
3312 request->peer, action, target,
3313 (device == NULL)? "(any)" : device);
3314 }
3315
3316 if (alternate_host != NULL) {
3317 const char *client_id = NULL;
3318 remote_fencing_op_t *op = NULL;
3319 pcmk__node_status_t *node =
3320 pcmk__get_node(0, alternate_host, NULL,
3321 pcmk__node_search_cluster_member);
3322
3323 if (request->ipc_client->id == 0) {
3324 client_id = crm_element_value(request->xml,
3325 PCMK__XA_ST_CLIENTID);
3326 } else {
3327 client_id = request->ipc_client->id;
3328 }
3329
3330 /* Create a duplicate fencing operation to relay with the client ID.
3331 * When a query response is received, this operation should be
3332 * deleted to avoid keeping the duplicate around.
3333 */
3334 op = create_remote_stonith_op(client_id, request->xml, FALSE);
3335
3336 crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY);
3337 crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID,
3338 request->ipc_client->id);
3339 crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id);
3340
3341 // @TODO On failure, fail request immediately, or maybe panic
3342 pcmk__cluster_send_message(node, pcmk_ipc_fenced, request->xml);
3343
3344 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
3345 NULL);
3346
3347 } else if (initiate_remote_stonith_op(request->ipc_client, request->xml,
3348 FALSE) == NULL) {
3349 fenced_set_protocol_error(&request->result);
3350
3351 } else {
3352 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
3353 NULL);
3354 }
3355 }
3356
3357 if (request->result.execution_status == PCMK_EXEC_PENDING) {
3358 return NULL;
3359 }
3360 return fenced_construct_reply(request->xml, NULL, &request->result);
3361 }
3362
3363 // STONITH_OP_FENCE_HISTORY
3364 static xmlNode *
3365 handle_history_request(pcmk__request_t *request)
3366 {
3367 xmlNode *reply = NULL;
3368 xmlNode *data = NULL;
3369
3370 stonith_fence_history(request->xml, &data, request->peer,
3371 request->call_options);
3372 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3373 if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) {
3374 /* When the local node broadcasts its history, it sets
3375 * st_opt_discard_reply and doesn't need a reply.
3376 */
3377 reply = fenced_construct_reply(request->xml, data, &request->result);
3378 }
3379 pcmk__xml_free(data);
3380 return reply;
3381 }
3382
3383 // STONITH_OP_DEVICE_ADD
3384 static xmlNode *
3385 handle_device_add_request(pcmk__request_t *request)
3386 {
3387 const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
3388 xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml,
3389 LOG_ERR);
3390
3391 if (is_privileged(request->ipc_client, op)) {
3392 int rc = stonith_device_register(dev, FALSE);
3393
3394 pcmk__set_result(&request->result,
3395 ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
3396 stonith__legacy2status(rc),
3397 ((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
3398 } else {
3399 pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
3400 PCMK_EXEC_INVALID,
3401 "Unprivileged users must register device via CIB");
3402 }
3403 fenced_send_config_notification(op, &request->result,
3404 (dev == NULL)? NULL : pcmk__xe_id(dev));
3405 return fenced_construct_reply(request->xml, NULL, &request->result);
3406 }
3407
3408 // STONITH_OP_DEVICE_DEL
3409 static xmlNode *
3410 handle_device_delete_request(pcmk__request_t *request)
3411 {
3412 xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml,
3413 LOG_ERR);
3414 const char *device_id = crm_element_value(dev, PCMK_XA_ID);
3415 const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
3416
3417 if (is_privileged(request->ipc_client, op)) {
3418 stonith_device_remove(device_id, false);
3419 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3420 } else {
3421 pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
3422 PCMK_EXEC_INVALID,
3423 "Unprivileged users must delete device via CIB");
3424 }
3425 fenced_send_config_notification(op, &request->result, device_id);
3426 return fenced_construct_reply(request->xml, NULL, &request->result);
3427 }
3428
3429 // STONITH_OP_LEVEL_ADD
3430 static xmlNode *
3431 handle_level_add_request(pcmk__request_t *request)
3432 {
3433 char *desc = NULL;
3434 const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
3435
3436 if (is_privileged(request->ipc_client, op)) {
3437 fenced_register_level(request->xml, &desc, &request->result);
3438 } else {
3439 unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
3440 pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
3441 PCMK_EXEC_INVALID,
3442 "Unprivileged users must add level via CIB");
3443 }
3444 fenced_send_config_notification(op, &request->result, desc);
3445 free(desc);
3446 return fenced_construct_reply(request->xml, NULL, &request->result);
3447 }
3448
3449 // STONITH_OP_LEVEL_DEL
3450 static xmlNode *
3451 handle_level_delete_request(pcmk__request_t *request)
3452 {
3453 char *desc = NULL;
3454 const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
3455
3456 if (is_privileged(request->ipc_client, op)) {
3457 fenced_unregister_level(request->xml, &desc, &request->result);
3458 } else {
3459 unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
3460 pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
3461 PCMK_EXEC_INVALID,
3462 "Unprivileged users must delete level via CIB");
3463 }
3464 fenced_send_config_notification(op, &request->result, desc);
3465 free(desc);
3466 return fenced_construct_reply(request->xml, NULL, &request->result);
3467 }
3468
3469 // CRM_OP_RM_NODE_CACHE
3470 static xmlNode *
3471 handle_cache_request(pcmk__request_t *request)
3472 {
3473 int node_id = 0;
3474 const char *name = NULL;
3475
3476 crm_element_value_int(request->xml, PCMK_XA_ID, &node_id);
3477 name = crm_element_value(request->xml, PCMK_XA_UNAME);
3478 pcmk__cluster_forget_cluster_node(node_id, name);
3479 pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
3480 return NULL;
3481 }
3482
3483 static xmlNode *
3484 handle_unknown_request(pcmk__request_t *request)
3485 {
3486 crm_err("Unknown IPC request %s from %s %s",
3487 request->op, pcmk__request_origin_type(request),
3488 pcmk__request_origin(request));
3489 pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
3490 "Unknown IPC request type '%s' (bug?)", request->op);
3491 return fenced_construct_reply(request->xml, NULL, &request->result);
3492 }
3493
3494 static void
3495 fenced_register_handlers(void)
3496 {
3497 pcmk__server_command_t handlers[] = {
3498 { CRM_OP_REGISTER, handle_register_request },
3499 { STONITH_OP_EXEC, handle_agent_request },
3500 { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request },
3501 { STONITH_OP_QUERY, handle_query_request },
3502 { STONITH_OP_NOTIFY, handle_notify_request },
3503 { STONITH_OP_RELAY, handle_relay_request },
3504 { STONITH_OP_FENCE, handle_fence_request },
3505 { STONITH_OP_FENCE_HISTORY, handle_history_request },
3506 { STONITH_OP_DEVICE_ADD, handle_device_add_request },
3507 { STONITH_OP_DEVICE_DEL, handle_device_delete_request },
3508 { STONITH_OP_LEVEL_ADD, handle_level_add_request },
3509 { STONITH_OP_LEVEL_DEL, handle_level_delete_request },
3510 { CRM_OP_RM_NODE_CACHE, handle_cache_request },
3511 { NULL, handle_unknown_request },
3512 };
3513
3514 fenced_handlers = pcmk__register_handlers(handlers);
3515 }
3516
3517 void
3518 fenced_unregister_handlers(void)
3519 {
3520 if (fenced_handlers != NULL) {
3521 g_hash_table_destroy(fenced_handlers);
3522 fenced_handlers = NULL;
3523 }
3524 }
3525
3526 static void
3527 handle_request(pcmk__request_t *request)
3528 {
3529 xmlNode *reply = NULL;
3530 const char *reason = NULL;
3531
3532 if (fenced_handlers == NULL) {
3533 fenced_register_handlers();
3534 }
3535 reply = pcmk__process_request(request, fenced_handlers);
3536 if (reply != NULL) {
3537 if (pcmk_is_set(request->flags, pcmk__request_reuse_options)
3538 && (request->ipc_client != NULL)) {
3539 /* Certain IPC-only commands must reuse the call options from the
3540 * original request rather than the ones set by stonith_send_reply()
3541 * -> do_local_reply().
3542 */
3543 pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
3544 request->ipc_flags);
3545 request->ipc_client->request_id = 0;
3546 } else {
3547 stonith_send_reply(reply, request->call_options,
3548 request->peer, request->ipc_client);
3549 }
3550 pcmk__xml_free(reply);
3551 }
3552
3553 reason = request->result.exit_reason;
3554 crm_debug("Processed %s request from %s %s: %s%s%s%s",
3555 request->op, pcmk__request_origin_type(request),
3556 pcmk__request_origin(request),
3557 pcmk_exec_status_str(request->result.execution_status),
3558 (reason == NULL)? "" : " (",
3559 (reason == NULL)? "" : reason,
3560 (reason == NULL)? "" : ")");
3561 }
3562
3563 static void
3564 handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
3565 {
3566 // Copy, because request might be freed before we want to log this
3567 char *op = crm_element_value_copy(request, PCMK__XA_ST_OP);
3568
3569 if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
3570 process_remote_stonith_query(request);
3571
3572 } else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE,
3573 NULL)) {
3574 fenced_process_fencing_reply(request);
3575
3576 } else {
3577 crm_err("Ignoring unknown %s reply from %s %s",
3578 pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"),
3579 ((client == NULL)? remote_peer : pcmk__client_name(client)));
3580 crm_log_xml_warn(request, "UnknownOp");
3581 free(op);
3582 return;
3583 }
3584 crm_debug("Processed %s reply from %s %s",
3585 op, ((client == NULL)? "peer" : "client"),
3586 ((client == NULL)? remote_peer : pcmk__client_name(client)));
3587 free(op);
3588 }
3589
3590 /*!
3591 * \internal
3592 * \brief Handle a message from an IPC client or CPG peer
3593 *
3594 * \param[in,out] client If not NULL, IPC client that sent message
3595 * \param[in] id If from IPC client, IPC message ID
3596 * \param[in] flags Message flags
3597 * \param[in,out] message Message XML
3598 * \param[in] remote_peer If not NULL, CPG peer that sent message
3599 */
3600 void
3601 stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
3602 xmlNode *message, const char *remote_peer)
3603 {
3604 uint32_t call_options = st_opt_none;
3605 int rc = pcmk_rc_ok;
3606 bool is_reply = false;
3607
3608 CRM_CHECK(message != NULL, return);
3609
3610 if (get_xpath_object("//" PCMK__XE_ST_REPLY, message, LOG_NEVER) != NULL) {
3611 is_reply = true;
3612 }
3613
3614 rc = pcmk__xe_get_flags(message, PCMK__XA_ST_CALLOPT, &call_options,
3615 st_opt_none);
3616 if (rc != pcmk_rc_ok) {
3617 crm_warn("Couldn't parse options from message: %s", pcmk_rc_str(rc));
3618 }
3619
3620 crm_debug("Processing %ssynchronous %s %s %u from %s %s",
3621 pcmk_is_set(call_options, st_opt_sync_call)? "" : "a",
3622 crm_element_value(message, PCMK__XA_ST_OP),
3623 (is_reply? "reply" : "request"), id,
3624 ((client == NULL)? "peer" : "client"),
3625 ((client == NULL)? remote_peer : pcmk__client_name(client)));
3626
3627 if (pcmk_is_set(call_options, st_opt_sync_call)) {
3628 pcmk__assert((client == NULL) || (client->request_id == id));
3629 }
3630
3631 if (is_reply) {
3632 handle_reply(client, message, remote_peer);
3633 } else {
3634 pcmk__request_t request = {
3635 .ipc_client = client,
3636 .ipc_id = id,
3637 .ipc_flags = flags,
3638 .peer = remote_peer,
3639 .xml = message,
3640 .call_options = call_options,
3641 .result = PCMK__UNKNOWN_RESULT,
3642 };
3643
3644 request.op = crm_element_value_copy(request.xml, PCMK__XA_ST_OP);
3645 CRM_CHECK(request.op != NULL, return);
3646
3647 if (pcmk_is_set(request.call_options, st_opt_sync_call)) {
3648 pcmk__set_request_flags(&request, pcmk__request_sync);
3649 }
3650
3651 handle_request(&request);
3652 pcmk__reset_request(&request);
3653 }
3654 }
3655