1 /*
2 * Copyright 2012-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <stdbool.h>
13
14 #include <crm/fencing/internal.h>
15
16 #include <glib.h>
17 #include <libxml/tree.h> // xmlNode
18
19 // Check whether we have a high-resolution monotonic clock
20 #undef PCMK__TIME_USE_CGT
21 #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
22 # define PCMK__TIME_USE_CGT
23 # include <time.h> /* clock_gettime */
24 #endif
25
26 #include <unistd.h>
27
28 #include <crm/crm.h>
29 #include <crm/fencing/internal.h>
30 #include <crm/services.h>
31 #include <crm/services_internal.h>
32 #include <crm/common/mainloop.h>
33 #include <crm/common/ipc.h>
34 #include <crm/common/xml.h>
35
36 #include "pacemaker-execd.h"
37
38 GHashTable *rsc_list = NULL;
39
40 typedef struct {
41 int timeout;
42 guint interval_ms;
43 int start_delay;
44 int timeout_orig;
45
46 int call_id;
47
48 int call_opts;
49 /* Timer ids, must be removed on cmd destruction. */
50 int delay_id;
51 int stonith_recurring_id;
52
53 int rsc_deleted;
54
55 int service_flags;
56
57 char *client_id;
58 char *origin;
59 char *rsc_id;
60 char *action;
61 char *real_action;
62 char *userdata_str;
63
64 pcmk__action_result_t result;
65
66 /* We can track operation queue time and run time, to be saved with the CIB
67 * resource history (and displayed in cluster status). We need
68 * high-resolution monotonic time for this purpose, so we use
69 * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
70 * is disabled).
71 *
72 * However, we also need epoch timestamps for recording the time the command
73 * last ran and the time its return value last changed, for use in time
74 * displays (as opposed to interval calculations). We keep time_t values for
75 * this purpose.
76 *
77 * The last run time is used for both purposes, so we keep redundant
78 * monotonic and epoch values for this. Technically the two could represent
79 * different times, but since time_t has only second resolution and the
80 * values are used for distinct purposes, that is not significant.
81 */
82 #ifdef PCMK__TIME_USE_CGT
83 /* Recurring and systemd operations may involve more than one executor
84 * command per operation, so they need info about the original and the most
85 * recent.
86 */
87 struct timespec t_first_run; // When op first ran
88 struct timespec t_run; // When op most recently ran
89 struct timespec t_first_queue; // When op was first queued
90 struct timespec t_queue; // When op was most recently queued
91 #endif
92 time_t epoch_last_run; // Epoch timestamp of when op last ran
93 time_t epoch_rcchange; // Epoch timestamp of when rc last changed
94
95 bool first_notify_sent;
96 int last_notify_rc;
97 int last_notify_op_status;
98 int last_pid;
99
100 GHashTable *params;
101 } lrmd_cmd_t;
102
103 static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
104 static gboolean execute_resource_action(gpointer user_data);
105 static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
106
107 #ifdef PCMK__TIME_USE_CGT
108
109 /*!
110 * \internal
111 * \brief Check whether a struct timespec has been set
112 *
113 * \param[in] timespec Time to check
114 *
115 * \return true if timespec has been set (i.e. is nonzero), false otherwise
116 */
117 static inline bool
118 time_is_set(const struct timespec *timespec)
119 {
120 return (timespec != NULL) &&
121 ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
122 }
123
124 /*
125 * \internal
126 * \brief Set a timespec (and its original if unset) to the current time
127 *
128 * \param[out] t_current Where to store current time
129 * \param[out] t_orig Where to copy t_current if unset
130 */
131 static void
132 get_current_time(struct timespec *t_current, struct timespec *t_orig)
133 {
134 clock_gettime(CLOCK_MONOTONIC, t_current);
135 if ((t_orig != NULL) && !time_is_set(t_orig)) {
136 *t_orig = *t_current;
137 }
138 }
139
140 /*!
141 * \internal
142 * \brief Return difference between two times in milliseconds
143 *
144 * \param[in] now More recent time (or NULL to use current time)
145 * \param[in] old Earlier time
146 *
147 * \return milliseconds difference (or 0 if old is NULL or unset)
148 *
149 * \note Can overflow on 32bit machines when the differences is around
150 * 24 days or more.
151 */
152 static int
153 time_diff_ms(const struct timespec *now, const struct timespec *old)
154 {
155 int diff_ms = 0;
156
157 if (time_is_set(old)) {
158 struct timespec local_now = { 0, };
159
160 if (now == NULL) {
161 clock_gettime(CLOCK_MONOTONIC, &local_now);
162 now = &local_now;
163 }
164 diff_ms = (now->tv_sec - old->tv_sec) * 1000
165 + (now->tv_nsec - old->tv_nsec) / 1000000;
166 }
167 return diff_ms;
168 }
169
170 /*!
171 * \internal
172 * \brief Reset a command's operation times to their original values.
173 *
174 * Reset a command's run and queued timestamps to the timestamps of the original
175 * command, so we report the entire time since then and not just the time since
176 * the most recent command (for recurring and systemd operations).
177 *
178 * \param[in,out] cmd Executor command object to reset
179 *
180 * \note It's not obvious what the queued time should be for a systemd
181 * start/stop operation, which might go like this:
182 * initial command queued 5ms, runs 3s
183 * monitor command queued 10ms, runs 10s
184 * monitor command queued 10ms, runs 10s
185 * Is the queued time for that operation 5ms, 10ms or 25ms? The current
186 * implementation will report 5ms. If it's 25ms, then we need to
187 * subtract 20ms from the total exec time so as not to count it twice.
188 * We can implement that later if it matters to anyone ...
189 */
190 static void
191 cmd_original_times(lrmd_cmd_t * cmd)
192 {
193 cmd->t_run = cmd->t_first_run;
194 cmd->t_queue = cmd->t_first_queue;
195 }
196 #endif
197
198 static inline bool
199 action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms)
200 {
201 return (cmd->interval_ms == interval_ms)
202 && pcmk__str_eq(cmd->action, action, pcmk__str_casei);
203 }
204
205 /*!
206 * \internal
207 * \brief Log the result of an asynchronous command
208 *
209 * \param[in] cmd Command to log result for
210 * \param[in] exec_time_ms Execution time in milliseconds, if known
211 * \param[in] queue_time_ms Queue time in milliseconds, if known
212 */
213 static void
214 log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms)
215 {
216 int log_level = LOG_INFO;
217 GString *str = g_string_sized_new(100); // reasonable starting size
218
219 if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
220 log_level = LOG_DEBUG;
221 }
222
223 g_string_append_printf(str, "%s %s (call %d",
224 cmd->rsc_id, cmd->action, cmd->call_id);
225 if (cmd->last_pid != 0) {
226 g_string_append_printf(str, ", PID %d", cmd->last_pid);
227 }
228 switch (cmd->result.execution_status) {
229 case PCMK_EXEC_DONE:
230 g_string_append_printf(str, ") exited with status %d",
231 cmd->result.exit_status);
232 break;
233 case PCMK_EXEC_CANCELLED:
234 g_string_append_printf(str, ") cancelled");
235 break;
236 default:
237 pcmk__g_strcat(str, ") could not be executed: ",
238 pcmk_exec_status_str(cmd->result.execution_status),
239 NULL);
240 break;
241 }
242 if (cmd->result.exit_reason != NULL) {
243 pcmk__g_strcat(str, " (", cmd->result.exit_reason, ")", NULL);
244 }
245
246 #ifdef PCMK__TIME_USE_CGT
247 pcmk__g_strcat(str, " (execution time ",
248 pcmk__readable_interval(exec_time_ms), NULL);
249 if (queue_time_ms > 0) {
250 pcmk__g_strcat(str, " after being queued ",
251 pcmk__readable_interval(queue_time_ms), NULL);
252 }
253 g_string_append_c(str, ')');
254 #endif
255
256 do_crm_log(log_level, "%s", str->str);
257 g_string_free(str, TRUE);
258 }
259
260 static void
261 log_execute(lrmd_cmd_t * cmd)
262 {
263 int log_level = LOG_INFO;
264
265 if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
266 log_level = LOG_DEBUG;
267 }
268
269 do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
270 cmd->rsc_id, cmd->action, cmd->call_id);
271 }
272
273 static const char *
274 normalize_action_name(lrmd_rsc_t * rsc, const char *action)
275 {
276 if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_casei) &&
277 pcmk__is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
278 return PCMK_ACTION_STATUS;
279 }
280 return action;
281 }
282
283 static lrmd_rsc_t *
284 build_rsc_from_xml(xmlNode * msg)
285 {
286 xmlNode *rsc_xml = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_LRMD_RSC,
287 LOG_ERR);
288 lrmd_rsc_t *rsc = NULL;
289
290 rsc = pcmk__assert_alloc(1, sizeof(lrmd_rsc_t));
291
292 pcmk__xe_get_int(msg, PCMK__XA_LRMD_CALLOPT, &rsc->call_opts);
293
294 rsc->rsc_id = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID);
295 rsc->class = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_CLASS);
296 rsc->provider = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_PROVIDER);
297 rsc->type = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_TYPE);
298 rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action,
299 rsc);
300
301 // Initialize fence device probes (to return "not running")
302 pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
303 PCMK_EXEC_NO_FENCE_DEVICE, NULL);
304 return rsc;
305 }
306
307 static lrmd_cmd_t *
308 create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
309 {
310 int call_options = 0;
311 xmlNode *rsc_xml = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_LRMD_RSC,
312 LOG_ERR);
313 lrmd_cmd_t *cmd = NULL;
314
315 cmd = pcmk__assert_alloc(1, sizeof(lrmd_cmd_t));
316
317 pcmk__xe_get_int(msg, PCMK__XA_LRMD_CALLOPT, &call_options);
318 cmd->call_opts = call_options;
319 cmd->client_id = pcmk__str_copy(client->id);
320
321 pcmk__xe_get_int(msg, PCMK__XA_LRMD_CALLID, &cmd->call_id);
322 pcmk__xe_get_guint(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL, &cmd->interval_ms);
323 pcmk__xe_get_int(rsc_xml, PCMK__XA_LRMD_TIMEOUT, &cmd->timeout);
324 pcmk__xe_get_int(rsc_xml, PCMK__XA_LRMD_RSC_START_DELAY, &cmd->start_delay);
325 cmd->timeout_orig = cmd->timeout;
326
327 cmd->origin = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_ORIGIN);
328 cmd->action = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_RSC_ACTION);
329 cmd->userdata_str = pcmk__xe_get_copy(rsc_xml,
330 PCMK__XA_LRMD_RSC_USERDATA_STR);
331 cmd->rsc_id = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID);
332
333 cmd->params = xml2list(rsc_xml);
334
335 if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"),
336 PCMK_VALUE_BLOCK, pcmk__str_casei)) {
337 pcmk__debug("Setting flag to leave pid group on timeout and only kill "
338 "action pid for " PCMK__OP_FMT,
339 cmd->rsc_id, cmd->action, cmd->interval_ms);
340 cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
341 LOG_TRACE, "Action",
342 cmd->action, 0,
343 SVC_ACTION_LEAVE_GROUP,
344 "SVC_ACTION_LEAVE_GROUP");
345 }
346 return cmd;
347 }
348
349 static void
350 stop_recurring_timer(lrmd_cmd_t *cmd)
351 {
352 if (cmd) {
353 if (cmd->stonith_recurring_id) {
354 g_source_remove(cmd->stonith_recurring_id);
355 }
356 cmd->stonith_recurring_id = 0;
357 }
358 }
359
360 static void
361 free_lrmd_cmd(lrmd_cmd_t * cmd)
362 {
363 stop_recurring_timer(cmd);
364 if (cmd->delay_id) {
365 g_source_remove(cmd->delay_id);
366 }
367
368 g_clear_pointer(&cmd->params, g_hash_table_destroy);
369
370 pcmk__reset_result(&(cmd->result));
371 free(cmd->origin);
372 free(cmd->action);
373 free(cmd->real_action);
374 free(cmd->userdata_str);
375 free(cmd->rsc_id);
376 free(cmd->client_id);
377 free(cmd);
378 }
379
380 static gboolean
381 stonith_recurring_op_helper(gpointer data)
382 {
383 lrmd_cmd_t *cmd = data;
384 lrmd_rsc_t *rsc;
385
386 cmd->stonith_recurring_id = 0;
387
388 if (!cmd->rsc_id) {
389 return FALSE;
390 }
391
392 rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
393
394 pcmk__assert(rsc != NULL);
395 /* take it out of recurring_ops list, and put it in the pending ops
396 * to be executed */
397 rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
398 rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
399 #ifdef PCMK__TIME_USE_CGT
400 get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
401 #endif
402 mainloop_set_trigger(rsc->work);
403
404 return FALSE;
405 }
406
407 static inline void
408 start_recurring_timer(lrmd_cmd_t *cmd)
409 {
410 if (!cmd || (cmd->interval_ms <= 0)) {
411 return;
412 }
413
414 cmd->stonith_recurring_id = pcmk__create_timer(cmd->interval_ms,
415 stonith_recurring_op_helper,
416 cmd);
417 }
418
419 static gboolean
420 start_delay_helper(gpointer data)
421 {
422 lrmd_cmd_t *cmd = data;
423 lrmd_rsc_t *rsc = NULL;
424
425 cmd->delay_id = 0;
426 rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
427
428 if (rsc) {
429 mainloop_set_trigger(rsc->work);
430 }
431
432 return FALSE;
433 }
434
435 /*!
436 * \internal
437 * \brief Check whether a list already contains the equivalent of a given action
438 *
439 * \param[in] action_list List to search
440 * \param[in] cmd Action to search for
441 */
442 static lrmd_cmd_t *
443 find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd)
444 {
445 for (const GList *item = action_list; item != NULL; item = item->next) {
446 lrmd_cmd_t *dup = item->data;
447
448 if (action_matches(cmd, dup->action, dup->interval_ms)) {
449 return dup;
450 }
451 }
452 return NULL;
453 }
454
455 static bool
456 merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
457 {
458 lrmd_cmd_t * dup = NULL;
459 bool dup_pending = true;
460
|
(1) Event path: |
Condition "cmd->interval_ms == 0", taking false branch. |
461 if (cmd->interval_ms == 0) {
462 return false;
463 }
464
465 // Search for a duplicate of this action (in-flight or not)
466 dup = find_duplicate_action(rsc->pending_ops, cmd);
|
(2) Event path: |
Condition "dup == NULL", taking true branch. |
467 if (dup == NULL) {
468 dup_pending = false;
469 dup = find_duplicate_action(rsc->recurring_ops, cmd);
|
(3) Event path: |
Condition "dup == NULL", taking false branch. |
470 if (dup == NULL) {
471 return false;
472 }
473 }
474
475 /* Do not merge fencing monitors marked for cancellation, so we can reply to
476 * the cancellation separately.
477 */
|
(4) Event path: |
Condition "pcmk__str_eq(rsc->class, "stonith", pcmk__str_casei)", taking true branch. |
|
(5) Event path: |
Condition "dup->result.execution_status == PCMK_EXEC_CANCELLED", taking false branch. |
478 if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
479 pcmk__str_casei)
480 && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) {
481 return false;
482 }
483
484 /* This should not occur. If it does, we need to investigate how something
485 * like this is possible in the controller.
486 */
487 pcmk__warn("Duplicate recurring op entry detected (" PCMK__OP_FMT "), "
488 "merging with previous op entry",
489 rsc->rsc_id, normalize_action_name(rsc, dup->action),
490 dup->interval_ms);
491
492 // Merge new action's call ID and user data into existing action
493 dup->first_notify_sent = false;
494 free(dup->userdata_str);
495 dup->userdata_str = cmd->userdata_str;
496 cmd->userdata_str = NULL;
497 dup->call_id = cmd->call_id;
|
CID (unavailable; MK=2171ab3422718d6cb1ce46fa97ba7706) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS): |
|
(6) Event assign_union_field: |
The union field "in" of "_pp" is written. |
|
(7) Event inconsistent_union_field_access: |
In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in". |
498 g_clear_pointer(&cmd, free_lrmd_cmd);
499
500 /* If dup is not pending, that means it has already executed at least once
501 * and is waiting in the interval. In that case, stop waiting and initiate
502 * a new instance now.
503 */
504 if (!dup_pending) {
505 if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
506 pcmk__str_casei)) {
507 stop_recurring_timer(dup);
508 stonith_recurring_op_helper(dup);
509 } else {
510 services_action_kick(rsc->rsc_id,
511 normalize_action_name(rsc, dup->action),
512 dup->interval_ms);
513 }
514 }
515 return true;
516 }
517
518 static void
519 schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
520 {
521 CRM_CHECK(cmd != NULL, return);
522 CRM_CHECK(rsc != NULL, return);
523
524 pcmk__trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
525
526 if (merge_recurring_duplicate(rsc, cmd)) {
527 // Equivalent of cmd has already been scheduled
528 return;
529 }
530
531 /* The controller expects the executor to automatically cancel
532 * recurring operations before a resource stops.
533 */
534 if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
535 cancel_all_recurring(rsc, NULL);
536 }
537
538 rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
539 #ifdef PCMK__TIME_USE_CGT
540 get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
541 #endif
542 mainloop_set_trigger(rsc->work);
543
544 if (cmd->start_delay) {
545 cmd->delay_id = pcmk__create_timer(cmd->start_delay, start_delay_helper, cmd);
546 }
547 }
548
549 xmlNode *
550 execd_create_reply_as(const char *origin, int rc, int call_id)
551 {
552 xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_LRMD_REPLY);
553
554 pcmk__xe_set(reply, PCMK__XA_LRMD_ORIGIN, origin);
555 pcmk__xe_set_int(reply, PCMK__XA_LRMD_RC, rc);
556 pcmk__xe_set_int(reply, PCMK__XA_LRMD_CALLID, call_id);
557 return reply;
558 }
559
560 static void
561 send_client_notify(gpointer key, gpointer value, gpointer user_data)
562 {
563 xmlNode *update_msg = user_data;
564 pcmk__client_t *client = value;
565 int rc;
566 int log_level = LOG_WARNING;
567 const char *msg = NULL;
568
569 CRM_CHECK(client != NULL, return);
570 if (client->name == NULL) {
571 pcmk__trace("Skipping notification to client without name");
572 return;
573 }
574 if (pcmk__is_set(client->flags, pcmk__client_to_proxy)) {
575 /* We only want to notify clients of the executor IPC API. If we are
576 * running as Pacemaker Remote, we may have clients proxied to other
577 * IPC services in the cluster, so skip those.
578 */
579 pcmk__trace("Skipping executor API notification to client %s",
580 pcmk__client_name(client));
581 return;
582 }
583
584 rc = lrmd_server_send_notify(client, update_msg);
585 if (rc == pcmk_rc_ok) {
586 return;
587 }
588
589 switch (rc) {
590 case ENOTCONN:
591 case EPIPE: // Client exited without waiting for notification
592 log_level = LOG_INFO;
593 msg = "Disconnected";
594 break;
595
596 default:
597 msg = pcmk_rc_str(rc);
598 break;
599 }
600 do_crm_log(log_level, "Could not notify client %s: %s " QB_XS " rc=%d",
601 pcmk__client_name(client), msg, rc);
602 }
603
604 static void
605 send_cmd_complete_notify(lrmd_cmd_t * cmd)
606 {
607 xmlNode *notify = NULL;
608 int exec_time = 0;
609 int queue_time = 0;
610
611 #ifdef PCMK__TIME_USE_CGT
612 exec_time = time_diff_ms(NULL, &(cmd->t_run));
613 queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
614 #endif
615 log_finished(cmd, exec_time, queue_time);
616
617 /* If the originator requested to be notified only for changes in recurring
618 * operation results, skip the notification if the result hasn't changed.
619 */
620 if (cmd->first_notify_sent
621 && pcmk__is_set(cmd->call_opts, lrmd_opt_notify_changes_only)
622 && (cmd->last_notify_rc == cmd->result.exit_status)
623 && (cmd->last_notify_op_status == cmd->result.execution_status)) {
624 return;
625 }
626
627 cmd->first_notify_sent = true;
628 cmd->last_notify_rc = cmd->result.exit_status;
629 cmd->last_notify_op_status = cmd->result.execution_status;
630
631 notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
632
633 pcmk__xe_set(notify, PCMK__XA_LRMD_ORIGIN, __func__);
634 pcmk__xe_set_int(notify, PCMK__XA_LRMD_TIMEOUT, cmd->timeout);
635 pcmk__xe_set_guint(notify, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms);
636 pcmk__xe_set_int(notify, PCMK__XA_LRMD_RSC_START_DELAY, cmd->start_delay);
637 pcmk__xe_set_int(notify, PCMK__XA_LRMD_EXEC_RC, cmd->result.exit_status);
638 pcmk__xe_set_int(notify, PCMK__XA_LRMD_EXEC_OP_STATUS,
639 cmd->result.execution_status);
640 pcmk__xe_set_int(notify, PCMK__XA_LRMD_CALLID, cmd->call_id);
641 pcmk__xe_set_int(notify, PCMK__XA_LRMD_RSC_DELETED, cmd->rsc_deleted);
642
643 pcmk__xe_set_time(notify, PCMK__XA_LRMD_RUN_TIME, cmd->epoch_last_run);
644 pcmk__xe_set_time(notify, PCMK__XA_LRMD_RCCHANGE_TIME, cmd->epoch_rcchange);
645 #ifdef PCMK__TIME_USE_CGT
646 pcmk__xe_set_int(notify, PCMK__XA_LRMD_EXEC_TIME, exec_time);
647 pcmk__xe_set_int(notify, PCMK__XA_LRMD_QUEUE_TIME, queue_time);
648 #endif
649
650 pcmk__xe_set(notify, PCMK__XA_LRMD_OP, LRMD_OP_RSC_EXEC);
651 pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_ID, cmd->rsc_id);
652 if(cmd->real_action) {
653 pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->real_action);
654 } else {
655 pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->action);
656 }
657 pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
658 pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason);
659
660 if (cmd->result.action_stderr != NULL) {
661 pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_OUTPUT,
662 cmd->result.action_stderr);
663
664 } else if (cmd->result.action_stdout != NULL) {
665 pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_OUTPUT,
666 cmd->result.action_stdout);
667 }
668
669 if (cmd->params) {
670 char *key = NULL;
671 char *value = NULL;
672 GHashTableIter iter;
673
674 xmlNode *args = pcmk__xe_create(notify, PCMK__XE_ATTRIBUTES);
675
676 g_hash_table_iter_init(&iter, cmd->params);
677 while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
678 hash2smartfield((gpointer) key, (gpointer) value, args);
679 }
680 }
681 if ((cmd->client_id != NULL)
682 && pcmk__is_set(cmd->call_opts, lrmd_opt_notify_orig_only)) {
683
684 pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
685
686 if (client != NULL) {
687 send_client_notify(client->id, client, notify);
688 }
689 } else {
690 pcmk__foreach_ipc_client(send_client_notify, notify);
691 }
692
693 pcmk__xml_free(notify);
694 }
695
696 void
697 execd_send_generic_notify(int rc, xmlNode *request)
698 {
699 if (pcmk__ipc_client_count() != 0) {
700 int call_id = 0;
701 xmlNode *notify = NULL;
702 xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
703 "//" PCMK__XE_LRMD_RSC,
704 LOG_ERR);
705 const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID);
706 const char *op = pcmk__xe_get(request, PCMK__XA_LRMD_OP);
707
708 pcmk__xe_get_int(request, PCMK__XA_LRMD_CALLID, &call_id);
709
710 notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
711 pcmk__xe_set(notify, PCMK__XA_LRMD_ORIGIN, __func__);
712 pcmk__xe_set_int(notify, PCMK__XA_LRMD_RC, rc);
713 pcmk__xe_set_int(notify, PCMK__XA_LRMD_CALLID, call_id);
714 pcmk__xe_set(notify, PCMK__XA_LRMD_OP, op);
715 pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_ID, rsc_id);
716
717 pcmk__foreach_ipc_client(send_client_notify, notify);
718
719 pcmk__xml_free(notify);
720 }
721 }
722
723 static void
724 cmd_reset(lrmd_cmd_t * cmd)
725 {
726 cmd->last_pid = 0;
727 #ifdef PCMK__TIME_USE_CGT
728 memset(&cmd->t_run, 0, sizeof(cmd->t_run));
729 memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
730 #endif
731 cmd->epoch_last_run = 0;
732
733 pcmk__reset_result(&(cmd->result));
734 cmd->result.execution_status = PCMK_EXEC_DONE;
735 }
736
737 static void
738 cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
739 {
740 pcmk__trace("Resource operation rsc:%s action:%s completed (%p %p)",
741 cmd->rsc_id, cmd->action, ((rsc != NULL)? rsc->active : NULL),
742 cmd);
743
744 if (rsc && (rsc->active == cmd)) {
745 rsc->active = NULL;
746 mainloop_set_trigger(rsc->work);
747 }
748
749 if (!rsc) {
750 cmd->rsc_deleted = 1;
751 }
752
753 /* reset original timeout so client notification has correct information */
754 cmd->timeout = cmd->timeout_orig;
755
756 send_cmd_complete_notify(cmd);
757
758 if ((cmd->interval_ms != 0)
759 && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) {
760
761 if (rsc) {
762 rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
763 rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
764 }
765 free_lrmd_cmd(cmd);
766 } else if (cmd->interval_ms == 0) {
767 if (rsc) {
768 rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
769 }
770 free_lrmd_cmd(cmd);
771 } else {
772 /* Clear all the values pertaining just to the last iteration of a recurring op. */
773 cmd_reset(cmd);
774 }
775 }
776
777 struct notify_new_client_data {
778 xmlNode *notify;
779 pcmk__client_t *new_client;
780 };
781
782 static void
783 notify_one_client(gpointer key, gpointer value, gpointer user_data)
784 {
785 pcmk__client_t *client = value;
786 struct notify_new_client_data *data = user_data;
787
788 if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
789 send_client_notify(key, (gpointer) client, (gpointer) data->notify);
790 }
791 }
792
793 void
794 notify_of_new_client(pcmk__client_t *new_client)
795 {
796 struct notify_new_client_data data;
797
798 data.new_client = new_client;
799 data.notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
800 pcmk__xe_set(data.notify, PCMK__XA_LRMD_ORIGIN, __func__);
801 pcmk__xe_set(data.notify, PCMK__XA_LRMD_OP, LRMD_OP_NEW_CLIENT);
802 pcmk__foreach_ipc_client(notify_one_client, &data);
803 pcmk__xml_free(data.notify);
804 }
805
806 void
807 client_disconnect_cleanup(const char *client_id)
808 {
809 GHashTableIter iter;
810 lrmd_rsc_t *rsc = NULL;
811 char *key = NULL;
812
813 g_hash_table_iter_init(&iter, rsc_list);
814 while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
815 if (pcmk__is_set(rsc->call_opts, lrmd_opt_drop_recurring)) {
816 /* This client is disconnecting, drop any recurring operations
817 * it may have initiated on the resource */
818 cancel_all_recurring(rsc, client_id);
819 }
820 }
821 }
822
823 static void
824 action_complete(svc_action_t * action)
825 {
826 lrmd_rsc_t *rsc;
827 lrmd_cmd_t *cmd = action->cb_data;
828 enum ocf_exitcode code;
829
830 #ifdef PCMK__TIME_USE_CGT
831 const char *rclass = NULL;
832 bool goagain = false;
833 int time_sum = 0;
834 int timeout_left = 0;
835 int delay = 0;
836 #endif
837
838 if (!cmd) {
839 pcmk__err("Completed executor action (%s) does not match any known "
840 "operations",
841 action->id);
842 return;
843 }
844
845 #ifdef PCMK__TIME_USE_CGT
846 if (cmd->result.exit_status != action->rc) {
847 cmd->epoch_rcchange = time(NULL);
848 }
849 #endif
850
851 cmd->last_pid = action->pid;
852
853 // Cast variable instead of function return to keep compilers happy
854 code = services_result2ocf(action->standard, cmd->action, action->rc);
855 pcmk__set_result(&(cmd->result), (int) code,
856 action->status, services__exit_reason(action));
857
858 rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
859
860 #ifdef PCMK__TIME_USE_CGT
861 if (rsc != NULL) {
862 rclass = rsc->class;
863 #if PCMK__ENABLE_SERVICE
864 if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SERVICE,
865 pcmk__str_casei)) {
866 rclass = resources_find_service_class(rsc->type);
867 }
868 #endif
869 }
870
871 if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
872 goto finalize;
873 }
874
875 if (pcmk__result_ok(&(cmd->result))
876 && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
877 PCMK_ACTION_STOP, NULL)) {
878 /* Getting results for when a start or stop action completes is now
879 * handled by watching for JobRemoved() signals from systemd and
880 * reacting to them. So, we can bypass the rest of the code in this
881 * function for those actions, and simply finalize cmd.
882 *
883 * @TODO When monitors are handled in the same way, this function
884 * can either be drastically simplified or done away with entirely.
885 */
886 services__copy_result(action, &(cmd->result));
887 goto finalize;
888
889 } else if (cmd->result.execution_status == PCMK_EXEC_PENDING &&
890 pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) &&
891 cmd->interval_ms == 0 &&
892 cmd->real_action == NULL) {
893 /* If the state is Pending at the time of probe, execute follow-up monitor. */
894 goagain = true;
895 cmd->real_action = cmd->action;
896 cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR);
897 } else if (cmd->real_action != NULL) {
898 // This is follow-up monitor to check whether start/stop/probe(monitor) completed
899 if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
900 goagain = true;
901
902 } else if (pcmk__result_ok(&(cmd->result))
903 && pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
904 pcmk__str_casei)) {
905 goagain = true;
906
907 } else {
908 int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
909 int timeout_left = cmd->timeout_orig - time_sum;
910
911 pcmk__debug("%s systemd %s is now complete (elapsed=%dms, "
912 "remaining=%dms): %s (%d)",
913 cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
914 crm_exit_str(cmd->result.exit_status),
915 cmd->result.exit_status);
916 cmd_original_times(cmd);
917
918 // Monitors may return "not running", but start/stop shouldn't
919 if ((cmd->result.execution_status == PCMK_EXEC_DONE)
920 && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {
921
922 if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START,
923 pcmk__str_casei)) {
924 cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
925 } else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
926 pcmk__str_casei)) {
927 cmd->result.exit_status = PCMK_OCF_OK;
928 }
929 }
930 }
931 } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)
932 && (cmd->interval_ms > 0)) {
933 /* For monitors, excluding follow-up monitors, */
934 /* if the pending state persists from the first notification until its timeout, */
935 /* it will be treated as a timeout. */
936
937 if ((cmd->result.execution_status == PCMK_EXEC_PENDING) &&
938 (cmd->last_notify_op_status == PCMK_EXEC_PENDING)) {
939 int time_left = time(NULL) - (cmd->epoch_rcchange + (cmd->timeout_orig/1000));
940
941 if (time_left >= 0) {
942 pcmk__notice("Giving up on %s %s (rc=%d): monitor pending "
943 "timeout (first pending notification=%s "
944 "timeout=%dms)",
945 cmd->rsc_id, cmd->action, cmd->result.exit_status,
946 g_strchomp(ctime(&cmd->epoch_rcchange)),
947 cmd->timeout_orig);
948 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
949 PCMK_EXEC_TIMEOUT,
950 "Investigate reason for timeout, and adjust "
951 "configured operation timeout if necessary");
952 cmd_original_times(cmd);
953 }
954 }
955 }
956
957 if (!goagain) {
958 goto finalize;
959 }
960
961 time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
962 timeout_left = cmd->timeout_orig - time_sum;
963 delay = cmd->timeout_orig / 10;
964
965 if (delay >= timeout_left && timeout_left > 20) {
966 delay = timeout_left/2;
967 }
968
969 delay = QB_MIN(2000, delay);
970 if (delay < timeout_left) {
971 cmd->start_delay = delay;
972 cmd->timeout = timeout_left;
973
974 if (pcmk__result_ok(&(cmd->result))) {
975 pcmk__debug("%s %s may still be in progress: re-scheduling "
976 "(elapsed=%dms, remaining=%dms, start_delay=%dms)",
977 cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
978 delay);
979
980 } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
981 pcmk__info("%s %s is still in progress: re-scheduling "
982 "(elapsed=%dms, remaining=%dms, start_delay=%dms)",
983 cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
984
985 } else {
986 pcmk__notice("%s %s failed: %s: Re-scheduling (remaining timeout "
987 "%s) "
988 QB_XS " exitstatus=%d elapsed=%dms start_delay=%dms)",
989 cmd->rsc_id, cmd->action,
990 crm_exit_str(cmd->result.exit_status),
991 pcmk__readable_interval(timeout_left),
992 cmd->result.exit_status, time_sum, delay);
993 }
994
995 cmd_reset(cmd);
996 if (rsc) {
997 rsc->active = NULL;
998 }
999 schedule_lrmd_cmd(rsc, cmd);
1000
1001 /* Don't finalize cmd, we're not done with it yet */
1002 return;
1003
1004 } else {
1005 pcmk__notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, "
1006 "remaining=%dms)",
1007 cmd->rsc_id, pcmk__s(cmd->real_action, cmd->action),
1008 cmd->result.exit_status, time_sum, timeout_left);
1009 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1010 PCMK_EXEC_TIMEOUT,
1011 "Investigate reason for timeout, and adjust "
1012 "configured operation timeout if necessary");
1013 cmd_original_times(cmd);
1014 }
1015 #endif
1016
1017 finalize:
1018 pcmk__set_result_output(&(cmd->result), services__grab_stdout(action),
1019 services__grab_stderr(action));
1020 cmd_finalize(cmd, rsc);
1021 }
1022
1023 /*!
1024 * \internal
1025 * \brief Process the result of a fence device action (start, stop, or monitor)
1026 *
1027 * \param[in,out] cmd Fence device action that completed
1028 * \param[in] exit_status Fencer API exit status for action
1029 * \param[in] execution_status Fencer API execution status for action
1030 * \param[in] exit_reason Human-friendly detail, if action failed
1031 */
1032 static void
1033 fencing_rsc_action_complete(lrmd_cmd_t *cmd, int exit_status,
1034 enum pcmk_exec_status execution_status,
1035 const char *exit_reason)
1036 {
1037 // This can be NULL if resource was removed before command completed
1038 lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
1039
1040 // Simplify fencer exit status to uniform exit status
1041 if (exit_status != CRM_EX_OK) {
1042 exit_status = PCMK_OCF_UNKNOWN_ERROR;
1043 }
1044
1045 if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) {
1046 /* An in-flight fence action was cancelled. The execution status is
1047 * already correct, so don't overwrite it.
1048 */
1049 execution_status = PCMK_EXEC_CANCELLED;
1050
1051 } else {
1052 /* Some execution status codes have specific meanings for the fencer
1053 * that executor clients may not expect, so map them to a simple error
1054 * status.
1055 */
1056 switch (execution_status) {
1057 case PCMK_EXEC_NOT_CONNECTED:
1058 case PCMK_EXEC_INVALID:
1059 execution_status = PCMK_EXEC_ERROR;
1060 break;
1061
1062 case PCMK_EXEC_NO_FENCE_DEVICE:
1063 /* This should be possible only for probes in practice, but
1064 * interpret for all actions to be safe.
1065 */
1066 if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1067 pcmk__str_none)) {
1068 exit_status = PCMK_OCF_NOT_RUNNING;
1069
1070 } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
1071 pcmk__str_none)) {
1072 exit_status = PCMK_OCF_OK;
1073
1074 } else {
1075 exit_status = PCMK_OCF_NOT_INSTALLED;
1076 }
1077 execution_status = PCMK_EXEC_ERROR;
1078 break;
1079
1080 case PCMK_EXEC_NOT_SUPPORTED:
1081 exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE;
1082 break;
1083
1084 default:
1085 break;
1086 }
1087 }
1088
1089 pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason);
1090
1091 // Certain successful actions change the known state of the resource
1092 if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
1093
1094 if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
1095 pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK,
1096 PCMK_EXEC_DONE, NULL); // "running"
1097
1098 } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
1099 pcmk__str_casei)) {
1100 pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
1101 PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running"
1102 }
1103 }
1104
1105 /* The recurring timer should not be running at this point in any case, but
1106 * as a failsafe, stop it if it is.
1107 */
1108 stop_recurring_timer(cmd);
1109
1110 /* Reschedule this command if appropriate. If a recurring command is *not*
1111 * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will
1112 * not be removed from recurring_ops by cmd_finalize().
1113 */
1114 if (rsc && (cmd->interval_ms > 0)
1115 && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) {
1116 start_recurring_timer(cmd);
1117 }
1118
1119 cmd_finalize(cmd, rsc);
1120 }
1121
1122 void
1123 execd_fencer_connection_failed(void)
1124 {
1125 GHashTableIter iter;
1126 lrmd_rsc_t *rsc = NULL;
1127
1128 pcmk__warn("Connection to fencer lost (any pending operations for fence "
1129 "devices will be considered failed)");
1130
1131 g_hash_table_iter_init(&iter, rsc_list);
1132 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) {
1133 if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1134 pcmk__str_none)) {
1135 continue;
1136 }
1137
1138 /* If we registered this fence device, we don't know whether the
1139 * fencer still has the registration or not. Cause future probes to
1140 * return an error until the resource is stopped or started
1141 * successfully. This is especially important if the controller also
1142 * went away (possibly due to a cluster layer restart) and won't
1143 * receive our client notification of any monitors finalized below.
1144 */
1145 if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) {
1146 pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
1147 PCMK_EXEC_NOT_CONNECTED,
1148 "Lost connection to fencer");
1149 }
1150
1151 // Consider any active, pending, or recurring operations as failed
1152
1153 for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) {
1154 lrmd_cmd_t *cmd = op->data;
1155
1156 /* This won't free a recurring op but instead restart its timer.
1157 * If cmd is rsc->active, this will set rsc->active to NULL, so we
1158 * don't have to worry about finalizing it a second time below.
1159 */
1160 fencing_rsc_action_complete(cmd, CRM_EX_ERROR,
1161 PCMK_EXEC_NOT_CONNECTED,
1162 "Lost connection to fencer");
1163 }
1164
1165 if (rsc->active != NULL) {
1166 rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active);
1167 }
1168 while (rsc->pending_ops != NULL) {
1169 // This will free the op and remove it from rsc->pending_ops
1170 fencing_rsc_action_complete((lrmd_cmd_t *) rsc->pending_ops->data,
1171 CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
1172 "Lost connection to fencer");
1173 }
1174 }
1175 }
1176
1177 /*!
1178 * \internal
1179 * \brief Execute a fencing resource "start" action
1180 *
1181 * Start a fencing resource by registering it with the fencer. (Fencing agents
1182 * don't have a start command.)
1183 *
1184 * \param[in,out] fencer_api Connection to fencer
1185 * \param[in] rsc Fencing resource to start
1186 * \param[in] cmd Start command to execute
1187 *
1188 * \return pcmk_ok on success, -errno otherwise
1189 */
1190 static int
1191 start_fencing_rsc(stonith_t *fencer_api, const lrmd_rsc_t *rsc,
1192 const lrmd_cmd_t *cmd)
1193 {
1194 char *key = NULL;
1195 char *value = NULL;
1196 stonith_key_value_t *device_params = NULL;
1197 int rc = pcmk_ok;
1198
1199 // Convert command parameters to fencer API key/values
1200 if (cmd->params) {
1201 GHashTableIter iter;
1202
1203 g_hash_table_iter_init(&iter, cmd->params);
1204 while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
1205 device_params = stonith__key_value_add(device_params, key, value);
1206 }
1207 }
1208
1209 /* The fencer will automatically register devices via CIB notifications
1210 * when the CIB changes, but to avoid a possible race condition between
1211 * the fencer receiving the notification and the executor requesting that
1212 * resource, the executor registers the device as well. The fencer knows how
1213 * to handle duplicate registrations.
1214 */
1215 rc = fencer_api->cmds->register_device(fencer_api, st_opt_sync_call,
1216 cmd->rsc_id, rsc->provider,
1217 rsc->type, device_params);
1218
1219 stonith__key_value_freeall(device_params, true, true);
1220 return rc;
1221 }
1222
1223 /*!
1224 * \internal
1225 * \brief Execute a fencing resource "stop" action
1226 *
1227 * Stop a fencing resource by unregistering it with the fencer. (Fencing agents
1228 * don't have a stop command.)
1229 *
1230 * \param[in,out] fencer_api Connection to fencer
1231 * \param[in] rsc Fencing resource to stop
1232 *
1233 * \return pcmk_ok on success, -errno otherwise
1234 */
1235 static inline int
1236 stop_fencing_rsc(stonith_t *fencer_api, const lrmd_rsc_t *rsc)
1237 {
1238 /* @TODO Failure would indicate a problem communicating with fencer;
1239 * perhaps we should try reconnecting and retrying a few times?
1240 */
1241 return fencer_api->cmds->remove_device(fencer_api, st_opt_sync_call,
1242 rsc->rsc_id);
1243 }
1244
1245 static void
1246 fencing_rsc_monitor_cb(stonith_t *stonith, stonith_callback_data_t *data)
1247 {
1248 if ((data == NULL) || (data->userdata == NULL)) {
1249 pcmk__err("Ignoring fencing resource monitor result: "
1250 "Invalid callback arguments (bug?)");
1251 } else {
1252 fencing_rsc_action_complete((lrmd_cmd_t *) data->userdata,
1253 stonith__exit_status(data),
1254 stonith__execution_status(data),
1255 stonith__exit_reason(data));
1256 }
1257 }
1258
1259 /*!
1260 * \internal
1261 * \brief Initiate a fencing resource recurring "monitor" action
1262 *
1263 * \param[in,out] fencer_api Connection to fencer
1264 * \param[in,out] rsc Fencing resource to monitor
1265 * \param[in] cmd Monitor command being executed
1266 *
1267 * \return pcmk_ok if monitor was successfully initiated, -errno otherwise
1268 */
1269 static inline int
1270 monitor_fencing_rsc(stonith_t *fencer_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
1271 {
1272 int rc = fencer_api->cmds->monitor(fencer_api, 0, cmd->rsc_id,
1273 pcmk__timeout_ms2s(cmd->timeout));
1274
1275 rc = fencer_api->cmds->register_callback(fencer_api, rc, 0, 0, cmd,
1276 "fencing_rsc_monitor_cb",
1277 fencing_rsc_monitor_cb);
1278 if (rc == TRUE) {
1279 rsc->active = cmd;
1280 rc = pcmk_ok;
1281 } else {
1282 rc = -pcmk_err_generic;
1283 }
1284 return rc;
1285 }
1286
1287 static void
1288 execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
1289 {
1290 int rc = pcmk_ok;
1291 const char *rc_s = NULL;
1292 bool do_monitor = false;
1293
1294 // Don't free; belongs to pacemaker-execd.c
1295 stonith_t *fencer_api = execd_get_fencer_connection();
1296
1297 if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)
1298 && (cmd->interval_ms == 0)) {
1299 // Probes don't require a fencer connection
1300 fencing_rsc_action_complete(cmd, rsc->fence_probe_result.exit_status,
1301 rsc->fence_probe_result.execution_status,
1302 rsc->fence_probe_result.exit_reason);
1303 return;
1304 }
1305
1306 if (fencer_api == NULL) {
1307 fencing_rsc_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR,
1308 PCMK_EXEC_NOT_CONNECTED,
1309 "No connection to fencer");
1310 return;
1311 }
1312
1313 if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
1314 rc = start_fencing_rsc(fencer_api, rsc, cmd);
1315 if (rc == pcmk_ok) {
1316 do_monitor = true;
1317 }
1318
1319 } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
1320 rc = stop_fencing_rsc(fencer_api, rsc);
1321
1322 } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1323 pcmk__str_casei)) {
1324 do_monitor = true;
1325
1326 } else {
1327 fencing_rsc_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE,
1328 PCMK_EXEC_ERROR,
1329 "Invalid fence device action (bug?)");
1330 return;
1331 }
1332
1333 if (do_monitor) {
1334 rc = monitor_fencing_rsc(fencer_api, rsc, cmd);
1335 if (rc == pcmk_ok) {
1336 // Don't clean up yet. We will get the result of the monitor later.
1337 return;
1338 }
1339 }
1340
1341 if (rc != -pcmk_err_generic) {
1342 rc_s = pcmk_strerror(rc);
1343 }
1344 fencing_rsc_action_complete(cmd,
1345 ((rc == pcmk_rc_ok)? CRM_EX_OK : CRM_EX_ERROR),
1346 stonith__legacy2status(rc), rc_s);
1347 }
1348
1349 static void
1350 execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
1351 {
1352 svc_action_t *action = NULL;
1353 GHashTable *params_copy = NULL;
1354
1355 pcmk__assert((rsc != NULL) && (cmd != NULL));
1356
1357 pcmk__trace("Creating action, resource:%s action:%s class:%s provider:%s "
1358 "agent:%s",
1359 rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
1360
1361 params_copy = pcmk__str_table_dup(cmd->params);
1362
1363 action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
1364 rsc->type,
1365 normalize_action_name(rsc, cmd->action),
1366 cmd->interval_ms, cmd->timeout,
1367 params_copy, cmd->service_flags);
1368
1369 if (action == NULL) {
1370 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1371 PCMK_EXEC_ERROR, strerror(ENOMEM));
1372 cmd_finalize(cmd, rsc);
1373 return;
1374 }
1375
1376 if (action->rc != PCMK_OCF_UNKNOWN) {
1377 services__copy_result(action, &(cmd->result));
1378 services_action_free(action);
1379 cmd_finalize(cmd, rsc);
1380 return;
1381 }
1382
1383 action->cb_data = cmd;
1384
1385 if (services_action_async(action, action_complete)) {
1386 /* The services library has taken responsibility for the action. It
1387 * could be pending, blocked, or merged into a duplicate recurring
1388 * action, in which case the action callback (action_complete())
1389 * will be called when the action completes, otherwise the callback has
1390 * already been called.
1391 *
1392 * action_complete() calls cmd_finalize() which can free cmd, so cmd
1393 * cannot be used here.
1394 */
1395 } else {
1396 /* This is a recurring action that is not being cancelled and could not
1397 * be initiated. It has been rescheduled, and the action callback
1398 * (action_complete()) has been called, which in this case has already
1399 * called cmd_finalize(), which in this case should only reset (not
1400 * free) cmd.
1401 */
1402 services__copy_result(action, &(cmd->result));
1403 services_action_free(action);
1404 }
1405 }
1406
1407 static gboolean
1408 execute_resource_action(gpointer user_data)
1409 {
1410 lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data;
1411 lrmd_cmd_t *cmd = NULL;
1412
1413 CRM_CHECK(rsc != NULL, return FALSE);
1414
1415 if (rsc->active) {
1416 pcmk__trace("%s is still active", rsc->rsc_id);
1417 return TRUE;
1418 }
1419
1420 if (rsc->pending_ops) {
1421 GList *first = rsc->pending_ops;
1422
1423 cmd = first->data;
1424 if (cmd->delay_id) {
1425 pcmk__trace("Command %s %s was asked to run too early, waiting for "
1426 "start_delay timeout of %dms",
1427 cmd->rsc_id, cmd->action, cmd->start_delay);
1428 return TRUE;
1429 }
1430 rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
1431 g_list_free_1(first);
1432
1433 #ifdef PCMK__TIME_USE_CGT
1434 get_current_time(&(cmd->t_run), &(cmd->t_first_run));
1435 #endif
1436 cmd->epoch_last_run = time(NULL);
1437 }
1438
1439 if (!cmd) {
1440 pcmk__trace("Nothing further to do for %s", rsc->rsc_id);
1441 return TRUE;
1442 }
1443
1444 rsc->active = cmd; /* only one op at a time for a rsc */
1445 if (cmd->interval_ms) {
1446 rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
1447 }
1448
1449 log_execute(cmd);
1450
1451 if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1452 execute_stonith_action(rsc, cmd);
1453 } else {
1454 execute_nonstonith_action(rsc, cmd);
1455 }
1456
1457 return TRUE;
1458 }
1459
1460 void
1461 execd_free_rsc(gpointer data)
1462 {
1463 GList *gIter = NULL;
1464 lrmd_rsc_t *rsc = data;
1465 bool is_fencing_rsc = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1466 pcmk__str_casei);
1467
1468 gIter = rsc->pending_ops;
1469 while (gIter != NULL) {
1470 GList *next = gIter->next;
1471 lrmd_cmd_t *cmd = gIter->data;
1472
1473 /* command was never executed */
1474 cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1475 cmd_finalize(cmd, NULL);
1476
1477 gIter = next;
1478 }
1479 /* frees list, but not list elements. */
1480 g_list_free(rsc->pending_ops);
1481
1482 gIter = rsc->recurring_ops;
1483 while (gIter != NULL) {
1484 GList *next = gIter->next;
1485 lrmd_cmd_t *cmd = gIter->data;
1486
1487 if (is_fencing_rsc) {
1488 cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1489 /* If a fencing resource's recurring operation is in-flight, just
1490 * mark it as cancelled. It is not safe to finalize/free the cmd
1491 * until the fencer API says it has either completed or timed out.
1492 */
1493 if (rsc->active != cmd) {
1494 cmd_finalize(cmd, NULL);
1495 }
1496 } else {
1497 /* This command is already handed off to service library,
1498 * let service library cancel it and tell us via the callback
1499 * when it is cancelled. The rsc can be safely destroyed
1500 * even if we are waiting for the cancel result */
1501 services_action_cancel(rsc->rsc_id,
1502 normalize_action_name(rsc, cmd->action),
1503 cmd->interval_ms);
1504 }
1505
1506 gIter = next;
1507 }
1508 /* frees list, but not list elements. */
1509 g_list_free(rsc->recurring_ops);
1510
1511 free(rsc->rsc_id);
1512 free(rsc->class);
1513 free(rsc->provider);
1514 free(rsc->type);
1515 mainloop_destroy_trigger(rsc->work);
1516
1517 free(rsc);
1518 }
1519
1520 int
1521 execd_process_signon(pcmk__client_t *client, xmlNode *request, int call_id,
1522 xmlNode **reply)
1523 {
1524 int rc = pcmk_rc_ok;
1525 time_t now = time(NULL);
1526 const char *protocol_version = pcmk__xe_get(request,
1527 PCMK__XA_LRMD_PROTOCOL_VERSION);
1528 const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE);
1529
1530 if (pcmk__compare_versions(protocol_version,
1531 LRMD_COMPATIBLE_PROTOCOL) < 0) {
1532 pcmk__err("Cluster API version must be greater than or equal to "
1533 LRMD_COMPATIBLE_PROTOCOL " , not %s",
1534 protocol_version);
1535 rc = EPROTO;
1536 }
1537
1538 if (pcmk__xe_attr_is_true(request, PCMK__XA_LRMD_IS_IPC_PROVIDER)) {
1539 #ifdef PCMK__COMPILE_REMOTE
1540 if ((client->remote != NULL)
1541 && pcmk__is_set(client->flags,
1542 pcmk__client_tls_handshake_complete)) {
1543 const char *op = pcmk__xe_get(request, PCMK__XA_LRMD_OP);
1544
1545 // This is a remote connection from a cluster node's controller
1546 ipc_proxy_add_provider(client);
1547
1548 /* @TODO Allowing multiple proxies makes no sense given that clients
1549 * have no way to choose between them. Maybe always use the most
1550 * recent one and switch any existing IPC connections to use it,
1551 * by iterating over ipc_clients here, and if client->id doesn't
1552 * match the client's userdata, replace the userdata with the new
1553 * ID. After the iteration, call lrmd_remote_client_destroy() on any
1554 * of the replaced values in ipc_providers.
1555 */
1556
1557 /* If this was a register operation, also ask for new schema files but
1558 * only if it's supported by the protocol version.
1559 */
1560 if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none) &&
1561 LRMD_SUPPORTS_SCHEMA_XFER(protocol_version)) {
1562 remoted_request_cib_schema_files();
1563 }
1564 } else {
1565 rc = EACCES;
1566 }
1567 #else
1568 rc = EPROTONOSUPPORT;
1569 #endif
1570 }
1571
1572 pcmk__assert(reply != NULL);
1573
1574 *reply = execd_create_reply(pcmk_rc2legacy(rc), call_id);
1575 pcmk__xe_set(*reply, PCMK__XA_LRMD_OP, CRM_OP_REGISTER);
1576 pcmk__xe_set(*reply, PCMK__XA_LRMD_CLIENTID, client->id);
1577 pcmk__xe_set(*reply, PCMK__XA_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
1578 pcmk__xe_set_time(*reply, PCMK__XA_UPTIME, now - start_time);
1579
1580 if (start_state) {
1581 pcmk__xe_set(*reply, PCMK__XA_NODE_START_STATE, start_state);
1582 }
1583
1584 return rc;
1585 }
1586
1587 void
1588 execd_process_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
1589 {
1590 lrmd_rsc_t *rsc = build_rsc_from_xml(request);
1591 lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
1592
1593 if (dup &&
1594 pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
1595 pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) &&
1596 pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
1597
1598 pcmk__notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
1599 execd_free_rsc(rsc);
1600 return;
1601 }
1602
1603 g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
1604 pcmk__info("Cached agent information for '%s'", rsc->rsc_id);
1605 }
1606
1607 int
1608 execd_process_get_rsc_info(xmlNode *request, int call_id, xmlNode **reply)
1609 {
1610 int rc = pcmk_rc_ok;
1611 xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
1612 "//" PCMK__XE_LRMD_RSC,
1613 LOG_ERR);
1614 const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1615 lrmd_rsc_t *rsc = NULL;
1616
1617 if (rsc_id == NULL) {
1618 rc = ENODEV;
1619 } else {
1620 rsc = g_hash_table_lookup(rsc_list, rsc_id);
1621 if (rsc == NULL) {
1622 pcmk__info("Agent information for '%s' not in cache", rsc_id);
1623 rc = ENODEV;
1624 }
1625 }
1626
1627 CRM_LOG_ASSERT(reply != NULL);
1628
1629 *reply = execd_create_reply(pcmk_rc2legacy(rc), call_id);
1630 if (rsc) {
1631 pcmk__xe_set(*reply, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id);
1632 pcmk__xe_set(*reply, PCMK__XA_LRMD_CLASS, rsc->class);
1633 pcmk__xe_set(*reply, PCMK__XA_LRMD_PROVIDER, rsc->provider);
1634 pcmk__xe_set(*reply, PCMK__XA_LRMD_TYPE, rsc->type);
1635 }
1636
1637 return rc;
1638 }
1639
1640 int
1641 execd_process_rsc_unregister(pcmk__client_t *client, xmlNode *request)
1642 {
1643 int rc = pcmk_rc_ok;
1644 lrmd_rsc_t *rsc = NULL;
1645 xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
1646 "//" PCMK__XE_LRMD_RSC,
1647 LOG_ERR);
1648 const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1649
1650 if (!rsc_id) {
1651 return ENODEV;
1652 }
1653
1654 rsc = g_hash_table_lookup(rsc_list, rsc_id);
1655 if (rsc == NULL) {
1656 pcmk__info("Ignoring unregistration of resource '%s', which is not "
1657 "registered", rsc_id);
1658 return pcmk_rc_ok;
1659 }
1660
1661 if (rsc->active) {
1662 /* let the caller know there are still active ops on this rsc to watch for */
1663 pcmk__trace("Operation (%p) still in progress for unregistered "
1664 "resource %s", rsc->active, rsc_id);
1665 rc = EINPROGRESS;
1666 }
1667
1668 g_hash_table_remove(rsc_list, rsc_id);
1669
1670 return rc;
1671 }
1672
1673 int
1674 execd_process_rsc_exec(pcmk__client_t *client, xmlNode *request)
1675 {
1676 lrmd_rsc_t *rsc = NULL;
1677 lrmd_cmd_t *cmd = NULL;
1678 xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
1679 "//" PCMK__XE_LRMD_RSC,
1680 LOG_ERR);
1681 const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1682
1683 if (!rsc_id) {
1684 return EINVAL;
1685 }
1686
1687 if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
1688 pcmk__info("Resource '%s' not found (%d active resources)", rsc_id,
1689 g_hash_table_size(rsc_list));
1690 return ENODEV;
1691 }
1692
1693 cmd = create_lrmd_cmd(request, client);
1694
1695 /* Don't reference cmd after handing it off to be scheduled.
1696 * The cmd could get merged and freed. */
1697 schedule_lrmd_cmd(rsc, cmd);
1698
1699 return pcmk_rc_ok;
1700 }
1701
1702 static int
1703 cancel_op(const char *rsc_id, const char *action, guint interval_ms)
1704 {
1705 GList *gIter = NULL;
1706 lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
1707
1708 /* How to cancel an action.
1709 * 1. Check pending ops list, if it hasn't been handed off
1710 * to the service library or stonith recurring list remove
1711 * it there and that will stop it.
1712 * 2. If it isn't in the pending ops list, then it's either a
1713 * recurring op in the stonith recurring list, or the service
1714 * library's recurring list. Stop it there
1715 * 3. If not found in any lists, then this operation has either
1716 * been executed already and is not a recurring operation, or
1717 * never existed.
1718 */
1719 if (!rsc) {
1720 return ENODEV;
1721 }
1722
1723 for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
1724 lrmd_cmd_t *cmd = gIter->data;
1725
1726 if (action_matches(cmd, action, interval_ms)) {
1727 cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1728 cmd_finalize(cmd, rsc);
1729 return pcmk_rc_ok;
1730 }
1731 }
1732
1733 if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1734 /* The service library does not handle stonith operations.
1735 * We have to handle recurring stonith operations ourselves. */
1736 for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
1737 lrmd_cmd_t *cmd = gIter->data;
1738
1739 if (action_matches(cmd, action, interval_ms)) {
1740 cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1741 if (rsc->active != cmd) {
1742 cmd_finalize(cmd, rsc);
1743 }
1744 return pcmk_rc_ok;
1745 }
1746 }
1747 } else if (services_action_cancel(rsc_id,
1748 normalize_action_name(rsc, action),
1749 interval_ms) == TRUE) {
1750 /* The service library will tell the action_complete callback function
1751 * this action was cancelled, which will destroy the cmd and remove
1752 * it from the recurring_op list. Do not do that in this function
1753 * if the service library says it cancelled it. */
1754 return pcmk_rc_ok;
1755 }
1756
1757 return EOPNOTSUPP;
1758 }
1759
1760 static void
1761 cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
1762 {
1763 GList *cmd_list = NULL;
1764 GList *cmd_iter = NULL;
1765
1766 /* Notice a copy of each list is created when concat is called.
1767 * This prevents odd behavior from occurring when the cmd_list
1768 * is iterated through later on. It is possible the cancel_op
1769 * function may end up modifying the recurring_ops and pending_ops
1770 * lists. If we did not copy those lists, our cmd_list iteration
1771 * could get messed up.*/
1772 if (rsc->recurring_ops) {
1773 cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
1774 }
1775 if (rsc->pending_ops) {
1776 cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
1777 }
1778 if (!cmd_list) {
1779 return;
1780 }
1781
1782 for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1783 lrmd_cmd_t *cmd = cmd_iter->data;
1784
1785 if (cmd->interval_ms == 0) {
1786 continue;
1787 }
1788
1789 if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
1790 continue;
1791 }
1792
1793 cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
1794 }
1795 /* frees only the copied list data, not the cmds */
1796 g_list_free(cmd_list);
1797 }
1798
1799 int
1800 execd_process_rsc_cancel(pcmk__client_t *client, xmlNode *request)
1801 {
1802 xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
1803 "//" PCMK__XE_LRMD_RSC,
1804 LOG_ERR);
1805 const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1806 const char *action = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ACTION);
1807 guint interval_ms = 0;
1808
1809 pcmk__xe_get_guint(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL, &interval_ms);
1810
1811 if (!rsc_id || !action) {
1812 return EINVAL;
1813 }
1814
1815 return cancel_op(rsc_id, action, interval_ms);
1816 }
1817
1818 static void
1819 add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
1820 {
1821 xmlNode *rsc_xml = pcmk__xe_create(reply, PCMK__XE_LRMD_RSC);
1822
1823 pcmk__xe_set(rsc_xml, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id);
1824 for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
1825 lrmd_cmd_t *cmd = item->data;
1826 xmlNode *op_xml = pcmk__xe_create(rsc_xml, PCMK__XE_LRMD_RSC_OP);
1827
1828 pcmk__xe_set(op_xml, PCMK__XA_LRMD_RSC_ACTION,
1829 pcmk__s(cmd->real_action, cmd->action));
1830 pcmk__xe_set_guint(op_xml, PCMK__XA_LRMD_RSC_INTERVAL,
1831 cmd->interval_ms);
1832 pcmk__xe_set_int(op_xml, PCMK__XA_LRMD_TIMEOUT, cmd->timeout_orig);
1833 }
1834 }
1835
1836 int
1837 execd_process_get_recurring(xmlNode *request, int call_id, xmlNode **reply)
1838 {
1839 int rc = pcmk_rc_ok;
1840 const char *rsc_id = NULL;
1841 lrmd_rsc_t *rsc = NULL;
1842 xmlNode *rsc_xml = NULL;
1843
1844 // Resource ID is optional
1845 rsc_xml = pcmk__xe_first_child(request, PCMK__XE_LRMD_CALLDATA, NULL, NULL);
1846 if (rsc_xml) {
1847 rsc_xml = pcmk__xe_first_child(rsc_xml, PCMK__XE_LRMD_RSC, NULL, NULL);
1848 }
1849 if (rsc_xml) {
1850 rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1851 }
1852
1853 // If resource ID is specified, resource must exist
1854 if (rsc_id != NULL) {
1855 rsc = g_hash_table_lookup(rsc_list, rsc_id);
1856 if (rsc == NULL) {
1857 pcmk__info("Resource '%s' not found (%d active resources)", rsc_id,
1858 g_hash_table_size(rsc_list));
1859 rc = ENODEV;
1860 }
1861 }
1862
1863 CRM_LOG_ASSERT(reply != NULL);
1864
1865 *reply = execd_create_reply(pcmk_rc2legacy(rc), call_id);
1866
1867 // If resource ID is not specified, check all resources
1868 if (rsc_id == NULL) {
1869 GHashTableIter iter;
1870 char *key = NULL;
1871
1872 g_hash_table_iter_init(&iter, rsc_list);
1873 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
1874 (gpointer *) &rsc)) {
1875 add_recurring_op_xml(*reply, rsc);
1876 }
1877 } else if (rsc) {
1878 add_recurring_op_xml(*reply, rsc);
1879 }
1880
1881 return rc;
1882 }
1883