1    	/*
2    	 * Copyright 2012-2026 the Pacemaker project contributors
3    	 *
4    	 * The version control history for this file may have further details.
5    	 *
6    	 * This source code is licensed under the GNU Lesser General Public License
7    	 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8    	 */
9    	
10   	#include <crm_internal.h>
11   	
12   	#include <stdbool.h>
13   	
14   	#include <crm/fencing/internal.h>
15   	
16   	#include <glib.h>
17   	#include <libxml/tree.h>                // xmlNode
18   	
19   	// Check whether we have a high-resolution monotonic clock
20   	#undef PCMK__TIME_USE_CGT
21   	#if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
22   	#  define PCMK__TIME_USE_CGT
23   	#  include <time.h>  /* clock_gettime */
24   	#endif
25   	
26   	#include <unistd.h>
27   	
28   	#include <crm/crm.h>
29   	#include <crm/fencing/internal.h>
30   	#include <crm/services.h>
31   	#include <crm/services_internal.h>
32   	#include <crm/common/mainloop.h>
33   	#include <crm/common/ipc.h>
34   	#include <crm/common/xml.h>
35   	
36   	#include "pacemaker-execd.h"
37   	
38   	GHashTable *rsc_list = NULL;
39   	
40   	typedef struct {
41   	    int timeout;
42   	    guint interval_ms;
43   	    int start_delay;
44   	    int timeout_orig;
45   	
46   	    int call_id;
47   	
48   	    int call_opts;
49   	    /* Timer ids, must be removed on cmd destruction. */
50   	    int delay_id;
51   	    int stonith_recurring_id;
52   	
53   	    int rsc_deleted;
54   	
55   	    int service_flags;
56   	
57   	    char *client_id;
58   	    char *origin;
59   	    char *rsc_id;
60   	    char *action;
61   	    char *real_action;
62   	    char *userdata_str;
63   	
64   	    pcmk__action_result_t result;
65   	
66   	    /* We can track operation queue time and run time, to be saved with the CIB
67   	     * resource history (and displayed in cluster status). We need
68   	     * high-resolution monotonic time for this purpose, so we use
69   	     * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
70   	     * is disabled).
71   	     *
72   	     * However, we also need epoch timestamps for recording the time the command
73   	     * last ran and the time its return value last changed, for use in time
74   	     * displays (as opposed to interval calculations). We keep time_t values for
75   	     * this purpose.
76   	     *
77   	     * The last run time is used for both purposes, so we keep redundant
78   	     * monotonic and epoch values for this. Technically the two could represent
79   	     * different times, but since time_t has only second resolution and the
80   	     * values are used for distinct purposes, that is not significant.
81   	     */
82   	#ifdef PCMK__TIME_USE_CGT
83   	    /* Recurring and systemd operations may involve more than one executor
84   	     * command per operation, so they need info about the original and the most
85   	     * recent.
86   	     */
87   	    struct timespec t_first_run;    // When op first ran
88   	    struct timespec t_run;          // When op most recently ran
89   	    struct timespec t_first_queue;  // When op was first queued
90   	    struct timespec t_queue;        // When op was most recently queued
91   	#endif
92   	    time_t epoch_last_run;          // Epoch timestamp of when op last ran
93   	    time_t epoch_rcchange;          // Epoch timestamp of when rc last changed
94   	
95   	    bool first_notify_sent;
96   	    int last_notify_rc;
97   	    int last_notify_op_status;
98   	    int last_pid;
99   	
100  	    GHashTable *params;
101  	} lrmd_cmd_t;
102  	
103  	static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
104  	static gboolean execute_resource_action(gpointer user_data);
105  	static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
106  	
107  	#ifdef PCMK__TIME_USE_CGT
108  	
109  	/*!
110  	 * \internal
111  	 * \brief Check whether a struct timespec has been set
112  	 *
113  	 * \param[in] timespec  Time to check
114  	 *
115  	 * \return true if timespec has been set (i.e. is nonzero), false otherwise
116  	 */
117  	static inline bool
118  	time_is_set(const struct timespec *timespec)
119  	{
120  	    return (timespec != NULL) &&
121  	           ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
122  	}
123  	
124  	/*
125  	 * \internal
126  	 * \brief Set a timespec (and its original if unset) to the current time
127  	 *
128  	 * \param[out] t_current  Where to store current time
129  	 * \param[out] t_orig     Where to copy t_current if unset
130  	 */
131  	static void
132  	get_current_time(struct timespec *t_current, struct timespec *t_orig)
133  	{
134  	    clock_gettime(CLOCK_MONOTONIC, t_current);
135  	    if ((t_orig != NULL) && !time_is_set(t_orig)) {
136  	        *t_orig = *t_current;
137  	    }
138  	}
139  	
140  	/*!
141  	 * \internal
142  	 * \brief Return difference between two times in milliseconds
143  	 *
144  	 * \param[in] now  More recent time (or NULL to use current time)
145  	 * \param[in] old  Earlier time
146  	 *
147  	 * \return milliseconds difference (or 0 if old is NULL or unset)
148  	 *
149  	 * \note Can overflow on 32bit machines when the differences is around
150  	 *       24 days or more.
151  	 */
152  	static int
153  	time_diff_ms(const struct timespec *now, const struct timespec *old)
154  	{
155  	    int diff_ms = 0;
156  	
157  	    if (time_is_set(old)) {
158  	        struct timespec local_now = { 0, };
159  	
160  	        if (now == NULL) {
161  	            clock_gettime(CLOCK_MONOTONIC, &local_now);
162  	            now = &local_now;
163  	        }
164  	        diff_ms = (now->tv_sec - old->tv_sec) * 1000
165  	                  + (now->tv_nsec - old->tv_nsec) / 1000000;
166  	    }
167  	    return diff_ms;
168  	}
169  	
170  	/*!
171  	 * \internal
172  	 * \brief Reset a command's operation times to their original values.
173  	 *
174  	 * Reset a command's run and queued timestamps to the timestamps of the original
175  	 * command, so we report the entire time since then and not just the time since
176  	 * the most recent command (for recurring and systemd operations).
177  	 *
178  	 * \param[in,out] cmd  Executor command object to reset
179  	 *
180  	 * \note It's not obvious what the queued time should be for a systemd
181  	 *       start/stop operation, which might go like this:
182  	 *         initial command queued 5ms, runs 3s
183  	 *         monitor command queued 10ms, runs 10s
184  	 *         monitor command queued 10ms, runs 10s
185  	 *       Is the queued time for that operation 5ms, 10ms or 25ms? The current
186  	 *       implementation will report 5ms. If it's 25ms, then we need to
187  	 *       subtract 20ms from the total exec time so as not to count it twice.
188  	 *       We can implement that later if it matters to anyone ...
189  	 */
190  	static void
191  	cmd_original_times(lrmd_cmd_t * cmd)
192  	{
193  	    cmd->t_run = cmd->t_first_run;
194  	    cmd->t_queue = cmd->t_first_queue;
195  	}
196  	#endif
197  	
198  	static inline bool
199  	action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms)
200  	{
201  	    return (cmd->interval_ms == interval_ms)
202  	           && pcmk__str_eq(cmd->action, action, pcmk__str_casei);
203  	}
204  	
205  	/*!
206  	 * \internal
207  	 * \brief Log the result of an asynchronous command
208  	 *
209  	 * \param[in] cmd            Command to log result for
210  	 * \param[in] exec_time_ms   Execution time in milliseconds, if known
211  	 * \param[in] queue_time_ms  Queue time in milliseconds, if known
212  	 */
213  	static void
214  	log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms)
215  	{
216  	    int log_level = LOG_INFO;
217  	    GString *str = g_string_sized_new(100); // reasonable starting size
218  	
219  	    if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
220  	        log_level = LOG_DEBUG;
221  	    }
222  	
223  	    g_string_append_printf(str, "%s %s (call %d",
224  	                           cmd->rsc_id, cmd->action, cmd->call_id);
225  	    if (cmd->last_pid != 0) {
226  	        g_string_append_printf(str, ", PID %d", cmd->last_pid);
227  	    }
228  	    switch (cmd->result.execution_status) {
229  	        case PCMK_EXEC_DONE:
230  	            g_string_append_printf(str, ") exited with status %d",
231  	                                   cmd->result.exit_status);
232  	            break;
233  	        case PCMK_EXEC_CANCELLED:
234  	            g_string_append_printf(str, ") cancelled");
235  	            break;
236  	        default:
237  	            pcmk__g_strcat(str, ") could not be executed: ",
238  	                           pcmk_exec_status_str(cmd->result.execution_status),
239  	                           NULL);
240  	            break;
241  	    }
242  	    if (cmd->result.exit_reason != NULL) {
243  	        pcmk__g_strcat(str, " (", cmd->result.exit_reason, ")", NULL);
244  	    }
245  	
246  	#ifdef PCMK__TIME_USE_CGT
247  	    pcmk__g_strcat(str, " (execution time ",
248  	                   pcmk__readable_interval(exec_time_ms), NULL);
249  	    if (queue_time_ms > 0) {
250  	        pcmk__g_strcat(str, " after being queued ",
251  	                       pcmk__readable_interval(queue_time_ms), NULL);
252  	    }
253  	    g_string_append_c(str, ')');
254  	#endif
255  	
256  	    do_crm_log(log_level, "%s", str->str);
257  	    g_string_free(str, TRUE);
258  	}
259  	
260  	static void
261  	log_execute(lrmd_cmd_t * cmd)
262  	{
263  	    int log_level = LOG_INFO;
264  	
265  	    if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
266  	        log_level = LOG_DEBUG;
267  	    }
268  	
269  	    do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
270  	               cmd->rsc_id, cmd->action, cmd->call_id);
271  	}
272  	
273  	static const char *
274  	normalize_action_name(lrmd_rsc_t * rsc, const char *action)
275  	{
276  	    if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_casei) &&
277  	        pcmk__is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
278  	        return PCMK_ACTION_STATUS;
279  	    }
280  	    return action;
281  	}
282  	
283  	static lrmd_rsc_t *
284  	build_rsc_from_xml(xmlNode * msg)
285  	{
286  	    xmlNode *rsc_xml = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_LRMD_RSC,
287  	                                            LOG_ERR);
288  	    lrmd_rsc_t *rsc = NULL;
289  	
290  	    rsc = pcmk__assert_alloc(1, sizeof(lrmd_rsc_t));
291  	
292  	    pcmk__xe_get_int(msg, PCMK__XA_LRMD_CALLOPT, &rsc->call_opts);
293  	
294  	    rsc->rsc_id = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID);
295  	    rsc->class = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_CLASS);
296  	    rsc->provider = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_PROVIDER);
297  	    rsc->type = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_TYPE);
298  	    rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action,
299  	                                     rsc);
300  	
301  	    // Initialize fence device probes (to return "not running")
302  	    pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
303  	                     PCMK_EXEC_NO_FENCE_DEVICE, NULL);
304  	    return rsc;
305  	}
306  	
307  	static lrmd_cmd_t *
308  	create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
309  	{
310  	    int call_options = 0;
311  	    xmlNode *rsc_xml = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_LRMD_RSC,
312  	                                            LOG_ERR);
313  	    lrmd_cmd_t *cmd = NULL;
314  	
315  	    cmd = pcmk__assert_alloc(1, sizeof(lrmd_cmd_t));
316  	
317  	    pcmk__xe_get_int(msg, PCMK__XA_LRMD_CALLOPT, &call_options);
318  	    cmd->call_opts = call_options;
319  	    cmd->client_id = pcmk__str_copy(client->id);
320  	
321  	    pcmk__xe_get_int(msg, PCMK__XA_LRMD_CALLID, &cmd->call_id);
322  	    pcmk__xe_get_guint(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL, &cmd->interval_ms);
323  	    pcmk__xe_get_int(rsc_xml, PCMK__XA_LRMD_TIMEOUT, &cmd->timeout);
324  	    pcmk__xe_get_int(rsc_xml, PCMK__XA_LRMD_RSC_START_DELAY, &cmd->start_delay);
325  	    cmd->timeout_orig = cmd->timeout;
326  	
327  	    cmd->origin = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_ORIGIN);
328  	    cmd->action = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_RSC_ACTION);
329  	    cmd->userdata_str = pcmk__xe_get_copy(rsc_xml,
330  	                                          PCMK__XA_LRMD_RSC_USERDATA_STR);
331  	    cmd->rsc_id = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID);
332  	
333  	    cmd->params = xml2list(rsc_xml);
334  	
335  	    if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"),
336  	                     PCMK_VALUE_BLOCK, pcmk__str_casei)) {
337  	        pcmk__debug("Setting flag to leave pid group on timeout and only kill "
338  	                    "action pid for " PCMK__OP_FMT,
339  	                    cmd->rsc_id, cmd->action, cmd->interval_ms);
340  	        cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
341  	                                                LOG_TRACE, "Action",
342  	                                                cmd->action, 0,
343  	                                                SVC_ACTION_LEAVE_GROUP,
344  	                                                "SVC_ACTION_LEAVE_GROUP");
345  	    }
346  	    return cmd;
347  	}
348  	
349  	static void
350  	stop_recurring_timer(lrmd_cmd_t *cmd)
351  	{
352  	    if (cmd) {
353  	        if (cmd->stonith_recurring_id) {
354  	            g_source_remove(cmd->stonith_recurring_id);
355  	        }
356  	        cmd->stonith_recurring_id = 0;
357  	    }
358  	}
359  	
360  	static void
361  	free_lrmd_cmd(lrmd_cmd_t * cmd)
362  	{
363  	    stop_recurring_timer(cmd);
364  	    if (cmd->delay_id) {
365  	        g_source_remove(cmd->delay_id);
366  	    }
367  	
368  	    g_clear_pointer(&cmd->params, g_hash_table_destroy);
369  	
370  	    pcmk__reset_result(&(cmd->result));
371  	    free(cmd->origin);
372  	    free(cmd->action);
373  	    free(cmd->real_action);
374  	    free(cmd->userdata_str);
375  	    free(cmd->rsc_id);
376  	    free(cmd->client_id);
377  	    free(cmd);
378  	}
379  	
380  	static gboolean
381  	stonith_recurring_op_helper(gpointer data)
382  	{
383  	    lrmd_cmd_t *cmd = data;
384  	    lrmd_rsc_t *rsc;
385  	
386  	    cmd->stonith_recurring_id = 0;
387  	
388  	    if (!cmd->rsc_id) {
389  	        return FALSE;
390  	    }
391  	
392  	    rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
393  	
394  	    pcmk__assert(rsc != NULL);
395  	    /* take it out of recurring_ops list, and put it in the pending ops
396  	     * to be executed */
397  	    rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
398  	    rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
399  	#ifdef PCMK__TIME_USE_CGT
400  	    get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
401  	#endif
402  	    mainloop_set_trigger(rsc->work);
403  	
404  	    return FALSE;
405  	}
406  	
407  	static inline void
408  	start_recurring_timer(lrmd_cmd_t *cmd)
409  	{
410  	    if (!cmd || (cmd->interval_ms <= 0)) {
411  	        return;
412  	    }
413  	
414  	    cmd->stonith_recurring_id = pcmk__create_timer(cmd->interval_ms,
415  	                                                   stonith_recurring_op_helper,
416  	                                                   cmd);
417  	}
418  	
419  	static gboolean
420  	start_delay_helper(gpointer data)
421  	{
422  	    lrmd_cmd_t *cmd = data;
423  	    lrmd_rsc_t *rsc = NULL;
424  	
425  	    cmd->delay_id = 0;
426  	    rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
427  	
428  	    if (rsc) {
429  	        mainloop_set_trigger(rsc->work);
430  	    }
431  	
432  	    return FALSE;
433  	}
434  	
435  	/*!
436  	 * \internal
437  	 * \brief Check whether a list already contains the equivalent of a given action
438  	 *
439  	 * \param[in] action_list  List to search
440  	 * \param[in] cmd          Action to search for
441  	 */
442  	static lrmd_cmd_t *
443  	find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd)
444  	{
445  	    for (const GList *item = action_list; item != NULL; item = item->next) {
446  	        lrmd_cmd_t *dup = item->data;
447  	
448  	        if (action_matches(cmd, dup->action, dup->interval_ms)) {
449  	            return dup;
450  	        }
451  	    }
452  	    return NULL;
453  	}
454  	
455  	static bool
456  	merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
457  	{
458  	    lrmd_cmd_t * dup = NULL;
459  	    bool dup_pending = true;
460  	
(1) Event path: Condition "cmd->interval_ms == 0", taking false branch.
461  	    if (cmd->interval_ms == 0) {
462  	        return false;
463  	    }
464  	
465  	    // Search for a duplicate of this action (in-flight or not)
466  	    dup = find_duplicate_action(rsc->pending_ops, cmd);
(2) Event path: Condition "dup == NULL", taking true branch.
467  	    if (dup == NULL) {
468  	        dup_pending = false;
469  	        dup = find_duplicate_action(rsc->recurring_ops, cmd);
(3) Event path: Condition "dup == NULL", taking false branch.
470  	        if (dup == NULL) {
471  	            return false;
472  	        }
473  	    }
474  	
475  	    /* Do not merge fencing monitors marked for cancellation, so we can reply to
476  	     * the cancellation separately.
477  	     */
(4) Event path: Condition "pcmk__str_eq(rsc->class, "stonith", pcmk__str_casei)", taking true branch.
(5) Event path: Condition "dup->result.execution_status == PCMK_EXEC_CANCELLED", taking false branch.
478  	    if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
479  	                     pcmk__str_casei)
480  	        && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) {
481  	        return false;
482  	    }
483  	
484  	    /* This should not occur. If it does, we need to investigate how something
485  	     * like this is possible in the controller.
486  	     */
487  	    pcmk__warn("Duplicate recurring op entry detected (" PCMK__OP_FMT "), "
488  	               "merging with previous op entry",
489  	               rsc->rsc_id, normalize_action_name(rsc, dup->action),
490  	               dup->interval_ms);
491  	
492  	    // Merge new action's call ID and user data into existing action
493  	    dup->first_notify_sent = false;
494  	    free(dup->userdata_str);
495  	    dup->userdata_str = cmd->userdata_str;
496  	    cmd->userdata_str = NULL;
497  	    dup->call_id = cmd->call_id;
CID (unavailable; MK=2171ab3422718d6cb1ce46fa97ba7706) (#1 of 1): Inconsistent C union access (INCONSISTENT_UNION_ACCESS):
(6) Event assign_union_field: The union field "in" of "_pp" is written.
(7) Event inconsistent_union_field_access: In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in".
498  	    g_clear_pointer(&cmd, free_lrmd_cmd);
499  	
500  	    /* If dup is not pending, that means it has already executed at least once
501  	     * and is waiting in the interval. In that case, stop waiting and initiate
502  	     * a new instance now.
503  	     */
504  	    if (!dup_pending) {
505  	        if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
506  	                         pcmk__str_casei)) {
507  	            stop_recurring_timer(dup);
508  	            stonith_recurring_op_helper(dup);
509  	        } else {
510  	            services_action_kick(rsc->rsc_id,
511  	                                 normalize_action_name(rsc, dup->action),
512  	                                 dup->interval_ms);
513  	        }
514  	    }
515  	    return true;
516  	}
517  	
518  	static void
519  	schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
520  	{
521  	    CRM_CHECK(cmd != NULL, return);
522  	    CRM_CHECK(rsc != NULL, return);
523  	
524  	    pcmk__trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
525  	
526  	    if (merge_recurring_duplicate(rsc, cmd)) {
527  	        // Equivalent of cmd has already been scheduled
528  	        return;
529  	    }
530  	
531  	    /* The controller expects the executor to automatically cancel
532  	     * recurring operations before a resource stops.
533  	     */
534  	    if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
535  	        cancel_all_recurring(rsc, NULL);
536  	    }
537  	
538  	    rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
539  	#ifdef PCMK__TIME_USE_CGT
540  	    get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
541  	#endif
542  	    mainloop_set_trigger(rsc->work);
543  	
544  	    if (cmd->start_delay) {
545  	        cmd->delay_id = pcmk__create_timer(cmd->start_delay, start_delay_helper, cmd);
546  	    }
547  	}
548  	
549  	xmlNode *
550  	execd_create_reply_as(const char *origin, int rc, int call_id)
551  	{
552  	    xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_LRMD_REPLY);
553  	
554  	    pcmk__xe_set(reply, PCMK__XA_LRMD_ORIGIN, origin);
555  	    pcmk__xe_set_int(reply, PCMK__XA_LRMD_RC, rc);
556  	    pcmk__xe_set_int(reply, PCMK__XA_LRMD_CALLID, call_id);
557  	    return reply;
558  	}
559  	
560  	static void
561  	send_client_notify(gpointer key, gpointer value, gpointer user_data)
562  	{
563  	    xmlNode *update_msg = user_data;
564  	    pcmk__client_t *client = value;
565  	    int rc;
566  	    int log_level = LOG_WARNING;
567  	    const char *msg = NULL;
568  	
569  	    CRM_CHECK(client != NULL, return);
570  	    if (client->name == NULL) {
571  	        pcmk__trace("Skipping notification to client without name");
572  	        return;
573  	    }
574  	    if (pcmk__is_set(client->flags, pcmk__client_to_proxy)) {
575  	        /* We only want to notify clients of the executor IPC API. If we are
576  	         * running as Pacemaker Remote, we may have clients proxied to other
577  	         * IPC services in the cluster, so skip those.
578  	         */
579  	        pcmk__trace("Skipping executor API notification to client %s",
580  	                    pcmk__client_name(client));
581  	        return;
582  	    }
583  	
584  	    rc = lrmd_server_send_notify(client, update_msg);
585  	    if (rc == pcmk_rc_ok) {
586  	        return;
587  	    }
588  	
589  	    switch (rc) {
590  	        case ENOTCONN:
591  	        case EPIPE: // Client exited without waiting for notification
592  	            log_level = LOG_INFO;
593  	            msg = "Disconnected";
594  	            break;
595  	
596  	        default:
597  	            msg = pcmk_rc_str(rc);
598  	            break;
599  	    }
600  	    do_crm_log(log_level, "Could not notify client %s: %s " QB_XS " rc=%d",
601  	               pcmk__client_name(client), msg, rc);
602  	}
603  	
604  	static void
605  	send_cmd_complete_notify(lrmd_cmd_t * cmd)
606  	{
607  	    xmlNode *notify = NULL;
608  	    int exec_time = 0;
609  	    int queue_time = 0;
610  	
611  	#ifdef PCMK__TIME_USE_CGT
612  	    exec_time = time_diff_ms(NULL, &(cmd->t_run));
613  	    queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
614  	#endif
615  	    log_finished(cmd, exec_time, queue_time);
616  	
617  	    /* If the originator requested to be notified only for changes in recurring
618  	     * operation results, skip the notification if the result hasn't changed.
619  	     */
620  	    if (cmd->first_notify_sent
621  	        && pcmk__is_set(cmd->call_opts, lrmd_opt_notify_changes_only)
622  	        && (cmd->last_notify_rc == cmd->result.exit_status)
623  	        && (cmd->last_notify_op_status == cmd->result.execution_status)) {
624  	        return;
625  	    }
626  	
627  	    cmd->first_notify_sent = true;
628  	    cmd->last_notify_rc = cmd->result.exit_status;
629  	    cmd->last_notify_op_status = cmd->result.execution_status;
630  	
631  	    notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
632  	
633  	    pcmk__xe_set(notify, PCMK__XA_LRMD_ORIGIN, __func__);
634  	    pcmk__xe_set_int(notify, PCMK__XA_LRMD_TIMEOUT, cmd->timeout);
635  	    pcmk__xe_set_guint(notify, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms);
636  	    pcmk__xe_set_int(notify, PCMK__XA_LRMD_RSC_START_DELAY, cmd->start_delay);
637  	    pcmk__xe_set_int(notify, PCMK__XA_LRMD_EXEC_RC, cmd->result.exit_status);
638  	    pcmk__xe_set_int(notify, PCMK__XA_LRMD_EXEC_OP_STATUS,
639  	                     cmd->result.execution_status);
640  	    pcmk__xe_set_int(notify, PCMK__XA_LRMD_CALLID, cmd->call_id);
641  	    pcmk__xe_set_int(notify, PCMK__XA_LRMD_RSC_DELETED, cmd->rsc_deleted);
642  	
643  	    pcmk__xe_set_time(notify, PCMK__XA_LRMD_RUN_TIME, cmd->epoch_last_run);
644  	    pcmk__xe_set_time(notify, PCMK__XA_LRMD_RCCHANGE_TIME, cmd->epoch_rcchange);
645  	#ifdef PCMK__TIME_USE_CGT
646  	    pcmk__xe_set_int(notify, PCMK__XA_LRMD_EXEC_TIME, exec_time);
647  	    pcmk__xe_set_int(notify, PCMK__XA_LRMD_QUEUE_TIME, queue_time);
648  	#endif
649  	
650  	    pcmk__xe_set(notify, PCMK__XA_LRMD_OP, LRMD_OP_RSC_EXEC);
651  	    pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_ID, cmd->rsc_id);
652  	    if(cmd->real_action) {
653  	        pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->real_action);
654  	    } else {
655  	        pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->action);
656  	    }
657  	    pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
658  	    pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason);
659  	
660  	    if (cmd->result.action_stderr != NULL) {
661  	        pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_OUTPUT,
662  	                     cmd->result.action_stderr);
663  	
664  	    } else if (cmd->result.action_stdout != NULL) {
665  	        pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_OUTPUT,
666  	                     cmd->result.action_stdout);
667  	    }
668  	
669  	    if (cmd->params) {
670  	        char *key = NULL;
671  	        char *value = NULL;
672  	        GHashTableIter iter;
673  	
674  	        xmlNode *args = pcmk__xe_create(notify, PCMK__XE_ATTRIBUTES);
675  	
676  	        g_hash_table_iter_init(&iter, cmd->params);
677  	        while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
678  	            hash2smartfield((gpointer) key, (gpointer) value, args);
679  	        }
680  	    }
681  	    if ((cmd->client_id != NULL)
682  	        && pcmk__is_set(cmd->call_opts, lrmd_opt_notify_orig_only)) {
683  	
684  	        pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
685  	
686  	        if (client != NULL) {
687  	            send_client_notify(client->id, client, notify);
688  	        }
689  	    } else {
690  	        pcmk__foreach_ipc_client(send_client_notify, notify);
691  	    }
692  	
693  	    pcmk__xml_free(notify);
694  	}
695  	
696  	void
697  	execd_send_generic_notify(int rc, xmlNode *request)
698  	{
699  	    if (pcmk__ipc_client_count() != 0) {
700  	        int call_id = 0;
701  	        xmlNode *notify = NULL;
702  	        xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
703  	                                                "//" PCMK__XE_LRMD_RSC,
704  	                                                LOG_ERR);
705  	        const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID);
706  	        const char *op = pcmk__xe_get(request, PCMK__XA_LRMD_OP);
707  	
708  	        pcmk__xe_get_int(request, PCMK__XA_LRMD_CALLID, &call_id);
709  	
710  	        notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
711  	        pcmk__xe_set(notify, PCMK__XA_LRMD_ORIGIN, __func__);
712  	        pcmk__xe_set_int(notify, PCMK__XA_LRMD_RC, rc);
713  	        pcmk__xe_set_int(notify, PCMK__XA_LRMD_CALLID, call_id);
714  	        pcmk__xe_set(notify, PCMK__XA_LRMD_OP, op);
715  	        pcmk__xe_set(notify, PCMK__XA_LRMD_RSC_ID, rsc_id);
716  	
717  	        pcmk__foreach_ipc_client(send_client_notify, notify);
718  	
719  	        pcmk__xml_free(notify);
720  	    }
721  	}
722  	
723  	static void
724  	cmd_reset(lrmd_cmd_t * cmd)
725  	{
726  	    cmd->last_pid = 0;
727  	#ifdef PCMK__TIME_USE_CGT
728  	    memset(&cmd->t_run, 0, sizeof(cmd->t_run));
729  	    memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
730  	#endif
731  	    cmd->epoch_last_run = 0;
732  	
733  	    pcmk__reset_result(&(cmd->result));
734  	    cmd->result.execution_status = PCMK_EXEC_DONE;
735  	}
736  	
737  	static void
738  	cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
739  	{
740  	    pcmk__trace("Resource operation rsc:%s action:%s completed (%p %p)",
741  	                cmd->rsc_id, cmd->action, ((rsc != NULL)? rsc->active : NULL),
742  	                cmd);
743  	
744  	    if (rsc && (rsc->active == cmd)) {
745  	        rsc->active = NULL;
746  	        mainloop_set_trigger(rsc->work);
747  	    }
748  	
749  	    if (!rsc) {
750  	        cmd->rsc_deleted = 1;
751  	    }
752  	
753  	    /* reset original timeout so client notification has correct information */
754  	    cmd->timeout = cmd->timeout_orig;
755  	
756  	    send_cmd_complete_notify(cmd);
757  	
758  	    if ((cmd->interval_ms != 0)
759  	        && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) {
760  	
761  	        if (rsc) {
762  	            rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
763  	            rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
764  	        }
765  	        free_lrmd_cmd(cmd);
766  	    } else if (cmd->interval_ms == 0) {
767  	        if (rsc) {
768  	            rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
769  	        }
770  	        free_lrmd_cmd(cmd);
771  	    } else {
772  	        /* Clear all the values pertaining just to the last iteration of a recurring op. */
773  	        cmd_reset(cmd);
774  	    }
775  	}
776  	
777  	struct notify_new_client_data {
778  	    xmlNode *notify;
779  	    pcmk__client_t *new_client;
780  	};
781  	
782  	static void
783  	notify_one_client(gpointer key, gpointer value, gpointer user_data)
784  	{
785  	    pcmk__client_t *client = value;
786  	    struct notify_new_client_data *data = user_data;
787  	
788  	    if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
789  	        send_client_notify(key, (gpointer) client, (gpointer) data->notify);
790  	    }
791  	}
792  	
793  	void
794  	notify_of_new_client(pcmk__client_t *new_client)
795  	{
796  	    struct notify_new_client_data data;
797  	
798  	    data.new_client = new_client;
799  	    data.notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
800  	    pcmk__xe_set(data.notify, PCMK__XA_LRMD_ORIGIN, __func__);
801  	    pcmk__xe_set(data.notify, PCMK__XA_LRMD_OP, LRMD_OP_NEW_CLIENT);
802  	    pcmk__foreach_ipc_client(notify_one_client, &data);
803  	    pcmk__xml_free(data.notify);
804  	}
805  	
806  	void
807  	client_disconnect_cleanup(const char *client_id)
808  	{
809  	    GHashTableIter iter;
810  	    lrmd_rsc_t *rsc = NULL;
811  	    char *key = NULL;
812  	
813  	    g_hash_table_iter_init(&iter, rsc_list);
814  	    while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
815  	        if (pcmk__is_set(rsc->call_opts, lrmd_opt_drop_recurring)) {
816  	            /* This client is disconnecting, drop any recurring operations
817  	             * it may have initiated on the resource */
818  	            cancel_all_recurring(rsc, client_id);
819  	        }
820  	    }
821  	}
822  	
823  	static void
824  	action_complete(svc_action_t * action)
825  	{
826  	    lrmd_rsc_t *rsc;
827  	    lrmd_cmd_t *cmd = action->cb_data;
828  	    enum ocf_exitcode code;
829  	
830  	#ifdef PCMK__TIME_USE_CGT
831  	    const char *rclass = NULL;
832  	    bool goagain = false;
833  	    int time_sum = 0;
834  	    int timeout_left = 0;
835  	    int delay = 0;
836  	#endif
837  	
838  	    if (!cmd) {
839  	        pcmk__err("Completed executor action (%s) does not match any known "
840  	                  "operations",
841  	                  action->id);
842  	        return;
843  	    }
844  	
845  	#ifdef PCMK__TIME_USE_CGT
846  	    if (cmd->result.exit_status != action->rc) {
847  	        cmd->epoch_rcchange = time(NULL);
848  	    }
849  	#endif
850  	
851  	    cmd->last_pid = action->pid;
852  	
853  	    // Cast variable instead of function return to keep compilers happy
854  	    code = services_result2ocf(action->standard, cmd->action, action->rc);
855  	    pcmk__set_result(&(cmd->result), (int) code,
856  	                     action->status, services__exit_reason(action));
857  	
858  	    rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
859  	
860  	#ifdef PCMK__TIME_USE_CGT
861  	    if (rsc != NULL) {
862  	        rclass = rsc->class;
863  	#if PCMK__ENABLE_SERVICE
864  	        if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SERVICE,
865  	                         pcmk__str_casei)) {
866  	            rclass = resources_find_service_class(rsc->type);
867  	        }
868  	#endif
869  	    }
870  	
871  	    if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
872  	        goto finalize;
873  	    }
874  	
875  	    if (pcmk__result_ok(&(cmd->result))
876  	        && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
877  	                                PCMK_ACTION_STOP, NULL)) {
878  	        /* Getting results for when a start or stop action completes is now
879  	         * handled by watching for JobRemoved() signals from systemd and
880  	         * reacting to them. So, we can bypass the rest of the code in this
881  	         * function for those actions, and simply finalize cmd.
882  	         *
883  	         * @TODO When monitors are handled in the same way, this function
884  	         * can either be drastically simplified or done away with entirely.
885  	         */
886  	        services__copy_result(action, &(cmd->result));
887  	        goto finalize;
888  	
889  	    } else if (cmd->result.execution_status == PCMK_EXEC_PENDING &&
890  	               pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) &&
891  	               cmd->interval_ms == 0 &&
892  	               cmd->real_action == NULL) {
893  	        /* If the state is Pending at the time of probe, execute follow-up monitor. */
894  	        goagain = true;
895  	        cmd->real_action = cmd->action;
896  	        cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR);
897  	    } else if (cmd->real_action != NULL) {
898  	        // This is follow-up monitor to check whether start/stop/probe(monitor) completed
899  	        if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
900  	            goagain = true;
901  	
902  	        } else if (pcmk__result_ok(&(cmd->result))
903  	                   && pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
904  	                                   pcmk__str_casei)) {
905  	            goagain = true;
906  	
907  	        } else {
908  	            int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
909  	            int timeout_left = cmd->timeout_orig - time_sum;
910  	
911  	            pcmk__debug("%s systemd %s is now complete (elapsed=%dms, "
912  	                        "remaining=%dms): %s (%d)",
913  	                        cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
914  	                        crm_exit_str(cmd->result.exit_status),
915  	                        cmd->result.exit_status);
916  	            cmd_original_times(cmd);
917  	
918  	            // Monitors may return "not running", but start/stop shouldn't
919  	            if ((cmd->result.execution_status == PCMK_EXEC_DONE)
920  	                && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {
921  	
922  	                if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START,
923  	                                 pcmk__str_casei)) {
924  	                    cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
925  	                } else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
926  	                                        pcmk__str_casei)) {
927  	                    cmd->result.exit_status = PCMK_OCF_OK;
928  	                }
929  	            }
930  	        }
931  	    } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)
932  	               && (cmd->interval_ms > 0)) {
933  	        /* For monitors, excluding follow-up monitors,                                  */
934  	        /* if the pending state persists from the first notification until its timeout, */
935  	        /* it will be treated as a timeout.                                             */
936  	
937  	        if ((cmd->result.execution_status == PCMK_EXEC_PENDING) &&
938  	            (cmd->last_notify_op_status == PCMK_EXEC_PENDING)) {
939  	            int time_left = time(NULL) - (cmd->epoch_rcchange + (cmd->timeout_orig/1000));
940  	
941  	            if (time_left >= 0) {
942  	                pcmk__notice("Giving up on %s %s (rc=%d): monitor pending "
943  	                             "timeout (first pending notification=%s "
944  	                             "timeout=%dms)",
945  	                             cmd->rsc_id, cmd->action, cmd->result.exit_status,
946  	                             g_strchomp(ctime(&cmd->epoch_rcchange)),
947  	                             cmd->timeout_orig);
948  	                pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
949  	                                 PCMK_EXEC_TIMEOUT,
950  	                                 "Investigate reason for timeout, and adjust "
951  	                                 "configured operation timeout if necessary");
952  	                cmd_original_times(cmd);
953  	            }
954  	        }
955  	    }
956  	
957  	    if (!goagain) {
958  	        goto finalize;
959  	    }
960  	
961  	    time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
962  	    timeout_left = cmd->timeout_orig - time_sum;
963  	    delay = cmd->timeout_orig / 10;
964  	
965  	    if (delay >= timeout_left && timeout_left > 20) {
966  	        delay = timeout_left/2;
967  	    }
968  	
969  	    delay = QB_MIN(2000, delay);
970  	    if (delay < timeout_left) {
971  	        cmd->start_delay = delay;
972  	        cmd->timeout = timeout_left;
973  	
974  	        if (pcmk__result_ok(&(cmd->result))) {
975  	            pcmk__debug("%s %s may still be in progress: re-scheduling "
976  	                        "(elapsed=%dms, remaining=%dms, start_delay=%dms)",
977  	                        cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
978  	                        delay);
979  	
980  	        } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
981  	            pcmk__info("%s %s is still in progress: re-scheduling "
982  	                       "(elapsed=%dms, remaining=%dms, start_delay=%dms)",
983  	                       cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
984  	
985  	        } else {
986  	            pcmk__notice("%s %s failed: %s: Re-scheduling (remaining timeout "
987  	                         "%s) "
988  	                         QB_XS " exitstatus=%d elapsed=%dms start_delay=%dms)",
989  	                         cmd->rsc_id, cmd->action,
990  	                         crm_exit_str(cmd->result.exit_status),
991  	                         pcmk__readable_interval(timeout_left),
992  	                         cmd->result.exit_status, time_sum, delay);
993  	        }
994  	
995  	        cmd_reset(cmd);
996  	        if (rsc) {
997  	            rsc->active = NULL;
998  	        }
999  	        schedule_lrmd_cmd(rsc, cmd);
1000 	
1001 	        /* Don't finalize cmd, we're not done with it yet */
1002 	        return;
1003 	
1004 	    } else {
1005 	        pcmk__notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, "
1006 	                     "remaining=%dms)",
1007 	                     cmd->rsc_id, pcmk__s(cmd->real_action, cmd->action),
1008 	                     cmd->result.exit_status, time_sum, timeout_left);
1009 	        pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1010 	                         PCMK_EXEC_TIMEOUT,
1011 	                         "Investigate reason for timeout, and adjust "
1012 	                         "configured operation timeout if necessary");
1013 	        cmd_original_times(cmd);
1014 	    }
1015 	#endif
1016 	
1017 	finalize:
1018 	    pcmk__set_result_output(&(cmd->result), services__grab_stdout(action),
1019 	                            services__grab_stderr(action));
1020 	    cmd_finalize(cmd, rsc);
1021 	}
1022 	
1023 	/*!
1024 	 * \internal
1025 	 * \brief Process the result of a fence device action (start, stop, or monitor)
1026 	 *
1027 	 * \param[in,out] cmd               Fence device action that completed
1028 	 * \param[in]     exit_status       Fencer API exit status for action
1029 	 * \param[in]     execution_status  Fencer API execution status for action
1030 	 * \param[in]     exit_reason       Human-friendly detail, if action failed
1031 	 */
1032 	static void
1033 	fencing_rsc_action_complete(lrmd_cmd_t *cmd, int exit_status,
1034 	                            enum pcmk_exec_status execution_status,
1035 	                            const char *exit_reason)
1036 	{
1037 	    // This can be NULL if resource was removed before command completed
1038 	    lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
1039 	
1040 	    // Simplify fencer exit status to uniform exit status
1041 	    if (exit_status != CRM_EX_OK) {
1042 	        exit_status = PCMK_OCF_UNKNOWN_ERROR;
1043 	    }
1044 	
1045 	    if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) {
1046 	        /* An in-flight fence action was cancelled. The execution status is
1047 	         * already correct, so don't overwrite it.
1048 	         */
1049 	        execution_status = PCMK_EXEC_CANCELLED;
1050 	
1051 	    } else {
1052 	        /* Some execution status codes have specific meanings for the fencer
1053 	         * that executor clients may not expect, so map them to a simple error
1054 	         * status.
1055 	         */
1056 	        switch (execution_status) {
1057 	            case PCMK_EXEC_NOT_CONNECTED:
1058 	            case PCMK_EXEC_INVALID:
1059 	                execution_status = PCMK_EXEC_ERROR;
1060 	                break;
1061 	
1062 	            case PCMK_EXEC_NO_FENCE_DEVICE:
1063 	                /* This should be possible only for probes in practice, but
1064 	                 * interpret for all actions to be safe.
1065 	                 */
1066 	                if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1067 	                                 pcmk__str_none)) {
1068 	                    exit_status = PCMK_OCF_NOT_RUNNING;
1069 	
1070 	                } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
1071 	                                        pcmk__str_none)) {
1072 	                    exit_status = PCMK_OCF_OK;
1073 	
1074 	                } else {
1075 	                    exit_status = PCMK_OCF_NOT_INSTALLED;
1076 	                }
1077 	                execution_status = PCMK_EXEC_ERROR;
1078 	                break;
1079 	
1080 	            case PCMK_EXEC_NOT_SUPPORTED:
1081 	                exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE;
1082 	                break;
1083 	
1084 	            default:
1085 	                break;
1086 	        }
1087 	    }
1088 	
1089 	    pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason);
1090 	
1091 	    // Certain successful actions change the known state of the resource
1092 	    if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
1093 	
1094 	        if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
1095 	            pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK,
1096 	                             PCMK_EXEC_DONE, NULL); // "running"
1097 	
1098 	        } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
1099 	                                pcmk__str_casei)) {
1100 	            pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
1101 	                             PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running"
1102 	        }
1103 	    }
1104 	
1105 	    /* The recurring timer should not be running at this point in any case, but
1106 	     * as a failsafe, stop it if it is.
1107 	     */
1108 	    stop_recurring_timer(cmd);
1109 	
1110 	    /* Reschedule this command if appropriate. If a recurring command is *not*
1111 	     * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will
1112 	     * not be removed from recurring_ops by cmd_finalize().
1113 	     */
1114 	    if (rsc && (cmd->interval_ms > 0)
1115 	        && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) {
1116 	        start_recurring_timer(cmd);
1117 	    }
1118 	
1119 	    cmd_finalize(cmd, rsc);
1120 	}
1121 	
1122 	void
1123 	execd_fencer_connection_failed(void)
1124 	{
1125 	    GHashTableIter iter;
1126 	    lrmd_rsc_t *rsc = NULL;
1127 	
1128 	    pcmk__warn("Connection to fencer lost (any pending operations for fence "
1129 	               "devices will be considered failed)");
1130 	
1131 	    g_hash_table_iter_init(&iter, rsc_list);
1132 	    while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) {
1133 	        if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1134 	                          pcmk__str_none)) {
1135 	            continue;
1136 	        }
1137 	
1138 	        /* If we registered this fence device, we don't know whether the
1139 	         * fencer still has the registration or not. Cause future probes to
1140 	         * return an error until the resource is stopped or started
1141 	         * successfully. This is especially important if the controller also
1142 	         * went away (possibly due to a cluster layer restart) and won't
1143 	         * receive our client notification of any monitors finalized below.
1144 	         */
1145 	        if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) {
1146 	            pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
1147 	                             PCMK_EXEC_NOT_CONNECTED,
1148 	                             "Lost connection to fencer");
1149 	        }
1150 	
1151 	        // Consider any active, pending, or recurring operations as failed
1152 	
1153 	        for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) {
1154 	            lrmd_cmd_t *cmd = op->data;
1155 	
1156 	            /* This won't free a recurring op but instead restart its timer.
1157 	             * If cmd is rsc->active, this will set rsc->active to NULL, so we
1158 	             * don't have to worry about finalizing it a second time below.
1159 	             */
1160 	            fencing_rsc_action_complete(cmd, CRM_EX_ERROR,
1161 	                                        PCMK_EXEC_NOT_CONNECTED,
1162 	                                        "Lost connection to fencer");
1163 	        }
1164 	
1165 	        if (rsc->active != NULL) {
1166 	            rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active);
1167 	        }
1168 	        while (rsc->pending_ops != NULL) {
1169 	            // This will free the op and remove it from rsc->pending_ops
1170 	            fencing_rsc_action_complete((lrmd_cmd_t *) rsc->pending_ops->data,
1171 	                                        CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
1172 	                                        "Lost connection to fencer");
1173 	        }
1174 	    }
1175 	}
1176 	
1177 	/*!
1178 	 * \internal
1179 	 * \brief Execute a fencing resource "start" action
1180 	 *
1181 	 * Start a fencing resource by registering it with the fencer. (Fencing agents
1182 	 * don't have a start command.)
1183 	 *
1184 	 * \param[in,out] fencer_api  Connection to fencer
1185 	 * \param[in]     rsc         Fencing resource to start
1186 	 * \param[in]     cmd         Start command to execute
1187 	 *
1188 	 * \return pcmk_ok on success, -errno otherwise
1189 	 */
1190 	static int
1191 	start_fencing_rsc(stonith_t *fencer_api, const lrmd_rsc_t *rsc,
1192 	                  const lrmd_cmd_t *cmd)
1193 	{
1194 	    char *key = NULL;
1195 	    char *value = NULL;
1196 	    stonith_key_value_t *device_params = NULL;
1197 	    int rc = pcmk_ok;
1198 	
1199 	    // Convert command parameters to fencer API key/values
1200 	    if (cmd->params) {
1201 	        GHashTableIter iter;
1202 	
1203 	        g_hash_table_iter_init(&iter, cmd->params);
1204 	        while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
1205 	            device_params = stonith__key_value_add(device_params, key, value);
1206 	        }
1207 	    }
1208 	
1209 	    /* The fencer will automatically register devices via CIB notifications
1210 	     * when the CIB changes, but to avoid a possible race condition between
1211 	     * the fencer receiving the notification and the executor requesting that
1212 	     * resource, the executor registers the device as well. The fencer knows how
1213 	     * to handle duplicate registrations.
1214 	     */
1215 	    rc = fencer_api->cmds->register_device(fencer_api, st_opt_sync_call,
1216 	                                           cmd->rsc_id, rsc->provider,
1217 	                                           rsc->type, device_params);
1218 	
1219 	    stonith__key_value_freeall(device_params, true, true);
1220 	    return rc;
1221 	}
1222 	
1223 	/*!
1224 	 * \internal
1225 	 * \brief Execute a fencing resource "stop" action
1226 	 *
1227 	 * Stop a fencing resource by unregistering it with the fencer. (Fencing agents
1228 	 * don't have a stop command.)
1229 	 *
1230 	 * \param[in,out] fencer_api  Connection to fencer
1231 	 * \param[in]     rsc         Fencing resource to stop
1232 	 *
1233 	 * \return pcmk_ok on success, -errno otherwise
1234 	 */
1235 	static inline int
1236 	stop_fencing_rsc(stonith_t *fencer_api, const lrmd_rsc_t *rsc)
1237 	{
1238 	    /* @TODO Failure would indicate a problem communicating with fencer;
1239 	     * perhaps we should try reconnecting and retrying a few times?
1240 	     */
1241 	    return fencer_api->cmds->remove_device(fencer_api, st_opt_sync_call,
1242 	                                           rsc->rsc_id);
1243 	}
1244 	
1245 	static void
1246 	fencing_rsc_monitor_cb(stonith_t *stonith, stonith_callback_data_t *data)
1247 	{
1248 	    if ((data == NULL) || (data->userdata == NULL)) {
1249 	        pcmk__err("Ignoring fencing resource monitor result: "
1250 	                  "Invalid callback arguments (bug?)");
1251 	    } else {
1252 	        fencing_rsc_action_complete((lrmd_cmd_t *) data->userdata,
1253 	                                    stonith__exit_status(data),
1254 	                                    stonith__execution_status(data),
1255 	                                    stonith__exit_reason(data));
1256 	    }
1257 	}
1258 	
1259 	/*!
1260 	 * \internal
1261 	 * \brief Initiate a fencing resource recurring "monitor" action
1262 	 *
1263 	 * \param[in,out] fencer_api  Connection to fencer
1264 	 * \param[in,out] rsc         Fencing resource to monitor
1265 	 * \param[in]     cmd         Monitor command being executed
1266 	 *
1267 	 * \return pcmk_ok if monitor was successfully initiated, -errno otherwise
1268 	 */
1269 	static inline int
1270 	monitor_fencing_rsc(stonith_t *fencer_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
1271 	{
1272 	    int rc = fencer_api->cmds->monitor(fencer_api, 0, cmd->rsc_id,
1273 	                                       pcmk__timeout_ms2s(cmd->timeout));
1274 	
1275 	    rc = fencer_api->cmds->register_callback(fencer_api, rc, 0, 0, cmd,
1276 	                                             "fencing_rsc_monitor_cb",
1277 	                                             fencing_rsc_monitor_cb);
1278 	    if (rc == TRUE) {
1279 	        rsc->active = cmd;
1280 	        rc = pcmk_ok;
1281 	    } else {
1282 	        rc = -pcmk_err_generic;
1283 	    }
1284 	    return rc;
1285 	}
1286 	
1287 	static void
1288 	execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
1289 	{
1290 	    int rc = pcmk_ok;
1291 	    const char *rc_s = NULL;
1292 	    bool do_monitor = false;
1293 	
1294 	    // Don't free; belongs to pacemaker-execd.c
1295 	    stonith_t *fencer_api = execd_get_fencer_connection();
1296 	
1297 	    if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)
1298 	        && (cmd->interval_ms == 0)) {
1299 	        // Probes don't require a fencer connection
1300 	        fencing_rsc_action_complete(cmd, rsc->fence_probe_result.exit_status,
1301 	                                    rsc->fence_probe_result.execution_status,
1302 	                                    rsc->fence_probe_result.exit_reason);
1303 	        return;
1304 	    }
1305 	
1306 	    if (fencer_api == NULL) {
1307 	        fencing_rsc_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR,
1308 	                                    PCMK_EXEC_NOT_CONNECTED,
1309 	                                    "No connection to fencer");
1310 	        return;
1311 	    }
1312 	
1313 	    if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
1314 	        rc = start_fencing_rsc(fencer_api, rsc, cmd);
1315 	        if (rc == pcmk_ok) {
1316 	            do_monitor = true;
1317 	        }
1318 	
1319 	    } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
1320 	        rc = stop_fencing_rsc(fencer_api, rsc);
1321 	
1322 	    } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1323 	                            pcmk__str_casei)) {
1324 	        do_monitor = true;
1325 	
1326 	    } else {
1327 	        fencing_rsc_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE,
1328 	                                    PCMK_EXEC_ERROR,
1329 	                                    "Invalid fence device action (bug?)");
1330 	        return;
1331 	    }
1332 	
1333 	    if (do_monitor) {
1334 	        rc = monitor_fencing_rsc(fencer_api, rsc, cmd);
1335 	        if (rc == pcmk_ok) {
1336 	            // Don't clean up yet. We will get the result of the monitor later.
1337 	            return;
1338 	        }
1339 	    }
1340 	
1341 	    if (rc != -pcmk_err_generic) {
1342 	        rc_s = pcmk_strerror(rc);
1343 	    }
1344 	    fencing_rsc_action_complete(cmd,
1345 	                                ((rc == pcmk_rc_ok)? CRM_EX_OK : CRM_EX_ERROR),
1346 	                                stonith__legacy2status(rc), rc_s);
1347 	}
1348 	
1349 	static void
1350 	execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
1351 	{
1352 	    svc_action_t *action = NULL;
1353 	    GHashTable *params_copy = NULL;
1354 	
1355 	    pcmk__assert((rsc != NULL) && (cmd != NULL));
1356 	
1357 	    pcmk__trace("Creating action, resource:%s action:%s class:%s provider:%s "
1358 	                "agent:%s",
1359 	                rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
1360 	
1361 	    params_copy = pcmk__str_table_dup(cmd->params);
1362 	
1363 	    action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
1364 	                                     rsc->type,
1365 	                                     normalize_action_name(rsc, cmd->action),
1366 	                                     cmd->interval_ms, cmd->timeout,
1367 	                                     params_copy, cmd->service_flags);
1368 	
1369 	    if (action == NULL) {
1370 	        pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1371 	                         PCMK_EXEC_ERROR, strerror(ENOMEM));
1372 	        cmd_finalize(cmd, rsc);
1373 	        return;
1374 	    }
1375 	
1376 	    if (action->rc != PCMK_OCF_UNKNOWN) {
1377 	        services__copy_result(action, &(cmd->result));
1378 	        services_action_free(action);
1379 	        cmd_finalize(cmd, rsc);
1380 	        return;
1381 	    }
1382 	
1383 	    action->cb_data = cmd;
1384 	
1385 	    if (services_action_async(action, action_complete)) {
1386 	        /* The services library has taken responsibility for the action. It
1387 	         * could be pending, blocked, or merged into a duplicate recurring
1388 	         * action, in which case the action callback (action_complete())
1389 	         * will be called when the action completes, otherwise the callback has
1390 	         * already been called.
1391 	         *
1392 	         * action_complete() calls cmd_finalize() which can free cmd, so cmd
1393 	         * cannot be used here.
1394 	         */
1395 	    } else {
1396 	        /* This is a recurring action that is not being cancelled and could not
1397 	         * be initiated. It has been rescheduled, and the action callback
1398 	         * (action_complete()) has been called, which in this case has already
1399 	         * called cmd_finalize(), which in this case should only reset (not
1400 	         * free) cmd.
1401 	         */
1402 	        services__copy_result(action, &(cmd->result));
1403 	        services_action_free(action);
1404 	    }
1405 	}
1406 	
1407 	static gboolean
1408 	execute_resource_action(gpointer user_data)
1409 	{
1410 	    lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data;
1411 	    lrmd_cmd_t *cmd = NULL;
1412 	
1413 	    CRM_CHECK(rsc != NULL, return FALSE);
1414 	
1415 	    if (rsc->active) {
1416 	        pcmk__trace("%s is still active", rsc->rsc_id);
1417 	        return TRUE;
1418 	    }
1419 	
1420 	    if (rsc->pending_ops) {
1421 	        GList *first = rsc->pending_ops;
1422 	
1423 	        cmd = first->data;
1424 	        if (cmd->delay_id) {
1425 	            pcmk__trace("Command %s %s was asked to run too early, waiting for "
1426 	                        "start_delay timeout of %dms",
1427 	                        cmd->rsc_id, cmd->action, cmd->start_delay);
1428 	            return TRUE;
1429 	        }
1430 	        rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
1431 	        g_list_free_1(first);
1432 	
1433 	#ifdef PCMK__TIME_USE_CGT
1434 	        get_current_time(&(cmd->t_run), &(cmd->t_first_run));
1435 	#endif
1436 	        cmd->epoch_last_run = time(NULL);
1437 	    }
1438 	
1439 	    if (!cmd) {
1440 	        pcmk__trace("Nothing further to do for %s", rsc->rsc_id);
1441 	        return TRUE;
1442 	    }
1443 	
1444 	    rsc->active = cmd;          /* only one op at a time for a rsc */
1445 	    if (cmd->interval_ms) {
1446 	        rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
1447 	    }
1448 	
1449 	    log_execute(cmd);
1450 	
1451 	    if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1452 	        execute_stonith_action(rsc, cmd);
1453 	    } else {
1454 	        execute_nonstonith_action(rsc, cmd);
1455 	    }
1456 	
1457 	    return TRUE;
1458 	}
1459 	
1460 	void
1461 	execd_free_rsc(gpointer data)
1462 	{
1463 	    GList *gIter = NULL;
1464 	    lrmd_rsc_t *rsc = data;
1465 	    bool is_fencing_rsc = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1466 	                                       pcmk__str_casei);
1467 	
1468 	    gIter = rsc->pending_ops;
1469 	    while (gIter != NULL) {
1470 	        GList *next = gIter->next;
1471 	        lrmd_cmd_t *cmd = gIter->data;
1472 	
1473 	        /* command was never executed */
1474 	        cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1475 	        cmd_finalize(cmd, NULL);
1476 	
1477 	        gIter = next;
1478 	    }
1479 	    /* frees list, but not list elements. */
1480 	    g_list_free(rsc->pending_ops);
1481 	
1482 	    gIter = rsc->recurring_ops;
1483 	    while (gIter != NULL) {
1484 	        GList *next = gIter->next;
1485 	        lrmd_cmd_t *cmd = gIter->data;
1486 	
1487 	        if (is_fencing_rsc) {
1488 	            cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1489 	            /* If a fencing resource's recurring operation is in-flight, just
1490 	             * mark it as cancelled. It is not safe to finalize/free the cmd
1491 	             * until the fencer API says it has either completed or timed out.
1492 	             */
1493 	            if (rsc->active != cmd) {
1494 	                cmd_finalize(cmd, NULL);
1495 	            }
1496 	        } else {
1497 	            /* This command is already handed off to service library,
1498 	             * let service library cancel it and tell us via the callback
1499 	             * when it is cancelled. The rsc can be safely destroyed
1500 	             * even if we are waiting for the cancel result */
1501 	            services_action_cancel(rsc->rsc_id,
1502 	                                   normalize_action_name(rsc, cmd->action),
1503 	                                   cmd->interval_ms);
1504 	        }
1505 	
1506 	        gIter = next;
1507 	    }
1508 	    /* frees list, but not list elements. */
1509 	    g_list_free(rsc->recurring_ops);
1510 	
1511 	    free(rsc->rsc_id);
1512 	    free(rsc->class);
1513 	    free(rsc->provider);
1514 	    free(rsc->type);
1515 	    mainloop_destroy_trigger(rsc->work);
1516 	
1517 	    free(rsc);
1518 	}
1519 	
1520 	int
1521 	execd_process_signon(pcmk__client_t *client, xmlNode *request, int call_id,
1522 	                     xmlNode **reply)
1523 	{
1524 	    int rc = pcmk_rc_ok;
1525 	    time_t now = time(NULL);
1526 	    const char *protocol_version = pcmk__xe_get(request,
1527 	                                                PCMK__XA_LRMD_PROTOCOL_VERSION);
1528 	    const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE);
1529 	
1530 	    if (pcmk__compare_versions(protocol_version,
1531 	                               LRMD_COMPATIBLE_PROTOCOL) < 0) {
1532 	        pcmk__err("Cluster API version must be greater than or equal to "
1533 	                  LRMD_COMPATIBLE_PROTOCOL " , not %s",
1534 	                  protocol_version);
1535 	        rc = EPROTO;
1536 	    }
1537 	
1538 	    if (pcmk__xe_attr_is_true(request, PCMK__XA_LRMD_IS_IPC_PROVIDER)) {
1539 	#ifdef PCMK__COMPILE_REMOTE
1540 	        if ((client->remote != NULL)
1541 	            && pcmk__is_set(client->flags,
1542 	                            pcmk__client_tls_handshake_complete)) {
1543 	            const char *op = pcmk__xe_get(request, PCMK__XA_LRMD_OP);
1544 	
1545 	            // This is a remote connection from a cluster node's controller
1546 	            ipc_proxy_add_provider(client);
1547 	
1548 	            /* @TODO Allowing multiple proxies makes no sense given that clients
1549 	             * have no way to choose between them. Maybe always use the most
1550 	             * recent one and switch any existing IPC connections to use it,
1551 	             * by iterating over ipc_clients here, and if client->id doesn't
1552 	             * match the client's userdata, replace the userdata with the new
1553 	             * ID. After the iteration, call lrmd_remote_client_destroy() on any
1554 	             * of the replaced values in ipc_providers.
1555 	             */
1556 	
1557 	            /* If this was a register operation, also ask for new schema files but
1558 	             * only if it's supported by the protocol version.
1559 	             */
1560 	            if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none) &&
1561 	                LRMD_SUPPORTS_SCHEMA_XFER(protocol_version)) {
1562 	                remoted_request_cib_schema_files();
1563 	            }
1564 	        } else {
1565 	            rc = EACCES;
1566 	        }
1567 	#else
1568 	        rc = EPROTONOSUPPORT;
1569 	#endif
1570 	    }
1571 	
1572 	    pcmk__assert(reply != NULL);
1573 	
1574 	    *reply = execd_create_reply(pcmk_rc2legacy(rc), call_id);
1575 	    pcmk__xe_set(*reply, PCMK__XA_LRMD_OP, CRM_OP_REGISTER);
1576 	    pcmk__xe_set(*reply, PCMK__XA_LRMD_CLIENTID, client->id);
1577 	    pcmk__xe_set(*reply, PCMK__XA_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
1578 	    pcmk__xe_set_time(*reply, PCMK__XA_UPTIME, now - start_time);
1579 	
1580 	    if (start_state) {
1581 	        pcmk__xe_set(*reply, PCMK__XA_NODE_START_STATE, start_state);
1582 	    }
1583 	
1584 	    return rc;
1585 	}
1586 	
1587 	void
1588 	execd_process_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
1589 	{
1590 	    lrmd_rsc_t *rsc = build_rsc_from_xml(request);
1591 	    lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
1592 	
1593 	    if (dup &&
1594 	        pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
1595 	        pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) &&
1596 	        pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
1597 	
1598 	        pcmk__notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
1599 	        execd_free_rsc(rsc);
1600 	        return;
1601 	    }
1602 	
1603 	    g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
1604 	    pcmk__info("Cached agent information for '%s'", rsc->rsc_id);
1605 	}
1606 	
1607 	int
1608 	execd_process_get_rsc_info(xmlNode *request, int call_id, xmlNode **reply)
1609 	{
1610 	    int rc = pcmk_rc_ok;
1611 	    xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
1612 	                                            "//" PCMK__XE_LRMD_RSC,
1613 	                                            LOG_ERR);
1614 	    const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1615 	    lrmd_rsc_t *rsc = NULL;
1616 	
1617 	    if (rsc_id == NULL) {
1618 	        rc = ENODEV;
1619 	    } else {
1620 	        rsc = g_hash_table_lookup(rsc_list, rsc_id);
1621 	        if (rsc == NULL) {
1622 	            pcmk__info("Agent information for '%s' not in cache", rsc_id);
1623 	            rc = ENODEV;
1624 	        }
1625 	    }
1626 	
1627 	    CRM_LOG_ASSERT(reply != NULL);
1628 	
1629 	    *reply = execd_create_reply(pcmk_rc2legacy(rc), call_id);
1630 	    if (rsc) {
1631 	        pcmk__xe_set(*reply, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id);
1632 	        pcmk__xe_set(*reply, PCMK__XA_LRMD_CLASS, rsc->class);
1633 	        pcmk__xe_set(*reply, PCMK__XA_LRMD_PROVIDER, rsc->provider);
1634 	        pcmk__xe_set(*reply, PCMK__XA_LRMD_TYPE, rsc->type);
1635 	    }
1636 	
1637 	    return rc;
1638 	}
1639 	
1640 	int
1641 	execd_process_rsc_unregister(pcmk__client_t *client, xmlNode *request)
1642 	{
1643 	    int rc = pcmk_rc_ok;
1644 	    lrmd_rsc_t *rsc = NULL;
1645 	    xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
1646 	                                            "//" PCMK__XE_LRMD_RSC,
1647 	                                            LOG_ERR);
1648 	    const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1649 	
1650 	    if (!rsc_id) {
1651 	        return ENODEV;
1652 	    }
1653 	
1654 	    rsc = g_hash_table_lookup(rsc_list, rsc_id);
1655 	    if (rsc == NULL) {
1656 	        pcmk__info("Ignoring unregistration of resource '%s', which is not "
1657 	                   "registered", rsc_id);
1658 	        return pcmk_rc_ok;
1659 	    }
1660 	
1661 	    if (rsc->active) {
1662 	        /* let the caller know there are still active ops on this rsc to watch for */
1663 	        pcmk__trace("Operation (%p) still in progress for unregistered "
1664 	                    "resource %s", rsc->active, rsc_id);
1665 	        rc = EINPROGRESS;
1666 	    }
1667 	
1668 	    g_hash_table_remove(rsc_list, rsc_id);
1669 	
1670 	    return rc;
1671 	}
1672 	
1673 	int
1674 	execd_process_rsc_exec(pcmk__client_t *client, xmlNode *request)
1675 	{
1676 	    lrmd_rsc_t *rsc = NULL;
1677 	    lrmd_cmd_t *cmd = NULL;
1678 	    xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
1679 	                                            "//" PCMK__XE_LRMD_RSC,
1680 	                                            LOG_ERR);
1681 	    const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1682 	
1683 	    if (!rsc_id) {
1684 	        return EINVAL;
1685 	    }
1686 	
1687 	    if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
1688 	        pcmk__info("Resource '%s' not found (%d active resources)", rsc_id,
1689 	                   g_hash_table_size(rsc_list));
1690 	        return ENODEV;
1691 	    }
1692 	
1693 	    cmd = create_lrmd_cmd(request, client);
1694 	
1695 	    /* Don't reference cmd after handing it off to be scheduled.
1696 	     * The cmd could get merged and freed. */
1697 	    schedule_lrmd_cmd(rsc, cmd);
1698 	
1699 	    return pcmk_rc_ok;
1700 	}
1701 	
1702 	static int
1703 	cancel_op(const char *rsc_id, const char *action, guint interval_ms)
1704 	{
1705 	    GList *gIter = NULL;
1706 	    lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
1707 	
1708 	    /* How to cancel an action.
1709 	     * 1. Check pending ops list, if it hasn't been handed off
1710 	     *    to the service library or stonith recurring list remove
1711 	     *    it there and that will stop it.
1712 	     * 2. If it isn't in the pending ops list, then it's either a
1713 	     *    recurring op in the stonith recurring list, or the service
1714 	     *    library's recurring list.  Stop it there
1715 	     * 3. If not found in any lists, then this operation has either
1716 	     *    been executed already and is not a recurring operation, or
1717 	     *    never existed.
1718 	     */
1719 	    if (!rsc) {
1720 	        return ENODEV;
1721 	    }
1722 	
1723 	    for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
1724 	        lrmd_cmd_t *cmd = gIter->data;
1725 	
1726 	        if (action_matches(cmd, action, interval_ms)) {
1727 	            cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1728 	            cmd_finalize(cmd, rsc);
1729 	            return pcmk_rc_ok;
1730 	        }
1731 	    }
1732 	
1733 	    if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1734 	        /* The service library does not handle stonith operations.
1735 	         * We have to handle recurring stonith operations ourselves. */
1736 	        for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
1737 	            lrmd_cmd_t *cmd = gIter->data;
1738 	
1739 	            if (action_matches(cmd, action, interval_ms)) {
1740 	                cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1741 	                if (rsc->active != cmd) {
1742 	                    cmd_finalize(cmd, rsc);
1743 	                }
1744 	                return pcmk_rc_ok;
1745 	            }
1746 	        }
1747 	    } else if (services_action_cancel(rsc_id,
1748 	                                      normalize_action_name(rsc, action),
1749 	                                      interval_ms) == TRUE) {
1750 	        /* The service library will tell the action_complete callback function
1751 	         * this action was cancelled, which will destroy the cmd and remove
1752 	         * it from the recurring_op list. Do not do that in this function
1753 	         * if the service library says it cancelled it. */
1754 	        return pcmk_rc_ok;
1755 	    }
1756 	
1757 	    return EOPNOTSUPP;
1758 	}
1759 	
1760 	static void
1761 	cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
1762 	{
1763 	    GList *cmd_list = NULL;
1764 	    GList *cmd_iter = NULL;
1765 	
1766 	    /* Notice a copy of each list is created when concat is called.
1767 	     * This prevents odd behavior from occurring when the cmd_list
1768 	     * is iterated through later on.  It is possible the cancel_op
1769 	     * function may end up modifying the recurring_ops and pending_ops
1770 	     * lists.  If we did not copy those lists, our cmd_list iteration
1771 	     * could get messed up.*/
1772 	    if (rsc->recurring_ops) {
1773 	        cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
1774 	    }
1775 	    if (rsc->pending_ops) {
1776 	        cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
1777 	    }
1778 	    if (!cmd_list) {
1779 	        return;
1780 	    }
1781 	
1782 	    for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1783 	        lrmd_cmd_t *cmd = cmd_iter->data;
1784 	
1785 	        if (cmd->interval_ms == 0) {
1786 	            continue;
1787 	        }
1788 	
1789 	        if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
1790 	            continue;
1791 	        }
1792 	
1793 	        cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
1794 	    }
1795 	    /* frees only the copied list data, not the cmds */
1796 	    g_list_free(cmd_list);
1797 	}
1798 	
1799 	int
1800 	execd_process_rsc_cancel(pcmk__client_t *client, xmlNode *request)
1801 	{
1802 	    xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
1803 	                                            "//" PCMK__XE_LRMD_RSC,
1804 	                                            LOG_ERR);
1805 	    const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1806 	    const char *action = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ACTION);
1807 	    guint interval_ms = 0;
1808 	
1809 	    pcmk__xe_get_guint(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL, &interval_ms);
1810 	
1811 	    if (!rsc_id || !action) {
1812 	        return EINVAL;
1813 	    }
1814 	
1815 	    return cancel_op(rsc_id, action, interval_ms);
1816 	}
1817 	
1818 	static void
1819 	add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
1820 	{
1821 	    xmlNode *rsc_xml = pcmk__xe_create(reply, PCMK__XE_LRMD_RSC);
1822 	
1823 	    pcmk__xe_set(rsc_xml, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id);
1824 	    for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
1825 	        lrmd_cmd_t *cmd = item->data;
1826 	        xmlNode *op_xml = pcmk__xe_create(rsc_xml, PCMK__XE_LRMD_RSC_OP);
1827 	
1828 	        pcmk__xe_set(op_xml, PCMK__XA_LRMD_RSC_ACTION,
1829 	                     pcmk__s(cmd->real_action, cmd->action));
1830 	        pcmk__xe_set_guint(op_xml, PCMK__XA_LRMD_RSC_INTERVAL,
1831 	                           cmd->interval_ms);
1832 	        pcmk__xe_set_int(op_xml, PCMK__XA_LRMD_TIMEOUT, cmd->timeout_orig);
1833 	    }
1834 	}
1835 	
1836 	int
1837 	execd_process_get_recurring(xmlNode *request, int call_id, xmlNode **reply)
1838 	{
1839 	    int rc = pcmk_rc_ok;
1840 	    const char *rsc_id = NULL;
1841 	    lrmd_rsc_t *rsc = NULL;
1842 	    xmlNode *rsc_xml = NULL;
1843 	
1844 	    // Resource ID is optional
1845 	    rsc_xml = pcmk__xe_first_child(request, PCMK__XE_LRMD_CALLDATA, NULL, NULL);
1846 	    if (rsc_xml) {
1847 	        rsc_xml = pcmk__xe_first_child(rsc_xml, PCMK__XE_LRMD_RSC, NULL, NULL);
1848 	    }
1849 	    if (rsc_xml) {
1850 	        rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1851 	    }
1852 	
1853 	    // If resource ID is specified, resource must exist
1854 	    if (rsc_id != NULL) {
1855 	        rsc = g_hash_table_lookup(rsc_list, rsc_id);
1856 	        if (rsc == NULL) {
1857 	            pcmk__info("Resource '%s' not found (%d active resources)", rsc_id,
1858 	                       g_hash_table_size(rsc_list));
1859 	            rc = ENODEV;
1860 	        }
1861 	    }
1862 	
1863 	    CRM_LOG_ASSERT(reply != NULL);
1864 	
1865 	    *reply = execd_create_reply(pcmk_rc2legacy(rc), call_id);
1866 	
1867 	    // If resource ID is not specified, check all resources
1868 	    if (rsc_id == NULL) {
1869 	        GHashTableIter iter;
1870 	        char *key = NULL;
1871 	
1872 	        g_hash_table_iter_init(&iter, rsc_list);
1873 	        while (g_hash_table_iter_next(&iter, (gpointer *) &key,
1874 	                                      (gpointer *) &rsc)) {
1875 	            add_recurring_op_xml(*reply, rsc);
1876 	        }
1877 	    } else if (rsc) {
1878 	        add_recurring_op_xml(*reply, rsc);
1879 	    }
1880 	
1881 	    return rc;
1882 	}
1883