1    	/*
2    	 * Copyright 2012-2026 the Pacemaker project contributors
3    	 *
4    	 * The version control history for this file may have further details.
5    	 *
6    	 * This source code is licensed under the GNU General Public License version 2
7    	 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8    	 */
9    	
10   	#include <crm_internal.h>
11   	
12   	#include <errno.h>
13   	#include <stdbool.h>
14   	
15   	#include <crm/crm.h>
16   	#include <crm/common/xml.h>
17   	#include <crm/lrmd_internal.h>          // lrmd__*
18   	
19   	#include <pacemaker-internal.h>
20   	#include <pacemaker-controld.h>
21   	
22   	static GHashTable *lrm_state_table = NULL;
23   	
24   	static void
25   	free_rsc_info(gpointer value)
26   	{
27   	    lrmd_rsc_info_t *rsc_info = value;
28   	
29   	    lrmd_free_rsc_info(rsc_info);
30   	}
31   	
32   	static void
33   	free_deletion_op(gpointer value)
34   	{
35   	    struct pending_deletion_op_s *op = value;
36   	
37   	    free(op->rsc);
38   	    delete_ha_msg_input(op->input);
39   	    free(op);
40   	}
41   	
42   	static void
43   	free_recurring_op(gpointer value)
44   	{
45   	    active_op_t *op = value;
46   	
47   	    free(op->user_data);
48   	    free(op->rsc_id);
49   	    free(op->op_type);
50   	    free(op->op_key);
51   	    g_clear_pointer(&op->params, g_hash_table_destroy);
52   	    free(op);
53   	}
54   	
55   	static gboolean
56   	fail_pending_op(gpointer key, gpointer value, gpointer user_data)
57   	{
58   	    lrmd_event_data_t event = { 0, };
59   	    lrm_state_t *lrm_state = user_data;
60   	    active_op_t *op = value;
61   	
62   	    pcmk__trace("Pre-emptively failing " PCMK__OP_FMT " on %s (call=%s, %s)",
63   	                op->rsc_id, op->op_type, op->interval_ms,
64   	                lrm_state->node_name, (const char *) key, op->user_data);
65   	
66   	    event.type = lrmd_event_exec_complete;
67   	    event.rsc_id = op->rsc_id;
68   	    event.op_type = op->op_type;
69   	    event.user_data = op->user_data;
70   	    event.timeout = 0;
71   	    event.interval_ms = op->interval_ms;
72   	    lrmd__set_result(&event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_NOT_CONNECTED,
73   	                     "Action was pending when executor connection was dropped");
74   	    event.t_run = op->start_time;
75   	    event.t_rcchange = op->start_time;
76   	
77   	    event.call_id = op->call_id;
78   	    event.remote_nodename = lrm_state->node_name;
79   	    event.params = op->params;
80   	
81   	    process_lrm_event(lrm_state, &event, op, NULL);
82   	    lrmd__reset_result(&event);
83   	    return TRUE;
84   	}
85   	
86   	gboolean
87   	lrm_state_is_local(lrm_state_t *lrm_state)
88   	{
89   	    return (lrm_state != NULL) && controld_is_local_node(lrm_state->node_name);
90   	}
91   	
92   	/*!
93   	 * \internal
94   	 * \brief Create executor state entry for a node and add it to the state table
95   	 *
96   	 * \param[in]  node_name  Node to create entry for
97   	 *
98   	 * \return Newly allocated executor state object initialized for \p node_name
99   	 */
100  	static lrm_state_t *
101  	lrm_state_create(const char *node_name)
102  	{
103  	    lrm_state_t *state = NULL;
104  	
105  	    if (!node_name) {
106  	        pcmk__err("No node name given for lrm state object");
107  	        return NULL;
108  	    }
109  	
110  	    state = pcmk__assert_alloc(1, sizeof(lrm_state_t));
111  	
112  	    state->node_name = pcmk__str_copy(node_name);
113  	    state->rsc_info_cache = pcmk__strkey_table(NULL, free_rsc_info);
114  	    state->deletion_ops = pcmk__strkey_table(free, free_deletion_op);
115  	    state->active_ops = pcmk__strkey_table(free, free_recurring_op);
116  	    state->resource_history = pcmk__strkey_table(NULL, history_free);
117  	    state->metadata_cache = metadata_cache_new();
118  	
119  	    g_hash_table_insert(lrm_state_table, (char *)state->node_name, state);
120  	    return state;
121  	}
122  	
123  	static void
124  	internal_lrm_state_destroy(gpointer data)
125  	{
126  	    lrm_state_t *lrm_state = data;
127  	
(1) Event path: Condition "!lrm_state", taking false branch.
128  	    if (!lrm_state) {
129  	        return;
130  	    }
131  	
132  	    /* Rather than directly remove the recorded proxy entries from proxy_table,
133  	     * make sure any connected proxies get disconnected. So that
134  	     * remote_proxy_disconnected() will be called and as well remove the
135  	     * entries from proxy_table.
136  	     */
137  	    controld_remote_proxy_disconnect_node(lrm_state->node_name);
138  	
139  	    remote_ra_cleanup(lrm_state);
140  	    lrmd_api_delete(lrm_state->conn);
141  	
(2) Event path: Condition "_p", taking true branch.
142  	    g_clear_pointer(&lrm_state->rsc_info_cache, g_hash_table_destroy);
(3) Event path: Condition "_p", taking true branch.
143  	    g_clear_pointer(&lrm_state->resource_history, g_hash_table_destroy);
(4) Event path: Condition "_p", taking true branch.
144  	    g_clear_pointer(&lrm_state->deletion_ops, g_hash_table_destroy);
CID (unavailable; MK=e869f4c7ad785dcbc90410a40d8872c1) (#4 of 4): Inconsistent C union access (INCONSISTENT_UNION_ACCESS):
(5) Event assign_union_field: The union field "in" of "_pp" is written.
(6) Event inconsistent_union_field_access: In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in".
145  	    g_clear_pointer(&lrm_state->active_ops, g_hash_table_destroy);
146  	
147  	    metadata_cache_free(lrm_state->metadata_cache);
148  	
149  	    free((char *)lrm_state->node_name);
150  	    free(lrm_state);
151  	}
152  	
153  	void
154  	lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata)
155  	{
156  	    if (lrm_state->resource_history) {
157  	        pcmk__trace("Resetting resource history cache with %u members",
158  	                    g_hash_table_size(lrm_state->resource_history));
159  	        g_hash_table_remove_all(lrm_state->resource_history);
160  	    }
161  	    if (lrm_state->deletion_ops) {
162  	        pcmk__trace("Resetting deletion operations cache with %u members",
163  	                    g_hash_table_size(lrm_state->deletion_ops));
164  	        g_hash_table_remove_all(lrm_state->deletion_ops);
165  	    }
166  	    if (lrm_state->active_ops != NULL) {
167  	        pcmk__trace("Resetting active operations cache with %u members",
168  	                    g_hash_table_size(lrm_state->active_ops));
169  	        g_hash_table_remove_all(lrm_state->active_ops);
170  	    }
171  	    if (lrm_state->rsc_info_cache) {
172  	        pcmk__trace("Resetting resource information cache with %u members",
173  	                    g_hash_table_size(lrm_state->rsc_info_cache));
174  	        g_hash_table_remove_all(lrm_state->rsc_info_cache);
175  	    }
176  	    if (reset_metadata) {
177  	        metadata_cache_reset(lrm_state->metadata_cache);
178  	    }
179  	}
180  	
181  	void
182  	controld_execd_state_table_init(void)
183  	{
184  	    if (lrm_state_table != NULL) {
185  	        return;
186  	    }
187  	
188  	    lrm_state_table = pcmk__strikey_table(NULL, internal_lrm_state_destroy);
189  	}
190  	
191  	void
192  	controld_execd_state_table_free(void)
193  	{
194  	    g_clear_pointer(&lrm_state_table, g_hash_table_destroy);
195  	}
196  	
197  	/*!
198  	 * \internal
199  	 * \brief Get executor state object
200  	 *
201  	 * \param[in] node_name  Get executor state for this node (local node if NULL)
202  	 * \param[in] create     If true, create executor state if it doesn't exist
203  	 *
204  	 * \return Executor state object for \p node_name
205  	 */
206  	lrm_state_t *
207  	controld_get_executor_state(const char *node_name, bool create)
208  	{
209  	    lrm_state_t *state = NULL;
210  	
211  	    if ((node_name == NULL) && (controld_globals.cluster != NULL)) {
212  	        node_name = controld_globals.cluster->priv->node_name;
213  	    }
214  	    if ((node_name == NULL) || (lrm_state_table == NULL)) {
215  	        return NULL;
216  	    }
217  	
218  	    state = g_hash_table_lookup(lrm_state_table, node_name);
219  	    if ((state == NULL) && create) {
220  	        state = lrm_state_create(node_name);
221  	    }
222  	    return state;
223  	}
224  	
225  	/* @TODO the lone caller just needs to iterate over the values, so replace this
226  	 * with a g_hash_table_foreach() wrapper instead
227  	 */
228  	GList *
229  	lrm_state_get_list(void)
230  	{
231  	    if (lrm_state_table == NULL) {
232  	        return NULL;
233  	    }
234  	    return g_hash_table_get_values(lrm_state_table);
235  	}
236  	
237  	void
238  	lrm_state_disconnect_only(lrm_state_t * lrm_state)
239  	{
240  	    guint removed = 0;
241  	
242  	    if (!lrm_state->conn) {
243  	        return;
244  	    }
245  	    pcmk__trace("Disconnecting %s", lrm_state->node_name);
246  	
247  	    controld_remote_proxy_disconnect_node(lrm_state->node_name);
248  	
249  	    lrm_state->conn->cmds->disconnect(lrm_state->conn);
250  	
251  	    if (!pcmk__is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
252  	        removed = g_hash_table_foreach_remove(lrm_state->active_ops,
253  	                                              fail_pending_op, lrm_state);
254  	        pcmk__trace("Synthesized %u operation failures for %s", removed,
255  	                    lrm_state->node_name);
256  	    }
257  	}
258  	
259  	void
260  	lrm_state_disconnect(lrm_state_t * lrm_state)
261  	{
262  	    lrm_state_disconnect_only(lrm_state);
263  	    g_clear_pointer(&lrm_state->conn, lrmd_api_delete);
264  	}
265  	
266  	int
267  	lrm_state_is_connected(lrm_state_t * lrm_state)
268  	{
269  	    if (!lrm_state->conn) {
270  	        return FALSE;
271  	    }
272  	    return lrm_state->conn->cmds->is_connected(lrm_state->conn);
273  	}
274  	
275  	int
276  	lrm_state_poke_connection(lrm_state_t * lrm_state)
277  	{
278  	
279  	    if (!lrm_state->conn) {
280  	        return -ENOTCONN;
281  	    }
282  	    return lrm_state->conn->cmds->poke_connection(lrm_state->conn);
283  	}
284  	
285  	// \return Standard Pacemaker return code
286  	int
287  	controld_connect_local_executor(lrm_state_t *lrm_state)
288  	{
289  	    int rc = pcmk_rc_ok;
290  	
291  	    if (lrm_state->conn == NULL) {
292  	        lrm_state->conn = lrmd_api_new();
293  	        lrm_state->conn->cmds->set_callback(lrm_state->conn, lrm_op_callback);
294  	    }
295  	
296  	    rc = lrm_state->conn->cmds->connect(lrm_state->conn, CRM_SYSTEM_CRMD, NULL);
297  	    rc = pcmk_legacy2rc(rc);
298  	
299  	    if (rc == pcmk_rc_ok) {
300  	        lrm_state->num_lrm_register_fails = 0;
301  	    } else {
302  	        lrm_state->num_lrm_register_fails++;
303  	    }
304  	    return rc;
305  	}
306  	
307  	// \return Standard Pacemaker return code
308  	int
309  	controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server,
310  	                                 int port, int timeout_ms)
311  	{
312  	    int rc = pcmk_rc_ok;
313  	
314  	    if (lrm_state->conn == NULL) {
315  	        lrm_state->conn = lrmd_remote_api_new(lrm_state->node_name, server,
316  	                                              port);
317  	        lrm_state->conn->cmds->set_callback(lrm_state->conn,
318  	                                            remote_lrm_op_callback);
319  	        lrmd__proxy_set_callback(lrm_state->conn, lrm_state,
320  	                                 controld_remote_proxy_cb);
321  	    }
322  	
323  	    pcmk__trace("Initiating remote connection to %s:%d with timeout %dms",
324  	                server, port, timeout_ms);
325  	    rc = lrm_state->conn->cmds->connect_async(lrm_state->conn,
326  	                                              lrm_state->node_name, timeout_ms);
327  	    if (rc == pcmk_ok) {
328  	        lrm_state->num_lrm_register_fails = 0;
329  	    } else {
330  	        lrm_state->num_lrm_register_fails++; // Ignored for remote connections
331  	    }
332  	    return pcmk_legacy2rc(rc);
333  	}
334  	
335  	int
336  	lrm_state_get_metadata(lrm_state_t * lrm_state,
337  	                       const char *class,
338  	                       const char *provider,
339  	                       const char *agent, char **output, enum lrmd_call_options options)
340  	{
341  	    lrmd_key_value_t *params = NULL;
342  	
343  	    if (!lrm_state->conn) {
344  	        return -ENOTCONN;
345  	    }
346  	
347  	    /* Add the node name to the environment, as is done with normal resource
348  	     * action calls. Meta-data calls shouldn't need it, but some agents are
349  	     * written with an ocf_local_nodename call at the beginning regardless of
350  	     * action. Without the environment variable, the agent would try to contact
351  	     * the controller to get the node name -- but the controller would be
352  	     * blocking on the synchronous meta-data call.
353  	     *
354  	     * At this point, we have to assume that agents are unlikely to make other
355  	     * calls that require the controller, such as crm_node --quorum or
356  	     * --cluster-id.
357  	     *
358  	     * @TODO Make meta-data calls asynchronous. (This will be part of a larger
359  	     * project to make meta-data calls via the executor rather than directly.)
360  	     */
361  	    params = lrmd_key_value_add(params, CRM_META "_" PCMK__META_ON_NODE,
362  	                                lrm_state->node_name);
363  	
364  	    return lrm_state->conn->cmds->get_metadata_params(lrm_state->conn, class,
365  	                                                      provider, agent, output,
366  	                                                      options, params);
367  	}
368  	
369  	int
370  	lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
371  	                 guint interval_ms)
372  	{
373  	    if (!lrm_state->conn) {
374  	        return -ENOTCONN;
375  	    }
376  	
377  	    /* Figure out a way to make this async?
378  	     * NOTICE: Currently it's synced and directly acknowledged in
379  	     * controld_invoke_execd().
380  	     */
381  	    if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
382  	        return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms);
383  	    }
384  	    return lrm_state->conn->cmds->cancel(lrm_state->conn, rsc_id, action,
385  	                                         interval_ms);
386  	}
387  	
388  	lrmd_rsc_info_t *
389  	lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options)
390  	{
391  	    lrmd_rsc_info_t *rsc = NULL;
392  	
393  	    if (!lrm_state->conn) {
394  	        return NULL;
395  	    }
396  	    if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
397  	        return remote_ra_get_rsc_info(lrm_state, rsc_id);
398  	    }
399  	
400  	    rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id);
401  	    if (rsc == NULL) {
402  	        /* only contact the lrmd if we don't already have a cached rsc info */
403  	        rsc = lrm_state->conn->cmds->get_rsc_info(lrm_state->conn, rsc_id,
404  	                                                  options);
405  	        if (rsc == NULL) {
406  			    return NULL;
407  	        }
408  	        /* cache the result */
409  	        g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc);
410  	    }
411  	
412  	    return lrmd_copy_rsc_info(rsc);
413  	
414  	}
415  	
416  	/*!
417  	 * \internal
418  	 * \brief Initiate a resource agent action
419  	 *
420  	 * \param[in,out] lrm_state       Executor state object
421  	 * \param[in]     rsc_id          ID of resource for action
422  	 * \param[in]     action          Action to execute
423  	 * \param[in]     userdata        String to copy and pass to execution callback
424  	 * \param[in]     interval_ms     Action interval (in milliseconds)
425  	 * \param[in]     timeout_ms      Action timeout (in milliseconds)
426  	 * \param[in]     start_delay_ms  Delay (in ms) before initiating action
427  	 * \param[in]     parameters      Hash table of resource parameters
428  	 * \param[out]    call_id         Where to store call ID on success
429  	 *
430  	 * \return Standard Pacemaker return code
431  	 */
432  	int
433  	controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id,
434  	                                const char *action, const char *userdata,
435  	                                guint interval_ms, int timeout_ms,
436  	                                int start_delay_ms, GHashTable *parameters,
437  	                                int *call_id)
438  	{
439  	    int rc = pcmk_rc_ok;
440  	    lrmd_key_value_t *params = NULL;
441  	
442  	    if (lrm_state->conn == NULL) {
443  	        return ENOTCONN;
444  	    }
445  	
446  	    // Convert parameters from hash table to list
447  	    if (parameters != NULL) {
448  	        const char *key = NULL;
449  	        const char *value = NULL;
450  	        GHashTableIter iter;
451  	
452  	        g_hash_table_iter_init(&iter, parameters);
453  	        while (g_hash_table_iter_next(&iter, (gpointer *) &key,
454  	                                      (gpointer *) &value)) {
455  	            params = lrmd_key_value_add(params, key, value);
456  	        }
457  	    }
458  	
459  	    if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
460  	        rc = controld_execute_remote_agent(lrm_state, rsc_id, action,
461  	                                           userdata, interval_ms, timeout_ms,
462  	                                           start_delay_ms, params, call_id);
463  	
464  	    } else {
465  	        rc = lrm_state->conn->cmds->exec(lrm_state->conn, rsc_id, action,
466  	                                         userdata, interval_ms, timeout_ms,
467  	                                         start_delay_ms,
468  	                                         lrmd_opt_notify_changes_only, params);
469  	        if (rc < 0) {
470  	            rc = pcmk_legacy2rc(rc);
471  	        } else {
472  	            *call_id = rc;
473  	            rc = pcmk_rc_ok;
474  	        }
475  	    }
476  	    return rc;
477  	}
478  	
479  	int
480  	lrm_state_register_rsc(lrm_state_t *lrm_state, const char *rsc_id,
481  	                       const char *class, const char *provider,
482  	                       const char *agent, enum lrmd_call_options options)
483  	{
484  	    if (lrm_state->conn == NULL) {
485  	        return -ENOTCONN;
486  	    }
487  	
488  	    if (is_remote_lrmd_ra(agent, provider, NULL)) {
489  	        return controld_get_executor_state(rsc_id, true)? pcmk_ok : -EINVAL;
490  	    }
491  	
492  	    /* @TODO Implement an asynchronous version of this (currently a blocking
493  	     * call to the lrmd).
494  	     */
495  	    return lrm_state->conn->cmds->register_rsc(lrm_state->conn, rsc_id, class,
496  	                                               provider, agent, options);
497  	}
498  	
499  	int
500  	lrm_state_unregister_rsc(lrm_state_t *lrm_state, const char *rsc_id,
501  	                         enum lrmd_call_options options)
502  	{
503  	    if (lrm_state->conn == NULL) {
504  	        return -ENOTCONN;
505  	    }
506  	
507  	    if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
508  	        g_hash_table_remove(lrm_state_table, rsc_id);
509  	        return pcmk_ok;
510  	    }
511  	
512  	    g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id);
513  	
514  	    /* @TODO Optimize this ... this function is a blocking round trip from
515  	     * client to daemon. The controld_execd_state.c code path that uses this
516  	     * function should always treat it as an async operation. The executor API
517  	     * should make an async version available.
518  	     */
519  	    return lrm_state->conn->cmds->unregister_rsc(lrm_state->conn, rsc_id,
520  	                                                 options);
521  	}
522