1 /*
2 * Copyright 2012-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <errno.h>
13 #include <stdbool.h>
14
15 #include <crm/crm.h>
16 #include <crm/common/xml.h>
17 #include <crm/lrmd_internal.h> // lrmd__*
18
19 #include <pacemaker-internal.h>
20 #include <pacemaker-controld.h>
21
22 static GHashTable *lrm_state_table = NULL;
23
24 static void
25 free_rsc_info(gpointer value)
26 {
27 lrmd_rsc_info_t *rsc_info = value;
28
29 lrmd_free_rsc_info(rsc_info);
30 }
31
32 static void
33 free_deletion_op(gpointer value)
34 {
35 struct pending_deletion_op_s *op = value;
36
37 free(op->rsc);
38 delete_ha_msg_input(op->input);
39 free(op);
40 }
41
42 static void
43 free_recurring_op(gpointer value)
44 {
45 active_op_t *op = value;
46
47 free(op->user_data);
48 free(op->rsc_id);
49 free(op->op_type);
50 free(op->op_key);
51 g_clear_pointer(&op->params, g_hash_table_destroy);
52 free(op);
53 }
54
55 static gboolean
56 fail_pending_op(gpointer key, gpointer value, gpointer user_data)
57 {
58 lrmd_event_data_t event = { 0, };
59 lrm_state_t *lrm_state = user_data;
60 active_op_t *op = value;
61
62 pcmk__trace("Pre-emptively failing " PCMK__OP_FMT " on %s (call=%s, %s)",
63 op->rsc_id, op->op_type, op->interval_ms,
64 lrm_state->node_name, (const char *) key, op->user_data);
65
66 event.type = lrmd_event_exec_complete;
67 event.rsc_id = op->rsc_id;
68 event.op_type = op->op_type;
69 event.user_data = op->user_data;
70 event.timeout = 0;
71 event.interval_ms = op->interval_ms;
72 lrmd__set_result(&event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_NOT_CONNECTED,
73 "Action was pending when executor connection was dropped");
74 event.t_run = op->start_time;
75 event.t_rcchange = op->start_time;
76
77 event.call_id = op->call_id;
78 event.remote_nodename = lrm_state->node_name;
79 event.params = op->params;
80
81 process_lrm_event(lrm_state, &event, op, NULL);
82 lrmd__reset_result(&event);
83 return TRUE;
84 }
85
86 gboolean
87 lrm_state_is_local(lrm_state_t *lrm_state)
88 {
89 return (lrm_state != NULL) && controld_is_local_node(lrm_state->node_name);
90 }
91
92 /*!
93 * \internal
94 * \brief Create executor state entry for a node and add it to the state table
95 *
96 * \param[in] node_name Node to create entry for
97 *
98 * \return Newly allocated executor state object initialized for \p node_name
99 */
100 static lrm_state_t *
101 lrm_state_create(const char *node_name)
102 {
103 lrm_state_t *state = NULL;
104
105 if (!node_name) {
106 pcmk__err("No node name given for lrm state object");
107 return NULL;
108 }
109
110 state = pcmk__assert_alloc(1, sizeof(lrm_state_t));
111
112 state->node_name = pcmk__str_copy(node_name);
113 state->rsc_info_cache = pcmk__strkey_table(NULL, free_rsc_info);
114 state->deletion_ops = pcmk__strkey_table(free, free_deletion_op);
115 state->active_ops = pcmk__strkey_table(free, free_recurring_op);
116 state->resource_history = pcmk__strkey_table(NULL, history_free);
117 state->metadata_cache = metadata_cache_new();
118
119 g_hash_table_insert(lrm_state_table, (char *)state->node_name, state);
120 return state;
121 }
122
123 static void
124 internal_lrm_state_destroy(gpointer data)
125 {
126 lrm_state_t *lrm_state = data;
127
|
(1) Event path: |
Condition "!lrm_state", taking false branch. |
128 if (!lrm_state) {
129 return;
130 }
131
132 /* Rather than directly remove the recorded proxy entries from proxy_table,
133 * make sure any connected proxies get disconnected. So that
134 * remote_proxy_disconnected() will be called and as well remove the
135 * entries from proxy_table.
136 */
137 controld_remote_proxy_disconnect_node(lrm_state->node_name);
138
139 remote_ra_cleanup(lrm_state);
140 lrmd_api_delete(lrm_state->conn);
141
|
(2) Event path: |
Condition "_p", taking true branch. |
142 g_clear_pointer(&lrm_state->rsc_info_cache, g_hash_table_destroy);
|
CID (unavailable; MK=e869f4c7ad785dcbc90410a40d8872c1) (#2 of 4): Inconsistent C union access (INCONSISTENT_UNION_ACCESS): |
|
(3) Event assign_union_field: |
The union field "in" of "_pp" is written. |
|
(4) Event inconsistent_union_field_access: |
In "_pp.out", the union field used: "out" is inconsistent with the field most recently stored: "in". |
143 g_clear_pointer(&lrm_state->resource_history, g_hash_table_destroy);
144 g_clear_pointer(&lrm_state->deletion_ops, g_hash_table_destroy);
145 g_clear_pointer(&lrm_state->active_ops, g_hash_table_destroy);
146
147 metadata_cache_free(lrm_state->metadata_cache);
148
149 free((char *)lrm_state->node_name);
150 free(lrm_state);
151 }
152
153 void
154 lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata)
155 {
156 if (lrm_state->resource_history) {
157 pcmk__trace("Resetting resource history cache with %u members",
158 g_hash_table_size(lrm_state->resource_history));
159 g_hash_table_remove_all(lrm_state->resource_history);
160 }
161 if (lrm_state->deletion_ops) {
162 pcmk__trace("Resetting deletion operations cache with %u members",
163 g_hash_table_size(lrm_state->deletion_ops));
164 g_hash_table_remove_all(lrm_state->deletion_ops);
165 }
166 if (lrm_state->active_ops != NULL) {
167 pcmk__trace("Resetting active operations cache with %u members",
168 g_hash_table_size(lrm_state->active_ops));
169 g_hash_table_remove_all(lrm_state->active_ops);
170 }
171 if (lrm_state->rsc_info_cache) {
172 pcmk__trace("Resetting resource information cache with %u members",
173 g_hash_table_size(lrm_state->rsc_info_cache));
174 g_hash_table_remove_all(lrm_state->rsc_info_cache);
175 }
176 if (reset_metadata) {
177 metadata_cache_reset(lrm_state->metadata_cache);
178 }
179 }
180
181 void
182 controld_execd_state_table_init(void)
183 {
184 if (lrm_state_table != NULL) {
185 return;
186 }
187
188 lrm_state_table = pcmk__strikey_table(NULL, internal_lrm_state_destroy);
189 }
190
191 void
192 controld_execd_state_table_free(void)
193 {
194 g_clear_pointer(&lrm_state_table, g_hash_table_destroy);
195 }
196
197 /*!
198 * \internal
199 * \brief Get executor state object
200 *
201 * \param[in] node_name Get executor state for this node (local node if NULL)
202 * \param[in] create If true, create executor state if it doesn't exist
203 *
204 * \return Executor state object for \p node_name
205 */
206 lrm_state_t *
207 controld_get_executor_state(const char *node_name, bool create)
208 {
209 lrm_state_t *state = NULL;
210
211 if ((node_name == NULL) && (controld_globals.cluster != NULL)) {
212 node_name = controld_globals.cluster->priv->node_name;
213 }
214 if ((node_name == NULL) || (lrm_state_table == NULL)) {
215 return NULL;
216 }
217
218 state = g_hash_table_lookup(lrm_state_table, node_name);
219 if ((state == NULL) && create) {
220 state = lrm_state_create(node_name);
221 }
222 return state;
223 }
224
225 /* @TODO the lone caller just needs to iterate over the values, so replace this
226 * with a g_hash_table_foreach() wrapper instead
227 */
228 GList *
229 lrm_state_get_list(void)
230 {
231 if (lrm_state_table == NULL) {
232 return NULL;
233 }
234 return g_hash_table_get_values(lrm_state_table);
235 }
236
237 void
238 lrm_state_disconnect_only(lrm_state_t * lrm_state)
239 {
240 guint removed = 0;
241
242 if (!lrm_state->conn) {
243 return;
244 }
245 pcmk__trace("Disconnecting %s", lrm_state->node_name);
246
247 controld_remote_proxy_disconnect_node(lrm_state->node_name);
248
249 lrm_state->conn->cmds->disconnect(lrm_state->conn);
250
251 if (!pcmk__is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
252 removed = g_hash_table_foreach_remove(lrm_state->active_ops,
253 fail_pending_op, lrm_state);
254 pcmk__trace("Synthesized %u operation failures for %s", removed,
255 lrm_state->node_name);
256 }
257 }
258
259 void
260 lrm_state_disconnect(lrm_state_t * lrm_state)
261 {
262 lrm_state_disconnect_only(lrm_state);
263 g_clear_pointer(&lrm_state->conn, lrmd_api_delete);
264 }
265
266 int
267 lrm_state_is_connected(lrm_state_t * lrm_state)
268 {
269 if (!lrm_state->conn) {
270 return FALSE;
271 }
272 return lrm_state->conn->cmds->is_connected(lrm_state->conn);
273 }
274
275 int
276 lrm_state_poke_connection(lrm_state_t * lrm_state)
277 {
278
279 if (!lrm_state->conn) {
280 return -ENOTCONN;
281 }
282 return lrm_state->conn->cmds->poke_connection(lrm_state->conn);
283 }
284
285 // \return Standard Pacemaker return code
286 int
287 controld_connect_local_executor(lrm_state_t *lrm_state)
288 {
289 int rc = pcmk_rc_ok;
290
291 if (lrm_state->conn == NULL) {
292 lrm_state->conn = lrmd_api_new();
293 lrm_state->conn->cmds->set_callback(lrm_state->conn, lrm_op_callback);
294 }
295
296 rc = lrm_state->conn->cmds->connect(lrm_state->conn, CRM_SYSTEM_CRMD, NULL);
297 rc = pcmk_legacy2rc(rc);
298
299 if (rc == pcmk_rc_ok) {
300 lrm_state->num_lrm_register_fails = 0;
301 } else {
302 lrm_state->num_lrm_register_fails++;
303 }
304 return rc;
305 }
306
307 // \return Standard Pacemaker return code
308 int
309 controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server,
310 int port, int timeout_ms)
311 {
312 int rc = pcmk_rc_ok;
313
314 if (lrm_state->conn == NULL) {
315 lrm_state->conn = lrmd_remote_api_new(lrm_state->node_name, server,
316 port);
317 lrm_state->conn->cmds->set_callback(lrm_state->conn,
318 remote_lrm_op_callback);
319 lrmd__proxy_set_callback(lrm_state->conn, lrm_state,
320 controld_remote_proxy_cb);
321 }
322
323 pcmk__trace("Initiating remote connection to %s:%d with timeout %dms",
324 server, port, timeout_ms);
325 rc = lrm_state->conn->cmds->connect_async(lrm_state->conn,
326 lrm_state->node_name, timeout_ms);
327 if (rc == pcmk_ok) {
328 lrm_state->num_lrm_register_fails = 0;
329 } else {
330 lrm_state->num_lrm_register_fails++; // Ignored for remote connections
331 }
332 return pcmk_legacy2rc(rc);
333 }
334
335 int
336 lrm_state_get_metadata(lrm_state_t * lrm_state,
337 const char *class,
338 const char *provider,
339 const char *agent, char **output, enum lrmd_call_options options)
340 {
341 lrmd_key_value_t *params = NULL;
342
343 if (!lrm_state->conn) {
344 return -ENOTCONN;
345 }
346
347 /* Add the node name to the environment, as is done with normal resource
348 * action calls. Meta-data calls shouldn't need it, but some agents are
349 * written with an ocf_local_nodename call at the beginning regardless of
350 * action. Without the environment variable, the agent would try to contact
351 * the controller to get the node name -- but the controller would be
352 * blocking on the synchronous meta-data call.
353 *
354 * At this point, we have to assume that agents are unlikely to make other
355 * calls that require the controller, such as crm_node --quorum or
356 * --cluster-id.
357 *
358 * @TODO Make meta-data calls asynchronous. (This will be part of a larger
359 * project to make meta-data calls via the executor rather than directly.)
360 */
361 params = lrmd_key_value_add(params, CRM_META "_" PCMK__META_ON_NODE,
362 lrm_state->node_name);
363
364 return lrm_state->conn->cmds->get_metadata_params(lrm_state->conn, class,
365 provider, agent, output,
366 options, params);
367 }
368
369 int
370 lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
371 guint interval_ms)
372 {
373 if (!lrm_state->conn) {
374 return -ENOTCONN;
375 }
376
377 /* Figure out a way to make this async?
378 * NOTICE: Currently it's synced and directly acknowledged in
379 * controld_invoke_execd().
380 */
381 if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
382 return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms);
383 }
384 return lrm_state->conn->cmds->cancel(lrm_state->conn, rsc_id, action,
385 interval_ms);
386 }
387
388 lrmd_rsc_info_t *
389 lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options)
390 {
391 lrmd_rsc_info_t *rsc = NULL;
392
393 if (!lrm_state->conn) {
394 return NULL;
395 }
396 if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
397 return remote_ra_get_rsc_info(lrm_state, rsc_id);
398 }
399
400 rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id);
401 if (rsc == NULL) {
402 /* only contact the lrmd if we don't already have a cached rsc info */
403 rsc = lrm_state->conn->cmds->get_rsc_info(lrm_state->conn, rsc_id,
404 options);
405 if (rsc == NULL) {
406 return NULL;
407 }
408 /* cache the result */
409 g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc);
410 }
411
412 return lrmd_copy_rsc_info(rsc);
413
414 }
415
416 /*!
417 * \internal
418 * \brief Initiate a resource agent action
419 *
420 * \param[in,out] lrm_state Executor state object
421 * \param[in] rsc_id ID of resource for action
422 * \param[in] action Action to execute
423 * \param[in] userdata String to copy and pass to execution callback
424 * \param[in] interval_ms Action interval (in milliseconds)
425 * \param[in] timeout_ms Action timeout (in milliseconds)
426 * \param[in] start_delay_ms Delay (in ms) before initiating action
427 * \param[in] parameters Hash table of resource parameters
428 * \param[out] call_id Where to store call ID on success
429 *
430 * \return Standard Pacemaker return code
431 */
432 int
433 controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id,
434 const char *action, const char *userdata,
435 guint interval_ms, int timeout_ms,
436 int start_delay_ms, GHashTable *parameters,
437 int *call_id)
438 {
439 int rc = pcmk_rc_ok;
440 lrmd_key_value_t *params = NULL;
441
442 if (lrm_state->conn == NULL) {
443 return ENOTCONN;
444 }
445
446 // Convert parameters from hash table to list
447 if (parameters != NULL) {
448 const char *key = NULL;
449 const char *value = NULL;
450 GHashTableIter iter;
451
452 g_hash_table_iter_init(&iter, parameters);
453 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
454 (gpointer *) &value)) {
455 params = lrmd_key_value_add(params, key, value);
456 }
457 }
458
459 if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
460 rc = controld_execute_remote_agent(lrm_state, rsc_id, action,
461 userdata, interval_ms, timeout_ms,
462 start_delay_ms, params, call_id);
463
464 } else {
465 rc = lrm_state->conn->cmds->exec(lrm_state->conn, rsc_id, action,
466 userdata, interval_ms, timeout_ms,
467 start_delay_ms,
468 lrmd_opt_notify_changes_only, params);
469 if (rc < 0) {
470 rc = pcmk_legacy2rc(rc);
471 } else {
472 *call_id = rc;
473 rc = pcmk_rc_ok;
474 }
475 }
476 return rc;
477 }
478
479 int
480 lrm_state_register_rsc(lrm_state_t *lrm_state, const char *rsc_id,
481 const char *class, const char *provider,
482 const char *agent, enum lrmd_call_options options)
483 {
484 if (lrm_state->conn == NULL) {
485 return -ENOTCONN;
486 }
487
488 if (is_remote_lrmd_ra(agent, provider, NULL)) {
489 return controld_get_executor_state(rsc_id, true)? pcmk_ok : -EINVAL;
490 }
491
492 /* @TODO Implement an asynchronous version of this (currently a blocking
493 * call to the lrmd).
494 */
495 return lrm_state->conn->cmds->register_rsc(lrm_state->conn, rsc_id, class,
496 provider, agent, options);
497 }
498
499 int
500 lrm_state_unregister_rsc(lrm_state_t *lrm_state, const char *rsc_id,
501 enum lrmd_call_options options)
502 {
503 if (lrm_state->conn == NULL) {
504 return -ENOTCONN;
505 }
506
507 if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
508 g_hash_table_remove(lrm_state_table, rsc_id);
509 return pcmk_ok;
510 }
511
512 g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id);
513
514 /* @TODO Optimize this ... this function is a blocking round trip from
515 * client to daemon. The controld_execd_state.c code path that uses this
516 * function should always treat it as an async operation. The executor API
517 * should make an async version available.
518 */
519 return lrm_state->conn->cmds->unregister_rsc(lrm_state->conn, rsc_id,
520 options);
521 }
522