1 /*
2 * Copyright 2004-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <stdbool.h>
13 #include <sys/time.h>
14 #include <sys/resource.h>
15
16 #include <crm/crm.h>
17 #include <crm/common/mainloop.h>
18 #include <crm/common/xml.h>
19
20 #include <crm/cluster/internal.h>
21 #include <crm/cluster/election_internal.h>
22 #include "crmcluster_private.h"
23
24 #define STORM_INTERVAL 2 /* in seconds */
25
26 struct pcmk__election {
27 enum election_result state; // Current state of election
28 guint count; // How many times local node has voted
29 void (*cb)(pcmk_cluster_t *); // Function to call if election is won
30 GHashTable *voted; // Key = node name, value = how node voted
31 mainloop_timer_t *timeout; // When to abort if all votes not received
32 int election_wins; // Track wins, for storm detection
33 bool wrote_blackbox; // Write a storm blackbox at most once
34 time_t expires; // When storm detection period ends
35 time_t last_election_loss; // When dampening period ends
36 };
37
38 static void
39 election_complete(pcmk_cluster_t *cluster)
40 {
41 pcmk__assert((cluster != NULL) && (cluster->priv->election != NULL));
42 cluster->priv->election->state = election_won;
43 if (cluster->priv->election->cb != NULL) {
44 cluster->priv->election->cb(cluster);
45 }
46 election_reset(cluster);
47 }
48
49 static gboolean
50 election_timer_cb(gpointer user_data)
51 {
52 pcmk_cluster_t *cluster = user_data;
53
54 pcmk__info("Declaring local node as winner after election timed out");
55 election_complete(cluster);
56 return FALSE;
57 }
58
59 /*!
60 * \internal
61 * \brief Get current state of an election
62 *
63 * \param[in] cluster Cluster with election
64 *
65 * \return Current state of \e
66 */
67 enum election_result
68 election_state(const pcmk_cluster_t *cluster)
69 {
70 if ((cluster == NULL) || (cluster->priv->election == NULL)) {
71 return election_error;
72 }
73 return cluster->priv->election->state;
74 }
75
76 /* The local node will be declared the winner if missing votes are not received
77 * within this time. The value is chosen to be the same as the default for the
78 * election-timeout cluster option.
79 */
80 #define ELECTION_TIMEOUT_MS 120000
81
82 /*!
83 * \internal
84 * \brief Track election state in a cluster
85 *
86 * Every node that wishes to participate in an election must initialize the
87 * election once, typically at start-up.
88 *
89 * \param[in] cluster Cluster that election is for
90 * \param[in] cb Function to call if local node wins election
91 */
92 void
93 election_init(pcmk_cluster_t *cluster, void (*cb)(pcmk_cluster_t *))
94 {
95 const char *name = pcmk__s(crm_system_name, "election");
96
97 CRM_CHECK(cluster->priv->election == NULL, return);
98
|
CID (unavailable; MK=bf77ad6f79598a6146f5533ef1330eae) (#1 of 1): Resource not released (INCOMPLETE_DEALLOCATOR): |
|
(1) Event allocation: |
Memory is allocated. [details] |
|
(2) Event allocation: |
The field "cluster->priv->election" is allocated, but not released in the identified deallocator. |
| Also see events: |
[deallocator] |
99 cluster->priv->election = pcmk__assert_alloc(1, sizeof(pcmk__election_t));
100 cluster->priv->election->cb = cb;
101 cluster->priv->election->timeout = mainloop_timer_add(name,
102 ELECTION_TIMEOUT_MS,
103 FALSE,
104 election_timer_cb,
105 cluster);
106 }
107
108 /*!
109 * \internal
110 * \brief Disregard any previous vote by specified peer
111 *
112 * This discards any recorded vote from a specified peer. Election users should
113 * call this whenever a voting peer becomes inactive.
114 *
115 * \param[in,out] cluster Cluster with election
116 * \param[in] uname Name of peer to disregard
117 */
118 void
119 election_remove(pcmk_cluster_t *cluster, const char *uname)
120 {
121 if ((cluster != NULL) && (cluster->priv->election != NULL)
122 && (uname != NULL) && (cluster->priv->election->voted != NULL)) {
123 pcmk__trace("Discarding (no-)vote from lost peer %s", uname);
124 g_hash_table_remove(cluster->priv->election->voted, uname);
125 }
126 }
127
128 /*!
129 * \internal
130 * \brief Stop election timer and disregard all votes
131 *
132 * \param[in,out] cluster Cluster with election
133 */
134 void
135 election_reset(pcmk_cluster_t *cluster)
136 {
137 if ((cluster != NULL) && (cluster->priv->election != NULL)) {
138 pcmk__trace("Resetting election");
139 mainloop_timer_stop(cluster->priv->election->timeout);
140 g_clear_pointer(&cluster->priv->election->voted, g_hash_table_destroy);
141 }
142 }
143
144 /*!
145 * \internal
146 * \brief Free an election object
147 *
148 * Free all memory associated with an election object, stopping its
149 * election timer (if running).
150 *
151 * \param[in,out] cluster Cluster with election
152 */
153 void
|
(3) Event deallocator: |
Deallocator for "struct pcmk__cluster". |
| Also see events: |
[allocation][allocation] |
154 election_fini(pcmk_cluster_t *cluster)
155 {
156 if ((cluster != NULL) && (cluster->priv->election != NULL)) {
157 election_reset(cluster);
158 pcmk__trace("Destroying election");
159 mainloop_timer_del(cluster->priv->election->timeout);
160 g_clear_pointer(&cluster->priv->election, free);
161 }
162 }
163
164 static void
165 election_timeout_start(pcmk_cluster_t *cluster)
166 {
167 mainloop_timer_start(cluster->priv->election->timeout);
168 }
169
170 /*!
171 * \internal
172 * \brief Stop an election's timer, if running
173 *
174 * \param[in,out] cluster Cluster with election
175 */
176 void
177 election_timeout_stop(pcmk_cluster_t *cluster)
178 {
179 if ((cluster != NULL) && (cluster->priv->election != NULL)) {
180 mainloop_timer_stop(cluster->priv->election->timeout);
181 }
182 }
183
184 /*!
185 * \internal
186 * \brief Change an election's timeout (restarting timer if running)
187 *
188 * \param[in,out] cluster Cluster with election
189 * \param[in] period New timeout
190 */
191 void
192 election_timeout_set_period(pcmk_cluster_t *cluster, guint period)
193 {
194 CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL), return);
195 mainloop_timer_set_period(cluster->priv->election->timeout, period);
196 }
197
198 static int
199 get_uptime(struct timeval *output)
200 {
201 static time_t expires = 0;
202 static struct rusage info;
203
204 time_t tm_now = time(NULL);
205
206 if (expires < tm_now) {
207 int rc = 0;
208
209 output->tv_sec = 0;
210 output->tv_usec = 0;
211
212 info.ru_utime.tv_sec = 0;
213 info.ru_utime.tv_usec = 0;
214
215 rc = getrusage(RUSAGE_SELF, &info);
216 if (rc < 0) {
217 pcmk__err("Could not calculate the current uptime: %s",
218 strerror(errno));
219 expires = 0;
220 return -1;
221 }
222
223 pcmk__debug("Current CPU usage is: %llds, %lldus",
224 (long long) info.ru_utime.tv_sec,
225 (long long) info.ru_utime.tv_usec);
226 }
227
228 expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */
229 output->tv_sec = info.ru_utime.tv_sec;
230 output->tv_usec = info.ru_utime.tv_usec;
231
232 return 1;
233 }
234
235 static int
236 compare_age(struct timeval your_age)
237 {
238 struct timeval our_age;
239
240 get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */
241
242 if (our_age.tv_sec > your_age.tv_sec) {
243 pcmk__debug("Win: %lld vs %lld (seconds)",
244 (long long) our_age.tv_sec, (long long) your_age.tv_sec);
245 return 1;
246 } else if (our_age.tv_sec < your_age.tv_sec) {
247 pcmk__debug("Lose: %lld vs %lld (seconds)",
248 (long long) our_age.tv_sec, (long long) your_age.tv_sec);
249 return -1;
250 } else if (our_age.tv_usec > your_age.tv_usec) {
251 pcmk__debug("Win: %lld.%06lld vs %lld.%06lld (usec)",
252 (long long) our_age.tv_sec, (long long) our_age.tv_usec,
253 (long long) your_age.tv_sec, (long long) your_age.tv_usec);
254 return 1;
255 } else if (our_age.tv_usec < your_age.tv_usec) {
256 pcmk__debug("Lose: %lld.%06lld vs %lld.%06lld (usec)",
257 (long long) our_age.tv_sec, (long long) our_age.tv_usec,
258 (long long) your_age.tv_sec, (long long) your_age.tv_usec);
259 return -1;
260 }
261
262 return 0;
263 }
264
265 /*!
266 * \internal
267 * \brief Start a new election by offering local node's candidacy
268 *
269 * Broadcast a "vote" election message containing the local node's ID,
270 * (incremented) election counter, and uptime, and start the election timer.
271 *
272 * \param[in,out] cluster Cluster with election
273 *
274 * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
275 * all active peers do so, or if the election times out, the local node
276 * wins the election. (If we lose to any peer vote, we will stop the
277 * timer, so a timeout means we did not lose -- either some peer did not
278 * vote, or we did not call election_check() in time.)
279 */
280 void
281 election_vote(pcmk_cluster_t *cluster)
282 {
283 struct timeval age;
284 xmlNode *vote = NULL;
285 pcmk__node_status_t *our_node = NULL;
286 const char *message_type = NULL;
287
288 CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL), return);
289
290 if (cluster->priv->node_name == NULL) {
291 pcmk__err("Cannot start an election: Local node name unknown");
292 return;
293 }
294
295 our_node = pcmk__get_node(0, cluster->priv->node_name, NULL,
296 pcmk__node_search_cluster_member);
297 if (!pcmk__cluster_is_node_active(our_node)) {
298 pcmk__trace("Cannot vote yet: local node not connected to cluster");
299 return;
300 }
301
302 election_reset(cluster);
303 cluster->priv->election->state = election_in_progress;
304 message_type = pcmk__server_message_type(cluster->priv->server);
305
306 /* @COMPAT We use message_type as the sender and recipient system for
307 * backward compatibility (see T566).
308 */
309 vote = pcmk__new_request(cluster->priv->server, message_type,
310 NULL, message_type, CRM_OP_VOTE, NULL);
311
312 cluster->priv->election->count++;
313 pcmk__xe_set(vote, PCMK__XA_ELECTION_OWNER,
314 pcmk__cluster_get_xml_id(our_node));
315 pcmk__xe_set_int(vote, PCMK__XA_ELECTION_ID,
316 cluster->priv->election->count);
317
318 // Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds
319 get_uptime(&age);
320 pcmk__xe_set_timeval(vote, PCMK__XA_ELECTION_AGE_SEC,
321 PCMK__XA_ELECTION_AGE_NANO_SEC, &age);
322
323 pcmk__cluster_send_message(NULL, cluster->priv->server, vote);
324 pcmk__xml_free(vote);
325
326 pcmk__debug("Started election round %u", cluster->priv->election->count);
327 election_timeout_start(cluster);
328 }
329
330 /*!
331 * \internal
332 * \brief Check whether local node has won an election
333 *
334 * If all known peers have sent no-vote messages, stop the election timer, set
335 * the election state to won, and call any registered win callback.
336 *
337 * \param[in,out] cluster Cluster with election
338 *
339 * \return TRUE if local node has won, FALSE otherwise
340 * \note If all known peers have sent no-vote messages, but the election owner
341 * does not call this function, the election will not be won (and the
342 * callback will not be called) until the election times out.
343 * \note This should be called when election_count_vote() returns
344 * \c election_in_progress.
345 */
346 bool
347 election_check(pcmk_cluster_t *cluster)
348 {
349 int voted_size = 0;
350 int num_members = 0;
351
352 CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL),
353 return false);
354
355 if (cluster->priv->election->voted == NULL) {
356 pcmk__trace("Election check requested, but no votes received yet");
357 return FALSE;
358 }
359
360 voted_size = g_hash_table_size(cluster->priv->election->voted);
361 num_members = pcmk__cluster_num_active_nodes();
362
363 /* in the case of #voted > #members, it is better to
364 * wait for the timeout and give the cluster time to
365 * stabilize
366 */
367 if (voted_size >= num_members) {
368 /* we won and everyone has voted */
369 election_timeout_stop(cluster);
370 if (voted_size > num_members) {
371 GHashTableIter gIter;
372 const pcmk__node_status_t *node = NULL;
373 char *key = NULL;
374
375 pcmk__warn("Received too many votes in election");
376 g_hash_table_iter_init(&gIter, pcmk__peer_cache);
377 while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
378 if (pcmk__cluster_is_node_active(node)) {
379 pcmk__warn("* expected vote: %s", node->name);
380 }
381 }
382
383 g_hash_table_iter_init(&gIter, cluster->priv->election->voted);
384 while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
385 pcmk__warn("* actual vote: %s", key);
386 }
387
388 }
389
390 pcmk__info("Election won by local node");
391 election_complete(cluster);
392 return TRUE;
393
394 } else {
395 pcmk__debug("Election still waiting on %d of %d vote%s",
396 (num_members - voted_size), num_members,
397 pcmk__plural_s(num_members));
398 }
399
400 return FALSE;
401 }
402
403 #define LOSS_DAMPEN 2 /* in seconds */
404
405 struct vote {
406 const char *op;
407 const char *from;
408 const char *version;
409 const char *election_owner;
410 int election_id;
411 struct timeval age;
412 };
413
414 /*!
415 * \internal
416 * \brief Unpack an election message
417 *
418 * \param[in] message Election message XML
419 * \param[out] vote Parsed fields from message
420 *
421 * \return TRUE if election message and election are valid, FALSE otherwise
422 * \note The parsed struct's pointer members are valid only for the lifetime of
423 * the message argument.
424 */
425 static bool
426 parse_election_message(const xmlNode *message, struct vote *vote)
427 {
428 CRM_CHECK(message && vote, return FALSE);
429
430 vote->election_id = -1;
431 vote->age.tv_sec = -1;
432 vote->age.tv_usec = -1;
433
434 vote->op = pcmk__xe_get(message, PCMK__XA_CRM_TASK);
435 vote->from = pcmk__xe_get(message, PCMK__XA_SRC);
436 vote->version = pcmk__xe_get(message, PCMK_XA_VERSION);
437 vote->election_owner = pcmk__xe_get(message, PCMK__XA_ELECTION_OWNER);
438
439 pcmk__xe_get_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id));
440
441 if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
442 || (vote->election_owner == NULL) || (vote->election_id < 0)) {
443
444 pcmk__warn("Invalid %s message from %s", pcmk__s(vote->op, "election"),
445 pcmk__s(vote->from, "unspecified node"));
446 pcmk__log_xml_trace(message, "bad-vote");
447 return FALSE;
448 }
449
450 // Op-specific validation
451
452 if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
453 /* Only vote ops have uptime.
454 Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds.
455 */
456 if ((pcmk__xe_get_timeval(message, PCMK__XA_ELECTION_AGE_SEC,
457 PCMK__XA_ELECTION_AGE_NANO_SEC,
458 &(vote->age)) != pcmk_rc_ok)
459 || (vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
460
461 pcmk__warn("Cannot count election %s from %s because uptime is "
462 "missing or invalid",
463 vote->op, vote->from);
464 return FALSE;
465 }
466
467 } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
468 pcmk__info("Cannot process election message from %s because %s is not "
469 "a known election op",
470 vote->from, vote->op);
471 return FALSE;
472 }
473
474 /* If the membership cache is NULL, we REALLY shouldn't be voting --
475 * the question is how we managed to get here.
476 */
477 if (pcmk__peer_cache == NULL) {
478 pcmk__info("Cannot count election %s from %s becasue no peer "
479 "information available",
480 vote->op, vote->from);
481 return FALSE;
482 }
483 return TRUE;
484 }
485
486 static void
487 record_vote(pcmk_cluster_t *cluster, struct vote *vote)
488 {
489 pcmk__assert((vote->from != NULL) && (vote->op != NULL));
490
491 if (cluster->priv->election->voted == NULL) {
492 cluster->priv->election->voted = pcmk__strkey_table(free, free);
493 }
494 pcmk__insert_dup(cluster->priv->election->voted, vote->from, vote->op);
495 }
496
497 static void
498 send_no_vote(pcmk_cluster_t *cluster, pcmk__node_status_t *peer,
499 struct vote *vote)
500 {
501 const char *message_type = NULL;
502 xmlNode *novote = NULL;
503
504 message_type = pcmk__server_message_type(cluster->priv->server);
505 novote = pcmk__new_request(cluster->priv->server, message_type,
506 vote->from, message_type, CRM_OP_NOVOTE, NULL);
507 pcmk__xe_set(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner);
508 pcmk__xe_set_int(novote, PCMK__XA_ELECTION_ID, vote->election_id);
509
510 pcmk__cluster_send_message(peer, cluster->priv->server, novote);
511 pcmk__xml_free(novote);
512 }
513
514 /*!
515 * \internal
516 * \brief Process an election message (vote or no-vote) from a peer
517 *
518 * \param[in,out] cluster Cluster with election
519 * \param[in] message Election message XML from peer
520 * \param[in] can_win Whether local node is eligible to win
521 *
522 * \return Election state after new vote is considered
523 * \note If the peer message is a vote, and we prefer the peer to win, this will
524 * send a no-vote reply to the peer.
525 * \note The situations "we lost to this vote" from "this is a late no-vote
526 * after we've already lost" both return election_lost. If a caller needs
527 * to distinguish them, it should save the current state before calling
528 * this function, and then compare the result.
529 */
530 enum election_result
531 election_count_vote(pcmk_cluster_t *cluster, const xmlNode *message,
532 bool can_win)
533 {
534 int log_level = LOG_INFO;
535 gboolean done = FALSE;
536 gboolean we_lose = FALSE;
537 const char *reason = NULL;
538 bool we_are_owner = FALSE;
539 pcmk__node_status_t *our_node = NULL;
540 pcmk__node_status_t *your_node = NULL;
541 time_t tm_now = time(NULL);
542 struct vote vote;
543
544 CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL)
545 && (message != NULL) && (cluster->priv->node_name != NULL),
546 return election_error);
547
548 if (!parse_election_message(message, &vote)) {
549 return election_error;
550 }
551
552 your_node = pcmk__get_node(0, vote.from, NULL,
553 pcmk__node_search_cluster_member);
554 our_node = pcmk__get_node(0, cluster->priv->node_name, NULL,
555 pcmk__node_search_cluster_member);
556 we_are_owner = (our_node != NULL)
557 && pcmk__str_eq(pcmk__cluster_get_xml_id(our_node),
558 vote.election_owner, pcmk__str_none);
559
560 if (!can_win) {
561 reason = "Not eligible";
562 we_lose = TRUE;
563
564 } else if (!pcmk__cluster_is_node_active(our_node)) {
565 reason = "We are not part of the cluster";
566 log_level = LOG_ERR;
567 we_lose = TRUE;
568
569 } else if (we_are_owner
570 && (vote.election_id != cluster->priv->election->count)) {
571 log_level = LOG_TRACE;
572 reason = "Superseded";
573 done = TRUE;
574
575 } else if (!pcmk__cluster_is_node_active(your_node)) {
576 /* Possibly we cached the message in the FSA queue at a point that it wasn't */
577 reason = "Peer is not part of our cluster";
578 log_level = LOG_WARNING;
579 done = TRUE;
580
581 } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
582 || pcmk__str_eq(vote.from, cluster->priv->node_name,
583 pcmk__str_casei)) {
584 /* Receiving our own broadcast vote, or a no-vote from peer, is a vote
585 * for us to win
586 */
587 if (!we_are_owner) {
588 pcmk__warn("Cannot count election round %d %s from %s because we "
589 "did not start election (node ID %s did)",
590 vote.election_id, vote.op, vote.from,
591 vote.election_owner);
592 return election_error;
593 }
594 if (cluster->priv->election->state != election_in_progress) {
595 // Should only happen if we already lost
596 pcmk__debug("Not counting election round %d %s from %s because no "
597 "election in progress",
598 vote.election_id, vote.op, vote.from);
599 return cluster->priv->election->state;
600 }
601 record_vote(cluster, &vote);
602 reason = "Recorded";
603 done = TRUE;
604
605 } else {
606 // A peer vote requires a comparison to determine which node is better
607 int age_result = compare_age(vote.age);
608 int version_result = pcmk__compare_versions(vote.version,
609 CRM_FEATURE_SET);
610
611 if (version_result < 0) {
612 reason = "Version";
613 we_lose = TRUE;
614
615 } else if (version_result > 0) {
616 reason = "Version";
617
618 } else if (age_result < 0) {
619 reason = "Uptime";
620 we_lose = TRUE;
621
622 } else if (age_result > 0) {
623 reason = "Uptime";
624
625 } else if (strcasecmp(cluster->priv->node_name, vote.from) > 0) {
626 reason = "Host name";
627 we_lose = TRUE;
628
629 } else {
630 reason = "Host name";
631 }
632 }
633
634 if (cluster->priv->election->expires < tm_now) {
635 cluster->priv->election->election_wins = 0;
636 cluster->priv->election->expires = tm_now + STORM_INTERVAL;
637
638 } else if (done == FALSE && we_lose == FALSE) {
639 int peers = 1 + g_hash_table_size(pcmk__peer_cache);
640
641 /* If every node has to vote down every other node, thats N*(N-1) total elections
642 * Allow some leeway before _really_ complaining
643 */
644 cluster->priv->election->election_wins++;
645 if (cluster->priv->election->election_wins > (peers * peers)) {
646 pcmk__warn("Election storm detected: %d wins in %d seconds",
647 cluster->priv->election->election_wins, STORM_INTERVAL);
648 cluster->priv->election->election_wins = 0;
649 cluster->priv->election->expires = tm_now + STORM_INTERVAL;
650 if (!(cluster->priv->election->wrote_blackbox)) {
651 /* It's questionable whether a black box (from every node in the
652 * cluster) would be truly helpful in diagnosing an election
653 * storm. It's also highly doubtful a production environment
654 * would get multiple election storms from distinct causes, so
655 * saving one blackbox per process lifetime should be
656 * sufficient. Alternatives would be to save a timestamp of the
657 * last blackbox write instead of a boolean, and write a new one
658 * if some amount of time has passed; or to save a storm count,
659 * write a blackbox on every Nth occurrence.
660 */
661 crm_write_blackbox(0, NULL);
662 cluster->priv->election->wrote_blackbox = true;
663 }
664 }
665 }
666
667 if (done) {
668 do_crm_log(log_level + 1,
669 "Processed election round %u %s (current round %d) "
670 "from %s (%s)",
671 vote.election_id, vote.op, cluster->priv->election->count,
672 vote.from, reason);
673 return cluster->priv->election->state;
674
675 } else if (we_lose == FALSE) {
676 /* We track the time of the last election loss to implement an election
677 * dampening period, reducing the likelihood of an election storm. If
678 * this node has lost within the dampening period, don't start a new
679 * election, even if we win against a peer's vote -- the peer we lost to
680 * should win again.
681 *
682 * @TODO This has a problem case: if an election winner immediately
683 * leaves the cluster, and a new election is immediately called, all
684 * nodes could lose, with no new winner elected. The ideal solution
685 * would be to tie the election structure with the peer caches, which
686 * would allow us to clear the dampening when the previous winner
687 * leaves (and would allow other improvements as well).
688 */
689 if ((cluster->priv->election->last_election_loss == 0)
690 || ((tm_now - cluster->priv->election->last_election_loss)
691 > (time_t) LOSS_DAMPEN)) {
692
693 do_crm_log(log_level,
694 "Election round %d (started by node ID %s) pass: "
695 "%s from %s (%s)",
696 vote.election_id, vote.election_owner, vote.op,
697 vote.from, reason);
698
699 cluster->priv->election->last_election_loss = 0;
700 election_timeout_stop(cluster);
701
702 /* Start a new election by voting down this, and other, peers */
703 cluster->priv->election->state = election_start;
704 return cluster->priv->election->state;
705 } else {
706 char *loss_time = NULL;
707
708 loss_time = ctime(&(cluster->priv->election->last_election_loss));
709 if (loss_time) {
710 // Show only HH:MM:SS
711 loss_time += 11;
712 loss_time[8] = '\0';
713 }
714 pcmk__info("Ignoring election round %d (started by node ID %s) "
715 "pass vs %s because we lost less than %ds ago at %s",
716 vote.election_id, vote.election_owner, vote.from,
717 LOSS_DAMPEN, pcmk__s(loss_time, "unknown"));
718 }
719 }
720
721 cluster->priv->election->last_election_loss = tm_now;
722
723 do_crm_log(log_level,
724 "Election round %d (started by node ID %s) lost: "
725 "%s from %s (%s)",
726 vote.election_id, vote.election_owner, vote.op,
727 vote.from, reason);
728
729 election_reset(cluster);
730 send_no_vote(cluster, your_node, &vote);
731 cluster->priv->election->state = election_lost;
732 return cluster->priv->election->state;
733 }
734
735 /*!
736 * \internal
737 * \brief Reset any election dampening currently in effect
738 *
739 * \param[in,out] cluster Cluster with election
740 */
741 void
742 election_clear_dampening(pcmk_cluster_t *cluster)
743 {
744 if ((cluster != NULL) && (cluster->priv->election != NULL)) {
745 cluster->priv->election->last_election_loss = 0;
746 }
747 }
748