1 /*
2 * Copyright 2004-2026 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <stdbool.h>
13 #include <sys/time.h>
14 #include <sys/resource.h>
15
16 #include <crm/crm.h>
17 #include <crm/common/mainloop.h>
18 #include <crm/common/xml.h>
19
20 #include <crm/cluster/internal.h>
21 #include <crm/cluster/election_internal.h>
22 #include "crmcluster_private.h"
23
24 #define STORM_INTERVAL 2 /* in seconds */
25
26 struct pcmk__election {
27 enum election_result state; // Current state of election
28 guint count; // How many times local node has voted
29 void (*cb)(pcmk_cluster_t *); // Function to call if election is won
30 GHashTable *voted; // Key = node name, value = how node voted
31 mainloop_timer_t *timeout; // When to abort if all votes not received
32 int election_wins; // Track wins, for storm detection
33 bool wrote_blackbox; // Write a storm blackbox at most once
34 time_t expires; // When storm detection period ends
35 time_t last_election_loss; // When dampening period ends
36 };
37
38 static void
39 election_complete(pcmk_cluster_t *cluster)
40 {
41 pcmk__assert((cluster != NULL) && (cluster->priv->election != NULL));
42 cluster->priv->election->state = election_won;
43 if (cluster->priv->election->cb != NULL) {
44 cluster->priv->election->cb(cluster);
45 }
46 election_reset(cluster);
47 }
48
49 static gboolean
50 election_timer_cb(gpointer user_data)
51 {
52 pcmk_cluster_t *cluster = user_data;
53
54 pcmk__info("Declaring local node as winner after election timed out");
55 election_complete(cluster);
56 return FALSE;
57 }
58
59 /*!
60 * \internal
61 * \brief Get current state of an election
62 *
63 * \param[in] cluster Cluster with election
64 *
65 * \return Current state of \e
66 */
67 enum election_result
68 election_state(const pcmk_cluster_t *cluster)
69 {
70 if ((cluster == NULL) || (cluster->priv->election == NULL)) {
71 return election_error;
72 }
73 return cluster->priv->election->state;
74 }
75
76 /* The local node will be declared the winner if missing votes are not received
77 * within this time. The value is chosen to be the same as the default for the
78 * election-timeout cluster option.
79 */
80 #define ELECTION_TIMEOUT_MS 120000
81
82 /*!
83 * \internal
84 * \brief Track election state in a cluster
85 *
86 * Every node that wishes to participate in an election must initialize the
87 * election once, typically at start-up.
88 *
89 * \param[in] cluster Cluster that election is for
90 * \param[in] cb Function to call if local node wins election
91 */
92 void
93 election_init(pcmk_cluster_t *cluster, void (*cb)(pcmk_cluster_t *))
94 {
95 const char *name = pcmk__s(crm_system_name, "election");
96
97 CRM_CHECK(cluster->priv->election == NULL, return);
98
|
CID (unavailable; MK=bf77ad6f79598a6146f5533ef1330eae) (#1 of 1): Resource not released (INCOMPLETE_DEALLOCATOR): |
|
(1) Event allocation: |
Memory is allocated. [details] |
|
(2) Event allocation: |
The field "cluster->priv->election" is allocated, but not released in the identified deallocator. |
| Also see events: |
[deallocator] |
99 cluster->priv->election = pcmk__assert_alloc(1, sizeof(pcmk__election_t));
100 cluster->priv->election->cb = cb;
101 cluster->priv->election->timeout = mainloop_timer_add(name,
102 ELECTION_TIMEOUT_MS,
103 FALSE,
104 election_timer_cb,
105 cluster);
106 }
107
108 /*!
109 * \internal
110 * \brief Disregard any previous vote by specified peer
111 *
112 * This discards any recorded vote from a specified peer. Election users should
113 * call this whenever a voting peer becomes inactive.
114 *
115 * \param[in,out] cluster Cluster with election
116 * \param[in] uname Name of peer to disregard
117 */
118 void
119 election_remove(pcmk_cluster_t *cluster, const char *uname)
120 {
121 if ((cluster != NULL) && (cluster->priv->election != NULL)
122 && (uname != NULL) && (cluster->priv->election->voted != NULL)) {
123 pcmk__trace("Discarding (no-)vote from lost peer %s", uname);
124 g_hash_table_remove(cluster->priv->election->voted, uname);
125 }
126 }
127
128 /*!
129 * \internal
130 * \brief Stop election timer and disregard all votes
131 *
132 * \param[in,out] cluster Cluster with election
133 */
134 void
135 election_reset(pcmk_cluster_t *cluster)
136 {
137 if ((cluster != NULL) && (cluster->priv->election != NULL)) {
138 pcmk__trace("Resetting election");
139 mainloop_timer_stop(cluster->priv->election->timeout);
140 if (cluster->priv->election->voted != NULL) {
141 g_hash_table_destroy(cluster->priv->election->voted);
142 cluster->priv->election->voted = NULL;
143 }
144 }
145 }
146
147 /*!
148 * \internal
149 * \brief Free an election object
150 *
151 * Free all memory associated with an election object, stopping its
152 * election timer (if running).
153 *
154 * \param[in,out] cluster Cluster with election
155 */
156 void
157 election_fini(pcmk_cluster_t *cluster)
158 {
159 if ((cluster != NULL) && (cluster->priv->election != NULL)) {
160 election_reset(cluster);
161 pcmk__trace("Destroying election");
162 mainloop_timer_del(cluster->priv->election->timeout);
163 free(cluster->priv->election);
164 cluster->priv->election = NULL;
165 }
166 }
167
168 static void
169 election_timeout_start(pcmk_cluster_t *cluster)
170 {
171 mainloop_timer_start(cluster->priv->election->timeout);
172 }
173
174 /*!
175 * \internal
176 * \brief Stop an election's timer, if running
177 *
178 * \param[in,out] cluster Cluster with election
179 */
180 void
181 election_timeout_stop(pcmk_cluster_t *cluster)
182 {
183 if ((cluster != NULL) && (cluster->priv->election != NULL)) {
184 mainloop_timer_stop(cluster->priv->election->timeout);
185 }
186 }
187
188 /*!
189 * \internal
190 * \brief Change an election's timeout (restarting timer if running)
191 *
192 * \param[in,out] cluster Cluster with election
193 * \param[in] period New timeout
194 */
195 void
196 election_timeout_set_period(pcmk_cluster_t *cluster, guint period)
197 {
198 CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL), return);
199 mainloop_timer_set_period(cluster->priv->election->timeout, period);
200 }
201
202 static int
203 get_uptime(struct timeval *output)
204 {
205 static time_t expires = 0;
206 static struct rusage info;
207
208 time_t tm_now = time(NULL);
209
210 if (expires < tm_now) {
211 int rc = 0;
212
213 output->tv_sec = 0;
214 output->tv_usec = 0;
215
216 info.ru_utime.tv_sec = 0;
217 info.ru_utime.tv_usec = 0;
218
219 rc = getrusage(RUSAGE_SELF, &info);
220 if (rc < 0) {
221 pcmk__err("Could not calculate the current uptime: %s",
222 strerror(errno));
223 expires = 0;
224 return -1;
225 }
226
227 pcmk__debug("Current CPU usage is: %llds, %lldus",
228 (long long) info.ru_utime.tv_sec,
229 (long long) info.ru_utime.tv_usec);
230 }
231
232 expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */
233 output->tv_sec = info.ru_utime.tv_sec;
234 output->tv_usec = info.ru_utime.tv_usec;
235
236 return 1;
237 }
238
239 static int
240 compare_age(struct timeval your_age)
241 {
242 struct timeval our_age;
243
244 get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */
245
246 if (our_age.tv_sec > your_age.tv_sec) {
247 pcmk__debug("Win: %lld vs %lld (seconds)",
248 (long long) our_age.tv_sec, (long long) your_age.tv_sec);
249 return 1;
250 } else if (our_age.tv_sec < your_age.tv_sec) {
251 pcmk__debug("Lose: %lld vs %lld (seconds)",
252 (long long) our_age.tv_sec, (long long) your_age.tv_sec);
253 return -1;
254 } else if (our_age.tv_usec > your_age.tv_usec) {
255 pcmk__debug("Win: %lld.%06lld vs %lld.%06lld (usec)",
256 (long long) our_age.tv_sec, (long long) our_age.tv_usec,
257 (long long) your_age.tv_sec, (long long) your_age.tv_usec);
258 return 1;
259 } else if (our_age.tv_usec < your_age.tv_usec) {
260 pcmk__debug("Lose: %lld.%06lld vs %lld.%06lld (usec)",
261 (long long) our_age.tv_sec, (long long) our_age.tv_usec,
262 (long long) your_age.tv_sec, (long long) your_age.tv_usec);
263 return -1;
264 }
265
266 return 0;
267 }
268
269 /*!
270 * \internal
271 * \brief Start a new election by offering local node's candidacy
272 *
273 * Broadcast a "vote" election message containing the local node's ID,
274 * (incremented) election counter, and uptime, and start the election timer.
275 *
276 * \param[in,out] cluster Cluster with election
277 *
278 * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
279 * all active peers do so, or if the election times out, the local node
280 * wins the election. (If we lose to any peer vote, we will stop the
281 * timer, so a timeout means we did not lose -- either some peer did not
282 * vote, or we did not call election_check() in time.)
283 */
284 void
285 election_vote(pcmk_cluster_t *cluster)
286 {
287 struct timeval age;
288 xmlNode *vote = NULL;
289 pcmk__node_status_t *our_node = NULL;
290 const char *message_type = NULL;
291
292 CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL), return);
293
294 if (cluster->priv->node_name == NULL) {
295 pcmk__err("Cannot start an election: Local node name unknown");
296 return;
297 }
298
299 our_node = pcmk__get_node(0, cluster->priv->node_name, NULL,
300 pcmk__node_search_cluster_member);
301 if (!pcmk__cluster_is_node_active(our_node)) {
302 pcmk__trace("Cannot vote yet: local node not connected to cluster");
303 return;
304 }
305
306 election_reset(cluster);
307 cluster->priv->election->state = election_in_progress;
308 message_type = pcmk__server_message_type(cluster->priv->server);
309
310 /* @COMPAT We use message_type as the sender and recipient system for
311 * backward compatibility (see T566).
312 */
313 vote = pcmk__new_request(cluster->priv->server, message_type,
314 NULL, message_type, CRM_OP_VOTE, NULL);
315
316 cluster->priv->election->count++;
317 pcmk__xe_set(vote, PCMK__XA_ELECTION_OWNER,
318 pcmk__cluster_get_xml_id(our_node));
319 pcmk__xe_set_int(vote, PCMK__XA_ELECTION_ID,
320 cluster->priv->election->count);
321
322 // Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds
323 get_uptime(&age);
324 pcmk__xe_set_timeval(vote, PCMK__XA_ELECTION_AGE_SEC,
325 PCMK__XA_ELECTION_AGE_NANO_SEC, &age);
326
327 pcmk__cluster_send_message(NULL, cluster->priv->server, vote);
328 pcmk__xml_free(vote);
329
330 pcmk__debug("Started election round %u", cluster->priv->election->count);
331 election_timeout_start(cluster);
332 }
333
334 /*!
335 * \internal
336 * \brief Check whether local node has won an election
337 *
338 * If all known peers have sent no-vote messages, stop the election timer, set
339 * the election state to won, and call any registered win callback.
340 *
341 * \param[in,out] cluster Cluster with election
342 *
343 * \return TRUE if local node has won, FALSE otherwise
344 * \note If all known peers have sent no-vote messages, but the election owner
345 * does not call this function, the election will not be won (and the
346 * callback will not be called) until the election times out.
347 * \note This should be called when election_count_vote() returns
348 * \c election_in_progress.
349 */
350 bool
351 election_check(pcmk_cluster_t *cluster)
352 {
353 int voted_size = 0;
354 int num_members = 0;
355
356 CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL),
357 return false);
358
359 if (cluster->priv->election->voted == NULL) {
360 pcmk__trace("Election check requested, but no votes received yet");
361 return FALSE;
362 }
363
364 voted_size = g_hash_table_size(cluster->priv->election->voted);
365 num_members = pcmk__cluster_num_active_nodes();
366
367 /* in the case of #voted > #members, it is better to
368 * wait for the timeout and give the cluster time to
369 * stabilize
370 */
371 if (voted_size >= num_members) {
372 /* we won and everyone has voted */
373 election_timeout_stop(cluster);
374 if (voted_size > num_members) {
375 GHashTableIter gIter;
376 const pcmk__node_status_t *node = NULL;
377 char *key = NULL;
378
379 pcmk__warn("Received too many votes in election");
380 g_hash_table_iter_init(&gIter, pcmk__peer_cache);
381 while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
382 if (pcmk__cluster_is_node_active(node)) {
383 pcmk__warn("* expected vote: %s", node->name);
384 }
385 }
386
387 g_hash_table_iter_init(&gIter, cluster->priv->election->voted);
388 while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
389 pcmk__warn("* actual vote: %s", key);
390 }
391
392 }
393
394 pcmk__info("Election won by local node");
395 election_complete(cluster);
396 return TRUE;
397
398 } else {
399 pcmk__debug("Election still waiting on %d of %d vote%s",
400 (num_members - voted_size), num_members,
401 pcmk__plural_s(num_members));
402 }
403
404 return FALSE;
405 }
406
407 #define LOSS_DAMPEN 2 /* in seconds */
408
409 struct vote {
410 const char *op;
411 const char *from;
412 const char *version;
413 const char *election_owner;
414 int election_id;
415 struct timeval age;
416 };
417
418 /*!
419 * \internal
420 * \brief Unpack an election message
421 *
422 * \param[in] message Election message XML
423 * \param[out] vote Parsed fields from message
424 *
425 * \return TRUE if election message and election are valid, FALSE otherwise
426 * \note The parsed struct's pointer members are valid only for the lifetime of
427 * the message argument.
428 */
429 static bool
430 parse_election_message(const xmlNode *message, struct vote *vote)
431 {
432 CRM_CHECK(message && vote, return FALSE);
433
434 vote->election_id = -1;
435 vote->age.tv_sec = -1;
436 vote->age.tv_usec = -1;
437
438 vote->op = pcmk__xe_get(message, PCMK__XA_CRM_TASK);
439 vote->from = pcmk__xe_get(message, PCMK__XA_SRC);
440 vote->version = pcmk__xe_get(message, PCMK_XA_VERSION);
441 vote->election_owner = pcmk__xe_get(message, PCMK__XA_ELECTION_OWNER);
442
443 pcmk__xe_get_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id));
444
445 if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
446 || (vote->election_owner == NULL) || (vote->election_id < 0)) {
447
448 pcmk__warn("Invalid %s message from %s", pcmk__s(vote->op, "election"),
449 pcmk__s(vote->from, "unspecified node"));
450 pcmk__log_xml_trace(message, "bad-vote");
451 return FALSE;
452 }
453
454 // Op-specific validation
455
456 if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
457 /* Only vote ops have uptime.
458 Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds.
459 */
460 if ((pcmk__xe_get_timeval(message, PCMK__XA_ELECTION_AGE_SEC,
461 PCMK__XA_ELECTION_AGE_NANO_SEC,
462 &(vote->age)) != pcmk_rc_ok)
463 || (vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
464
465 pcmk__warn("Cannot count election %s from %s because uptime is "
466 "missing or invalid",
467 vote->op, vote->from);
468 return FALSE;
469 }
470
471 } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
472 pcmk__info("Cannot process election message from %s because %s is not "
473 "a known election op",
474 vote->from, vote->op);
475 return FALSE;
476 }
477
478 /* If the membership cache is NULL, we REALLY shouldn't be voting --
479 * the question is how we managed to get here.
480 */
481 if (pcmk__peer_cache == NULL) {
482 pcmk__info("Cannot count election %s from %s becasue no peer "
483 "information available",
484 vote->op, vote->from);
485 return FALSE;
486 }
487 return TRUE;
488 }
489
490 static void
491 record_vote(pcmk_cluster_t *cluster, struct vote *vote)
492 {
493 pcmk__assert((vote->from != NULL) && (vote->op != NULL));
494
495 if (cluster->priv->election->voted == NULL) {
496 cluster->priv->election->voted = pcmk__strkey_table(free, free);
497 }
498 pcmk__insert_dup(cluster->priv->election->voted, vote->from, vote->op);
499 }
500
501 static void
502 send_no_vote(pcmk_cluster_t *cluster, pcmk__node_status_t *peer,
503 struct vote *vote)
504 {
505 const char *message_type = NULL;
506 xmlNode *novote = NULL;
507
508 message_type = pcmk__server_message_type(cluster->priv->server);
509 novote = pcmk__new_request(cluster->priv->server, message_type,
510 vote->from, message_type, CRM_OP_NOVOTE, NULL);
511 pcmk__xe_set(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner);
512 pcmk__xe_set_int(novote, PCMK__XA_ELECTION_ID, vote->election_id);
513
514 pcmk__cluster_send_message(peer, cluster->priv->server, novote);
515 pcmk__xml_free(novote);
516 }
517
518 /*!
519 * \internal
520 * \brief Process an election message (vote or no-vote) from a peer
521 *
522 * \param[in,out] cluster Cluster with election
523 * \param[in] message Election message XML from peer
524 * \param[in] can_win Whether local node is eligible to win
525 *
526 * \return Election state after new vote is considered
527 * \note If the peer message is a vote, and we prefer the peer to win, this will
528 * send a no-vote reply to the peer.
529 * \note The situations "we lost to this vote" from "this is a late no-vote
530 * after we've already lost" both return election_lost. If a caller needs
531 * to distinguish them, it should save the current state before calling
532 * this function, and then compare the result.
533 */
534 enum election_result
535 election_count_vote(pcmk_cluster_t *cluster, const xmlNode *message,
536 bool can_win)
537 {
538 int log_level = LOG_INFO;
539 gboolean done = FALSE;
540 gboolean we_lose = FALSE;
541 const char *reason = NULL;
542 bool we_are_owner = FALSE;
543 pcmk__node_status_t *our_node = NULL;
544 pcmk__node_status_t *your_node = NULL;
545 time_t tm_now = time(NULL);
546 struct vote vote;
547
548 CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL)
549 && (message != NULL) && (cluster->priv->node_name != NULL),
550 return election_error);
551
552 if (!parse_election_message(message, &vote)) {
553 return election_error;
554 }
555
556 your_node = pcmk__get_node(0, vote.from, NULL,
557 pcmk__node_search_cluster_member);
558 our_node = pcmk__get_node(0, cluster->priv->node_name, NULL,
559 pcmk__node_search_cluster_member);
560 we_are_owner = (our_node != NULL)
561 && pcmk__str_eq(pcmk__cluster_get_xml_id(our_node),
562 vote.election_owner, pcmk__str_none);
563
564 if (!can_win) {
565 reason = "Not eligible";
566 we_lose = TRUE;
567
568 } else if (!pcmk__cluster_is_node_active(our_node)) {
569 reason = "We are not part of the cluster";
570 log_level = LOG_ERR;
571 we_lose = TRUE;
572
573 } else if (we_are_owner
574 && (vote.election_id != cluster->priv->election->count)) {
575 log_level = LOG_TRACE;
576 reason = "Superseded";
577 done = TRUE;
578
579 } else if (!pcmk__cluster_is_node_active(your_node)) {
580 /* Possibly we cached the message in the FSA queue at a point that it wasn't */
581 reason = "Peer is not part of our cluster";
582 log_level = LOG_WARNING;
583 done = TRUE;
584
585 } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
586 || pcmk__str_eq(vote.from, cluster->priv->node_name,
587 pcmk__str_casei)) {
588 /* Receiving our own broadcast vote, or a no-vote from peer, is a vote
589 * for us to win
590 */
591 if (!we_are_owner) {
592 pcmk__warn("Cannot count election round %d %s from %s because we "
593 "did not start election (node ID %s did)",
594 vote.election_id, vote.op, vote.from,
595 vote.election_owner);
596 return election_error;
597 }
598 if (cluster->priv->election->state != election_in_progress) {
599 // Should only happen if we already lost
600 pcmk__debug("Not counting election round %d %s from %s because no "
601 "election in progress",
602 vote.election_id, vote.op, vote.from);
603 return cluster->priv->election->state;
604 }
605 record_vote(cluster, &vote);
606 reason = "Recorded";
607 done = TRUE;
608
609 } else {
610 // A peer vote requires a comparison to determine which node is better
611 int age_result = compare_age(vote.age);
612 int version_result = pcmk__compare_versions(vote.version,
613 CRM_FEATURE_SET);
614
615 if (version_result < 0) {
616 reason = "Version";
617 we_lose = TRUE;
618
619 } else if (version_result > 0) {
620 reason = "Version";
621
622 } else if (age_result < 0) {
623 reason = "Uptime";
624 we_lose = TRUE;
625
626 } else if (age_result > 0) {
627 reason = "Uptime";
628
629 } else if (strcasecmp(cluster->priv->node_name, vote.from) > 0) {
630 reason = "Host name";
631 we_lose = TRUE;
632
633 } else {
634 reason = "Host name";
635 }
636 }
637
638 if (cluster->priv->election->expires < tm_now) {
639 cluster->priv->election->election_wins = 0;
640 cluster->priv->election->expires = tm_now + STORM_INTERVAL;
641
642 } else if (done == FALSE && we_lose == FALSE) {
643 int peers = 1 + g_hash_table_size(pcmk__peer_cache);
644
645 /* If every node has to vote down every other node, thats N*(N-1) total elections
646 * Allow some leeway before _really_ complaining
647 */
648 cluster->priv->election->election_wins++;
649 if (cluster->priv->election->election_wins > (peers * peers)) {
650 pcmk__warn("Election storm detected: %d wins in %d seconds",
651 cluster->priv->election->election_wins, STORM_INTERVAL);
652 cluster->priv->election->election_wins = 0;
653 cluster->priv->election->expires = tm_now + STORM_INTERVAL;
654 if (!(cluster->priv->election->wrote_blackbox)) {
655 /* It's questionable whether a black box (from every node in the
656 * cluster) would be truly helpful in diagnosing an election
657 * storm. It's also highly doubtful a production environment
658 * would get multiple election storms from distinct causes, so
659 * saving one blackbox per process lifetime should be
660 * sufficient. Alternatives would be to save a timestamp of the
661 * last blackbox write instead of a boolean, and write a new one
662 * if some amount of time has passed; or to save a storm count,
663 * write a blackbox on every Nth occurrence.
664 */
665 crm_write_blackbox(0, NULL);
666 cluster->priv->election->wrote_blackbox = true;
667 }
668 }
669 }
670
671 if (done) {
672 do_crm_log(log_level + 1,
673 "Processed election round %u %s (current round %d) "
674 "from %s (%s)",
675 vote.election_id, vote.op, cluster->priv->election->count,
676 vote.from, reason);
677 return cluster->priv->election->state;
678
679 } else if (we_lose == FALSE) {
680 /* We track the time of the last election loss to implement an election
681 * dampening period, reducing the likelihood of an election storm. If
682 * this node has lost within the dampening period, don't start a new
683 * election, even if we win against a peer's vote -- the peer we lost to
684 * should win again.
685 *
686 * @TODO This has a problem case: if an election winner immediately
687 * leaves the cluster, and a new election is immediately called, all
688 * nodes could lose, with no new winner elected. The ideal solution
689 * would be to tie the election structure with the peer caches, which
690 * would allow us to clear the dampening when the previous winner
691 * leaves (and would allow other improvements as well).
692 */
693 if ((cluster->priv->election->last_election_loss == 0)
694 || ((tm_now - cluster->priv->election->last_election_loss)
695 > (time_t) LOSS_DAMPEN)) {
696
697 do_crm_log(log_level,
698 "Election round %d (started by node ID %s) pass: "
699 "%s from %s (%s)",
700 vote.election_id, vote.election_owner, vote.op,
701 vote.from, reason);
702
703 cluster->priv->election->last_election_loss = 0;
704 election_timeout_stop(cluster);
705
706 /* Start a new election by voting down this, and other, peers */
707 cluster->priv->election->state = election_start;
708 return cluster->priv->election->state;
709 } else {
710 char *loss_time = NULL;
711
712 loss_time = ctime(&(cluster->priv->election->last_election_loss));
713 if (loss_time) {
714 // Show only HH:MM:SS
715 loss_time += 11;
716 loss_time[8] = '\0';
717 }
718 pcmk__info("Ignoring election round %d (started by node ID %s) "
719 "pass vs %s because we lost less than %ds ago at %s",
720 vote.election_id, vote.election_owner, vote.from,
721 LOSS_DAMPEN, pcmk__s(loss_time, "unknown"));
722 }
723 }
724
725 cluster->priv->election->last_election_loss = tm_now;
726
727 do_crm_log(log_level,
728 "Election round %d (started by node ID %s) lost: "
729 "%s from %s (%s)",
730 vote.election_id, vote.election_owner, vote.op,
731 vote.from, reason);
732
733 election_reset(cluster);
734 send_no_vote(cluster, your_node, &vote);
735 cluster->priv->election->state = election_lost;
736 return cluster->priv->election->state;
737 }
738
739 /*!
740 * \internal
741 * \brief Reset any election dampening currently in effect
742 *
743 * \param[in,out] cluster Cluster with election
744 */
745 void
746 election_clear_dampening(pcmk_cluster_t *cluster)
747 {
748 if ((cluster != NULL) && (cluster->priv->election != NULL)) {
749 cluster->priv->election->last_election_loss = 0;
750 }
751 }
752