1 /*
2 * Copyright (c) 2009-2020 Red Hat, Inc.
3 *
4 * All rights reserved.
5 *
6 * Authors: Christine Caulfield (ccaulfie@redhat.com)
7 * Fabio M. Di Nitto (fdinitto@redhat.com)
8 *
9 * This software licensed under BSD license, the text of which follows:
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions are met:
13 *
14 * - Redistributions of source code must retain the above copyright notice,
15 * this list of conditions and the following disclaimer.
16 * - Redistributions in binary form must reproduce the above copyright notice,
17 * this list of conditions and the following disclaimer in the documentation
18 * and/or other materials provided with the distribution.
19 * - Neither the name of the MontaVista Software, Inc. nor the names of its
20 * contributors may be used to endorse or promote products derived from this
21 * software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
33 * THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 #include <config.h>
37
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <fcntl.h>
41 #include <stdint.h>
42 #include <unistd.h>
43
44 #include <qb/qblist.h>
45 #include <qb/qbipc_common.h>
46
47 #include "quorum.h"
48 #include <corosync/corodefs.h>
49 #include <corosync/logsys.h>
50 #include <corosync/coroapi.h>
51 #include <corosync/icmap.h>
52 #include <corosync/votequorum.h>
53 #include <corosync/ipc_votequorum.h>
54
55 #include "service.h"
56 #include "util.h"
57
58 LOGSYS_DECLARE_SUBSYS ("VOTEQ");
59
60 /*
61 * interface with corosync
62 */
63
64 static struct corosync_api_v1 *corosync_api;
65
66 /*
67 * votequorum global config vars
68 */
69
70
71 static char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
72 static struct cluster_node *qdevice = NULL;
73 static unsigned int qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
74 static unsigned int qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
75 static uint8_t qdevice_can_operate = 1;
76 static void *qdevice_reg_conn = NULL;
77 static uint8_t qdevice_master_wins = 0;
78
79 static uint8_t two_node = 0;
80
81 static uint8_t wait_for_all = 0;
82 static uint8_t wait_for_all_status = 0;
83 static uint8_t wait_for_all_autoset = 0; /* Wait for all is not set explicitly and follows two_node */
84
85 static enum {ATB_NONE, ATB_LOWEST, ATB_HIGHEST, ATB_LIST} auto_tie_breaker = ATB_NONE, initial_auto_tie_breaker = ATB_NONE;
86 static int lowest_node_id = -1;
87 static int highest_node_id = -1;
88
89 #define DEFAULT_LMS_WIN 10000
90 static uint8_t last_man_standing = 0;
91 static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
92
93 static uint8_t allow_downscale = 0;
94 static uint32_t ev_barrier = 0;
95
96 static uint8_t ev_tracking = 0;
97 static uint32_t ev_tracking_barrier = 0;
98 static int ev_tracking_fd = -1;
99
100 /*
101 * votequorum_exec defines/structs/forward definitions
102 */
103
104 struct req_exec_quorum_nodeinfo {
105 struct qb_ipc_request_header header __attribute__((aligned(8)));
106 uint32_t nodeid;
107 uint32_t votes;
108 uint32_t expected_votes;
109 uint32_t flags;
110 } __attribute__((packed));
111
112 struct req_exec_quorum_reconfigure {
113 struct qb_ipc_request_header header __attribute__((aligned(8)));
114 uint32_t nodeid;
115 uint32_t value;
116 uint8_t param;
117 uint8_t _pad0;
118 uint8_t _pad1;
119 uint8_t _pad2;
120 } __attribute__((packed));
121
122 struct req_exec_quorum_qdevice_reg {
123 struct qb_ipc_request_header header __attribute__((aligned(8)));
124 uint32_t operation;
125 char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
126 } __attribute__((packed));
127
128 struct req_exec_quorum_qdevice_reconfigure {
129 struct qb_ipc_request_header header __attribute__((aligned(8)));
130 char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
131 char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
132 } __attribute__((packed));
133
134 struct req_exec_quorum_qdevice_set_extra_info {
135 struct qb_ipc_request_header header __attribute__((aligned(8)));
136 uint32_t nodeid; ///< the ID of the node whose info needs updating
137 uint32_t ei_size; ///< length of the extra info
138 char extra_info[];
139 } __attribute__((packed));
140
141 /*
142 * votequorum_exec onwire version (via totem)
143 */
144
145 #include "votequorum.h"
146
147 /*
148 * votequorum_exec onwire messages (via totem)
149 */
150
151 #define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO 0
152 #define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE 1
153 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG 2
154 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE 3
155 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_EXTRA_NODEINFO 4
156
157 static void votequorum_exec_send_expectedvotes_notification(void);
158 static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context);
159 static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context);
160 static int votequorum_exec_send_all_qdevice_extra_info_notification(void *conn, uint64_t context);
161
162 #define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES 1
163 #define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES 2
164 #define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA 3
165
166 static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value);
167
168 /*
169 * used by req_exec_quorum_qdevice_reg
170 */
171 #define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER 0
172 #define VOTEQUORUM_QDEVICE_OPERATION_REGISTER 1
173
174 /*
175 * votequorum internal node status/view
176 */
177
178 #define NODE_FLAGS_QUORATE 1
179 #define NODE_FLAGS_LEAVING 2
180 #define NODE_FLAGS_WFASTATUS 4
181 #define NODE_FLAGS_FIRST 8
182 #define NODE_FLAGS_QDEVICE_REGISTERED 16
183 #define NODE_FLAGS_QDEVICE_ALIVE 32
184 #define NODE_FLAGS_QDEVICE_CAST_VOTE 64
185 #define NODE_FLAGS_QDEVICE_MASTER_WINS 128
186 #define NODE_FLAGS_QDEVICE_EXTRA_INFO_SET 256
187
188 typedef enum {
189 NODESTATE_MEMBER=1,
190 NODESTATE_DEAD,
191 NODESTATE_LEAVING
192 } nodestate_t;
193
194 struct cluster_node {
195 int node_id;
196 nodestate_t state;
197 uint32_t votes;
198 uint32_t expected_votes;
199 uint32_t flags;
200
201 uint32_t ei_size;
202 char extra_nodeinfo[VOTEQUORUM_QDEVICE_EXTRA_NODEINFO_MAXSIZE];
203
204 struct qb_list_head list;
205 };
206
207 /*
208 * votequorum internal quorum status
209 */
210
211 static uint8_t quorum;
212 static uint8_t cluster_is_quorate;
213
214 /*
215 * votequorum membership data
216 */
217
218 static struct cluster_node *us;
219 static struct qb_list_head cluster_members_list;
220 static unsigned int quorum_members[PROCESSOR_COUNT_MAX];
221 static unsigned int previous_quorum_members[PROCESSOR_COUNT_MAX];
222 static unsigned int atb_nodelist[PROCESSOR_COUNT_MAX];
223 static int quorum_members_entries = 0;
224 static int previous_quorum_members_entries = 0;
225 static int atb_nodelist_entries = 0;
226 static struct memb_ring_id quorum_ringid;
227
228 /*
229 * pre allocate all cluster_nodes + one for qdevice
230 */
231 static struct cluster_node cluster_nodes[PROCESSOR_COUNT_MAX+2];
232 static int cluster_nodes_entries = 0;
233
234 /*
235 * votequorum tracking
236 */
237 struct quorum_pd {
238 unsigned char track_flags;
239 int tracking_enabled;
240 uint64_t tracking_context;
241 struct qb_list_head list;
242 void *conn;
243 };
244
245 static struct qb_list_head trackers_list;
246
247 /*
248 * votequorum timers
249 */
250
251 static corosync_timer_handle_t qdevice_timer;
252 static int qdevice_timer_set = 0;
253 static corosync_timer_handle_t last_man_standing_timer;
254 static int last_man_standing_timer_set = 0;
255 static int sync_nodeinfo_sent = 0;
256 static int sync_wait_for_poll_or_timeout = 0;
257
258 /*
259 * Service Interfaces required by service_message_handler struct
260 */
261
262 static int sync_in_progress = 0;
263
264 static void votequorum_sync_init (
265 const unsigned int *trans_list,
266 size_t trans_list_entries,
267 const unsigned int *member_list,
268 size_t member_list_entries,
269 const struct memb_ring_id *ring_id);
270
271 static int votequorum_sync_process (void);
272 static void votequorum_sync_activate (void);
273 static void votequorum_sync_abort (void);
274
275 static quorum_set_quorate_fn_t quorum_callback;
276
277 /*
278 * votequorum_exec handler and definitions
279 */
280
281 static char *votequorum_exec_init_fn (struct corosync_api_v1 *api);
282 static int votequorum_exec_exit_fn (void);
283 static int votequorum_exec_send_nodeinfo(uint32_t nodeid);
284 static int votequorum_exec_send_set_qdevice_extra_info(uint32_t nodeid);
285
286 static void message_handler_req_exec_votequorum_nodeinfo (
287 const void *message,
288 unsigned int nodeid);
289 static void exec_votequorum_nodeinfo_endian_convert (void *message);
290
291 static void message_handler_req_exec_votequorum_reconfigure (
292 const void *message,
293 unsigned int nodeid);
294 static void exec_votequorum_reconfigure_endian_convert (void *message);
295
296 static void message_handler_req_exec_votequorum_qdevice_reg (
297 const void *message,
298 unsigned int nodeid);
299 static void exec_votequorum_qdevice_reg_endian_convert (void *message);
300
301 static void message_handler_req_exec_votequorum_qdevice_reconfigure (
302 const void *message,
303 unsigned int nodeid);
304 static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message);
305
306 static void message_handler_req_exec_votequorum_qdevice_set_extra_info (
307 const void *message,
308 unsigned int nodeid);
309 static void exec_votequorum_qdevice_set_extra_info_endian_convert (void *message);
310
311 static struct corosync_exec_handler votequorum_exec_engine[] =
312 {
313 { /* 0 */
314 .exec_handler_fn = message_handler_req_exec_votequorum_nodeinfo,
315 .exec_endian_convert_fn = exec_votequorum_nodeinfo_endian_convert
316 },
317 { /* 1 */
318 .exec_handler_fn = message_handler_req_exec_votequorum_reconfigure,
319 .exec_endian_convert_fn = exec_votequorum_reconfigure_endian_convert
320 },
321 { /* 2 */
322 .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reg,
323 .exec_endian_convert_fn = exec_votequorum_qdevice_reg_endian_convert
324 },
325 { /* 3 */
326 .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reconfigure,
327 .exec_endian_convert_fn = exec_votequorum_qdevice_reconfigure_endian_convert
328 },
329 { /* 4 */
330 .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_set_extra_info,
331 .exec_endian_convert_fn = exec_votequorum_qdevice_set_extra_info_endian_convert
332 },
333 };
334
335 /*
336 * Library Handler and Functions Definitions
337 */
338
339 static int quorum_lib_init_fn (void *conn);
340
341 static int quorum_lib_exit_fn (void *conn);
342
343 static void qdevice_timer_fn(void *arg);
344
345 static void message_handler_req_lib_votequorum_getinfo (void *conn,
346 const void *message);
347
348 static void message_handler_req_lib_votequorum_setexpected (void *conn,
349 const void *message);
350
351 static void message_handler_req_lib_votequorum_setvotes (void *conn,
352 const void *message);
353
354 static void message_handler_req_lib_votequorum_trackstart (void *conn,
355 const void *message);
356
357 static void message_handler_req_lib_votequorum_trackstop (void *conn,
358 const void *message);
359
360 static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
361 const void *message);
362
363 static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
364 const void *message);
365
366 static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
367 const void *message);
368
369 static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
370 const void *message);
371
372 static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
373 const void *message);
374
375 static void message_handler_req_lib_votequorum_qdevice_set_extra_info (void *conn,
376 const void *message);
377
378 static void message_handler_req_lib_votequorum_qdevice_get_extra_info (void *conn,
379 const void *message);
380
381 static struct corosync_lib_handler quorum_lib_service[] =
382 {
383 { /* 0 */
384 .lib_handler_fn = message_handler_req_lib_votequorum_getinfo,
385 .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
386 },
387 { /* 1 */
388 .lib_handler_fn = message_handler_req_lib_votequorum_setexpected,
389 .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
390 },
391 { /* 2 */
392 .lib_handler_fn = message_handler_req_lib_votequorum_setvotes,
393 .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
394 },
395 { /* 3 */
396 .lib_handler_fn = message_handler_req_lib_votequorum_trackstart,
397 .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
398 },
399 { /* 4 */
400 .lib_handler_fn = message_handler_req_lib_votequorum_trackstop,
401 .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
402 },
403 { /* 5 */
404 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_register,
405 .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
406 },
407 { /* 6 */
408 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_unregister,
409 .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
410 },
411 { /* 7 */
412 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_update,
413 .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
414 },
415 { /* 8 */
416 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_poll,
417 .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
418 },
419 { /* 9 */
420 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_master_wins,
421 .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
422 },
423 { /* 10 */
424 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_set_extra_info,
425 .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
426 },
427 { /* 11 */
428 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_get_extra_info,
429 .flow_control = COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
430 },
431 };
432
433 static struct corosync_service_engine votequorum_service_engine = {
434 .name = "corosync vote quorum service v1.0",
435 .id = VOTEQUORUM_SERVICE,
436 .priority = 2,
437 .private_data_size = sizeof (struct quorum_pd),
438 .allow_inquorate = CS_LIB_ALLOW_INQUORATE,
439 .flow_control = COROSYNC_LIB_FLOW_CONTROL_REQUIRED,
440 .lib_init_fn = quorum_lib_init_fn,
441 .lib_exit_fn = quorum_lib_exit_fn,
442 .lib_engine = quorum_lib_service,
443 .lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler),
444 .exec_init_fn = votequorum_exec_init_fn,
445 .exec_exit_fn = votequorum_exec_exit_fn,
446 .exec_engine = votequorum_exec_engine,
447 .exec_engine_count = sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler),
448 .sync_init = votequorum_sync_init,
449 .sync_process = votequorum_sync_process,
450 .sync_activate = votequorum_sync_activate,
451 .sync_abort = votequorum_sync_abort
452 };
453
454 struct corosync_service_engine *votequorum_get_service_engine_ver0 (void)
455 {
456 return (&votequorum_service_engine);
457 }
458
459 static struct default_service votequorum_service[] = {
460 {
461 .name = "corosync_votequorum",
462 .ver = 0,
463 .loader = votequorum_get_service_engine_ver0
464 },
465 };
466
467 /*
468 * common/utility macros/functions
469 */
470
471 #define max(a,b) (((a) > (b)) ? (a) : (b))
472
473 static void node_add_ordered(struct cluster_node *newnode)
474 {
475 struct cluster_node *node = NULL;
476 struct qb_list_head *tmp;
477
478 ENTER();
479
480 qb_list_for_each(tmp, &cluster_members_list) {
481 node = qb_list_entry(tmp, struct cluster_node, list);
482 if (newnode->node_id < node->node_id) {
483 break;
484 }
485 }
486
487 if (!node) {
488 qb_list_add(&newnode->list, &cluster_members_list);
489 } else {
490 qb_list_add_tail(&newnode->list, &node->list);
491 }
492
493 LEAVE();
494 }
495
496 static struct cluster_node *allocate_node(unsigned int nodeid)
497 {
498 struct cluster_node *cl = NULL;
499 struct qb_list_head *tmp;
500
501 ENTER();
502
503 if (cluster_nodes_entries <= PROCESSOR_COUNT_MAX + 1) {
504 cl = (struct cluster_node *)&cluster_nodes[cluster_nodes_entries];
505 cluster_nodes_entries++;
506 } else {
507 qb_list_for_each(tmp, &cluster_members_list) {
508 cl = qb_list_entry(tmp, struct cluster_node, list);
509 if (cl->state == NODESTATE_DEAD) {
510 break;
511 }
512 }
513 /*
514 * this should never happen
515 */
516 if (!cl) {
517 log_printf(LOGSYS_LEVEL_CRIT, "Unable to find memory for node " CS_PRI_NODE_ID " data!!", nodeid);
518 goto out;
519 }
520 qb_list_del(tmp);
521 }
522
523 memset(cl, 0, sizeof(struct cluster_node));
524 cl->node_id = nodeid;
525 if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
526 node_add_ordered(cl);
527 }
528
529 out:
530 LEAVE();
531
532 return cl;
533 }
534
535 static struct cluster_node *find_node_by_nodeid(unsigned int nodeid)
536 {
537 struct cluster_node *node;
538 struct qb_list_head *tmp;
539
540 ENTER();
541
|
(1) Event path: |
Condition "nodeid == us->node_id", taking false branch. |
542 if (nodeid == us->node_id) {
543 LEAVE();
544 return us;
545 }
546
|
(2) Event path: |
Condition "nodeid == 0", taking false branch. |
547 if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
548 LEAVE();
549 return qdevice;
550 }
551
|
(3) Event path: |
Condition "tmp != &cluster_members_list", taking true branch. |
|
(6) Event path: |
Condition "tmp != &cluster_members_list", taking false branch. |
552 qb_list_for_each(tmp, &cluster_members_list) {
553 node = qb_list_entry(tmp, struct cluster_node, list);
|
(4) Event path: |
Condition "node->node_id == nodeid", taking false branch. |
554 if (node->node_id == nodeid) {
555 LEAVE();
556 return node;
557 }
|
(5) Event path: |
Jumping back to the beginning of the loop. |
558 }
559
560 LEAVE();
|
(7) Event return_null: |
Explicitly returning null. |
561 return NULL;
562 }
563
564 static void get_lowest_node_id(void)
565 {
566 struct cluster_node *node = NULL;
567 struct qb_list_head *tmp;
568
569 ENTER();
570
571 lowest_node_id = us->node_id;
572
573 qb_list_for_each(tmp, &cluster_members_list) {
574 node = qb_list_entry(tmp, struct cluster_node, list);
575 if ((node->state == NODESTATE_MEMBER) &&
576 (node->node_id < lowest_node_id)) {
577 lowest_node_id = node->node_id;
578 }
579 }
580 log_printf(LOGSYS_LEVEL_DEBUG, "lowest node id: " CS_PRI_NODE_ID " us: " CS_PRI_NODE_ID, lowest_node_id, us->node_id);
581 icmap_set_uint32("runtime.votequorum.lowest_node_id", lowest_node_id);
582
583 LEAVE();
584 }
585
586 static void get_highest_node_id(void)
587 {
588 struct cluster_node *node = NULL;
589 struct qb_list_head *tmp;
590
591 ENTER();
592
593 highest_node_id = us->node_id;
594
595 qb_list_for_each(tmp, &cluster_members_list) {
596 node = qb_list_entry(tmp, struct cluster_node, list);
597 if ((node->state == NODESTATE_MEMBER) &&
598 (node->node_id > highest_node_id)) {
599 highest_node_id = node->node_id;
600 }
601 }
602 log_printf(LOGSYS_LEVEL_DEBUG, "highest node id: " CS_PRI_NODE_ID " us: " CS_PRI_NODE_ID, highest_node_id, us->node_id);
603 icmap_set_uint32("runtime.votequorum.highest_node_id", highest_node_id);
604
605 LEAVE();
606 }
607
608 static int check_low_node_id_partition(void)
609 {
610 struct cluster_node *node = NULL;
611 struct qb_list_head *tmp;
612 int found = 0;
613
614 ENTER();
615
616 qb_list_for_each(tmp, &cluster_members_list) {
617 node = qb_list_entry(tmp, struct cluster_node, list);
618 if ((node->state == NODESTATE_MEMBER) &&
619 (node->node_id == lowest_node_id)) {
620 found = 1;
621 }
622 }
623
624 LEAVE();
625 return found;
626 }
627
628 static int check_high_node_id_partition(void)
629 {
630 struct cluster_node *node = NULL;
631 struct qb_list_head *tmp;
632 int found = 0;
633
634 ENTER();
635
636 qb_list_for_each(tmp, &cluster_members_list) {
637 node = qb_list_entry(tmp, struct cluster_node, list);
638 if ((node->state == NODESTATE_MEMBER) &&
639 (node->node_id == highest_node_id)) {
640 found = 1;
641 }
642 }
643
644 LEAVE();
645 return found;
646 }
647
648 static int is_in_nodelist(int nodeid, unsigned int *members, int entries)
649 {
650 int i;
651 ENTER();
652
653 for (i=0; i<entries; i++) {
654 if (nodeid == members[i]) {
655 LEAVE();
656 return 1;
657 }
658 }
659 LEAVE();
660 return 0;
661 }
662
663 /*
664 * The algorithm for a list of tie-breaker nodes is:
665 * travel the list of nodes in the auto_tie_breaker list,
666 * if the node IS in our current partition, check if the
667 * nodes earlier in the atb list are in the 'previous' partition;
668 * If none are found then we are safe to be quorate, if any are
669 * then we cannot be as we don't know if that node is up or down.
670 * If we don't have a node in the current list we are NOT quorate.
671 * Obviously if we find the first node in the atb list in our
672 * partition then we are quorate.
673 *
674 * Special cases lowest nodeid, and highest nodeid are handled separately.
675 */
676 static int check_auto_tie_breaker(void)
677 {
678 int i, j;
679 int res;
680 ENTER();
681
682 if (auto_tie_breaker == ATB_LOWEST) {
683 res = check_low_node_id_partition();
684 log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LOWEST decision: %d", res);
685 LEAVE();
686 return res;
687 }
688 if (auto_tie_breaker == ATB_HIGHEST) {
689 res = check_high_node_id_partition();
690 log_printf(LOGSYS_LEVEL_DEBUG, "ATB_HIGHEST decision: %d", res);
691 LEAVE();
692 return res;
693 }
694
695 /* Assume ATB_LIST, we should never be called for ATB_NONE */
696 for (i=0; i < atb_nodelist_entries; i++) {
697 if (is_in_nodelist(atb_nodelist[i], quorum_members, quorum_members_entries)) {
698 /*
699 * Node is in our partition, if any of its predecessors are
700 * in the previous quorum partition then it might be in the
701 * 'other half' (as we've got this far without seeing it here)
702 * and so we can't be quorate.
703 */
704 for (j=0; j<i; j++) {
705 if (is_in_nodelist(atb_nodelist[j], previous_quorum_members, previous_quorum_members_entries)) {
706 log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node " CS_PRI_NODE_ID " in previous partition but not here, quorum denied", atb_nodelist[j]);
707 LEAVE();
708 return 0;
709 }
710 }
711
712 /*
713 * None of the other list nodes were in the previous partition, if there
714 * are enough votes, we can be quorate
715 */
716 log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node " CS_PRI_NODE_ID " in current partition, we can be quorate", atb_nodelist[i]);
717 LEAVE();
718 return 1;
719 }
720 }
721 log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found no list nodes in current partition, we cannot be quorate");
722 LEAVE();
723 return 0;
724 }
725
726 /*
727 * atb_string can be either:
728 * 'lowest'
729 * 'highest'
730 * a list of nodeids
731 */
732 static void parse_atb_string(char *atb_string)
733 {
734 char *ptr;
735 long num;
736
737 ENTER();
738 auto_tie_breaker = ATB_NONE;
739
740 if (!strcmp(atb_string, "lowest"))
741 auto_tie_breaker = ATB_LOWEST;
742
743 if (!strcmp(atb_string, "highest"))
744 auto_tie_breaker = ATB_HIGHEST;
745
746 if (atoi(atb_string)) {
747
748 atb_nodelist_entries = 0;
749 ptr = atb_string;
750 do {
751 num = strtol(ptr, &ptr, 10);
752 if (num) {
753 log_printf(LOGSYS_LEVEL_DEBUG, "ATB nodelist[%d] = %d", atb_nodelist_entries, num);
754 atb_nodelist[atb_nodelist_entries++] = num;
755 }
756 } while (num);
757
758 if (atb_nodelist_entries) {
759 auto_tie_breaker = ATB_LIST;
760 }
761 }
762 icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
763 log_printf(LOGSYS_LEVEL_DEBUG, "ATB type = %d", auto_tie_breaker);
764
765 /* Make sure we got something */
766 if (auto_tie_breaker == ATB_NONE) {
767 log_printf(LOGSYS_LEVEL_WARNING, "auto_tie_breaker_nodes is not valid. It must be 'lowest', 'highest' or a space-separated list of node IDs. auto_tie_breaker is disabled");
768 auto_tie_breaker = ATB_NONE;
769 }
770 LEAVE();
771 }
772
773 static int check_qdevice_master(void)
774 {
775 struct cluster_node *node = NULL;
776 struct qb_list_head *tmp;
777 int found = 0;
778
779 ENTER();
780
781 qb_list_for_each(tmp, &cluster_members_list) {
782 node = qb_list_entry(tmp, struct cluster_node, list);
783 if ((node->state == NODESTATE_MEMBER) &&
784 (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) &&
785 (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE)) {
786 found = 1;
787 }
788 }
789
790 LEAVE();
791 return found;
792 }
793
794 static void decode_flags(uint32_t flags)
795 {
796 ENTER();
797
798 log_printf(LOGSYS_LEVEL_DEBUG,
799 "flags: quorate: %s Leaving: %s WFA Status: %s First: %s Qdevice: %s QdeviceAlive: %s QdeviceCastVote: %s QdeviceMasterWins: %s",
800 (flags & NODE_FLAGS_QUORATE)?"Yes":"No",
801 (flags & NODE_FLAGS_LEAVING)?"Yes":"No",
802 (flags & NODE_FLAGS_WFASTATUS)?"Yes":"No",
803 (flags & NODE_FLAGS_FIRST)?"Yes":"No",
804 (flags & NODE_FLAGS_QDEVICE_REGISTERED)?"Yes":"No",
805 (flags & NODE_FLAGS_QDEVICE_ALIVE)?"Yes":"No",
806 (flags & NODE_FLAGS_QDEVICE_CAST_VOTE)?"Yes":"No",
807 (flags & NODE_FLAGS_QDEVICE_MASTER_WINS)?"Yes":"No");
808
809 LEAVE();
810 }
811
812 /*
813 * load/save are copied almost pristine from totemsrp,c
814 */
815 static int load_ev_tracking_barrier(void)
816 {
817 int res = 0;
818 char filename[PATH_MAX];
819
820 ENTER();
821
822 snprintf(filename, sizeof(filename) - 1, "%s/ev_tracking", get_state_dir());
823
824 ev_tracking_fd = open(filename, O_RDWR, 0700);
825 if (ev_tracking_fd != -1) {
826 res = read (ev_tracking_fd, &ev_tracking_barrier, sizeof(uint32_t));
827 close(ev_tracking_fd);
828 if (res == sizeof (uint32_t)) {
829 LEAVE();
830 return 0;
831 }
832 }
833
834 ev_tracking_barrier = 0;
835 umask(0);
836 ev_tracking_fd = open (filename, O_CREAT|O_RDWR, 0700);
837 if (ev_tracking_fd != -1) {
838 res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
839 if ((res == -1) || (res != sizeof (uint32_t))) {
840 log_printf(LOGSYS_LEVEL_WARNING,
841 "Unable to write to %s", filename);
842 }
843 close(ev_tracking_fd);
844 LEAVE();
845 return 0;
846 }
847 log_printf(LOGSYS_LEVEL_WARNING,
848 "Unable to create %s file", filename);
849
850 LEAVE();
851
852 return -1;
853 }
854
855 static void update_wait_for_all_status(uint8_t wfa_status)
856 {
857 ENTER();
858
859 wait_for_all_status = wfa_status;
860 if (wait_for_all_status) {
861 us->flags |= NODE_FLAGS_WFASTATUS;
862 } else {
863 us->flags &= ~NODE_FLAGS_WFASTATUS;
864 }
865 icmap_set_uint8("runtime.votequorum.wait_for_all_status",
866 wait_for_all_status);
867
868 LEAVE();
869 }
870
871 static void update_two_node(void)
872 {
873 ENTER();
874
875 icmap_set_uint8("runtime.votequorum.two_node", two_node);
876
877 LEAVE();
878 }
879
880 static void update_ev_barrier(uint32_t expected_votes)
881 {
882 ENTER();
883
884 ev_barrier = expected_votes;
885 icmap_set_uint32("runtime.votequorum.ev_barrier", ev_barrier);
886
887 LEAVE();
888 }
889
890 static void update_qdevice_can_operate(uint8_t status)
891 {
892 ENTER();
893
894 qdevice_can_operate = status;
895 icmap_set_uint8("runtime.votequorum.qdevice_can_operate", qdevice_can_operate);
896
897 LEAVE();
898 }
899
900 static void update_qdevice_master_wins(uint8_t allow)
901 {
902 ENTER();
903
904 qdevice_master_wins = allow;
905 icmap_set_uint8("runtime.votequorum.qdevice_master_wins", qdevice_master_wins);
906
907 LEAVE();
908 }
909
910 static void update_ev_tracking_barrier(uint32_t ev_t_barrier)
911 {
912 int res;
913
914 ENTER();
915
916 ev_tracking_barrier = ev_t_barrier;
917 icmap_set_uint32("runtime.votequorum.ev_tracking_barrier", ev_tracking_barrier);
918
919 if (lseek (ev_tracking_fd, 0, SEEK_SET) != 0) {
920 log_printf(LOGSYS_LEVEL_WARNING,
921 "Unable to update ev_tracking_barrier on disk data!!!");
922 LEAVE();
923 return;
924 }
925
926 res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
927 if (res != sizeof (uint32_t)) {
928 log_printf(LOGSYS_LEVEL_WARNING,
929 "Unable to update ev_tracking_barrier on disk data!!!");
930 }
931 #ifdef HAVE_FDATASYNC
932 fdatasync(ev_tracking_fd);
933 #else
934 fsync(ev_tracking_fd);
935 #endif
936
937 LEAVE();
938 }
939
940 /*
941 * quorum calculation core bits
942 */
943
944 static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsigned int *ret_total_votes)
945 {
946 struct qb_list_head *nodelist;
947 struct cluster_node *node;
948 unsigned int total_votes = 0;
949 unsigned int highest_expected = 0;
950 unsigned int newquorum, q1, q2;
951 unsigned int total_nodes = 0;
952
953 ENTER();
954
955 if ((allow_downscale) && (allow_decrease) && (max_expected)) {
956 max_expected = max(ev_barrier, max_expected);
957 }
958
959 qb_list_for_each(nodelist, &cluster_members_list) {
960 node = qb_list_entry(nodelist, struct cluster_node, list);
961
962 log_printf(LOGSYS_LEVEL_DEBUG, "node " CS_PRI_NODE_ID " state=%d, votes=%u, expected=%u",
963 node->node_id, node->state, node->votes, node->expected_votes);
964
965 if (node->state == NODESTATE_MEMBER) {
966 highest_expected = max(highest_expected, node->expected_votes);
967 total_votes += node->votes;
968 total_nodes++;
969 }
970 }
971
972 if (us->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
973 log_printf(LOGSYS_LEVEL_DEBUG, "node 0 state=1, votes=%u", qdevice->votes);
974 total_votes += qdevice->votes;
975 total_nodes++;
976 }
977
978 if (max_expected > 0) {
979 highest_expected = max_expected;
980 }
981
982 /*
983 * This quorum calculation is taken from the OpenVMS Cluster Systems
984 * manual, but, then, you guessed that didn't you
985 */
986 q1 = (highest_expected + 2) / 2;
987 q2 = (total_votes + 2) / 2;
988 newquorum = max(q1, q2);
989
990 /*
991 * Normally quorum never decreases but the system administrator can
992 * force it down by setting expected votes to a maximum value
993 */
994 if (!allow_decrease) {
995 newquorum = max(quorum, newquorum);
996 }
997
998 /*
999 * The special two_node mode allows each of the two nodes to retain
1000 * quorum if the other fails. Only one of the two should live past
1001 * fencing (as both nodes try to fence each other in split-brain.)
1002 * Also: if there are more than two nodes, force us inquorate to avoid
1003 * any damage or confusion.
1004 */
1005 if (two_node && total_nodes <= 2) {
1006 newquorum = 1;
1007 }
1008
1009 if (ret_total_votes) {
1010 *ret_total_votes = total_votes;
1011 }
1012
1013 LEAVE();
1014 return newquorum;
1015 }
1016
1017 static void update_node_expected_votes(int new_expected_votes)
1018 {
1019 struct qb_list_head *nodelist;
1020 struct cluster_node *node;
1021
1022 if (new_expected_votes) {
1023 qb_list_for_each(nodelist, &cluster_members_list) {
1024 node = qb_list_entry(nodelist, struct cluster_node, list);
1025
1026 if (node->state == NODESTATE_MEMBER) {
1027 node->expected_votes = new_expected_votes;
1028 }
1029 }
1030 }
1031 }
1032
1033 static void are_we_quorate(unsigned int total_votes)
1034 {
1035 int quorate;
1036 int quorum_change = 0;
1037
1038 ENTER();
1039
1040 /*
1041 * wait for all nodes to show up before granting quorum
1042 */
1043
1044 if ((wait_for_all) && (wait_for_all_status)) {
1045 if (total_votes != us->expected_votes) {
1046 log_printf(LOGSYS_LEVEL_NOTICE,
1047 "Waiting for all cluster members. "
1048 "Current votes: %d expected_votes: %d",
1049 total_votes, us->expected_votes);
1050 assert(!cluster_is_quorate);
1051 return;
1052 }
1053 update_wait_for_all_status(0);
1054 }
1055
1056 if (quorum > total_votes) {
1057 quorate = 0;
1058 } else {
1059 quorate = 1;
1060 get_lowest_node_id();
1061 get_highest_node_id();
1062 }
1063
1064 if ((auto_tie_breaker != ATB_NONE) &&
1065 /* Must be a half (or half-1) split */
1066 (total_votes == (us->expected_votes / 2)) &&
1067 /* If the 'other' partition in a split might have quorum then we can't run ATB */
1068 (previous_quorum_members_entries - quorum_members_entries < quorum) &&
1069 (check_auto_tie_breaker() == 1)) {
1070 quorate = 1;
1071 }
1072
1073 if ((qdevice_master_wins) &&
1074 (!quorate) &&
1075 (check_qdevice_master() == 1)) {
1076 log_printf(LOGSYS_LEVEL_DEBUG, "node is quorate as part of master_wins partition");
1077 quorate = 1;
1078 }
1079
1080 if (cluster_is_quorate && !quorate) {
1081 quorum_change = 1;
1082 log_printf(LOGSYS_LEVEL_DEBUG, "quorum lost, blocking activity");
1083 }
1084 if (!cluster_is_quorate && quorate) {
1085 quorum_change = 1;
1086 log_printf(LOGSYS_LEVEL_DEBUG, "quorum regained, resuming activity");
1087 }
1088
1089 cluster_is_quorate = quorate;
1090 if (cluster_is_quorate) {
1091 us->flags |= NODE_FLAGS_QUORATE;
1092 } else {
1093 us->flags &= ~NODE_FLAGS_QUORATE;
1094 }
1095
1096 if (wait_for_all) {
1097 if (quorate) {
1098 update_wait_for_all_status(0);
1099 } else {
1100 update_wait_for_all_status(1);
1101 }
1102 }
1103
1104 if ((quorum_change) &&
1105 (sync_in_progress == 0)) {
1106 quorum_callback(quorum_members, quorum_members_entries,
1107 cluster_is_quorate, &quorum_ringid);
1108 votequorum_exec_send_quorum_notification(NULL, 0L);
1109 }
1110
1111 LEAVE();
1112 }
1113
1114 static void get_total_votes(unsigned int *totalvotes, unsigned int *current_members)
1115 {
1116 unsigned int total_votes = 0;
1117 unsigned int cluster_members = 0;
1118 struct qb_list_head *nodelist;
1119 struct cluster_node *node;
1120
1121 ENTER();
1122
1123 qb_list_for_each(nodelist, &cluster_members_list) {
1124 node = qb_list_entry(nodelist, struct cluster_node, list);
1125 if (node->state == NODESTATE_MEMBER) {
1126 cluster_members++;
1127 total_votes += node->votes;
1128 }
1129 }
1130
1131 if (qdevice->votes) {
1132 total_votes += qdevice->votes;
1133 cluster_members++;
1134 }
1135
1136 *totalvotes = total_votes;
1137 *current_members = cluster_members;
1138
1139 LEAVE();
1140 }
1141
1142 /*
1143 * Recalculate cluster quorum, set quorate and notify changes
1144 */
1145 static void recalculate_quorum(int allow_decrease, int by_current_nodes)
1146 {
1147 unsigned int total_votes = 0;
1148 unsigned int cluster_members = 0;
1149
1150 ENTER();
1151
1152 get_total_votes(&total_votes, &cluster_members);
1153
1154 if (!by_current_nodes) {
1155 cluster_members = 0;
1156 }
1157
1158 /*
1159 * Keep expected_votes at the highest number of votes in the cluster
1160 */
1161 log_printf(LOGSYS_LEVEL_DEBUG, "total_votes=%d, expected_votes=%d", total_votes, us->expected_votes);
1162 if (total_votes > us->expected_votes) {
1163 us->expected_votes = total_votes;
1164 votequorum_exec_send_expectedvotes_notification();
1165 }
1166
1167 if ((ev_tracking) &&
1168 (us->expected_votes > ev_tracking_barrier)) {
1169 update_ev_tracking_barrier(us->expected_votes);
1170 }
1171
1172 quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes);
1173 update_node_expected_votes(cluster_members);
1174
1175 are_we_quorate(total_votes);
1176
1177 LEAVE();
1178 }
1179
1180 /*
1181 * configuration bits and pieces
1182 */
1183
1184 static int votequorum_read_nodelist_configuration(uint32_t *votes,
1185 uint32_t *nodes,
1186 uint32_t *expected_votes)
1187 {
1188 icmap_iter_t iter;
1189 const char *iter_key;
1190 char tmp_key[ICMAP_KEYNAME_MAXLEN];
1191 uint32_t our_pos, node_pos, last_node_pos=-1;
1192 uint32_t nodecount = 0;
1193 uint32_t nodelist_expected_votes = 0;
1194 uint32_t node_votes = 0;
1195 int res = 0;
1196
1197 ENTER();
1198
1199 if (icmap_get_uint32("nodelist.local_node_pos", &our_pos) != CS_OK) {
1200 log_printf(LOGSYS_LEVEL_DEBUG,
1201 "No nodelist defined or our node is not in the nodelist");
1202 return 0;
1203 }
1204
1205 iter = icmap_iter_init("nodelist.node.");
1206
1207 while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
1208
1209 res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key);
1210 if (res != 2) {
1211 continue;
1212 }
1213
1214 /*
1215 * If current node_pos is the same as the last_node_pos then skip it
1216 * so we only do the code below once per node.
1217 * (icmap keys are always in order)
1218 */
1219 if (last_node_pos == node_pos) {
1220 continue;
1221 }
1222 last_node_pos = node_pos;
1223
1224 nodecount++;
1225
1226 snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.quorum_votes", node_pos);
1227 if (icmap_get_uint32(tmp_key, &node_votes) != CS_OK) {
1228 node_votes = 1;
1229 }
1230
1231 nodelist_expected_votes = nodelist_expected_votes + node_votes;
1232
1233 if (node_pos == our_pos) {
1234 *votes = node_votes;
1235 }
1236 }
1237
1238 *expected_votes = nodelist_expected_votes;
1239 *nodes = nodecount;
1240
1241 icmap_iter_finalize(iter);
1242
1243 LEAVE();
1244
1245 return 1;
1246 }
1247
1248 static int votequorum_qdevice_is_configured(uint32_t *qdevice_votes)
1249 {
1250 char *qdevice_model = NULL;
1251 int ret = 0;
1252
1253 ENTER();
1254
1255 if (icmap_get_string("quorum.device.model", &qdevice_model) == CS_OK) {
1256 if (strlen(qdevice_model)) {
1257 if (icmap_get_uint32("quorum.device.votes", qdevice_votes) != CS_OK) {
1258 *qdevice_votes = -1;
1259 }
1260 if (icmap_get_uint32("quorum.device.timeout", &qdevice_timeout) != CS_OK) {
1261 qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
1262 }
1263 if (icmap_get_uint32("quorum.device.sync_timeout", &qdevice_sync_timeout) != CS_OK) {
1264 qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
1265 }
1266 update_qdevice_can_operate(1);
1267 ret = 1;
1268 }
1269
1270 free(qdevice_model);
1271 }
1272
1273 LEAVE();
1274
1275 return ret;
1276 }
1277
1278 #define VOTEQUORUM_READCONFIG_STARTUP 0
1279 #define VOTEQUORUM_READCONFIG_RUNTIME 1
1280
1281 static char *votequorum_readconfig(int runtime)
1282 {
1283 uint32_t node_votes = 0, qdevice_votes = 0;
1284 uint32_t node_expected_votes = 0, expected_votes = 0;
1285 uint32_t node_count = 0;
1286 uint8_t atb = 0;
1287 int have_nodelist, have_qdevice;
1288 char *atb_string = NULL;
1289 char *error = NULL;
1290
1291 ENTER();
1292
1293 log_printf(LOGSYS_LEVEL_DEBUG, "Reading configuration (runtime: %d)", runtime);
1294
1295 /*
1296 * Set the few things we re-read at runtime back to their defaults
1297 */
1298 if (runtime) {
1299 two_node = 0;
1300 expected_votes = 0;
1301 /* auto_tie_breaker cannot be changed by config reload, but
1302 * we automatically disable it on odd-sized clusters without
1303 * wait_for_all.
1304 * We may need to re-enable it when membership changes to ensure
1305 * that auto_tie_breaker is consistent across all nodes */
1306 auto_tie_breaker = initial_auto_tie_breaker;
1307 icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1308 }
1309
1310 /*
1311 * gather basic data here
1312 */
1313 (void)icmap_get_uint32("quorum.expected_votes", &expected_votes);
1314 have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes);
1315 have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes);
1316 (void)icmap_get_uint8("quorum.two_node", &two_node);
1317
1318 /*
1319 * do config verification and enablement
1320 */
1321
1322 if ((!have_nodelist) && (!expected_votes)) {
1323 if (!runtime) {
1324 error = (char *)"configuration error: nodelist or quorum.expected_votes must be configured!";
1325 } else {
1326 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: nodelist or quorum.expected_votes must be configured!");
1327 log_printf(LOGSYS_LEVEL_CRIT, "will continue with current runtime data");
1328 }
1329 goto out;
1330 }
1331
1332 /*
1333 * two_node and qdevice are not compatible in the same config.
1334 * try to make an educated guess of what to do
1335 */
1336
1337 if ((two_node) && (have_qdevice)) {
1338 if (!runtime) {
1339 error = (char *)"configuration error: two_node and quorum device cannot be configured at the same time!";
1340 goto out;
1341 } else {
1342 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: two_node and quorum device cannot be configured at the same time!");
1343 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
1344 log_printf(LOGSYS_LEVEL_CRIT, "quorum device is registered, disabling two_node");
1345 two_node = 0;
1346 } else {
1347 log_printf(LOGSYS_LEVEL_CRIT, "quorum device is not registered, allowing two_node");
1348 update_qdevice_can_operate(0);
1349 }
1350 }
1351 }
1352
1353 /*
1354 * Enable special features
1355 */
1356 if (!runtime) {
1357 (void)icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
1358 if (icmap_get_uint8("quorum.wait_for_all", &wait_for_all) != CS_OK) {
1359 wait_for_all_autoset = 1;
1360 }
1361 (void)icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
1362 (void)icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
1363 (void)icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
1364 (void)icmap_get_uint8("quorum.auto_tie_breaker", &atb);
1365 (void)icmap_get_string("quorum.auto_tie_breaker_node", &atb_string);
1366
1367 /* auto_tie_breaker defaults to LOWEST */
1368 if (atb) {
1369 auto_tie_breaker = ATB_LOWEST;
1370 icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1371 }
1372 else {
1373 auto_tie_breaker = ATB_NONE;
1374 if (atb_string) {
1375 log_printf(LOGSYS_LEVEL_WARNING,
1376 "auto_tie_breaker_node: is meaningless if auto_tie_breaker is set to 0");
1377 }
1378 }
1379
1380 if (atb && atb_string) {
1381 parse_atb_string(atb_string);
1382 }
1383 free(atb_string);
1384 initial_auto_tie_breaker = auto_tie_breaker;
1385
1386 /* allow_downscale requires ev_tracking */
1387 if (allow_downscale) {
1388 ev_tracking = 1;
1389 }
1390
1391 if (ev_tracking) {
1392 if (load_ev_tracking_barrier() < 0) {
1393 LEAVE();
1394 return ((char *)"Unable to load ev_tracking file!");
1395 }
1396 update_ev_tracking_barrier(ev_tracking_barrier);
1397 }
1398
1399 }
1400
1401 /*
1402 * Changing of wait_for_all during runtime is not supported, but changing of two_node is
1403 * and two_node may set wfa if not configured explicitly. It is safe to unset it
1404 * (or set it back) when two_node changes.
1405 */
1406 if (wait_for_all_autoset) {
1407 wait_for_all = two_node;
1408 }
1409
1410 /* two_node and auto_tie_breaker are not compatible as two_node uses
1411 * a fence race to decide quorum whereas ATB decides based on node id
1412 */
1413 if (two_node && auto_tie_breaker != ATB_NONE) {
1414 log_printf(LOGSYS_LEVEL_CRIT, "two_node and auto_tie_breaker are both specified but are not compatible.");
1415 log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf");
1416 two_node = 0;
1417 }
1418
1419 /* If ATB is set and the cluster has an odd number of nodes then wait_for_all needs
1420 * to be set so that an isolated half+1 without the tie breaker node
1421 * does not have quorum on reboot.
1422 */
1423 if ((auto_tie_breaker != ATB_NONE) && (node_expected_votes % 2) &&
1424 (!wait_for_all)) {
1425 if (last_man_standing) {
1426 /* if LMS is set too, it's a fatal configuration error. We can't dictate to the user what
1427 * they might want so we'll just quit.
1428 */
1429 log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set, the cluster has an odd number of nodes\n");
1430 log_printf(LOGSYS_LEVEL_CRIT, "and last_man_standing is also set. With this situation a better\n");
1431 log_printf(LOGSYS_LEVEL_CRIT, "solution would be to disable LMS, leave ATB enabled, and also\n");
1432 log_printf(LOGSYS_LEVEL_CRIT, "enable wait_for_all (mandatory for ATB in odd-numbered clusters).\n");
1433 log_printf(LOGSYS_LEVEL_CRIT, "Due to this ambiguity, corosync will fail to start. Please fix your corosync.conf\n");
1434 error = (char *)"configuration error: auto_tie_breaker & last_man_standing not available in odd sized cluster";
1435 goto out;
1436 }
1437 else {
1438 log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set and the cluster has an odd number of nodes.\n");
1439 log_printf(LOGSYS_LEVEL_CRIT, "wait_for_all needs to be set for this configuration but it is missing\n");
1440 log_printf(LOGSYS_LEVEL_CRIT, "Therefore auto_tie_breaker has been disabled. Please fix your corosync.conf\n");
1441 auto_tie_breaker = ATB_NONE;
1442 icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1443 }
1444 }
1445
1446 /*
1447 * quorum device is not compatible with last_man_standing and auto_tie_breaker
1448 * neither lms or atb can be set at runtime, so there is no need to check for
1449 * runtime incompatibilities, but qdevice can be configured _after_ LMS and ATB have
1450 * been enabled at startup.
1451 */
1452
1453 if ((have_qdevice) && (last_man_standing)) {
1454 if (!runtime) {
1455 error = (char *)"configuration error: quorum.device is not compatible with last_man_standing";
1456 goto out;
1457 } else {
1458 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with last_man_standing");
1459 log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1460 update_qdevice_can_operate(0);
1461 }
1462 }
1463
1464 if ((have_qdevice) && (auto_tie_breaker != ATB_NONE)) {
1465 if (!runtime) {
1466 error = (char *)"configuration error: quorum.device is not compatible with auto_tie_breaker";
1467 goto out;
1468 } else {
1469 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with auto_tie_breaker");
1470 log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1471 update_qdevice_can_operate(0);
1472 }
1473 }
1474
1475 if ((have_qdevice) && (allow_downscale)) {
1476 if (!runtime) {
1477 error = (char *)"configuration error: quorum.device is not compatible with allow_downscale";
1478 goto out;
1479 } else {
1480 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with allow_downscale");
1481 log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1482 update_qdevice_can_operate(0);
1483 }
1484 }
1485
1486 /*
1487 * if user specifies quorum.expected_votes + quorum.device but NOT the device.votes
1488 * we don't know what the quorum device should vote.
1489 */
1490
1491 if ((expected_votes) && (have_qdevice) && (qdevice_votes == -1)) {
1492 if (!runtime) {
1493 error = (char *)"configuration error: quorum.device.votes must be specified when quorum.expected_votes is set";
1494 goto out;
1495 } else {
1496 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when quorum.expected_votes is set");
1497 log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1498 update_qdevice_can_operate(0);
1499 }
1500 }
1501
1502 /*
1503 * if user specifies a node list with uneven votes and no device.votes
1504 * we cannot autocalculate the votes
1505 */
1506
1507 if ((have_qdevice) &&
1508 (qdevice_votes == -1) &&
1509 (have_nodelist) &&
1510 (node_count != node_expected_votes)) {
1511 if (!runtime) {
1512 error = (char *)"configuration error: quorum.device.votes must be specified when not all nodes votes 1";
1513 goto out;
1514 } else {
1515 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when not all nodes votes 1");
1516 log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1517 update_qdevice_can_operate(0);
1518 }
1519 }
1520
1521 /*
1522 * validate quorum device votes vs expected_votes
1523 */
1524
1525 if ((qdevice_votes > 0) && (expected_votes)) {
1526 int delta = expected_votes - qdevice_votes;
1527 if (delta < 2) {
1528 if (!runtime) {
1529 error = (char *)"configuration error: quorum.device.votes is too high or expected_votes is too low";
1530 goto out;
1531 } else {
1532 log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes is too high or expected_votes is too low");
1533 log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1534 update_qdevice_can_operate(0);
1535 }
1536 }
1537 }
1538
1539 /*
1540 * automatically calculate device votes and adjust expected_votes from nodelist
1541 */
1542
1543 if ((have_qdevice) &&
1544 (qdevice_votes == -1) &&
1545 (!expected_votes) &&
1546 (have_nodelist) &&
1547 (node_count == node_expected_votes)) {
1548 qdevice_votes = node_expected_votes - 1;
1549 node_expected_votes = node_expected_votes + qdevice_votes;
1550 }
1551
1552 /*
1553 * set this node votes and expected_votes
1554 */
1555 log_printf(LOGSYS_LEVEL_DEBUG, "ev_tracking=%d, ev_tracking_barrier = %d: expected_votes = %d\n", ev_tracking, ev_tracking_barrier, expected_votes);
1556
1557 if (ev_tracking) {
1558 expected_votes = ev_tracking_barrier;
1559 }
1560
1561 if (have_nodelist) {
1562 us->votes = node_votes;
1563 us->expected_votes = node_expected_votes;
1564 } else {
1565 us->votes = 1;
1566 (void)icmap_get_uint32("quorum.votes", &us->votes);
1567 }
1568
1569 if (expected_votes) {
1570 us->expected_votes = expected_votes;
1571 }
1572
1573 /*
1574 * set qdevice votes
1575 */
1576
1577 if (!have_qdevice) {
1578 qdevice->votes = 0;
1579 }
1580
1581 if (qdevice_votes != -1) {
1582 qdevice->votes = qdevice_votes;
1583 }
1584
1585 update_ev_barrier(us->expected_votes);
1586 update_two_node();
1587 if (wait_for_all) {
1588 if (!runtime) {
1589 update_wait_for_all_status(1);
1590 }
1591 } else if (wait_for_all_autoset && wait_for_all_status) {
1592 /*
1593 * Reset wait for all status for consistency when wfa is auto-unset by 2node.
1594 * wait_for_all_status would be ignored by are_we_quorate anyway.
1595 */
1596 update_wait_for_all_status(0);
1597 }
1598
1599 out:
1600 LEAVE();
1601 return error;
1602 }
1603
1604 static void votequorum_refresh_config(
1605 int32_t event,
1606 const char *key_name,
1607 struct icmap_notify_value new_val,
1608 struct icmap_notify_value old_val,
1609 void *user_data)
1610 {
1611 int old_votes, old_expected_votes;
1612 uint8_t reloading;
1613 uint8_t cancel_wfa;
1614 int32_t reload_status;
1615
1616 ENTER();
1617
1618 /*
1619 * If a full reload is in progress then don't do anything until it's done and
1620 * can reconfigure it all atomically
1621 */
1622 if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) {
1623 return;
1624 }
1625
1626 /* If a full reload failed, then don't reconfigure */
1627 if ( (strcmp(key_name, "config.totemconfig_reload_in_progress") == 0) &&
1628 (icmap_get_int32("config.reload_status", &reload_status) == CS_OK) &&
1629 (reload_status != CS_OK) ) {
1630 return;
1631 }
1632
1633 (void)icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa);
1634 if (strcmp(key_name, "quorum.cancel_wait_for_all") == 0 &&
1635 cancel_wfa >= 1) {
1636 icmap_set_uint8("quorum.cancel_wait_for_all", 0);
1637 if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA,
1638 us->node_id, 0)) {
1639 log_printf(LOGSYS_LEVEL_ERROR, "Failed to send Cancel WFA message to other nodes");
1640 }
1641 return;
1642 }
1643
1644 old_votes = us->votes;
1645 old_expected_votes = us->expected_votes;
1646
1647 /*
1648 * Reload the configuration
1649 */
1650 votequorum_readconfig(VOTEQUORUM_READCONFIG_RUNTIME);
1651
1652 /*
1653 * activate new config
1654 */
1655 votequorum_exec_send_nodeinfo(us->node_id);
1656 votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
1657 if (us->votes != old_votes) {
1658 if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES,
1659 us->node_id, us->votes)) {
1660 log_printf(LOGSYS_LEVEL_ERROR, "Failed to send new votes message to other nodes");
1661 }
1662 }
1663 if (us->expected_votes != old_expected_votes) {
1664 if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES,
1665 us->node_id, us->expected_votes)) {
1666 log_printf(LOGSYS_LEVEL_ERROR, "Failed to send expected votes message to other nodes");
1667 }
1668 }
1669
1670 LEAVE();
1671 }
1672
1673 static void votequorum_exec_add_config_notification(void)
1674 {
1675 icmap_track_t icmap_track_nodelist = NULL;
1676 icmap_track_t icmap_track_quorum = NULL;
1677 icmap_track_t icmap_track_reload = NULL;
1678
1679 ENTER();
1680
1681 icmap_track_add("nodelist.",
1682 ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX,
1683 votequorum_refresh_config,
1684 NULL,
1685 &icmap_track_nodelist);
1686
1687 icmap_track_add("quorum.",
1688 ICMAP_TRACK_ADD | ICMAP_TRACK_DELETE | ICMAP_TRACK_MODIFY | ICMAP_TRACK_PREFIX,
1689 votequorum_refresh_config,
1690 NULL,
1691 &icmap_track_quorum);
1692
1693 icmap_track_add("config.totemconfig_reload_in_progress",
1694 ICMAP_TRACK_ADD | ICMAP_TRACK_MODIFY,
1695 votequorum_refresh_config,
1696 NULL,
1697 &icmap_track_reload);
1698
1699 LEAVE();
1700 }
1701
1702 /*
1703 * votequorum_exec core
1704 */
1705
1706 static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value)
1707 {
1708 struct req_exec_quorum_reconfigure req_exec_quorum_reconfigure;
1709 struct iovec iov[1];
1710 int ret;
1711
1712 ENTER();
1713
1714 req_exec_quorum_reconfigure.nodeid = nodeid;
1715 req_exec_quorum_reconfigure.value = value;
1716 req_exec_quorum_reconfigure.param = param;
1717 req_exec_quorum_reconfigure._pad0 = 0;
1718 req_exec_quorum_reconfigure._pad1 = 0;
1719 req_exec_quorum_reconfigure._pad2 = 0;
1720
1721 req_exec_quorum_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE);
1722 req_exec_quorum_reconfigure.header.size = sizeof(req_exec_quorum_reconfigure);
1723
1724 iov[0].iov_base = (void *)&req_exec_quorum_reconfigure;
1725 iov[0].iov_len = sizeof(req_exec_quorum_reconfigure);
1726
1727 ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1728
1729 LEAVE();
1730 return ret;
1731 }
1732
1733 static int votequorum_exec_send_nodeinfo(uint32_t nodeid)
1734 {
1735 struct req_exec_quorum_nodeinfo req_exec_quorum_nodeinfo;
1736 struct iovec iov[1];
1737 struct cluster_node *node;
1738 int ret;
1739
1740 ENTER();
1741
1742 node = find_node_by_nodeid(nodeid);
1743 if (!node) {
1744 return -1;
1745 }
1746
1747 memset(&req_exec_quorum_nodeinfo, 0, sizeof(req_exec_quorum_nodeinfo));
1748 req_exec_quorum_nodeinfo.nodeid = nodeid;
1749 req_exec_quorum_nodeinfo.votes = node->votes;
1750 req_exec_quorum_nodeinfo.expected_votes = node->expected_votes;
1751 req_exec_quorum_nodeinfo.flags = node->flags;
1752 if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
1753 decode_flags(node->flags);
1754 }
1755
1756 req_exec_quorum_nodeinfo.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO);
1757 req_exec_quorum_nodeinfo.header.size = sizeof(req_exec_quorum_nodeinfo);
1758
1759 iov[0].iov_base = (void *)&req_exec_quorum_nodeinfo;
1760 iov[0].iov_len = sizeof(req_exec_quorum_nodeinfo);
1761
1762 ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1763
1764 LEAVE();
1765 return ret;
1766 }
1767
1768 static int votequorum_exec_send_qdevice_reconfigure(const char *oldname, const char *newname)
1769 {
1770 struct req_exec_quorum_qdevice_reconfigure req_exec_quorum_qdevice_reconfigure;
1771 struct iovec iov[1];
1772 int ret;
1773
1774 ENTER();
1775
1776 req_exec_quorum_qdevice_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE);
1777 req_exec_quorum_qdevice_reconfigure.header.size = sizeof(req_exec_quorum_qdevice_reconfigure);
1778
1779 assert(strlen(oldname) < sizeof(req_exec_quorum_qdevice_reconfigure.oldname));
1780 strcpy(req_exec_quorum_qdevice_reconfigure.oldname, oldname);
1781
1782 assert(strlen(newname) < sizeof(req_exec_quorum_qdevice_reconfigure.newname));
1783 strcpy(req_exec_quorum_qdevice_reconfigure.newname, newname);
1784
1785 iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reconfigure;
1786 iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reconfigure);
1787
1788 ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1789
1790 LEAVE();
1791 return ret;
1792 }
1793
1794 static int votequorum_exec_send_qdevice_reg(uint32_t operation, const char *qdevice_name_req)
1795 {
1796 struct req_exec_quorum_qdevice_reg req_exec_quorum_qdevice_reg;
1797 struct iovec iov[1];
1798 int ret;
1799
1800 ENTER();
1801
1802 req_exec_quorum_qdevice_reg.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG);
1803 req_exec_quorum_qdevice_reg.header.size = sizeof(req_exec_quorum_qdevice_reg);
1804 req_exec_quorum_qdevice_reg.operation = operation;
1805
1806 assert(strlen(qdevice_name_req) < sizeof(req_exec_quorum_qdevice_reg.qdevice_name));
1807 strcpy(req_exec_quorum_qdevice_reg.qdevice_name, qdevice_name_req);
1808
1809 iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reg;
1810 iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reg);
1811
1812 ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1813
1814 LEAVE();
1815 return ret;
1816 }
1817
1818 static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context)
1819 {
1820 struct res_lib_votequorum_quorum_notification *res_lib_votequorum_notification;
1821 struct qb_list_head *tmp;
1822 struct cluster_node *node;
1823 int i = 0;
1824 int cluster_members = 0;
1825 int size;
1826 char buf[sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * (PROCESSOR_COUNT_MAX + 2)];
1827
1828 ENTER();
1829
1830 log_printf(LOGSYS_LEVEL_DEBUG, "Sending quorum callback, quorate = %d", cluster_is_quorate);
1831
1832 qb_list_for_each(tmp, &cluster_members_list) {
1833 node = qb_list_entry(tmp, struct cluster_node, list);
1834 cluster_members++;
1835 }
1836 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
1837 cluster_members++;
1838 }
1839
1840 size = sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * cluster_members;
1841
1842 res_lib_votequorum_notification = (struct res_lib_votequorum_quorum_notification *)&buf;
1843 res_lib_votequorum_notification->quorate = cluster_is_quorate;
1844 res_lib_votequorum_notification->context = context;
1845 res_lib_votequorum_notification->node_list_entries = cluster_members;
1846 res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_QUORUM_NOTIFICATION;
1847 res_lib_votequorum_notification->header.size = size;
1848 res_lib_votequorum_notification->header.error = CS_OK;
1849
1850 /* Send all known nodes and their states */
1851 qb_list_for_each(tmp, &cluster_members_list) {
1852 node = qb_list_entry(tmp, struct cluster_node, list);
1853 res_lib_votequorum_notification->node_list[i].nodeid = node->node_id;
1854 res_lib_votequorum_notification->node_list[i++].state = node->state;
1855 }
1856 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
1857 res_lib_votequorum_notification->node_list[i].nodeid = VOTEQUORUM_QDEVICE_NODEID;
1858 res_lib_votequorum_notification->node_list[i++].state = qdevice->state;
1859 }
1860
1861 /* Send it to all interested parties */
1862 if (conn) {
1863 int ret = corosync_api->ipc_dispatch_send(conn, &buf, size);
1864 LEAVE();
1865 return ret;
1866 } else {
1867 struct quorum_pd *qpd;
1868
1869 qb_list_for_each(tmp, &trackers_list) {
1870 qpd = qb_list_entry(tmp, struct quorum_pd, list);
1871 res_lib_votequorum_notification->context = qpd->tracking_context;
1872 corosync_api->ipc_dispatch_send(qpd->conn, &buf, size);
1873 }
1874 }
1875
1876 LEAVE();
1877
1878 return 0;
1879 }
1880
1881 static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context)
1882 {
1883 struct res_lib_votequorum_nodelist_notification *res_lib_votequorum_notification;
1884 int i = 0;
1885 int size;
1886 struct qb_list_head *tmp;
1887 char buf[sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries];
1888
1889 ENTER();
1890
1891 log_printf(LOGSYS_LEVEL_DEBUG, "Sending nodelist callback. ring_id = " CS_PRI_RING_ID, quorum_ringid.nodeid, quorum_ringid.seq);
1892
1893 size = sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries;
1894
1895 res_lib_votequorum_notification = (struct res_lib_votequorum_nodelist_notification *)&buf;
1896 res_lib_votequorum_notification->node_list_entries = quorum_members_entries;
1897 res_lib_votequorum_notification->ring_id.nodeid = quorum_ringid.nodeid;
1898 res_lib_votequorum_notification->ring_id.seq = quorum_ringid.seq;
1899 res_lib_votequorum_notification->context = context;
1900
1901 for (i=0; i<quorum_members_entries; i++) {
1902 res_lib_votequorum_notification->node_list[i] = quorum_members[i];
1903 }
1904
1905 res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_NODELIST_NOTIFICATION;
1906 res_lib_votequorum_notification->header.size = size;
1907 res_lib_votequorum_notification->header.error = CS_OK;
1908
1909 /* Send it to all interested parties */
1910 if (conn) {
1911 int ret = corosync_api->ipc_dispatch_send(conn, &buf, size);
1912 LEAVE();
1913 return ret;
1914 } else {
1915 struct quorum_pd *qpd;
1916
1917 qb_list_for_each(tmp, &trackers_list) {
1918 qpd = qb_list_entry(tmp, struct quorum_pd, list);
1919 res_lib_votequorum_notification->context = qpd->tracking_context;
1920 corosync_api->ipc_dispatch_send(qpd->conn, &buf, size);
1921 }
1922 }
1923
1924 LEAVE();
1925
1926 return 0;
1927 }
1928
1929 static void votequorum_exec_send_expectedvotes_notification(void)
1930 {
1931 struct res_lib_votequorum_expectedvotes_notification res_lib_votequorum_expectedvotes_notification;
1932 struct quorum_pd *qpd;
1933 struct qb_list_head *tmp;
1934
1935 ENTER();
1936
1937 log_printf(LOGSYS_LEVEL_DEBUG, "Sending expected votes callback");
1938
1939 res_lib_votequorum_expectedvotes_notification.header.id = MESSAGE_RES_VOTEQUORUM_EXPECTEDVOTES_NOTIFICATION;
1940 res_lib_votequorum_expectedvotes_notification.header.size = sizeof(res_lib_votequorum_expectedvotes_notification);
1941 res_lib_votequorum_expectedvotes_notification.header.error = CS_OK;
1942 res_lib_votequorum_expectedvotes_notification.expected_votes = us->expected_votes;
1943
1944 qb_list_for_each(tmp, &trackers_list) {
1945 qpd = qb_list_entry(tmp, struct quorum_pd, list);
1946 res_lib_votequorum_expectedvotes_notification.context = qpd->tracking_context;
1947 corosync_api->ipc_dispatch_send(qpd->conn, &res_lib_votequorum_expectedvotes_notification,
1948 sizeof(struct res_lib_votequorum_expectedvotes_notification));
1949 }
1950
1951 LEAVE();
1952 }
1953
1954 static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message)
1955 {
1956 ENTER();
1957
1958 LEAVE();
1959 }
1960
1961 static void message_handler_req_exec_votequorum_qdevice_reconfigure (
1962 const void *message,
1963 unsigned int nodeid)
1964 {
1965 const struct req_exec_quorum_qdevice_reconfigure *req_exec_quorum_qdevice_reconfigure = message;
1966
1967 ENTER();
1968
1969 log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice name change req from node " CS_PRI_NODE_ID " [from: %s to: %s]",
1970 nodeid,
1971 req_exec_quorum_qdevice_reconfigure->oldname,
1972 req_exec_quorum_qdevice_reconfigure->newname);
1973
1974 if (!strcmp(req_exec_quorum_qdevice_reconfigure->oldname, qdevice_name)) {
1975 log_printf(LOGSYS_LEVEL_DEBUG, "Allowing qdevice rename");
1976 memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
1977 strcpy(qdevice_name, req_exec_quorum_qdevice_reconfigure->newname);
1978 /*
1979 * TODO: notify qdevices about name change?
1980 * this is not relevant for now and can wait later on since
1981 * qdevices are local only and libvotequorum is not final
1982 */
1983 }
1984
1985 LEAVE();
1986 }
1987
1988 static void exec_votequorum_qdevice_reg_endian_convert (void *message)
1989 {
1990 struct req_exec_quorum_qdevice_reg *req_exec_quorum_qdevice_reg = message;
1991
1992 ENTER();
1993
1994 req_exec_quorum_qdevice_reg->operation = swab32(req_exec_quorum_qdevice_reg->operation);
1995
1996 LEAVE();
1997 }
1998
1999 static void message_handler_req_exec_votequorum_qdevice_reg (
2000 const void *message,
2001 unsigned int nodeid)
2002 {
2003 const struct req_exec_quorum_qdevice_reg *req_exec_quorum_qdevice_reg = message;
2004 struct res_lib_votequorum_status res_lib_votequorum_status;
2005 int wipe_qdevice_name = 1;
2006 struct cluster_node *node = NULL;
2007 struct qb_list_head *tmp;
2008 cs_error_t error = CS_OK;
2009
2010 ENTER();
2011
2012 log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice op %u req from node " CS_PRI_NODE_ID " [%s]",
2013 req_exec_quorum_qdevice_reg->operation,
2014 nodeid, req_exec_quorum_qdevice_reg->qdevice_name);
2015
2016 switch(req_exec_quorum_qdevice_reg->operation)
2017 {
2018 case VOTEQUORUM_QDEVICE_OPERATION_REGISTER:
2019 if (nodeid != us->node_id) {
2020 if (!strlen(qdevice_name)) {
2021 log_printf(LOGSYS_LEVEL_DEBUG, "Remote qdevice name recorded");
2022 strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
2023 }
2024 LEAVE();
2025 return;
2026 }
2027
2028 /*
2029 * protect against the case where we broadcast qdevice registration
2030 * to new memebers, we receive the message back, but there is no registration
2031 * connection in progress
2032 */
2033 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2034 LEAVE();
2035 return;
2036 }
2037
2038 /*
2039 * this should NEVER happen
2040 */
2041 if (!qdevice_reg_conn) {
2042 log_printf(LOGSYS_LEVEL_WARNING, "Unable to determine origin of the qdevice register call!");
2043 LEAVE();
2044 return;
2045 }
2046
2047 /*
2048 * registering our own device in this case
2049 */
2050 if (!strlen(qdevice_name)) {
2051 strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
2052 }
2053
2054 /*
2055 * check if it is our device or something else
2056 */
2057 if ((!strncmp(req_exec_quorum_qdevice_reg->qdevice_name,
2058 qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
2059 us->flags |= NODE_FLAGS_QDEVICE_REGISTERED;
2060 votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
2061 votequorum_exec_send_nodeinfo(us->node_id);
2062 } else {
2063 log_printf(LOGSYS_LEVEL_WARNING,
2064 "A new qdevice with different name (new: %s old: %s) is trying to register!",
2065 req_exec_quorum_qdevice_reg->qdevice_name, qdevice_name);
2066 error = CS_ERR_EXIST;
2067 }
2068
2069 res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2070 res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2071 res_lib_votequorum_status.header.error = error;
2072 corosync_api->ipc_response_send(qdevice_reg_conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2073 qdevice_reg_conn = NULL;
2074 break;
2075 case VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER:
2076 qb_list_for_each(tmp, &cluster_members_list) {
2077 node = qb_list_entry(tmp, struct cluster_node, list);
2078 if ((node->state == NODESTATE_MEMBER) &&
2079 (node->flags & NODE_FLAGS_QDEVICE_REGISTERED)) {
2080 wipe_qdevice_name = 0;
2081 }
2082 }
2083
2084 if (wipe_qdevice_name) {
2085 memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2086 }
2087
2088 break;
2089 }
2090 LEAVE();
2091 }
2092
2093 static void exec_votequorum_nodeinfo_endian_convert (void *message)
2094 {
2095 struct req_exec_quorum_nodeinfo *nodeinfo = message;
2096
2097 ENTER();
2098
2099 nodeinfo->nodeid = swab32(nodeinfo->nodeid);
2100 nodeinfo->votes = swab32(nodeinfo->votes);
2101 nodeinfo->expected_votes = swab32(nodeinfo->expected_votes);
2102 nodeinfo->flags = swab32(nodeinfo->flags);
2103
2104 LEAVE();
2105 }
2106
2107 static void message_handler_req_exec_votequorum_nodeinfo (
2108 const void *message,
2109 unsigned int sender_nodeid)
2110 {
2111 const struct req_exec_quorum_nodeinfo *req_exec_quorum_nodeinfo = message;
2112 struct cluster_node *node = NULL;
2113 int old_votes;
2114 int old_expected;
2115 uint32_t old_flags;
2116 nodestate_t old_state;
2117 int new_node = 0;
2118 int allow_downgrade = 0;
2119 int by_node = 0;
2120 unsigned int nodeid = req_exec_quorum_nodeinfo->nodeid;
2121
2122 ENTER();
2123
2124 log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node " CS_PRI_NODE_ID, sender_nodeid);
2125 log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[" CS_PRI_NODE_ID "]: votes: %d, expected: %d flags: %d",
2126 nodeid,
2127 req_exec_quorum_nodeinfo->votes,
2128 req_exec_quorum_nodeinfo->expected_votes,
2129 req_exec_quorum_nodeinfo->flags);
2130
2131 if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
2132 decode_flags(req_exec_quorum_nodeinfo->flags);
2133 }
2134
2135 node = find_node_by_nodeid(nodeid);
2136 if (!node) {
2137 node = allocate_node(nodeid);
2138 new_node = 1;
2139 }
2140 if (!node) {
2141 corosync_api->error_memory_failure();
2142 LEAVE();
2143 return;
2144 }
2145
2146 if (new_node) {
2147 old_votes = 0;
2148 old_expected = 0;
2149 old_state = NODESTATE_DEAD;
2150 old_flags = 0;
2151 } else {
2152 old_votes = node->votes;
2153 old_expected = node->expected_votes;
2154 old_state = node->state;
2155 old_flags = node->flags;
2156 }
2157
2158 if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
2159 struct cluster_node *sender_node = find_node_by_nodeid(sender_nodeid);
2160
2161 assert(sender_node != NULL);
2162
2163 if ((!cluster_is_quorate) &&
2164 (sender_node->flags & NODE_FLAGS_QUORATE)) {
2165 node->votes = req_exec_quorum_nodeinfo->votes;
2166 } else {
2167 node->votes = max(node->votes, req_exec_quorum_nodeinfo->votes);
2168 }
2169 goto recalculate;
2170 }
2171
2172 /* Update node state */
2173 node->flags = req_exec_quorum_nodeinfo->flags;
2174 node->votes = req_exec_quorum_nodeinfo->votes;
2175 node->state = NODESTATE_MEMBER;
2176
2177 if (node->flags & NODE_FLAGS_LEAVING) {
2178 node->state = NODESTATE_LEAVING;
2179 allow_downgrade = 1;
2180 by_node = 1;
2181 }
2182
2183 if ((!cluster_is_quorate) &&
2184 (node->flags & NODE_FLAGS_QUORATE)) {
2185 allow_downgrade = 1;
2186 us->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
2187 }
2188
2189 if (node->flags & NODE_FLAGS_QUORATE || (ev_tracking)) {
2190 node->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
2191 } else {
2192 node->expected_votes = us->expected_votes;
2193 }
2194
2195 if ((last_man_standing) && (node->votes > 1)) {
2196 log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all"
2197 "cluster nodes votes are set to 1. Disabling LMS.");
2198 last_man_standing = 0;
2199 if (last_man_standing_timer_set) {
2200 corosync_api->timer_delete(last_man_standing_timer);
2201 last_man_standing_timer_set = 0;
2202 }
2203 }
2204
2205 recalculate:
2206 if ((new_node) ||
2207 (nodeid == us->node_id) ||
2208 (node->flags & NODE_FLAGS_FIRST) ||
2209 (old_votes != node->votes) ||
2210 (old_expected != node->expected_votes) ||
2211 (old_flags != node->flags) ||
2212 (old_state != node->state)) {
2213 recalculate_quorum(allow_downgrade, by_node);
2214 }
2215
2216 if ((wait_for_all) &&
2217 (!(node->flags & NODE_FLAGS_WFASTATUS)) &&
2218 (node->flags & NODE_FLAGS_QUORATE)) {
2219 update_wait_for_all_status(0);
2220 }
2221
2222 LEAVE();
2223 }
2224
2225 static void exec_votequorum_reconfigure_endian_convert (void *message)
2226 {
2227 struct req_exec_quorum_reconfigure *reconfigure = message;
2228
2229 ENTER();
2230
2231 reconfigure->nodeid = swab32(reconfigure->nodeid);
2232 reconfigure->value = swab32(reconfigure->value);
2233
2234 LEAVE();
2235 }
2236
2237 static void message_handler_req_exec_votequorum_reconfigure (
2238 const void *message,
2239 unsigned int nodeid)
2240 {
2241 const struct req_exec_quorum_reconfigure *req_exec_quorum_reconfigure = message;
2242 struct cluster_node *node;
2243
2244 ENTER();
2245
2246 log_printf(LOGSYS_LEVEL_DEBUG, "got reconfigure message from cluster node " CS_PRI_NODE_ID " for " CS_PRI_NODE_ID,
2247 nodeid, req_exec_quorum_reconfigure->nodeid);
2248
2249 switch(req_exec_quorum_reconfigure->param)
2250 {
2251 case VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES:
2252 update_node_expected_votes(req_exec_quorum_reconfigure->value);
2253 votequorum_exec_send_expectedvotes_notification();
2254 update_ev_barrier(req_exec_quorum_reconfigure->value);
2255 if (ev_tracking) {
2256 us->expected_votes = max(us->expected_votes, ev_tracking_barrier);
2257 }
2258 recalculate_quorum(1, 0); /* Allow decrease */
2259 break;
2260
2261 case VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES:
2262 node = find_node_by_nodeid(req_exec_quorum_reconfigure->nodeid);
2263 if (!node) {
2264 LEAVE();
2265 return;
2266 }
2267 node->votes = req_exec_quorum_reconfigure->value;
2268 recalculate_quorum(1, 0); /* Allow decrease */
2269 break;
2270
2271 case VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA:
2272 update_wait_for_all_status(0);
2273 log_printf(LOGSYS_LEVEL_INFO, "wait_for_all_status reset by user on node " CS_PRI_NODE_ID ".",
2274 req_exec_quorum_reconfigure->nodeid);
2275 recalculate_quorum(0, 0);
2276
2277 break;
2278
2279 }
2280
2281 LEAVE();
2282 }
2283
2284 static int votequorum_exec_exit_fn (void)
2285 {
2286 int ret = 0;
2287
2288 ENTER();
2289
2290 /*
2291 * tell the other nodes we are leaving
2292 */
2293
2294 if (allow_downscale) {
2295 us->flags |= NODE_FLAGS_LEAVING;
2296 ret = votequorum_exec_send_nodeinfo(us->node_id);
2297 }
2298
2299 if ((ev_tracking) && (ev_tracking_fd != -1)) {
2300 close(ev_tracking_fd);
2301 }
2302
2303
2304 LEAVE();
2305 return ret;
2306 }
2307
2308 static void votequorum_set_icmap_ro_keys(void)
2309 {
2310 icmap_set_ro_access("quorum.allow_downscale", CS_FALSE, CS_TRUE);
2311 icmap_set_ro_access("quorum.wait_for_all", CS_FALSE, CS_TRUE);
2312 icmap_set_ro_access("quorum.last_man_standing", CS_FALSE, CS_TRUE);
2313 icmap_set_ro_access("quorum.last_man_standing_window", CS_FALSE, CS_TRUE);
2314 icmap_set_ro_access("quorum.expected_votes_tracking", CS_FALSE, CS_TRUE);
2315 icmap_set_ro_access("quorum.auto_tie_breaker", CS_FALSE, CS_TRUE);
2316 icmap_set_ro_access("quorum.auto_tie_breaker_node", CS_FALSE, CS_TRUE);
2317 }
2318
2319 static char *votequorum_exec_init_fn (struct corosync_api_v1 *api)
2320 {
2321 char *error = NULL;
2322
2323 ENTER();
2324
2325 /*
2326 * make sure we start clean
2327 */
2328 qb_list_init(&cluster_members_list);
2329 qb_list_init(&trackers_list);
2330 qdevice = NULL;
2331 us = NULL;
2332 memset(cluster_nodes, 0, sizeof(cluster_nodes));
2333
2334 /*
2335 * Allocate a cluster_node for qdevice
2336 */
2337 qdevice = allocate_node(VOTEQUORUM_QDEVICE_NODEID);
2338 if (!qdevice) {
2339 LEAVE();
2340 return ((char *)"Could not allocate node.");
2341 }
2342 qdevice->votes = 0;
2343 memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2344
2345 /*
2346 * Allocate a cluster_node for us
2347 */
2348 us = allocate_node(corosync_api->totem_nodeid_get());
2349 if (!us) {
2350 LEAVE();
2351 return ((char *)"Could not allocate node.");
2352 }
2353
2354 icmap_set_uint32("runtime.votequorum.this_node_id", us->node_id);
2355
2356 us->state = NODESTATE_MEMBER;
2357 us->votes = 1;
2358 us->flags |= NODE_FLAGS_FIRST;
2359
2360 error = votequorum_readconfig(VOTEQUORUM_READCONFIG_STARTUP);
2361 if (error) {
2362 return error;
2363 }
2364 recalculate_quorum(0, 0);
2365
2366 /*
2367 * Set RO keys in icmap
2368 */
2369 votequorum_set_icmap_ro_keys();
2370
2371 /*
2372 * Listen for changes
2373 */
2374 votequorum_exec_add_config_notification();
2375
2376 /*
2377 * Start us off with one node
2378 */
2379 votequorum_exec_send_nodeinfo(us->node_id);
2380
2381 LEAVE();
2382
2383 return (NULL);
2384 }
2385
2386 /*
2387 * votequorum service core
2388 */
2389
2390 static void votequorum_last_man_standing_timer_fn(void *arg)
2391 {
2392 ENTER();
2393
2394 last_man_standing_timer_set = 0;
2395 if (cluster_is_quorate) {
2396 recalculate_quorum(1,1);
2397 }
2398
2399 LEAVE();
2400 }
2401
2402 static void votequorum_sync_init (
2403 const unsigned int *trans_list, size_t trans_list_entries,
2404 const unsigned int *member_list, size_t member_list_entries,
2405 const struct memb_ring_id *ring_id)
2406 {
2407 int i, j;
2408 int found;
2409 int left_nodes;
2410 struct cluster_node *node;
2411
2412 ENTER();
2413
2414 sync_in_progress = 1;
2415 sync_nodeinfo_sent = 0;
2416 sync_wait_for_poll_or_timeout = 0;
2417
2418 if (member_list_entries > 1) {
2419 us->flags &= ~NODE_FLAGS_FIRST;
2420 }
2421
2422 /*
2423 * we don't need to track which nodes have left directly,
2424 * since that info is in the node db, but we need to know
2425 * if somebody has left for last_man_standing
2426 */
2427 left_nodes = 0;
2428 for (i = 0; i < quorum_members_entries; i++) {
2429 found = 0;
2430 for (j = 0; j < member_list_entries; j++) {
2431 if (quorum_members[i] == member_list[j]) {
2432 found = 1;
2433 break;
2434 }
2435 }
2436 if (found == 0) {
2437 left_nodes = 1;
2438 node = find_node_by_nodeid(quorum_members[i]);
2439 if (node) {
2440 node->state = NODESTATE_DEAD;
2441 }
2442 }
2443 }
2444
2445 if (last_man_standing) {
2446 if (((member_list_entries >= quorum) && (left_nodes)) ||
2447 ((member_list_entries <= quorum) && (auto_tie_breaker != ATB_NONE) && (check_low_node_id_partition() == 1))) {
2448 if (last_man_standing_timer_set) {
2449 corosync_api->timer_delete(last_man_standing_timer);
2450 last_man_standing_timer_set = 0;
2451 }
2452 corosync_api->timer_add_duration((unsigned long long)last_man_standing_window*1000000,
2453 NULL, votequorum_last_man_standing_timer_fn,
2454 &last_man_standing_timer);
2455 last_man_standing_timer_set = 1;
2456 }
2457 }
2458
2459 memcpy(previous_quorum_members, quorum_members, sizeof(unsigned int) * quorum_members_entries);
2460 previous_quorum_members_entries = quorum_members_entries;
2461
2462 memcpy(quorum_members, member_list, sizeof(unsigned int) * member_list_entries);
2463 quorum_members_entries = member_list_entries;
2464 memcpy(&quorum_ringid, ring_id, sizeof(*ring_id));
2465
2466 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && us->flags & NODE_FLAGS_QDEVICE_ALIVE) {
2467 /*
2468 * Reset poll timer. Sync waiting is interrupted on valid qdevice poll or after timeout
2469 */
2470 if (qdevice_timer_set) {
2471 corosync_api->timer_delete(qdevice_timer);
2472 }
2473 corosync_api->timer_add_duration((unsigned long long)qdevice_sync_timeout*1000000, qdevice,
2474 qdevice_timer_fn, &qdevice_timer);
2475 qdevice_timer_set = 1;
2476 sync_wait_for_poll_or_timeout = 1;
2477
2478 log_printf(LOGSYS_LEVEL_INFO, "waiting for quorum device %s poll (but maximum for %u ms)",
2479 qdevice_name, qdevice_sync_timeout);
2480 }
2481
2482 LEAVE();
2483 }
2484
2485 static int votequorum_sync_process (void)
2486 {
2487 if (!sync_nodeinfo_sent) {
2488 votequorum_exec_send_nodeinfo(us->node_id);
2489 if(us->flags & NODE_FLAGS_QDEVICE_EXTRA_INFO_SET) {
2490 votequorum_exec_send_set_qdevice_extra_info(us->node_id);
2491 }
2492 votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
2493 if (strlen(qdevice_name)) {
2494 votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
2495 qdevice_name);
2496 }
2497 votequorum_exec_send_nodelist_notification(NULL, 0LL);
2498 sync_nodeinfo_sent = 1;
2499 }
2500
2501 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && sync_wait_for_poll_or_timeout) {
2502 /*
2503 * Waiting for qdevice to poll with new ringid or timeout
2504 */
2505
2506 return (-1);
2507 }
2508
2509 return 0;
2510 }
2511
2512 static void votequorum_sync_activate (void)
2513 {
2514 recalculate_quorum(0, 0);
2515 quorum_callback(quorum_members, quorum_members_entries,
2516 cluster_is_quorate, &quorum_ringid);
2517 votequorum_exec_send_quorum_notification(NULL, 0L);
2518
2519 sync_in_progress = 0;
2520 }
2521
2522 static void votequorum_sync_abort (void)
2523 {
2524
2525 }
2526
2527 char *votequorum_init(struct corosync_api_v1 *api,
2528 quorum_set_quorate_fn_t q_set_quorate_fn)
2529 {
2530 char *error;
2531
2532 ENTER();
2533
2534 if (q_set_quorate_fn == NULL) {
2535 return ((char *)"Quorate function not set");
2536 }
2537
2538 corosync_api = api;
2539 quorum_callback = q_set_quorate_fn;
2540
2541 error = corosync_service_link_and_init(corosync_api,
2542 &votequorum_service[0]);
2543 if (error) {
2544 return (error);
2545 }
2546
2547 LEAVE();
2548
2549 return (NULL);
2550 }
2551
2552 /*
2553 * Library Handler init/fini
2554 */
2555
2556 static int quorum_lib_init_fn (void *conn)
2557 {
2558 struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2559
2560 ENTER();
2561
2562 qb_list_init (&pd->list);
2563 pd->conn = conn;
2564
2565 LEAVE();
2566 return (0);
2567 }
2568
2569 static int quorum_lib_exit_fn (void *conn)
2570 {
2571 struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2572
2573 ENTER();
2574
2575 if (quorum_pd->tracking_enabled) {
2576 qb_list_del (&quorum_pd->list);
2577 qb_list_init (&quorum_pd->list);
2578 }
2579
2580 LEAVE();
2581
2582 return (0);
2583 }
2584
2585 /*
2586 * library internal functions
2587 */
2588
2589 static void qdevice_timer_fn(void *arg)
2590 {
2591 ENTER();
2592
2593 if ((!(us->flags & NODE_FLAGS_QDEVICE_ALIVE)) ||
2594 (!qdevice_timer_set)) {
2595 LEAVE();
2596 return;
2597 }
2598
2599 us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE;
2600 us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE;
2601 log_printf(LOGSYS_LEVEL_INFO, "lost contact with quorum device %s", qdevice_name);
2602 votequorum_exec_send_nodeinfo(us->node_id);
2603
2604 qdevice_timer_set = 0;
2605 sync_wait_for_poll_or_timeout = 0;
2606
2607 LEAVE();
2608 }
2609
2610 /*
2611 * Library Handler Functions
2612 */
2613
2614 static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *message)
2615 {
2616 const struct req_lib_votequorum_getinfo *req_lib_votequorum_getinfo = message;
2617 struct res_lib_votequorum_getinfo res_lib_votequorum_getinfo;
2618 struct cluster_node *node;
2619 unsigned int highest_expected = 0;
2620 unsigned int total_votes = 0;
2621 cs_error_t error = CS_OK;
2622 uint32_t nodeid = req_lib_votequorum_getinfo->nodeid;
2623
2624 ENTER();
2625
2626 log_printf(LOGSYS_LEVEL_DEBUG, "got getinfo request on %p for node " CS_PRI_NODE_ID, conn, req_lib_votequorum_getinfo->nodeid);
2627
2628 if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
2629 nodeid = us->node_id;
2630 }
2631
2632 node = find_node_by_nodeid(nodeid);
2633 if (node) {
2634 struct cluster_node *iternode;
2635 struct qb_list_head *nodelist;
2636
2637 qb_list_for_each(nodelist, &cluster_members_list) {
2638 iternode = qb_list_entry(nodelist, struct cluster_node, list);
2639
2640 if (iternode->state == NODESTATE_MEMBER) {
2641 highest_expected =
2642 max(highest_expected, iternode->expected_votes);
2643 total_votes += iternode->votes;
2644 }
2645 }
2646
2647 if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
2648 total_votes += qdevice->votes;
2649 }
2650
2651 switch(node->state) {
2652 case NODESTATE_MEMBER:
2653 res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_MEMBER;
2654 break;
2655 case NODESTATE_DEAD:
2656 res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_DEAD;
2657 break;
2658 case NODESTATE_LEAVING:
2659 res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_LEAVING;
2660 break;
2661 default:
2662 res_lib_votequorum_getinfo.state = node->state;
2663 break;
2664 }
2665 res_lib_votequorum_getinfo.state = node->state;
2666 res_lib_votequorum_getinfo.votes = node->votes;
2667 res_lib_votequorum_getinfo.expected_votes = node->expected_votes;
2668 res_lib_votequorum_getinfo.highest_expected = highest_expected;
2669
2670 res_lib_votequorum_getinfo.quorum = quorum;
2671 res_lib_votequorum_getinfo.total_votes = total_votes;
2672 res_lib_votequorum_getinfo.flags = 0;
2673 res_lib_votequorum_getinfo.nodeid = node->node_id;
2674
2675 if (two_node) {
2676 res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_TWONODE;
2677 }
2678 if (cluster_is_quorate) {
2679 res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QUORATE;
2680 }
2681 if (wait_for_all) {
2682 res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_WAIT_FOR_ALL;
2683 }
2684 if (last_man_standing) {
2685 res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_LAST_MAN_STANDING;
2686 }
2687 if (auto_tie_breaker != ATB_NONE) {
2688 res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_AUTO_TIE_BREAKER;
2689 }
2690 if (allow_downscale) {
2691 res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_ALLOW_DOWNSCALE;
2692 }
2693
2694 memset(res_lib_votequorum_getinfo.qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2695 strcpy(res_lib_votequorum_getinfo.qdevice_name, qdevice_name);
2696 res_lib_votequorum_getinfo.qdevice_votes = qdevice->votes;
2697
2698 if (node->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2699 res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_REGISTERED;
2700 }
2701 if (node->flags & NODE_FLAGS_QDEVICE_ALIVE) {
2702 res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_ALIVE;
2703 }
2704 if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
2705 res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_CAST_VOTE;
2706 }
2707 if (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) {
2708 res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_MASTER_WINS;
2709 }
2710 if (node->flags & NODE_FLAGS_QDEVICE_EXTRA_INFO_SET) {
2711 res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_HAS_EXTRA_INFO;
2712 }
2713 } else {
2714 error = CS_ERR_NOT_EXIST;
2715 }
2716
2717 res_lib_votequorum_getinfo.header.size = sizeof(res_lib_votequorum_getinfo);
2718 res_lib_votequorum_getinfo.header.id = MESSAGE_RES_VOTEQUORUM_GETINFO;
2719 res_lib_votequorum_getinfo.header.error = error;
2720 corosync_api->ipc_response_send(conn, &res_lib_votequorum_getinfo, sizeof(res_lib_votequorum_getinfo));
2721 log_printf(LOGSYS_LEVEL_DEBUG, "getinfo response error: %d", error);
2722
2723 LEAVE();
2724 }
2725
2726 static void message_handler_req_lib_votequorum_setexpected (void *conn, const void *message)
2727 {
2728 const struct req_lib_votequorum_setexpected *req_lib_votequorum_setexpected = message;
2729 struct res_lib_votequorum_status res_lib_votequorum_status;
2730 cs_error_t error = CS_OK;
2731 unsigned int newquorum;
2732 unsigned int total_votes;
2733 uint8_t allow_downscale_status = 0;
2734
2735 ENTER();
2736
2737 allow_downscale_status = allow_downscale;
2738 allow_downscale = 0;
2739
2740 /*
2741 * Validate new expected votes
2742 */
2743 newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->expected_votes, &total_votes);
2744 allow_downscale = allow_downscale_status;
2745 /*
2746 * Setting expected_votes < total_votes doesn't make sense.
2747 * For quorate cluster prevent cluster to become unquorate.
2748 */
2749 if (req_lib_votequorum_setexpected->expected_votes < total_votes ||
2750 (cluster_is_quorate && (newquorum > total_votes))) {
2751 error = CS_ERR_INVALID_PARAM;
2752 goto error_exit;
2753 }
2754 update_node_expected_votes(req_lib_votequorum_setexpected->expected_votes);
2755
2756 if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, us->node_id,
2757 req_lib_votequorum_setexpected->expected_votes)) {
2758 error = CS_ERR_NO_RESOURCES;
2759 }
2760
2761 error_exit:
2762 res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2763 res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2764 res_lib_votequorum_status.header.error = error;
2765 corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2766
2767 LEAVE();
2768 }
2769
2770 static void message_handler_req_lib_votequorum_setvotes (void *conn, const void *message)
2771 {
2772 const struct req_lib_votequorum_setvotes *req_lib_votequorum_setvotes = message;
2773 struct res_lib_votequorum_status res_lib_votequorum_status;
2774 struct cluster_node *node;
2775 unsigned int newquorum;
2776 unsigned int total_votes;
2777 unsigned int saved_votes;
2778 cs_error_t error = CS_OK;
2779 unsigned int nodeid;
2780
2781 ENTER();
2782
2783 nodeid = req_lib_votequorum_setvotes->nodeid;
2784 node = find_node_by_nodeid(nodeid);
2785 if (!node) {
2786 error = CS_ERR_NAME_NOT_FOUND;
2787 goto error_exit;
2788 }
2789
2790 /*
2791 * Check votes is valid
2792 */
2793 saved_votes = node->votes;
2794 node->votes = req_lib_votequorum_setvotes->votes;
2795
2796 newquorum = calculate_quorum(1, 0, &total_votes);
2797
2798 if (newquorum < total_votes / 2 ||
2799 newquorum > total_votes) {
2800 node->votes = saved_votes;
2801 error = CS_ERR_INVALID_PARAM;
2802 goto error_exit;
2803 }
2804
2805 if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, nodeid,
2806 req_lib_votequorum_setvotes->votes)) {
2807 error = CS_ERR_NO_RESOURCES;
2808 }
2809
2810 error_exit:
2811 res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2812 res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2813 res_lib_votequorum_status.header.error = error;
2814 corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2815
2816 LEAVE();
2817 }
2818
2819 static void message_handler_req_lib_votequorum_trackstart (void *conn,
2820 const void *message)
2821 {
2822 const struct req_lib_votequorum_trackstart *req_lib_votequorum_trackstart = message;
2823 struct res_lib_votequorum_status res_lib_votequorum_status;
2824 struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2825 cs_error_t error = CS_OK;
2826
2827 ENTER();
2828
2829 /*
2830 * If an immediate listing of the current cluster membership
2831 * is requested, generate membership list
2832 */
2833 if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CURRENT ||
2834 req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES) {
2835 log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn);
2836 votequorum_exec_send_nodelist_notification(conn, req_lib_votequorum_trackstart->context);
2837 votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->context);
2838 votequorum_exec_send_all_qdevice_extra_info_notification(conn, req_lib_votequorum_trackstart->context);
2839 }
2840
2841 if (quorum_pd->tracking_enabled) {
2842 error = CS_ERR_EXIST;
2843 goto response_send;
2844 }
2845
2846 /*
2847 * Record requests for tracking
2848 */
2849 if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES ||
2850 req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES_ONLY) {
2851
2852 quorum_pd->track_flags = req_lib_votequorum_trackstart->track_flags;
2853 quorum_pd->tracking_enabled = 1;
2854 quorum_pd->tracking_context = req_lib_votequorum_trackstart->context;
2855
2856 qb_list_add (&quorum_pd->list, &trackers_list);
2857 }
2858
2859 response_send:
2860 res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2861 res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2862 res_lib_votequorum_status.header.error = error;
2863 corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2864
2865 LEAVE();
2866 }
2867
2868 static void message_handler_req_lib_votequorum_trackstop (void *conn,
2869 const void *message)
2870 {
2871 struct res_lib_votequorum_status res_lib_votequorum_status;
2872 struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2873 int error = CS_OK;
2874
2875 ENTER();
2876
2877 if (quorum_pd->tracking_enabled) {
2878 error = CS_OK;
2879 quorum_pd->tracking_enabled = 0;
2880 qb_list_del (&quorum_pd->list);
2881 qb_list_init (&quorum_pd->list);
2882 } else {
2883 error = CS_ERR_NOT_EXIST;
2884 }
2885
2886 res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2887 res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2888 res_lib_votequorum_status.header.error = error;
2889 corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2890
2891 LEAVE();
2892 }
2893
2894 static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
2895 const void *message)
2896 {
2897 const struct req_lib_votequorum_qdevice_register *req_lib_votequorum_qdevice_register = message;
2898 struct res_lib_votequorum_status res_lib_votequorum_status;
2899 cs_error_t error = CS_OK;
2900
2901 ENTER();
2902
2903 if (!qdevice_can_operate) {
2904 log_printf(LOGSYS_LEVEL_INFO, "Registration of quorum device is disabled by incorrect corosync.conf. See logs for more information");
2905 error = CS_ERR_ACCESS;
2906 goto out;
2907 }
2908
2909 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2910 if ((!strncmp(req_lib_votequorum_qdevice_register->name,
2911 qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
2912 goto out;
2913 } else {
2914 log_printf(LOGSYS_LEVEL_WARNING,
2915 "A new qdevice with different name (new: %s old: %s) is trying to re-register!",
2916 req_lib_votequorum_qdevice_register->name, qdevice_name);
2917 error = CS_ERR_EXIST;
2918 goto out;
2919 }
2920 } else {
2921 if (qdevice_reg_conn != NULL) {
2922 log_printf(LOGSYS_LEVEL_WARNING,
2923 "Registration request already in progress");
2924 error = CS_ERR_TRY_AGAIN;
2925 goto out;
2926 }
2927 qdevice_reg_conn = conn;
2928 if (votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
2929 req_lib_votequorum_qdevice_register->name) != 0) {
2930 log_printf(LOGSYS_LEVEL_WARNING,
2931 "Unable to send qdevice registration request to cluster");
2932 error = CS_ERR_TRY_AGAIN;
2933 qdevice_reg_conn = NULL;
2934 } else {
2935 LEAVE();
2936 return;
2937 }
2938 }
2939
2940 out:
2941
2942 res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2943 res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2944 res_lib_votequorum_status.header.error = error;
2945 corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2946
2947 LEAVE();
2948 }
2949
2950 static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
2951 const void *message)
2952 {
2953 const struct req_lib_votequorum_qdevice_unregister *req_lib_votequorum_qdevice_unregister = message;
2954 struct res_lib_votequorum_status res_lib_votequorum_status;
2955 cs_error_t error = CS_OK;
2956
2957 ENTER();
2958
2959 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2960 if (strncmp(req_lib_votequorum_qdevice_unregister->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
2961 error = CS_ERR_INVALID_PARAM;
2962 goto out;
2963 }
2964 if (qdevice_timer_set) {
2965 corosync_api->timer_delete(qdevice_timer);
2966 qdevice_timer_set = 0;
2967 sync_wait_for_poll_or_timeout = 0;
2968 }
2969 us->flags &= ~NODE_FLAGS_QDEVICE_REGISTERED;
2970 us->flags &= ~NODE_FLAGS_QDEVICE_ALIVE;
2971 us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE;
2972 us->flags &= ~NODE_FLAGS_QDEVICE_MASTER_WINS;
2973
2974 us->flags &= ~NODE_FLAGS_QDEVICE_EXTRA_INFO_SET;
2975 us->ei_size = 0;
2976 votequorum_exec_send_set_qdevice_extra_info(us->node_id);
2977
2978 votequorum_exec_send_nodeinfo(us->node_id);
2979 votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER,
2980 req_lib_votequorum_qdevice_unregister->name);
2981 } else {
2982 error = CS_ERR_NOT_EXIST;
2983 }
2984
2985 out:
2986 res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2987 res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2988 res_lib_votequorum_status.header.error = error;
2989 corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2990
2991 LEAVE();
2992 }
2993
2994 static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
2995 const void *message)
2996 {
2997 const struct req_lib_votequorum_qdevice_update *req_lib_votequorum_qdevice_update = message;
2998 struct res_lib_votequorum_status res_lib_votequorum_status;
2999 cs_error_t error = CS_OK;
3000
3001 ENTER();
3002
3003 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
3004 if (strncmp(req_lib_votequorum_qdevice_update->oldname, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
3005 error = CS_ERR_INVALID_PARAM;
3006 goto out;
3007 }
3008 votequorum_exec_send_qdevice_reconfigure(req_lib_votequorum_qdevice_update->oldname,
3009 req_lib_votequorum_qdevice_update->newname);
3010 } else {
3011 error = CS_ERR_NOT_EXIST;
3012 }
3013
3014 out:
3015 res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
3016 res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
3017 res_lib_votequorum_status.header.error = error;
3018 corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
3019
3020 LEAVE();
3021 }
3022
3023 static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
3024 const void *message)
3025 {
3026 const struct req_lib_votequorum_qdevice_poll *req_lib_votequorum_qdevice_poll = message;
3027 struct res_lib_votequorum_status res_lib_votequorum_status;
3028 cs_error_t error = CS_OK;
3029 uint32_t oldflags;
3030
3031 ENTER();
3032
3033 if (!qdevice_can_operate) {
3034 error = CS_ERR_ACCESS;
3035 goto out;
3036 }
3037
3038 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
3039 if (!(req_lib_votequorum_qdevice_poll->ring_id.nodeid == quorum_ringid.nodeid &&
3040 req_lib_votequorum_qdevice_poll->ring_id.seq == quorum_ringid.seq)) {
3041 log_printf(LOGSYS_LEVEL_DEBUG, "Received poll ring id (" CS_PRI_RING_ID ") != last sync "
3042 "ring id (" CS_PRI_RING_ID "). Ignoring poll call.",
3043 req_lib_votequorum_qdevice_poll->ring_id.nodeid, req_lib_votequorum_qdevice_poll->ring_id.seq,
3044 quorum_ringid.nodeid, quorum_ringid.seq);
3045 error = CS_ERR_MESSAGE_ERROR;
3046 goto out;
3047 }
3048 if (strncmp(req_lib_votequorum_qdevice_poll->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
3049 error = CS_ERR_INVALID_PARAM;
3050 goto out;
3051 }
3052
3053 if (qdevice_timer_set) {
3054 corosync_api->timer_delete(qdevice_timer);
3055 qdevice_timer_set = 0;
3056 }
3057
3058 oldflags = us->flags;
3059
3060 us->flags |= NODE_FLAGS_QDEVICE_ALIVE;
3061
3062 if (req_lib_votequorum_qdevice_poll->cast_vote) {
3063 us->flags |= NODE_FLAGS_QDEVICE_CAST_VOTE;
3064 } else {
3065 us->flags &= ~NODE_FLAGS_QDEVICE_CAST_VOTE;
3066 }
3067
3068 if (us->flags != oldflags) {
3069 votequorum_exec_send_nodeinfo(us->node_id);
3070 }
3071
3072 corosync_api->timer_add_duration((unsigned long long)qdevice_timeout*1000000, qdevice,
3073 qdevice_timer_fn, &qdevice_timer);
3074 qdevice_timer_set = 1;
3075 sync_wait_for_poll_or_timeout = 0;
3076 } else {
3077 error = CS_ERR_NOT_EXIST;
3078 }
3079
3080 out:
3081 res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
3082 res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
3083 res_lib_votequorum_status.header.error = error;
3084 corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
3085
3086 LEAVE();
3087 }
3088
3089 static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
3090 const void *message)
3091 {
3092 const struct req_lib_votequorum_qdevice_master_wins *req_lib_votequorum_qdevice_master_wins = message;
3093 struct res_lib_votequorum_status res_lib_votequorum_status;
3094 cs_error_t error = CS_OK;
3095 uint32_t oldflags = us->flags;
3096
3097 ENTER();
3098
3099 if (!qdevice_can_operate) {
3100 error = CS_ERR_ACCESS;
3101 goto out;
3102 }
3103
3104 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
3105 if (strncmp(req_lib_votequorum_qdevice_master_wins->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
3106 error = CS_ERR_INVALID_PARAM;
3107 goto out;
3108 }
3109
3110 if (req_lib_votequorum_qdevice_master_wins->allow) {
3111 us->flags |= NODE_FLAGS_QDEVICE_MASTER_WINS;
3112 } else {
3113 us->flags &= ~NODE_FLAGS_QDEVICE_MASTER_WINS;
3114 }
3115
3116 if (us->flags != oldflags) {
3117 votequorum_exec_send_nodeinfo(us->node_id);
3118 }
3119
3120 update_qdevice_master_wins(req_lib_votequorum_qdevice_master_wins->allow);
3121 } else {
3122 error = CS_ERR_NOT_EXIST;
3123 }
3124
3125 out:
3126 res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
3127 res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
3128 res_lib_votequorum_status.header.error = error;
3129 corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
3130
3131 LEAVE();
3132 }
3133
3134 static void fill_extra_info_notification(struct res_lib_votequorum_qdevice_extra_info_notification *res_lib_votequorum_qdevice_extra_info_notification, struct cluster_node *node)
3135 {
3136 log_printf(LOGSYS_LEVEL_DEBUG, "Sending extra info notification for node %" PRIu32 " %" PRIu32 " bytes.", node->node_id, node->ei_size);
3137 if(node->flags & NODE_FLAGS_QDEVICE_EXTRA_INFO_SET) {
3138 memcpy(res_lib_votequorum_qdevice_extra_info_notification->extra_info, node->extra_nodeinfo, node->ei_size);
3139 res_lib_votequorum_qdevice_extra_info_notification->ei_size = node->ei_size;
3140 } else {
3141 res_lib_votequorum_qdevice_extra_info_notification->ei_size = 0;
3142 }
3143 res_lib_votequorum_qdevice_extra_info_notification->nodeid = node->node_id;
3144
3145 res_lib_votequorum_qdevice_extra_info_notification->header.id = MESSAGE_RES_VOTEQUORUM_QDEVICE_EXTRA_INFO_NOTIFICATION;
3146 res_lib_votequorum_qdevice_extra_info_notification->header.size = sizeof(struct res_lib_votequorum_qdevice_extra_info_notification) + res_lib_votequorum_qdevice_extra_info_notification->ei_size;
3147 res_lib_votequorum_qdevice_extra_info_notification->header.error = CS_OK;
3148 }
3149
3150 static int votequorum_exec_send_all_qdevice_extra_info_notification(void *conn, uint64_t context)
3151 {
3152 char buf[sizeof(struct res_lib_votequorum_qdevice_extra_info_notification) + VOTEQUORUM_QDEVICE_EXTRA_NODEINFO_MAXSIZE] = { 0 };
3153 struct res_lib_votequorum_qdevice_extra_info_notification *res_lib_votequorum_qdevice_extra_info_notification = (struct res_lib_votequorum_qdevice_extra_info_notification*)buf;
3154 struct qb_list_head *tmp;
3155 struct cluster_node *node;
3156
3157 ENTER();
3158
3159 qb_list_for_each(tmp, &cluster_members_list) {
3160 node = qb_list_entry(tmp, struct cluster_node, list);
3161 if(node->flags & NODE_FLAGS_QDEVICE_EXTRA_INFO_SET) {
3162 fill_extra_info_notification(res_lib_votequorum_qdevice_extra_info_notification, node);
3163 res_lib_votequorum_qdevice_extra_info_notification->context = context;
3164 corosync_api->ipc_dispatch_send(conn, res_lib_votequorum_qdevice_extra_info_notification,
3165 res_lib_votequorum_qdevice_extra_info_notification->header.size);
3166 }
3167 }
3168
3169 LEAVE();
3170
3171 return CS_OK;
3172 }
3173
3174 static int
3175 votequorum_exec_send_qdevice_extra_info_notification(uint32_t nodeid)
3176 {
3177 char buf[sizeof(struct res_lib_votequorum_qdevice_extra_info_notification) + VOTEQUORUM_QDEVICE_EXTRA_NODEINFO_MAXSIZE] = { 0 };
3178 struct res_lib_votequorum_qdevice_extra_info_notification *res_lib_votequorum_qdevice_extra_info_notification = (struct res_lib_votequorum_qdevice_extra_info_notification*)buf;
3179 struct quorum_pd *qpd;
3180 struct qb_list_head *tmp;
3181
3182 ENTER();
3183
3184 struct cluster_node *node = find_node_by_nodeid(nodeid);
3185
|
CID (unavailable; MK=9f02865ed1a1d9640552360ff46c4551) (#1 of 1): Dereference null return value (NULL_RETURNS): |
3186 log_printf(LOGSYS_LEVEL_DEBUG, "Sending extra info notification for node %" PRIu32 " %" PRIu32 " bytes.", nodeid, node->ei_size);
3187
3188 fill_extra_info_notification(res_lib_votequorum_qdevice_extra_info_notification, node);
3189
3190 qb_list_for_each(tmp, &trackers_list) {
3191 qpd = qb_list_entry(tmp, struct quorum_pd, list);
3192 log_printf(LOGSYS_LEVEL_DEBUG, "Sending extra info notification for node %" PRIu32 " message (ctx: %" PRIx64 ")", nodeid, qpd->tracking_context);
3193 res_lib_votequorum_qdevice_extra_info_notification->context = qpd->tracking_context;
3194 corosync_api->ipc_dispatch_send(qpd->conn, res_lib_votequorum_qdevice_extra_info_notification,
3195 res_lib_votequorum_qdevice_extra_info_notification->header.size);
3196 }
3197
3198 LEAVE();
3199
3200 return CS_OK;
3201 }
3202
3203 static int
3204 votequorum_exec_send_set_qdevice_extra_info(uint32_t nodeid)
3205 {
3206 char buf[sizeof(struct req_exec_quorum_qdevice_set_extra_info) + VOTEQUORUM_QDEVICE_EXTRA_NODEINFO_MAXSIZE];
3207 struct req_exec_quorum_qdevice_set_extra_info *req_exec_quorum_qdevice_set_extra_info = (struct req_exec_quorum_qdevice_set_extra_info *)buf;
3208 struct iovec iov[1];
3209 struct cluster_node *node = NULL;
3210 int ret = 0;
3211
3212 ENTER();
3213
3214 node = find_node_by_nodeid(nodeid);
3215 if(!node) {
3216 log_printf(LOGSYS_LEVEL_ERROR, "Sending mcast extra info for node %" PRIu32 " failed because node doesn't exist", nodeid);
3217 ret = CS_ERR_NOT_EXIST;
3218 goto exit;
3219 }
3220
3221 log_printf(LOGSYS_LEVEL_DEBUG, "Sending mcast extra info for node %" PRIu32 " %" PRIu32 " bytes.", nodeid, node->ei_size);
3222
3223 req_exec_quorum_qdevice_set_extra_info->nodeid = nodeid;
3224 if(node->flags & NODE_FLAGS_QDEVICE_EXTRA_INFO_SET) {
3225 req_exec_quorum_qdevice_set_extra_info->ei_size = node->ei_size;
3226 memcpy(req_exec_quorum_qdevice_set_extra_info->extra_info, node->extra_nodeinfo, node->ei_size);
3227 } else {
3228 req_exec_quorum_qdevice_set_extra_info->ei_size = 0;
3229 }
3230
3231 req_exec_quorum_qdevice_set_extra_info->header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_EXTRA_NODEINFO);
3232 req_exec_quorum_qdevice_set_extra_info->header.size = sizeof(struct req_exec_quorum_qdevice_set_extra_info) + req_exec_quorum_qdevice_set_extra_info->ei_size;
3233
3234 iov[0].iov_base = (void *)buf;
3235 iov[0].iov_len = req_exec_quorum_qdevice_set_extra_info->header.size;
3236 ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
3237
3238 exit:
3239 LEAVE();
3240
3241 return ret;
3242 }
3243
3244 static void message_handler_req_exec_votequorum_qdevice_set_extra_info (
3245 const void *message,
3246 unsigned int nodeid)
3247 {
3248 const struct req_exec_quorum_qdevice_set_extra_info *req_exec_quorum_qdevice_set_extra_info = message;
3249 struct cluster_node *node = NULL;
3250
3251 ENTER();
3252
3253 log_printf(LOGSYS_LEVEL_DEBUG, "Setting extra info for node %" PRIu32, nodeid);
3254
3255 if(req_exec_quorum_qdevice_set_extra_info->ei_size > VOTEQUORUM_QDEVICE_EXTRA_NODEINFO_MAXSIZE) {
3256 log_printf(LOGSYS_LEVEL_ERROR, "Oversized extra info for node %" PRIu32 "! Ignoring", nodeid);
3257 return;
3258 }
3259
3260 node = find_node_by_nodeid(nodeid);
3261 if(node) {
3262 log_printf(LOGSYS_LEVEL_DEBUG, "New extra info for node %" PRIu32 " is %" PRIu32 " bytes", nodeid, req_exec_quorum_qdevice_set_extra_info->ei_size);
3263 if(req_exec_quorum_qdevice_set_extra_info->ei_size == 0) {
3264 node->flags &= ~NODE_FLAGS_QDEVICE_EXTRA_INFO_SET;
3265 node->ei_size = 0;
3266 } else {
3267 node->ei_size = req_exec_quorum_qdevice_set_extra_info->ei_size;
3268 memcpy(node->extra_nodeinfo, req_exec_quorum_qdevice_set_extra_info->extra_info, req_exec_quorum_qdevice_set_extra_info->ei_size);
3269 node->flags |= NODE_FLAGS_QDEVICE_EXTRA_INFO_SET;
3270 }
3271 votequorum_exec_send_qdevice_extra_info_notification(nodeid);
3272 } else {
3273 log_printf(LOGSYS_LEVEL_ERROR, "Failed to find node %u for set extra info", nodeid);
3274 }
3275
3276 LEAVE();
3277 }
3278
3279 static void exec_votequorum_qdevice_set_extra_info_endian_convert (void *message)
3280 {
3281 struct req_exec_quorum_qdevice_set_extra_info *set_extra_info = message;
3282 ENTER();
3283
3284 set_extra_info->ei_size = swab32(set_extra_info->ei_size);
3285 set_extra_info->nodeid = swab32(set_extra_info->nodeid);
3286
3287 LEAVE();
3288 }
3289
3290
3291 static void message_handler_req_lib_votequorum_qdevice_set_extra_info (void *conn,
3292 const void *message)
3293 {
3294 const struct req_lib_votequorum_set_qdevice_extra_info *req_lib_votequorum_qdevice_set_extra_info = message;
3295 struct res_lib_votequorum_status res_lib_votequorum_status;
3296 cs_error_t error = CS_OK;
3297
3298 ENTER();
3299
3300 log_printf(LOGSYS_LEVEL_DEBUG, "Local set extra info %" PRIu32 " bytes", req_lib_votequorum_qdevice_set_extra_info->ei_size);
3301
3302 if (!qdevice_can_operate) {
3303 error = CS_ERR_ACCESS;
3304 goto out;
3305 }
3306
3307 if(req_lib_votequorum_qdevice_set_extra_info->ei_size > VOTEQUORUM_QDEVICE_EXTRA_NODEINFO_MAXSIZE) {
3308 log_printf(LOGSYS_LEVEL_ERROR, "Failed to set extra info, oversized.");
3309 error = CS_ERR_INVALID_PARAM;
3310 goto out;
3311 }
3312
3313 if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
3314 if(req_lib_votequorum_qdevice_set_extra_info->ei_size == 0) {
3315 us->flags &= ~NODE_FLAGS_QDEVICE_EXTRA_INFO_SET;
3316 us->ei_size = 0;
3317 } else {
3318 us->ei_size = req_lib_votequorum_qdevice_set_extra_info->ei_size;
3319 memcpy(us->extra_nodeinfo, req_lib_votequorum_qdevice_set_extra_info->extra_info, req_lib_votequorum_qdevice_set_extra_info->ei_size);
3320 us->flags |= NODE_FLAGS_QDEVICE_EXTRA_INFO_SET;
3321 }
3322 votequorum_exec_send_set_qdevice_extra_info(us->node_id);
3323 } else {
3324 error = CS_ERR_NOT_EXIST;
3325 }
3326
3327 out:
3328 res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
3329 res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
3330 res_lib_votequorum_status.header.error = error;
3331 corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
3332
3333 LEAVE();
3334 }
3335
3336 static void message_handler_req_lib_votequorum_qdevice_get_extra_info (void *conn,
3337 const void *message)
3338 {
3339 const struct req_lib_votequorum_get_qdevice_extra_info *req_lib_votequorum_qdevice_get_extra_info = message;
3340 char buf[sizeof(struct res_lib_votequorum_get_qdevice_extra_info) + VOTEQUORUM_QDEVICE_EXTRA_NODEINFO_MAXSIZE];
3341 struct res_lib_votequorum_get_qdevice_extra_info *res_lib_votequorum_qdevice_get_extra_info = (struct res_lib_votequorum_get_qdevice_extra_info *)buf;
3342 size_t ei_size = 0;
3343 cs_error_t error = CS_OK;
3344
3345 ENTER();
3346
3347 if (!qdevice_can_operate) {
3348 error = CS_ERR_ACCESS;
3349 goto out;
3350 }
3351
3352 struct cluster_node *node = find_node_by_nodeid(req_lib_votequorum_qdevice_get_extra_info->nodeid);
3353
3354 if(!node) {
3355 error = CS_ERR_NOT_EXIST;
3356 goto out;
3357 }
3358
3359 if (node->flags & NODE_FLAGS_QDEVICE_EXTRA_INFO_SET) {
3360 res_lib_votequorum_qdevice_get_extra_info->ei_size = ei_size = node->ei_size;
3361 memcpy(res_lib_votequorum_qdevice_get_extra_info->extra_info, node->extra_nodeinfo, node->ei_size);
3362 } else {
3363 res_lib_votequorum_qdevice_get_extra_info->ei_size = 0;
3364 }
3365
3366 out:
3367 res_lib_votequorum_qdevice_get_extra_info->header.size = sizeof(struct res_lib_votequorum_get_qdevice_extra_info) + ei_size;
3368 res_lib_votequorum_qdevice_get_extra_info->header.id = MESSAGE_RES_VOTEQUORUM_QDEVICE_GET_EXTRA_INFO;
3369 res_lib_votequorum_qdevice_get_extra_info->header.error = error;
3370 corosync_api->ipc_response_send(conn, res_lib_votequorum_qdevice_get_extra_info, res_lib_votequorum_qdevice_get_extra_info->header.size);
3371
3372 LEAVE();
3373 }
3374