1 /*
2 * Copyright 2004-2012 Red Hat, Inc.
3 *
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License v2 or (at your option) any later version.
7 */
8
9 #define EXTERN
10 #include "dlm_daemon.h"
11 #include <ctype.h>
12 #include <pthread.h>
13 #include <linux/netlink.h>
14 #include <linux/genetlink.h>
15 #include <uuid/uuid.h>
16
17 #ifdef USE_SD_NOTIFY
18 #include <systemd/sd-daemon.h>
19 #endif
20
21 #include "copyright.cf"
22 #include "version.cf"
23
24 #define CLIENT_NALLOC 32
25 static int client_maxi;
26 static int client_size = 0;
27 static struct client *client = NULL;
28 static struct pollfd *pollfd = NULL;
29 static pthread_t query_thread;
30 static pthread_mutex_t query_mutex;
31 static struct list_head fs_register_list;
32 static int kernel_monitor_fd;
33
34 int helper_ci;
35 int helper_pid;
36 int helper_req_fd;
37 int helper_status_fd;
38 uint64_t helper_last_status;
39 uint32_t helper_full_count;
40
41 struct client {
42 int fd;
43 void *workfn;
44 void *deadfn;
45 struct lockspace *ls;
46 };
47
48 enum {
49 Env_ACTION = 0,
50 Env_DEVPATH,
51 Env_SUBSYSTEM,
52 Env_LOCKSPACE,
53 Env_Last, /* Flag for end of vars */
54 };
55
56 static const char *uevent_vars[] = {
57 [Env_ACTION] = "ACTION=",
58 [Env_DEVPATH] = "DEVPATH=",
59 [Env_SUBSYSTEM] = "SUBSYSTEM=",
60 [Env_LOCKSPACE] = "LOCKSPACE=",
61 };
62
63 static void decode_uevent(const char *buf, unsigned len, const char *vars[],
64 unsigned nvars, const char *vals[])
65 {
66 const char *ptr;
67 unsigned int i;
68 int slen, vlen;
69
70 memset(vals, 0, sizeof(const char *) * nvars);
71
72 while (len > 0) {
73 ptr = buf;
74 slen = strlen(ptr);
75 buf += slen;
76 len -= slen;
77 buf++;
78 len--;
79
80 for (i = 0; i < nvars; i++) {
81 vlen = strlen(vars[i]);
82 if (vlen > slen)
83 continue;
84 if (memcmp(vars[i], ptr, vlen) != 0)
85 continue;
86 vals[i] = ptr + vlen;
87 break;
88 }
89 }
90 }
91
92 int do_read(int fd, void *buf, size_t count)
93 {
94 int rv, off = 0;
95
|
(1) Event cond_true: |
Condition "off < count", taking true branch. |
|
(9) Event loop_begin: |
Jumped back to beginning of loop. |
|
(10) Event cond_true: |
Condition "off < count", taking true branch. |
96 while (off < count) {
|
(11) Event tainted_data_argument: |
Calling function "read" taints parameter "(char *)buf[off]". [Note: The source code implementation of the function has been overridden by a builtin model.] |
97 rv = read(fd, (char *)buf + off, count - off);
|
(2) Event cond_false: |
Condition "rv == 0", taking false branch. |
|
(12) Event cond_true: |
Condition "rv == 0", taking true branch. |
98 if (rv == 0)
|
(3) Event if_end: |
End of if statement. |
99 return -1;
|
(4) Event cond_false: |
Condition "rv == -1", taking false branch. |
100 if (rv == -1 && errno == EINTR)
|
(5) Event if_end: |
End of if statement. |
101 continue;
|
(6) Event cond_false: |
Condition "rv == -1", taking false branch. |
102 if (rv == -1)
|
(7) Event if_end: |
End of if statement. |
103 return -1;
104 off += rv;
|
(8) Event loop: |
Jumping back to the beginning of the loop. |
105 }
106 return 0;
107 }
108
109 int do_write(int fd, void *buf, size_t count)
110 {
111 int rv, off = 0;
112
113 retry:
114 rv = write(fd, (char *)buf + off, count);
115 if (rv == -1 && errno == EINTR)
116 goto retry;
117 if (rv < 0) {
118 log_error("write errno %d", errno);
119 return rv;
120 }
121
122 if (rv != count) {
123 count -= rv;
124 off += rv;
125 goto retry;
126 }
127 return 0;
128 }
129
130 uint64_t monotime(void)
131 {
132 struct timespec ts;
133 clock_gettime(CLOCK_MONOTONIC, &ts);
134 return ts.tv_sec;
135 }
136
137 static void client_alloc(void)
138 {
139 int i;
140
141 if (!client) {
142 client = malloc(CLIENT_NALLOC * sizeof(struct client));
143 pollfd = malloc(CLIENT_NALLOC * sizeof(struct pollfd));
144 } else {
145 client = realloc(client, (client_size + CLIENT_NALLOC) *
146 sizeof(struct client));
147 pollfd = realloc(pollfd, (client_size + CLIENT_NALLOC) *
148 sizeof(struct pollfd));
149 if (!pollfd)
150 log_error("can't alloc for pollfd");
151 }
152 if (!client || !pollfd)
153 log_error("can't alloc for client array");
154
155 for (i = client_size; i < client_size + CLIENT_NALLOC; i++) {
156 client[i].workfn = NULL;
157 client[i].deadfn = NULL;
158 client[i].fd = -1;
159 pollfd[i].fd = -1;
160 pollfd[i].revents = 0;
161 }
162 client_size += CLIENT_NALLOC;
163 }
164
165 void client_dead(int ci)
166 {
167 close(client[ci].fd);
168 client[ci].workfn = NULL;
169 client[ci].fd = -1;
170 pollfd[ci].fd = -1;
171 }
172
173 int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci))
174 {
175 int i;
176
177 if (!client)
178 client_alloc();
179 again:
180 for (i = 0; i < client_size; i++) {
181 if (client[i].fd == -1) {
182 client[i].workfn = workfn;
183 if (deadfn)
184 client[i].deadfn = deadfn;
185 else
186 client[i].deadfn = client_dead;
187 client[i].fd = fd;
188 pollfd[i].fd = fd;
189 pollfd[i].events = POLLIN;
190 if (i > client_maxi)
191 client_maxi = i;
192 return i;
193 }
194 }
195
196 client_alloc();
197 goto again;
198 }
199
200 int client_fd(int ci)
201 {
202 return client[ci].fd;
203 }
204
205 void client_ignore(int ci, int fd)
206 {
207 pollfd[ci].fd = -1;
208 pollfd[ci].events = 0;
209 }
210
211 void client_back(int ci, int fd)
212 {
213 pollfd[ci].fd = fd;
214 pollfd[ci].events = POLLIN;
215 }
216
217 static void sigterm_handler(int sig)
218 {
219 daemon_quit = 1;
220 }
221
222 static void sigchld_handler(int sig)
223 {
224 }
225
226 struct run *find_run(char *uuid_str)
227 {
228 struct run *run;
229
230 list_for_each_entry(run, &run_ops, list) {
231 if (!strcmp(run->uuid, uuid_str))
232 return run;
233 }
234 return NULL;
235 }
236
237 static void close_helper(void)
238 {
239 close(helper_req_fd);
240 close(helper_status_fd);
241 helper_req_fd = -1;
242 helper_status_fd = -1;
243 pollfd[helper_ci].fd = -1;
244 pollfd[helper_ci].events = 0;
245 helper_ci = -1;
246
247 /* don't set helper_pid = -1 until we've tried waitpid */
248 }
249
250 /*
251 * We cannot block the main thread on this write, so the pipe is NONBLOCK, and
252 * write fails with EAGAIN when the pipe is full. With around 1.5K request
253 * size and 64k default pipe size, the pipe will be full if we quickly send
254 * around 40 requests to the helper. We retry the message once a second, so
255 * we'll retry the write again in a second.
256 *
257 * By setting the pipe size to 1MB in setup_helper, we could quickly send many
258 * more requests before getting EAGAIN.
259 */
260
261 void send_helper_run_request(struct run_request *req)
262 {
263 int rv;
264
265 if (helper_req_fd == -1) {
266 log_error("send_helper_run_request no fd");
267 return;
268 }
269
270 retry:
271 rv = write(helper_req_fd, req, sizeof(struct run_request));
272 if (rv == -1 && errno == EINTR)
273 goto retry;
274
275 /* pipe is full, we'll try again in a second */
276 if (rv == -1 && errno == EAGAIN) {
277 helper_full_count++;
278 log_debug("send_helper_run_request full_count %u",
279 helper_full_count);
280 return;
281 }
282
283 /* helper exited or closed fd, quit using helper */
284 if (rv == -1 && errno == EPIPE) {
285 log_error("send_helper_run_request EPIPE");
286 close_helper();
287 return;
288 }
289
290 if (rv != sizeof(struct run_request)) {
291 /* this shouldn't happen */
292 log_error("send_helper_run_request %d %d", rv, errno);
293 close_helper();
294 return;
295 }
296 }
297
298 static void send_helper_run_cancel(struct run *run)
299 {
300 struct run_request req;
301 int rv;
302
303 if (helper_req_fd == -1) {
304 log_error("send_helper_run_cancel no fd");
305 return;
306 }
307
308 memset(&req, 0, sizeof(req));
309 memcpy(req.uuid, run->uuid, RUN_UUID_LEN);
310
311 rv = write(helper_req_fd, &req, sizeof(struct run_request));
312 if (rv < 0)
313 log_error("send_helper_run_cancel write error");
314 }
315
316 /*
317 * first pipe for daemon to send requests to helper; they are not acknowledged
318 * and the daemon does not get any result back for the requests.
319 *
320 * second pipe for helper to send general status/heartbeat back to the daemon
321 * every so often to confirm it's not dead/hung. If the helper gets stuck or
322 * killed, the daemon will not get the status and won't bother sending requests
323 * to the helper, and use SIGTERM instead
324 */
325
326 static int setup_helper(void)
327 {
328 int pid;
329 int pw_fd = -1; /* parent write */
330 int cr_fd = -1; /* child read */
331 int pr_fd = -1; /* parent read */
332 int cw_fd = -1; /* child write */
333 int pfd[2];
334
335 /* we can't allow the main daemon thread to block */
336 if (pipe2(pfd, O_NONBLOCK | O_CLOEXEC))
337 return -errno;
338
339 /* uncomment for rhel7 where this should be available */
340 /* fcntl(pfd[1], F_SETPIPE_SZ, 1024*1024); */
341
342 cr_fd = pfd[0];
343 pw_fd = pfd[1];
344
345 if (pipe2(pfd, O_NONBLOCK | O_CLOEXEC)) {
346 close(cr_fd);
347 close(pw_fd);
348 return -errno;
349 }
350
351 pr_fd = pfd[0];
352 cw_fd = pfd[1];
353
354 pid = fork();
355 if (pid < 0) {
356 close(cr_fd);
357 close(pw_fd);
358 close(pr_fd);
359 close(cw_fd);
360 return -errno;
361 }
362
363 if (pid) {
364 close(cr_fd);
365 close(cw_fd);
366 helper_req_fd = pw_fd;
367 helper_status_fd = pr_fd;
368 helper_pid = pid;
369 return 0;
370 } else {
371 close(pr_fd);
372 close(pw_fd);
373 run_helper(cr_fd, cw_fd, opt(daemon_debug_ind));
374 exit(0);
375 }
376 }
377
378 static void process_helper(int ci)
379 {
380 struct run_reply reply;
381 struct run_reply send_reply;
382 struct run *run;
383 int rv;
384
385 rv = read(client[ci].fd, &reply, sizeof(reply));
386 if (!rv || rv == -EAGAIN)
387 return;
388 if (rv < 0) {
389 log_error("process_helper rv %d errno %d", rv, errno);
390 goto fail;
391 }
392 if (rv != sizeof(reply)) {
393 log_error("process_helper recv size %d", rv);
394 goto fail;
395 }
396
397 if (!reply.header.type) {
398 /* log_debug("helper status"); */
399 helper_last_status = monotime();
400 return;
401 }
402
403 if (reply.header.type == DLM_MSG_RUN_REPLY) {
404 run = find_run(reply.uuid);
405 if (!run) {
406 log_error("helper reply no run uuid %s", reply.uuid);
407 return;
408 }
409
410 memset(&send_reply, 0, sizeof(send_reply));
411 memcpy(&send_reply.info, &run->info, sizeof(struct run_info));
412 memcpy(send_reply.uuid, run->uuid, RUN_UUID_LEN);
413 send_reply.header.type = DLM_MSG_RUN_REPLY;
414 send_reply.info.local_pid = reply.info.local_pid;
415 send_reply.info.local_result = reply.info.local_result;
416
417 log_debug("helper reply %s pid %d result %d",
418 send_reply.uuid, send_reply.info.local_pid, send_reply.info.local_result);
419
420 send_run_reply(run, &send_reply);
421 return;
422 }
423
424 return;
425
426 fail:
427 close_helper();
428 }
429
430 static void helper_dead(int ci)
431 {
432 int pid = helper_pid;
433 int rv, status;
434
435 close_helper();
436
437 helper_pid = -1;
438
439 rv = waitpid(pid, &status, WNOHANG);
440
441 if (rv != pid) {
442 /* should not happen */
443 log_error("helper pid %d dead wait %d", pid, rv);
444 return;
445 }
446
447 if (WIFEXITED(status)) {
448 log_error("helper pid %d exit status %d", pid,
449 WEXITSTATUS(status));
450 return;
451 }
452
453 if (WIFSIGNALED(status)) {
454 log_error("helper pid %d term signal %d", pid,
455 WTERMSIG(status));
456 return;
457 }
458
459 /* should not happen */
460 log_error("helper pid %d state change", pid);
461 }
462
463 static int start_run_operation(char *data, int datalen, int dest_nodeid, uint32_t flags, char *uuid_out)
464 {
465 struct run *run;
466 struct run_request req;
467 uuid_t uu;
468 int rv;
469
470 if (!opt(enable_helper_ind)) {
471 log_debug("ignore start_run helper not enabled");
472 return -1;
473 }
474
475 if (datalen > RUN_COMMAND_LEN)
476 return -1;
477
478 if (!(run = malloc(sizeof(struct run))))
479 return -1;
480
481 memset(run, 0, sizeof(struct run));
482
483 uuid_generate(uu);
484 uuid_unparse_lower(uu, run->uuid);
485 strncpy(run->command, data, datalen);
486 run->info.start_nodeid = our_nodeid;
487 run->info.dest_nodeid = dest_nodeid;
488 run->info.flags = flags;
489
490 memset(&req, 0, sizeof(req));
491 req.header.type = DLM_MSG_RUN_REQUEST;
492 memcpy(&req.info, &run->info, sizeof(struct run_info));
493 memcpy(req.uuid, run->uuid, RUN_UUID_LEN);
494 strncpy(req.command, data, datalen);
495
496 log_error("run start %s %.128s", run->uuid, run->command);
497
498 rv = send_run_request(run, &req);
499
500 memcpy(uuid_out, run->uuid, RUN_UUID_LEN);
501 list_add(&run->list, &run_ops);
502
503 /*
504 * This flag means the starting node should run the command itself
505 * at the time of the request and not wait to receive its own request.
506 */
507 if (flags & DLMC_FLAG_RUN_START_NODE_FIRST)
508 send_helper_run_request(&req);
509
510 return rv;
511 }
512
513 void clear_run(struct run *run)
514 {
515 log_debug("clear run %s", run->uuid);
516 list_del(&run->list);
517 free(run);
518 }
519
520 static int check_run_operation(char *uuid_str, uint32_t flags, struct dlmc_run_check_state *state)
521 {
522 char nodes_buf[128];
523 struct run *run;
524 int pos, len, ret, i;
525
526 if (!opt(enable_helper_ind)) {
527 log_debug("ignore check_run helper not enabled");
528 return -1;
529 }
530
531 run = find_run(uuid_str);
532 if (!run) {
533 log_debug("check_run no uuid %s", uuid_str);
534 return -1;
535 }
536
537 if (flags & DLMC_FLAG_RUN_CHECK_CANCEL) {
538 log_debug("cancel_run %s", run->uuid);
539 send_helper_run_cancel(run);
540 clear_run(run);
541 return 0;
542 }
543
544 log_debug("check_run %s reply_count %d need_replies %d fail_count %d",
545 uuid_str, run->info.reply_count, run->info.need_replies, run->info.fail_count);
546
547 if (run->info.need_replies)
548 state->check_status |= DLMC_RUN_STATUS_WAITING;
549 else
550 state->check_status |= DLMC_RUN_STATUS_DONE;
551
552 if (run->info.fail_count)
553 state->check_status |= DLMC_RUN_STATUS_FAILED;
554
555 if (!run->info.need_replies) {
556 if (run->info.fail_count) {
557 /* create a printable list of nodeids where the command failed */
558 pos = 0;
559 len = sizeof(nodes_buf);
560 memset(nodes_buf, 0, len);
561 for (i = 0; i < run->node_count; i++) {
562 if (!run->node_results[i].result)
563 continue;
564 ret = snprintf(nodes_buf + pos, len - pos, "%d ", run->node_results[i].nodeid);
565 if (ret >= len - pos)
566 break;
567 pos += ret;
568 }
569 nodes_buf[len-1] = '\0';
570
571 log_error("run ended %s error from %d remote nodes with ids: %s", run->uuid, run->info.fail_count, nodes_buf);
572 } else {
573 log_error("run ended %s success from %d remote nodes", run->uuid, run->info.reply_count);
574 }
575 }
576
577 if (!run->info.need_replies && (flags & DLMC_FLAG_RUN_CHECK_CLEAR))
578 clear_run(run);
579
580 return 0;
581 }
582
583 static struct lockspace *create_ls(const char *name)
584 {
585 struct lockspace *ls;
586
587 ls = malloc(sizeof(*ls));
588 if (!ls)
589 goto out;
590 memset(ls, 0, sizeof(struct lockspace));
591 strncpy(ls->name, name, DLM_LOCKSPACE_LEN);
592
593 INIT_LIST_HEAD(&ls->changes);
594 INIT_LIST_HEAD(&ls->node_history);
595 INIT_LIST_HEAD(&ls->saved_messages);
596 INIT_LIST_HEAD(&ls->plock_resources);
597 ls->plock_resources_root = RB_ROOT;
598 #if 0
599 INIT_LIST_HEAD(&ls->deadlk_nodes);
600 INIT_LIST_HEAD(&ls->transactions);
601 INIT_LIST_HEAD(&ls->resources);
602 #endif
603 setup_lockspace_config(ls);
604 out:
605 return ls;
606 }
607
608 struct lockspace *find_ls(const char *name)
609 {
610 struct lockspace *ls;
611
612 list_for_each_entry(ls, &lockspaces, list) {
613 if ((strlen(ls->name) == strlen(name)) &&
614 !strncmp(ls->name, name, strlen(name)))
615 return ls;
616 }
617 return NULL;
618 }
619
620 struct lockspace *find_ls_id(uint32_t id)
621 {
622 struct lockspace *ls;
623
624 list_for_each_entry(ls, &lockspaces, list) {
625 if (ls->global_id == id)
626 return ls;
627 }
628 return NULL;
629 }
630
631 struct fs_reg {
632 struct list_head list;
633 char name[DLM_LOCKSPACE_LEN+1];
634 };
635
636 static int fs_register_check(char *name)
637 {
638 struct fs_reg *fs;
639 list_for_each_entry(fs, &fs_register_list, list) {
640 if (!strcmp(name, fs->name))
641 return 1;
642 }
643 return 0;
644 }
645
646 static int fs_register_add(char *name)
647 {
648 struct fs_reg *fs;
649
650 if (fs_register_check(name))
651 return -EALREADY;
652
653 fs = malloc(sizeof(struct fs_reg));
654 if (!fs)
655 return -ENOMEM;
656 strncpy(fs->name, name, DLM_LOCKSPACE_LEN);
657 list_add(&fs->list, &fs_register_list);
658 return 0;
659 }
660
661 static void fs_register_del(char *name)
662 {
663 struct fs_reg *fs;
664 list_for_each_entry(fs, &fs_register_list, list) {
665 if (!strcmp(name, fs->name)) {
666 list_del(&fs->list);
667 free(fs);
668 return;
669 }
670 }
671 }
672
673 const char *dlm_mode_str(int mode)
674 {
675 switch (mode) {
676 case DLM_LOCK_IV:
677 return "IV";
678 case DLM_LOCK_NL:
679 return "NL";
680 case DLM_LOCK_CR:
681 return "CR";
682 case DLM_LOCK_CW:
683 return "CW";
684 case DLM_LOCK_PR:
685 return "PR";
686 case DLM_LOCK_PW:
687 return "PW";
688 case DLM_LOCK_EX:
689 return "EX";
690 }
691 return "??";
692 }
693
694 /* recv "online" (join) and "offline" (leave) messages from dlm via uevents */
695
696 #define MAX_LINE_UEVENT 4096
697
698 static void process_uevent(int ci)
699 {
700 const char *uevent_vals[Env_Last];
701 struct lockspace *ls;
702 char buf[MAX_LINE_UEVENT];
703 int rv;
704
705 memset(buf, 0, sizeof(buf));
706
707 retry_recv:
708 rv = recv(client[ci].fd, &buf, sizeof(buf), 0);
709 if (rv < 0) {
710 if (errno == EINTR)
711 goto retry_recv;
712 if (errno != EAGAIN)
713 log_error("uevent recv error %d errno %d", rv, errno);
714 return;
715 }
716
717 buf[MAX_LINE_UEVENT-1] = '\0';
718
719 decode_uevent(buf, rv, uevent_vars, Env_Last, uevent_vals);
720
721 if (!uevent_vals[Env_ACTION] ||
722 !uevent_vals[Env_DEVPATH] ||
723 !uevent_vals[Env_SUBSYSTEM] ||
724 !uevent_vals[Env_LOCKSPACE]) {
725 log_debug("failed to validate uevent, action: %p, devpath: %p, subsystem: %p, lockspace: %p",
726 uevent_vals[Env_ACTION], uevent_vals[Env_DEVPATH],
727 uevent_vals[Env_SUBSYSTEM],
728 uevent_vals[Env_LOCKSPACE]);
729 return;
730 }
731
732 if (strcmp(uevent_vals[Env_SUBSYSTEM], "dlm")) {
733 log_debug("uevent looks like dlm but came not from dlm subsystem");
734 return;
735 }
736
737 log_debug("uevent action: %s, devpath: %s, devpath: %s, lockspace: %s",
738 uevent_vals[Env_ACTION], uevent_vals[Env_SUBSYSTEM],
739 uevent_vals[Env_DEVPATH], uevent_vals[Env_LOCKSPACE]);
740
741 rv = 0;
742
743 if (!strcmp(uevent_vals[Env_ACTION], "online")) {
744 ls = find_ls(uevent_vals[Env_LOCKSPACE]);
745 if (ls) {
746 rv = -EEXIST;
747 goto out;
748 }
749
750 ls = create_ls(uevent_vals[Env_LOCKSPACE]);
751 if (!ls) {
752 rv = -ENOMEM;
753 goto out;
754 }
755
756 if (fs_register_check(ls->name))
757 ls->fs_registered = 1;
758
759 rv = dlm_join_lockspace(ls);
760 if (rv) {
761 /* ls already freed */
762 goto out;
763 }
764
765 } else if (!strcmp(uevent_vals[Env_ACTION], "offline")) {
766 ls = find_ls(uevent_vals[Env_LOCKSPACE]);
767 if (!ls) {
768 rv = -ENOENT;
769 goto out;
770 }
771
772 dlm_leave_lockspace(ls);
773 }
774 out:
775 if (rv < 0)
776 log_error("%s action: %s, devpath: %s, devpath: %s, lockspace: %s - error %d errno %d",
777 __func__, uevent_vals[Env_ACTION],
778 uevent_vals[Env_SUBSYSTEM], uevent_vals[Env_DEVPATH],
779 uevent_vals[Env_LOCKSPACE], rv, errno);
780 }
781
782 static int setup_uevent(void)
783 {
784 struct sockaddr_nl snl;
785 int s, rv, val;
786
787 s = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
788 if (s < 0) {
789 log_error("uevent netlink socket");
790 return s;
791 }
792
793 /* man 7 netlink:
794 *
795 * However, reliable transmissions from kernel to user are impossible in
796 * any case. The kernel can't send a netlink message if the socket buffer
797 * is full: the message will be dropped and the kernel and the user-space
798 * process will no longer have the same view of kernel state. It is up to
799 * the application to detect when this happens (via the ENOBUFS error
800 * returned by recvmsg(2)) and resynchronize.
801 *
802 * To avoid ENOBUFS errors we set the netlink socket to realiable
803 * transmission mode which can be turned on by NETLINK_NO_ENOBUFS
804 * option. This option is available since kernel 2.6.30. If this setting
805 * fails we fallback to increase the netlink socket receive buffer.
806 */
807 val = 1;
808 rv = setsockopt(s, SOL_NETLINK, NETLINK_NO_ENOBUFS, &val, sizeof(val));
809 if (rv == -1) {
810 /* Fallback handling if NETLINK_NO_ENOBUFS fails to set.
811 *
812 * To prevent ENOBUFS errors we just set the receive buffer to
813 * two megabyte as other applications do it. This will not
814 * ensure that we never receive ENOBUFS but it's more unlikely.
815 */
816 val = DEFAULT_NETLINK_RCVBUF;
817 log_error("uevent netlink NETLINK_NO_ENOBUFS errno %d, will set rcvbuf to %d bytes", errno, val);
818
819 rv = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val));
820 if (rv == -1)
821 log_error("uevent netlink SO_RCVBUF errno %d", errno);
822
823 rv = setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val));
824 if (rv == -1)
825 log_error("uevent netlink SO_RCVBUFFORCE errno %d", errno);
826 }
827
828 memset(&snl, 0, sizeof(snl));
829 snl.nl_family = AF_NETLINK;
830 snl.nl_pid = getpid();
831 snl.nl_groups = 1;
832
833 rv = bind(s, (struct sockaddr *) &snl, sizeof(snl));
834 if (rv < 0) {
835 log_error("uevent bind error %d errno %d", rv, errno);
836 close(s);
837 return rv;
838 }
839
840 return s;
841 }
842
843 static inline void init_header_name(struct dlmc_header *h,
844 const char *name, size_t len)
845 {
846 #pragma GCC diagnostic push
847 #if __GNUC__ >= 8
848 #pragma GCC diagnostic ignored "-Wstringop-truncation"
849 #endif
850 strncpy(h->name, name, len);
851 #pragma GCC diagnostic pop
852 }
853
854 static void init_header(struct dlmc_header *h, int cmd, char *name, int result,
855 int extra_len)
856 {
857 memset(h, 0, sizeof(struct dlmc_header));
858
859 h->magic = DLMC_MAGIC;
860 h->version = DLMC_VERSION;
861 h->len = sizeof(struct dlmc_header) + extra_len;
862 h->command = cmd;
863 h->data = result;
864
865 if (name)
866 init_header_name(h, name, DLM_LOCKSPACE_LEN);
867 }
868
869 static char copy_buf[LOG_DUMP_SIZE];
870
871 static void copy_run_list(char *buf, int *len)
872 {
873 char tmp[1024];
874 struct run *run;
875 int ret, pos = 0;
876
877 list_for_each_entry(run, &run_ops, list) {
878 memset(tmp, 0, sizeof(tmp));
879
880 snprintf(tmp, 1024, "run_uuid %s start_nodeid %d local_pid %d local_result %d need_replies %d reply_count %d fail_count %d flags %x\n",
881 run->uuid, run->info.start_nodeid, run->info.local_pid,
882 run->info.local_result, run->info.need_replies,
883 run->info.reply_count, run->info.fail_count, run->info.flags);
884
885 if (pos + strlen(tmp) >= LOG_DUMP_SIZE)
886 break;
887
888 ret = sprintf(buf + pos, "%s", tmp);
889 pos += ret;
890
891 memset(tmp, 0, sizeof(tmp));
892
893 snprintf(tmp, 1024, "run_command %.1000s\n", run->command);
894
895 if (pos + strlen(tmp) >= LOG_DUMP_SIZE)
896 break;
897
898 ret = sprintf(buf + pos, "%s", tmp);
899 pos += ret;
900
901 /* TODO: dump node results */
902 }
903
904 *len = pos;
905 }
906
907 static void query_dump_run(int fd)
908 {
909 struct dlmc_header h;
910 int len = 0;
911
912 copy_run_list(copy_buf, &len);
913
914 init_header(&h, DLMC_CMD_DUMP_RUN, NULL, 0, len);
915 send(fd, &h, sizeof(h), MSG_NOSIGNAL);
916
917 if (len)
918 send(fd, copy_buf, len, MSG_NOSIGNAL);
919 }
920
921 static void query_dump_debug(int fd)
922 {
923 struct dlmc_header h;
924 int len = 0;
925
926 copy_log_dump(copy_buf, &len);
927
928 init_header(&h, DLMC_CMD_DUMP_DEBUG, NULL, 0, len);
929 send(fd, &h, sizeof(h), MSG_NOSIGNAL);
930
931 if (len)
932 send(fd, copy_buf, len, MSG_NOSIGNAL);
933 }
934
935 static void copy_options(char *buf, int *len)
936 {
937 struct dlm_option *o;
938 char tmp[256];
939 int i, ret, pos = 0, l = 0;
940
941 for (i = 0; i < dlm_options_max; i++) {
942 o = &dlm_options[i];
943
944 memset(tmp, 0, sizeof(tmp));
945
946 if (o->req_arg == req_arg_str)
947 l = snprintf(tmp, 240, "%s=%s", o->name, o->use_str);
948 else if (o->req_arg == req_arg_uint)
949 l = snprintf(tmp, 240, "%s=%u", o->name, o->use_uint);
950 else
951 l = snprintf(tmp, 240, "%s=%d", o->name, o->use_int);
952
953 if (o->dynamic_set)
954 snprintf(tmp + l, 15, " (set_config)\n");
955 else if (o->cli_set)
956 snprintf(tmp + l, 15, " (cli option)\n");
957 else if (o->file_set)
958 snprintf(tmp + l, 15, " (dlm.conf)\n");
959 else
960 snprintf(tmp + l, 15, "\n");
961
962 if (pos + strlen(tmp) >= LOG_DUMP_SIZE)
963 break;
964
965 ret = sprintf(buf + pos, "%s", tmp);
966 pos += ret;
967 }
968
969 *len = pos;
970 }
971
972 static void query_dump_config(int fd)
973 {
974 struct dlmc_header h;
975 int len = 0;
976
977 copy_options(copy_buf, &len);
978
979 init_header(&h, DLMC_CMD_DUMP_CONFIG, NULL, 0, len);
980 send(fd, &h, sizeof(h), MSG_NOSIGNAL);
981
982 if (len)
983 send(fd, copy_buf, len, MSG_NOSIGNAL);
984 }
985
986 static void query_dump_log_plock(int fd)
987 {
988 struct dlmc_header h;
989 int len = 0;
990
991 copy_log_dump_plock(copy_buf, &len);
992
993 init_header(&h, DLMC_CMD_DUMP_DEBUG, NULL, 0, len);
994 send(fd, &h, sizeof(h), MSG_NOSIGNAL);
995
996 if (len)
997 send(fd, copy_buf, len, MSG_NOSIGNAL);
998 }
999
1000 static void query_dump_plocks(int fd, char *name)
1001 {
1002 struct lockspace *ls;
1003 struct dlmc_header h;
1004 int len = 0;
1005 int rv;
1006
1007 ls = find_ls(name);
1008 if (!ls) {
1009 rv = -ENOENT;
1010 goto out;
1011 }
1012
1013 rv = copy_plock_state(ls, copy_buf, &len);
1014 out:
1015 init_header(&h, DLMC_CMD_DUMP_PLOCKS, name, rv, len);
1016 send(fd, &h, sizeof(h), MSG_NOSIGNAL);
1017
1018 if (len)
1019 send(fd, copy_buf, len, MSG_NOSIGNAL);
1020 }
1021
1022 /* combines a header and the data and sends it back to the client in
1023 a single do_write() call */
1024
1025 static void do_reply(int fd, int cmd, char *name, int result, int option,
1026 char *buf, int buflen)
1027 {
1028 struct dlmc_header *h;
1029 char *reply;
1030 int reply_len;
1031
|
(1) Event var_assign_alias: |
Assigning: "reply_len" = "96UL + buflen", which taints "reply_len". |
| Also see events: |
[taint_sink_lv_call] |
1032 reply_len = sizeof(struct dlmc_header) + buflen;
|
(2) Event taint_sink_lv_call: |
Passing tainted expression "reply_len" to taint sink "malloc". |
| Also see events: |
[var_assign_alias] |
1033 reply = malloc(reply_len);
1034 if (!reply)
1035 return;
1036 memset(reply, 0, reply_len);
1037 h = (struct dlmc_header *)reply;
1038
1039 init_header(h, cmd, name, result, buflen);
1040 h->option = option;
1041
1042 if (buf && buflen)
1043 memcpy(reply + sizeof(struct dlmc_header), buf, buflen);
1044
1045 do_write(fd, reply, reply_len);
1046
1047 free(reply);
1048 }
1049
1050 static void query_lockspace_info(int fd, char *name)
1051 {
1052 struct lockspace *ls;
1053 struct dlmc_lockspace lockspace;
1054 int rv;
1055
1056 ls = find_ls(name);
1057 if (!ls) {
1058 rv = -ENOENT;
1059 goto out;
1060 }
1061
1062 memset(&lockspace, 0, sizeof(lockspace));
1063
1064 rv = set_lockspace_info(ls, &lockspace);
1065 out:
1066 do_reply(fd, DLMC_CMD_LOCKSPACE_INFO, name, rv, 0,
1067 (char *)&lockspace, sizeof(lockspace));
1068 }
1069
1070 static void query_node_info(int fd, char *name, int nodeid)
1071 {
1072 struct lockspace *ls;
1073 struct dlmc_node node;
1074 int rv;
1075
1076 ls = find_ls(name);
1077 if (!ls) {
1078 rv = -ENOENT;
1079 goto out;
1080 }
1081
1082 memset(&node, 0, sizeof(node));
1083
1084 rv = set_node_info(ls, nodeid, &node);
1085 out:
1086 do_reply(fd, DLMC_CMD_NODE_INFO, name, rv, 0,
1087 (char *)&node, sizeof(node));
1088 }
1089
1090 static void query_lockspaces(int fd)
1091 {
1092 int ls_count = 0;
1093 struct dlmc_lockspace *lss = NULL;
1094 int rv, result;
1095
1096 rv = set_lockspaces(&ls_count, &lss);
1097 if (rv < 0) {
1098 result = rv;
1099 ls_count = 0;
1100 goto out;
1101 }
1102
1103 result = ls_count;
1104 out:
1105 do_reply(fd, DLMC_CMD_LOCKSPACES, NULL, result, 0,
1106 (char *)lss, ls_count * sizeof(struct dlmc_lockspace));
1107
1108 if (lss)
1109 free(lss);
1110 }
1111
1112 static void query_lockspace_nodes(int fd, char *name, int option, int max)
1113 {
1114 struct lockspace *ls;
1115 int node_count = 0;
1116 struct dlmc_node *nodes = NULL;
1117 int rv, result;
1118
1119 ls = find_ls(name);
|
(1) Event cond_false: |
Condition "!ls", taking false branch. |
1120 if (!ls) {
1121 result = -ENOENT;
1122 node_count = 0;
1123 goto out;
|
(2) Event if_end: |
End of if statement. |
1124 }
1125
1126 rv = set_lockspace_nodes(ls, option, &node_count, &nodes);
|
(3) Event cond_false: |
Condition "rv < 0", taking false branch. |
1127 if (rv < 0) {
1128 result = rv;
1129 node_count = 0;
1130 goto out;
|
(4) Event if_end: |
End of if statement. |
1131 }
1132
1133 /* node_count is the number of structs copied/returned; the caller's
1134 max may be less than that, in which case we copy as many as they
1135 asked for and return -E2BIG */
1136
|
(5) Event cond_true: |
Condition "node_count > max", taking true branch. |
|
(6) Event upper_bounds: |
Checking upper bounds of signed scalar "max" by taking the true branch of "node_count > max". |
| Also see events: |
[var_assign_parm][taint_sink_lv_call] |
1137 if (node_count > max) {
1138 result = -E2BIG;
1139 node_count = max;
|
(8) Event if_fallthrough: |
Falling through to end of if statement. |
1140 } else {
1141 result = node_count;
|
(9) Event if_end: |
End of if statement. |
1142 }
1143 out:
1144 do_reply(fd, DLMC_CMD_LOCKSPACE_NODES, name, result, 0,
1145 (char *)nodes, node_count * sizeof(struct dlmc_node));
1146
1147 if (nodes)
1148 free(nodes);
1149 }
1150
1151 static void process_connection(int ci)
1152 {
1153 struct dlmc_header h;
1154 char uuid_str[RUN_UUID_LEN];
1155 char *extra = NULL;
1156 int rv, extra_len = 0;
1157 struct lockspace *ls;
1158 struct dlmc_run_check_state state;
1159
1160 memset(uuid_str, 0, sizeof(uuid_str));
1161
1162 rv = do_read(client[ci].fd, &h, sizeof(h));
1163 if (rv < 0) {
1164 log_debug("connection %d read error %d", ci, rv);
1165 goto out;
1166 }
1167
1168 if (h.magic != DLMC_MAGIC) {
1169 log_debug("connection %d magic error %x", ci, h.magic);
1170 goto out;
1171 }
1172
1173 if ((h.version & 0xFFFF0000) != (DLMC_VERSION & 0xFFFF0000)) {
1174 log_debug("connection %d version error %x", ci, h.version);
1175 goto out;
1176 }
1177
1178 if (h.len > sizeof(h)) {
1179 extra_len = h.len - sizeof(h);
1180 extra = malloc(extra_len);
1181 if (!extra) {
1182 log_error("process_connection no mem %d", extra_len);
1183 goto out;
1184 }
1185 memset(extra, 0, extra_len);
1186
1187 rv = do_read(client[ci].fd, extra, extra_len);
1188 if (rv < 0) {
1189 log_debug("connection %d extra read error %d", ci, rv);
1190 goto out;
1191 }
1192 }
1193
1194 switch (h.command) {
1195 case DLMC_CMD_FENCE_ACK:
1196 fence_ack_node(atoi(h.name));
1197 break;
1198
1199 case DLMC_CMD_FS_REGISTER:
1200 if (opt(enable_fscontrol_ind)) {
1201 rv = fs_register_add(h.name);
1202 ls = find_ls(h.name);
1203 if (ls)
1204 ls->fs_registered = 1;
1205 } else {
1206 rv = -EOPNOTSUPP;
1207 }
1208 do_reply(client[ci].fd, DLMC_CMD_FS_REGISTER, h.name, rv, 0,
1209 NULL, 0);
1210 break;
1211
1212 case DLMC_CMD_FS_UNREGISTER:
1213 fs_register_del(h.name);
1214 ls = find_ls(h.name);
1215 if (ls)
1216 ls->fs_registered = 0;
1217 break;
1218
1219 case DLMC_CMD_FS_NOTIFIED:
1220 ls = find_ls(h.name);
1221 if (ls)
1222 rv = set_fs_notified(ls, h.data);
1223 else
1224 rv = -ENOENT;
1225 /* pass back the nodeid provided by caller in option field */
1226 do_reply(client[ci].fd, DLMC_CMD_FS_NOTIFIED, h.name, rv,
1227 h.data, NULL, 0);
1228 break;
1229
1230 case DLMC_CMD_RUN_START:
1231 if (!extra_len)
1232 rv = -EINVAL;
1233 else
1234 rv = start_run_operation(extra, extra_len, h.data, h.flags, uuid_str);
1235 do_reply(client[ci].fd, DLMC_CMD_RUN_START, uuid_str, rv, 0, NULL, 0);
1236 client_dead(ci);
1237 break;
1238
1239 case DLMC_CMD_RUN_CHECK:
1240 memset(&state, 0, sizeof(state));
1241
1242 rv = check_run_operation(h.name, h.flags, &state);
1243
1244 do_reply(client[ci].fd, DLMC_CMD_RUN_CHECK, NULL, rv, 0, (char *)&state, sizeof(state));
1245 /* dlmc_run_check may retry checks on the same connection */
1246 break;
1247
1248 #if 0
1249 case DLMC_CMD_DEADLOCK_CHECK:
1250 ls = find_ls(h.name);
1251 if (ls)
1252 send_cycle_start(ls);
1253 client_dead(ci);
1254 break;
1255 #endif
1256 case DLMC_CMD_RELOAD_CONFIG:
1257 set_opt_file(1);
1258 break;
1259
1260 case DLMC_CMD_SET_CONFIG:
1261 if (extra_len)
1262 set_opt_online(extra, extra_len);
1263 break;
1264
1265 default:
1266 log_error("process_connection %d unknown command %d",
1267 ci, h.command);
1268 }
1269 out:
1270 if (extra)
1271 free(extra);
1272 }
1273
1274 static void process_listener(int ci)
1275 {
1276 int fd, i;
1277
1278 fd = accept(client[ci].fd, NULL, NULL);
1279 if (fd < 0) {
1280 log_error("process_listener: accept error %d %d", fd, errno);
1281 return;
1282 }
1283
1284 i = client_add(fd, process_connection, NULL);
1285
1286 log_debug("client connection %d fd %d", i, fd);
1287 }
1288
1289 static int setup_listener(const char *sock_path)
1290 {
1291 struct sockaddr_un addr;
1292 socklen_t addrlen;
1293 int rv, s;
1294
1295 /* we listen for new client connections on socket s */
1296
1297 s = socket(AF_LOCAL, SOCK_STREAM, 0);
1298 if (s < 0) {
1299 log_error("socket error %d %d", s, errno);
1300 return s;
1301 }
1302
1303 memset(&addr, 0, sizeof(addr));
1304 addr.sun_family = AF_LOCAL;
1305 strcpy(&addr.sun_path[1], sock_path);
1306 addrlen = sizeof(sa_family_t) + strlen(addr.sun_path+1) + 1;
1307
1308 rv = bind(s, (struct sockaddr *) &addr, addrlen);
1309 if (rv < 0) {
1310 log_error("bind error %d %d", rv, errno);
1311 close(s);
1312 return rv;
1313 }
1314
1315 rv = listen(s, 5);
1316 if (rv < 0) {
1317 log_error("listen error %d %d", rv, errno);
1318 close(s);
1319 return rv;
1320 }
1321 return s;
1322 }
1323
1324 static void query_lock(void)
1325 {
1326 pthread_mutex_lock(&query_mutex);
1327 }
1328
1329 static void query_unlock(void)
1330 {
1331 pthread_mutex_unlock(&query_mutex);
1332 }
1333
1334 /* This is a thread, so we have to be careful, don't call log_ functions.
1335 We need a thread to process queries because the main thread may block
1336 for long periods when writing to sysfs to stop dlm-kernel (any maybe
1337 other places). */
1338
1339 static void *process_queries(void *arg)
1340 {
1341 struct dlmc_header h;
1342 int s, f, rv;
1343
1344 rv = setup_listener(DLMC_QUERY_SOCK_PATH);
|
(1) Event cond_false: |
Condition "rv < 0", taking false branch. |
1345 if (rv < 0)
|
(2) Event if_end: |
End of if statement. |
1346 return NULL;
1347
1348 s = rv;
1349
|
(9) Event loop_begin: |
Jumped back to beginning of loop. |
1350 for (;;) {
1351 f = accept(s, NULL, NULL);
|
(3) Event cond_false: |
Condition "f < 0", taking false branch. |
|
(10) Event cond_false: |
Condition "f < 0", taking false branch. |
1352 if (f < 0)
|
(4) Event if_end: |
End of if statement. |
|
(11) Event if_end: |
End of if statement. |
1353 return NULL;
1354
1355 rv = do_read(f, &h, sizeof(h));
|
(5) Event cond_true: |
Condition "rv < 0", taking true branch. |
|
(13) Event cond_false: |
Condition "rv < 0", taking false branch. |
1356 if (rv < 0) {
|
(6) Event goto: |
Jumping to label "out". |
1357 goto out;
|
(14) Event if_end: |
End of if statement. |
1358 }
1359
|
(15) Event cond_false: |
Condition "h.magic != 3510423868U", taking false branch. |
1360 if (h.magic != DLMC_MAGIC) {
1361 goto out;
|
(16) Event if_end: |
End of if statement. |
1362 }
1363
|
(17) Event cond_false: |
Condition "(h.version & 0xffff0000U) != (65536U /* 65537 & 0xffff0000U */)", taking false branch. |
1364 if ((h.version & 0xFFFF0000) != (DLMC_VERSION & 0xFFFF0000)) {
1365 goto out;
|
(18) Event if_end: |
End of if statement. |
1366 }
1367
1368 query_lock();
1369
|
(19) Event switch: |
Switch case value "6". |
1370 switch (h.command) {
1371 case DLMC_CMD_DUMP_DEBUG:
1372 query_dump_debug(f);
1373 break;
1374 case DLMC_CMD_DUMP_CONFIG:
1375 query_dump_config(f);
1376 break;
1377 case DLMC_CMD_DUMP_LOG_PLOCK:
1378 query_dump_log_plock(f);
1379 break;
1380 case DLMC_CMD_DUMP_PLOCKS:
1381 query_dump_plocks(f, h.name);
1382 break;
1383 case DLMC_CMD_LOCKSPACE_INFO:
1384 query_lockspace_info(f, h.name);
1385 break;
1386 case DLMC_CMD_NODE_INFO:
1387 query_node_info(f, h.name, h.data);
1388 break;
1389 case DLMC_CMD_LOCKSPACES:
1390 query_lockspaces(f);
1391 break;
|
(20) Event switch_case: |
Reached case "6". |
1392 case DLMC_CMD_LOCKSPACE_NODES:
|
(21) Event tainted_data: |
Passing tainted expression "h.data" to "query_lockspace_nodes", which uses it as an allocation size. [details] |
|
(22) Event remediation: |
Ensure that tainted values are properly sanitized, by checking that their values are within a permissible range. |
| Also see events: |
[tainted_argument] |
1393 query_lockspace_nodes(f, h.name, h.option, h.data);
1394 break;
1395 case DLMC_CMD_DUMP_STATUS:
1396 send_state_daemon(f);
1397 send_state_daemon_nodes(f);
1398 send_state_startup_nodes(f);
1399 break;
1400 case DLMC_CMD_DUMP_RUN:
1401 query_dump_run(f);
1402 break;
1403 default:
1404 break;
1405 }
1406 query_unlock();
1407
|
(7) Event label: |
Reached label "out". |
1408 out:
1409 close(f);
|
(8) Event loop: |
Jumping back to the beginning of the loop. |
1410 }
1411 }
1412
1413 static int setup_queries(void)
1414 {
1415 int rv;
1416
1417 pthread_mutex_init(&query_mutex, NULL);
1418
1419 rv = pthread_create(&query_thread, NULL, process_queries, NULL);
1420 if (rv < 0) {
1421 log_error("can't create query thread");
1422 return rv;
1423 }
1424 return 0;
1425 }
1426
1427 /* The dlm in kernels before 2.6.28 do not have the monitor device. We
1428 keep this fd open as long as we're running. If we exit/terminate while
1429 lockspaces exist in the kernel, the kernel will detect a close on this
1430 fd and stop the lockspaces. */
1431
1432 static void setup_monitor(void)
1433 {
1434 if (!monitor_minor)
1435 return;
1436
1437 kernel_monitor_fd = open("/dev/misc/dlm-monitor", O_RDONLY);
1438 log_debug("/dev/misc/dlm-monitor fd %d", kernel_monitor_fd);
1439 }
1440
1441 void cluster_dead(int ci)
1442 {
1443 if (!cluster_down)
1444 log_error("cluster is down, exiting");
1445 daemon_quit = 1;
1446 cluster_down = 1;
1447 }
1448
1449 static int loop(void)
1450 {
1451 struct lockspace *ls;
1452 int poll_timeout = -1;
1453 int rv, i;
1454 void (*workfn) (int ci);
1455 void (*deadfn) (int ci);
1456
1457 rv = setup_queries();
1458 if (rv < 0)
1459 goto out;
1460
1461 rv = setup_listener(DLMC_SOCK_PATH);
1462 if (rv < 0)
1463 goto out;
1464 client_add(rv, process_listener, NULL);
1465
1466 rv = setup_cluster_cfg();
1467 if (rv < 0)
1468 goto out;
1469 if (rv > 0)
1470 client_add(rv, process_cluster_cfg, cluster_dead);
1471
1472 rv = check_uncontrolled_lockspaces();
1473 if (rv < 0)
1474 goto out;
1475
1476 /*
1477 * unfence needs to happen after checking for uncontrolled dlm kernel
1478 * state (for which we are probably currently fenced, the state must
1479 * be cleared by a reboot). unfence needs to happen before joining
1480 * the daemon cpg, after which it needs to be possible for someone to
1481 * fence us.
1482 */
1483 rv = unfence_node(our_nodeid);
1484 if (rv < 0)
1485 goto out;
1486
1487 rv = setup_node_config();
1488 if (rv < 0)
1489 goto out;
1490
1491 rv = setup_cluster();
1492 if (rv < 0)
1493 goto out;
1494 client_add(rv, process_cluster, cluster_dead);
1495
1496 rv = setup_misc_devices();
1497 if (rv < 0)
1498 goto out;
1499
1500 rv = setup_configfs_options();
1501 if (rv < 0)
1502 goto out;
1503
1504 setup_monitor();
1505
1506 rv = setup_configfs_members(); /* calls update_cluster() */
1507 if (rv < 0)
1508 goto out;
1509
1510 rv = setup_uevent();
1511 if (rv < 0)
1512 goto out;
1513 client_add(rv, process_uevent, NULL);
1514
1515 rv = setup_cpg_daemon();
1516 if (rv < 0)
1517 goto out;
1518 client_add(rv, process_cpg_daemon, cluster_dead);
1519
1520 rv = set_protocol();
1521 if (rv < 0)
1522 goto out;
1523
1524 #if 0
1525 if (opt(enable_deadlk_ind)) {
1526 rv = setup_netlink();
1527 if (rv < 0)
1528 goto out;
1529 client_add(rv, process_netlink, NULL);
1530
1531 setup_deadlock();
1532 }
1533 #endif
1534
1535 rv = setup_plocks();
1536 if (rv < 0)
1537 goto out;
1538 plock_fd = rv;
1539 plock_ci = client_add(rv, process_plocks, NULL);
1540
1541 if (opt(enable_helper_ind))
1542 helper_ci = client_add(helper_status_fd, process_helper, helper_dead);
1543
1544 #ifdef USE_SD_NOTIFY
1545 sd_notify(0, "READY=1");
1546 #endif
1547
1548 /* We want to wait for our protocol to be set before
1549 we start to process fencing. */
1550 daemon_fence_allow = 1;
1551
1552 for (;;) {
1553 rv = poll(pollfd, client_maxi + 1, poll_timeout);
1554 if (rv == -1 && errno == EINTR) {
1555 if (daemon_quit && list_empty(&lockspaces)) {
1556 rv = 0;
1557 goto out;
1558 }
1559 if (daemon_quit) {
1560 log_error("shutdown ignored, active lockspaces");
1561 daemon_quit = 0;
1562 }
1563 continue;
1564 }
1565 if (rv < 0) {
1566 log_error("poll errno %d", errno);
1567 goto out;
1568 }
1569
1570 query_lock();
1571
1572 for (i = 0; i <= client_maxi; i++) {
1573 if (client[i].fd < 0)
1574 continue;
1575 if (pollfd[i].revents & POLLIN) {
1576 workfn = client[i].workfn;
1577 workfn(i);
1578 }
1579 if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL)) {
1580 deadfn = client[i].deadfn;
1581 deadfn(i);
1582 }
1583 }
1584 query_unlock();
1585
1586 if (daemon_quit)
1587 break;
1588
1589 query_lock();
1590
1591 poll_timeout = -1;
1592
1593 if (retry_fencing) {
1594 process_fencing_changes();
1595 poll_timeout = 1000;
1596 }
1597
1598 if (poll_lockspaces || poll_fs) {
1599 process_lockspace_changes();
1600 poll_timeout = 1000;
1601 }
1602
1603 if (poll_ignore_plock) {
1604 if (!limit_plocks()) {
1605 poll_ignore_plock = 0;
1606 client_back(plock_ci, plock_fd);
1607 }
1608 poll_timeout = 1000;
1609 }
1610
1611 if (poll_drop_plock) {
1612 drop_resources_all();
1613 if (poll_drop_plock)
1614 poll_timeout = 1000;
1615 }
1616
1617 query_unlock();
1618 }
1619 out:
1620 log_debug("shutdown");
1621 close_plocks();
1622 close_cpg_daemon();
1623 clear_configfs();
1624 close_cluster();
1625 close_cluster_cfg();
1626
1627 list_for_each_entry(ls, &lockspaces, list)
1628 log_error("abandoned lockspace %s", ls->name);
1629
1630 /* must be end */
1631 close_logging();
1632 return rv;
1633 }
1634
1635 static int lockfile(const char *name)
1636 {
1637 char path[PATH_MAX];
1638 char buf[16];
1639 struct flock lock;
1640 mode_t old_umask;
1641 int fd, rv;
1642
1643 old_umask = umask(0022);
1644 rv = mkdir(SYS_VARDIR, 0775);
1645 if (rv < 0 && errno != EEXIST) {
1646 umask(old_umask);
1647 return rv;
1648 }
1649
1650 rv = mkdir(SYS_RUNDIR, 0775);
1651 if (rv < 0 && errno != EEXIST) {
1652 umask(old_umask);
1653 return rv;
1654 }
1655
1656 rv = mkdir(RUNDIR, 0775);
1657 if (rv < 0 && errno != EEXIST) {
1658 umask(old_umask);
1659 return rv;
1660 }
1661 umask(old_umask);
1662
1663 snprintf(path, PATH_MAX, "%s/%s", RUNDIR, name);
1664
1665 fd = open(path, O_CREAT|O_WRONLY|O_CLOEXEC, 0644);
1666 if (fd < 0) {
1667 log_error("lockfile open error %s: %s",
1668 path, strerror(errno));
1669 return -1;
1670 }
1671
1672 lock.l_type = F_WRLCK;
1673 lock.l_start = 0;
1674 lock.l_whence = SEEK_SET;
1675 lock.l_len = 0;
1676
1677 rv = fcntl(fd, F_SETLK, &lock);
1678 if (rv < 0) {
1679 log_error("lockfile setlk error %s: %s",
1680 path, strerror(errno));
1681 goto fail;
1682 }
1683
1684 rv = ftruncate(fd, 0);
1685 if (rv < 0) {
1686 log_error("lockfile truncate error %s: %s",
1687 path, strerror(errno));
1688 goto fail;
1689 }
1690
1691 memset(buf, 0, sizeof(buf));
1692 snprintf(buf, sizeof(buf), "%d\n", getpid());
1693
1694 rv = write(fd, buf, strlen(buf));
1695 if (rv <= 0) {
1696 log_error("lockfile write error %s: %s",
1697 path, strerror(errno));
1698 goto fail;
1699 }
1700
1701 return fd;
1702 fail:
1703 close(fd);
1704 return -1;
1705 }
1706
1707 static void unlink_lockfile(int fd, const char *dir, const char *name)
1708 {
1709 char path[PATH_MAX];
1710
1711 snprintf(path, PATH_MAX, "%s/%s", dir, name);
1712 unlink(path);
1713 close(fd);
1714 }
1715
1716 static const char *req_arg_s(int a)
1717 {
1718 switch (a) {
1719 case no_arg:
1720 return "";
1721 case req_arg_bool:
1722 return "0|1";
1723 case req_arg_int:
1724 return "<int>";
1725 case req_arg_str:
1726 return "<str>";
1727 default:
1728 return "<arg>";
1729 }
1730 }
1731
1732 static void print_usage(void)
1733 {
1734 struct dlm_option *o;
1735 int i;
1736
1737 printf("Usage:\n");
1738 printf("\n");
1739 printf("dlm_controld [options]\n");
1740 printf("\n");
1741 printf("Option [arg]\n");
1742 printf("Description [default]\n");
1743 printf("\n");
1744
1745 for (i = 0; i < dlm_options_max; i++) {
1746 o = &dlm_options[i];
1747
1748 /* don't advertise options with no description */
1749 if (!strlen(o->desc))
1750 continue;
1751
1752 printf(" --%s", o->name);
1753
1754 if (o->letter) {
1755 printf(" | -%c", o->letter);
1756 if (o->req_arg)
1757 printf(" %s", req_arg_s(o->req_arg));
1758 } else {
1759 if (o->req_arg)
1760 printf(" %s", req_arg_s(o->req_arg));
1761 }
1762
1763 printf("\n");
1764
1765 printf(" %s", o->desc);
1766
1767 if (o->req_arg == req_arg_str)
1768 printf(" [%s]\n", o->default_str ? o->default_str : "");
1769 else if (o->req_arg == req_arg_int)
1770 printf(" [%d]\n", o->default_int);
1771 else if (o->req_arg == req_arg_bool)
1772 printf(" [%d]\n", o->default_int);
1773 else if (o->req_arg == req_arg_uint)
1774 printf(" [%u]\n", o->default_uint);
1775 else if (o->req_arg == no_arg && !o->default_int)
1776 printf(" [0]\n");
1777 else
1778 printf("\n");
1779
1780 printf("\n");
1781 }
1782 }
1783
1784 static void set_opt_default(int ind, const char *name, char letter, int arg_type,
1785 int default_int, const char *default_str,
1786 unsigned int default_uint, char reload, const char *desc)
1787 {
1788 dlm_options[ind].name = name;
1789 dlm_options[ind].letter = letter;
1790 dlm_options[ind].req_arg = arg_type;
1791 dlm_options[ind].desc = desc;
1792 dlm_options[ind].reload = reload;
1793 dlm_options[ind].default_int = default_int;
1794 dlm_options[ind].default_str = default_str;
1795 dlm_options[ind].default_uint = default_uint;
1796 dlm_options[ind].use_int = default_int;
1797 dlm_options[ind].use_str = (char *)default_str;
1798 dlm_options[ind].use_uint = default_uint;
1799 }
1800
1801 static void set_opt_defaults(void)
1802 {
1803 set_opt_default(daemon_debug_ind,
1804 "daemon_debug", 'D', req_arg_bool,
1805 0, NULL, 0, 1,
1806 "enable debugging to stderr and don't fork");
1807
1808 set_opt_default(foreground_ind,
1809 "foreground", '\0', req_arg_bool,
1810 0, NULL, 0, 0,
1811 "don't fork");
1812
1813 set_opt_default(log_debug_ind,
1814 "log_debug", 'K', req_arg_bool,
1815 0, NULL, 0, 1,
1816 "enable kernel dlm debugging messages");
1817
1818 set_opt_default(protocol_ind,
1819 "protocol", 'r', req_arg_str,
1820 0, "tcp", 0, 0,
1821 "dlm kernel lowcomms protocol: tcp, sctp");
1822
1823 set_opt_default(port_ind,
1824 "port", 'R', req_arg_uint,
1825 -1, NULL, 21064, 0,
1826 "dlm kernel lowcomms protocol port");
1827
1828 set_opt_default(mark_ind,
1829 "mark", '\0', req_arg_uint,
1830 0, NULL, 0, 0,
1831 "set mark value for DLM if not explicit by nodeid specified");
1832
1833 set_opt_default(debug_logfile_ind,
1834 "debug_logfile", 'L', req_arg_bool,
1835 0, NULL, 0, 1,
1836 "write debugging to log file");
1837
1838 set_opt_default(enable_fscontrol_ind,
1839 "enable_fscontrol", '\0', req_arg_bool,
1840 0, NULL, 0, 0,
1841 ""); /* do not advertise */
1842
1843 set_opt_default(enable_plock_ind,
1844 "enable_plock", 'p', req_arg_bool,
1845 1, NULL, 0, 0,
1846 "enable/disable posix lock support for cluster fs");
1847
1848 set_opt_default(plock_debug_ind,
1849 "plock_debug", 'P', req_arg_bool,
1850 0, NULL, 0, 1,
1851 "enable plock debugging");
1852
1853 set_opt_default(plock_rate_limit_ind,
1854 "plock_rate_limit", 'l', req_arg_int,
1855 0, NULL, 0, 1,
1856 "limit rate of plock operations (0 for none)");
1857
1858 set_opt_default(plock_ownership_ind,
1859 "plock_ownership", 'o', req_arg_bool,
1860 0, NULL, 0, 0,
1861 "enable/disable plock ownership");
1862
1863 set_opt_default(drop_resources_time_ind,
1864 "drop_resources_time", 't', req_arg_int,
1865 10000, NULL, 0, 1,
1866 "plock ownership drop resources time (milliseconds)");
1867
1868 set_opt_default(drop_resources_count_ind,
1869 "drop_resources_count", 'c', req_arg_int,
1870 10, NULL, 0, 1,
1871 "plock ownership drop resources count");
1872
1873 set_opt_default(drop_resources_age_ind,
1874 "drop_resources_age", 'a', req_arg_int,
1875 10000, NULL, 0, 1,
1876 "plock ownership drop resources age (milliseconds)");
1877
1878 set_opt_default(post_join_delay_ind,
1879 "post_join_delay", 'j', req_arg_int,
1880 30, NULL, 0, 1,
1881 "seconds to delay fencing after cluster join");
1882
1883 set_opt_default(enable_fencing_ind,
1884 "enable_fencing", 'f', req_arg_bool,
1885 1, NULL, 0, 0,
1886 "enable/disable fencing");
1887
1888 set_opt_default(enable_concurrent_fencing_ind,
1889 "enable_concurrent_fencing", '\0', req_arg_bool,
1890 0, NULL, 0, 0,
1891 "enable/disable concurrent fencing");
1892
1893 set_opt_default(enable_startup_fencing_ind,
1894 "enable_startup_fencing", 's', req_arg_bool,
1895 1, NULL, 0, 0,
1896 "enable/disable startup fencing");
1897
1898 set_opt_default(repeat_failed_fencing_ind,
1899 "repeat_failed_fencing", '\0', req_arg_bool,
1900 1, NULL, 0, 1,
1901 "enable/disable retrying after fencing fails");
1902
1903 set_opt_default(enable_quorum_fencing_ind,
1904 "enable_quorum_fencing", 'q', req_arg_bool,
1905 1, NULL, 0, 1,
1906 "enable/disable quorum requirement for fencing");
1907
1908 set_opt_default(enable_quorum_lockspace_ind,
1909 "enable_quorum_lockspace", '\0', req_arg_bool,
1910 1, NULL, 0, 1,
1911 "enable/disable quorum requirement for lockspace operations");
1912
1913 set_opt_default(enable_helper_ind,
1914 "enable_helper", '\0', req_arg_bool,
1915 1, NULL, 0, 0,
1916 "enable/disable helper process for running commands");
1917
1918 set_opt_default(help_ind,
1919 "help", 'h', no_arg,
1920 -1, NULL, 0, 0,
1921 "print this help, then exit");
1922
1923 set_opt_default(version_ind,
1924 "version", 'V', no_arg,
1925 -1, NULL, 0, 0,
1926 "Print program version information, then exit");
1927 }
1928
1929 int get_ind_name(char *s)
1930 {
1931 char name[PATH_MAX];
1932 char *p = s;
1933 int i;
1934
1935 memset(name, 0, sizeof(name));
1936
1937 for (i = 0; i < strlen(s); i++) {
1938 if (*p == '=')
1939 break;
1940 if (*p == ' ')
1941 break;
1942 name[i] = *p;
1943 p++;
1944 }
1945
1946 for (i = 0; i < dlm_options_max; i++) {
1947 if (!strcmp(dlm_options[i].name, name))
1948 return i;
1949 }
1950 return -1;
1951 }
1952
1953 static int get_ind_letter(char c)
1954 {
1955 int i;
1956
1957 for (i = 0; i < dlm_options_max; i++) {
1958 if (dlm_options[i].letter == c)
1959 return i;
1960 }
1961 return -1;
1962 }
1963
1964 struct dlm_option *get_dlm_option(char *name)
1965 {
1966 int i;
1967 i = get_ind_name(name);
1968 if (i < 0)
1969 return NULL;
1970 return &dlm_options[i];
1971 }
1972
1973 static void set_opt_cli(int argc, char **argv)
1974 {
1975 struct dlm_option *o;
1976 char *arg1, *p, *arg_str, *endptr;
1977 char bool_str[] = "1";
1978 char bundled_letters[8];
1979 int b, blc = 0, blc_max = 8;
1980 int debug_options = 0;
1981 int i, ind, bundled;
1982
1983 if (argc < 2)
1984 return;
1985
1986 arg1 = argv[1];
1987
1988 if (!strcmp(arg1, "help") || !strcmp(arg1, "--help") || !strcmp(arg1, "-h")) {
1989 print_usage();
1990 exit(EXIT_SUCCESS);
1991 }
1992
1993 if (!strcmp(arg1, "version") || !strcmp(arg1, "--version") || !strcmp(arg1, "-V")) {
1994 printf("dlm_controld %s (built %s %s)\n",
1995 RELEASE_VERSION, __DATE__, __TIME__);
1996 printf("%s\n", REDHAT_COPYRIGHT);
1997 exit(EXIT_SUCCESS);
1998 }
1999
2000 for (i = 1; i < argc; ) {
2001 p = argv[i++];
2002
2003 if (!strcmp(p, "--debug_options")) {
2004 debug_options = 1;
2005 continue;
2006 }
2007
2008 if (p[0] == '-' && p[1] == '-')
2009 ind = get_ind_name(p + 2);
2010 else if (p[0] == '-')
2011 ind = get_ind_letter(p[1]);
2012 else {
2013 fprintf(stderr, "unknown option arg %s\n", p);
2014 exit(EXIT_FAILURE);
2015 }
2016
2017 if (ind < 0) {
2018 fprintf(stderr, "unknown option %s\n", p);
2019 exit(EXIT_FAILURE);
2020 }
2021
2022 o = &dlm_options[ind];
2023 o->cli_set++;
2024
2025 if (!o->req_arg || (o->req_arg == req_arg_bool)) {
2026 bundled = 0;
2027
2028 /* current for no_arg type, there is not possible to have bundled options.
2029 * for req_arg_bool, bundled options, e.g. -DKP. all treat as "true".
2030 * below code save bundled, arg-less, single letters */
2031 if ((p[0] == '-') && isalpha(p[1]) && (strlen(p) > 2)) {
2032 for (b = 2; b < strlen(p) && blc < blc_max; b++) {
2033 if (!isalpha(p[b]))
2034 break;
2035 bundled_letters[blc++] = p[b];
2036 bundled = 1;
2037 }
2038 }
2039 if (bundled) {
2040 /* "-x" has same effect as "-x 1" */
2041 o->cli_int = 1;
2042 o->use_int = 1;
2043 continue;
2044 }
2045 }
2046
2047 arg_str = NULL;
2048
2049 if (strstr(p, "=")) {
2050 /* arg starts after = for name or letter */
2051 arg_str = strstr(p, "=") + 1;
2052
2053 } else if (strlen(p) > 2 && isalpha(p[1]) && isdigit(p[2])) {
2054 /* arg with no space between letter and digits */
2055 arg_str = p + 2;
2056
2057 } else {
2058 /* space separates arg from name or letter */
2059 if (o->req_arg == req_arg_bool) {
2060 /* bool type treat empty arg as true */
2061 if (i >= argc || argv[i][0] == '-')
2062 arg_str = bool_str;
2063 else
2064 arg_str = argv[i++];
2065 } else {
2066 if (i >= argc) {
2067 fprintf(stderr, "option %s no arg\n", p);
2068 exit(EXIT_FAILURE);
2069 }
2070 arg_str = argv[i++];
2071 }
2072 }
2073
2074 if (!arg_str || arg_str[0] == '-' || arg_str[0] == '\0') {
2075 fprintf(stderr, "option %s requires arg\n", p);
2076 exit(EXIT_FAILURE);
2077 }
2078 if ((o->req_arg != req_arg_str) && !strtol(arg_str, &endptr, 10)) {
2079 if (endptr == arg_str) {
2080 fprintf(stderr, "option %s requires digit number\n", p);
2081 exit(EXIT_FAILURE);
2082 }
2083 }
2084
2085 if (o->req_arg == req_arg_str) {
2086 o->cli_str = strdup(arg_str);
2087 o->use_str = o->cli_str;
2088 } else if (o->req_arg == req_arg_int) {
2089 o->cli_int = atoi(arg_str);
2090 o->use_int = o->cli_int;
2091 } else if (o->req_arg == req_arg_bool) {
2092 o->cli_int = atoi(arg_str) ? 1 : 0;
2093 o->use_int = o->cli_int;
2094 } else if (o->req_arg == req_arg_uint) {
2095 o->cli_uint = strtoul(arg_str, NULL, 0);
2096 o->use_uint = o->cli_uint;
2097 }
2098 }
2099
2100 /* process bundled letters saved above */
2101
2102 for (i = 0; i < blc; i++) {
2103 ind = get_ind_letter(bundled_letters[i]);
2104 if (ind < 0) {
2105 fprintf(stderr, "unknown option char %c\n", bundled_letters[i]);
2106 exit(EXIT_FAILURE);
2107 }
2108 /* bundled letter must be bool type, treat it with "true" value */
2109 o = &dlm_options[ind];
2110 o->cli_set++;
2111 o->cli_int = 1;
2112 o->use_int = 1;
2113 }
2114
2115 if (debug_options && opt(daemon_debug_ind)) {
2116 for (i = 0; i < dlm_options_max; i++) {
2117 o = &dlm_options[i];
2118 printf("%-25s cli_set %d cli_int %d cli_str %s use_int %d use_str %s\n",
2119 o->name, o->cli_set, o->cli_int, o->cli_str, o->use_int, o->use_str);
2120 }
2121 }
2122
2123 if (getenv("DLM_CONTROLD_DEBUG")) {
2124 dlm_options[daemon_debug_ind].use_int = 1;
2125 }
2126 }
2127
2128 #if 0
2129 /* When this is used, the systemd service file needs ControlGroup=cpu:/ */
2130 static void set_scheduler(void)
2131 {
2132 struct sched_param sched_param;
2133 int rv;
2134
2135 rv = sched_get_priority_max(SCHED_RR);
2136 if (rv != -1) {
2137 sched_param.sched_priority = rv;
2138 rv = sched_setscheduler(0, SCHED_RR, &sched_param);
2139 if (rv == -1)
2140 log_error("could not set SCHED_RR priority %d err %d",
2141 sched_param.sched_priority, errno);
2142 } else {
2143 log_error("could not get maximum scheduler priority err %d",
2144 errno);
2145 }
2146 }
2147 #endif
2148
2149 int main(int argc, char **argv)
2150 {
2151 struct sigaction act;
2152 int fd, rv;
2153
2154 /*
2155 * config priority: cli, config file, default
2156 * - explicit cli setting will override default,
2157 * - explicit file setting will override default
2158 * - explicit file setting will not override explicit cli setting
2159 *
2160 * "dlm reload_config" will trigger to reload config file, and
2161 * reload action also follows the rule: not override explicit
2162 * cli setting
2163 */
2164 set_opt_defaults();
2165 set_opt_cli(argc, argv);
2166 set_opt_file(0);
2167
2168 rv = node_config_init(CONF_FILE_PATH);
2169 if (rv)
2170 return 1;
2171
2172 strcpy(fence_all_device.name, "fence_all");
2173 strcpy(fence_all_device.agent, "dlm_stonith");
2174 fence_all_device.unfence = 0;
2175
2176 INIT_LIST_HEAD(&lockspaces);
2177 INIT_LIST_HEAD(&fs_register_list);
2178 INIT_LIST_HEAD(&run_ops);
2179 init_daemon();
2180
2181 if (!opt(daemon_debug_ind) && !opt(foreground_ind)) {
2182 if (daemon(0, 0) < 0) {
2183 perror("daemon error");
2184 exit(EXIT_FAILURE);
2185 }
2186 }
2187
2188 init_logging();
2189
2190 fd = lockfile(RUN_FILE_NAME);
2191 if (fd < 0)
2192 return 1;
2193
2194 log_level(NULL, LOG_INFO, "dlm_controld %s started", RELEASE_VERSION);
2195
2196 if (opt(enable_helper_ind))
2197 setup_helper();
2198
2199 memset(&act, 0, sizeof(act));
2200 act.sa_handler = sigterm_handler;
2201 rv = sigaction(SIGTERM, &act, NULL);
2202 if (rv < 0)
2203 goto out;
2204 rv = sigaction(SIGINT, &act, NULL);
2205 if (rv < 0)
2206 goto out;
2207
2208 memset(&act, 0, sizeof(act));
2209 act.sa_handler = SIG_IGN;
2210 rv = sigaction(SIGHUP, &act, NULL);
2211 if (rv < 0)
2212 goto out;
2213
2214 memset(&act, 0, sizeof(act));
2215 act.sa_handler = sigchld_handler;
2216 act.sa_flags = SA_NOCLDSTOP;
2217 rv = sigaction(SIGCHLD, &act, NULL);
2218 if (rv < 0)
2219 goto out;
2220
2221 /* set_scheduler(); */
2222
2223 rv = loop();
2224
2225 out:
2226 unlink_lockfile(fd, RUNDIR, RUN_FILE_NAME);
2227 return rv < 0 ? 1 : 0;
2228 }
2229
2230