1 /*
2 * Copyright 2004-2012 Red Hat, Inc.
3 *
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License v2 or (at your option) any later version.
7 */
8
9 #define EXTERN
10 #include "dlm_daemon.h"
11 #include <ctype.h>
12 #include <pthread.h>
13 #include <linux/netlink.h>
14 #include <linux/genetlink.h>
15 #include <uuid/uuid.h>
16
17 #ifdef USE_SD_NOTIFY
18 #include <systemd/sd-daemon.h>
19 #endif
20
21 #include "copyright.cf"
22 #include "version.cf"
23
24 #define CLIENT_NALLOC 32
25 static int client_maxi;
26 static int client_size = 0;
27 static struct client *client = NULL;
28 static struct pollfd *pollfd = NULL;
29 static pthread_t query_thread;
30 static pthread_mutex_t query_mutex;
31 static struct list_head fs_register_list;
32 static int kernel_monitor_fd;
33
34 int helper_ci;
35 int helper_pid;
36 int helper_req_fd;
37 int helper_status_fd;
38 uint64_t helper_last_status;
39 uint32_t helper_full_count;
40
41 struct client {
42 int fd;
43 void *workfn;
44 void *deadfn;
45 struct lockspace *ls;
46 };
47
48 enum {
49 Env_ACTION = 0,
50 Env_DEVPATH,
51 Env_SUBSYSTEM,
52 Env_LOCKSPACE,
53 Env_RELEASE_RECOVER,
54 Env_Last, /* Flag for end of vars */
55 };
56
57 static const char *uevent_vars[] = {
58 [Env_ACTION] = "ACTION=",
59 [Env_DEVPATH] = "DEVPATH=",
60 [Env_SUBSYSTEM] = "SUBSYSTEM=",
61 [Env_LOCKSPACE] = "LOCKSPACE=",
62 [Env_RELEASE_RECOVER] = "RELEASE_RECOVER=",
63 };
64
65 static void decode_uevent(const char *buf, unsigned len, const char *vars[],
66 unsigned nvars, const char *vals[])
67 {
68 const char *ptr;
69 unsigned int i;
70 int slen, vlen;
71
72 memset(vals, 0, sizeof(const char *) * nvars);
73
74 while (len > 0) {
75 ptr = buf;
76 slen = strlen(ptr);
77 buf += slen;
78 len -= slen;
79 buf++;
80 len--;
81
82 for (i = 0; i < nvars; i++) {
83 vlen = strlen(vars[i]);
84 if (vlen > slen)
85 continue;
86 if (memcmp(vars[i], ptr, vlen) != 0)
87 continue;
88 vals[i] = ptr + vlen;
89 break;
90 }
91 }
92 }
93
94 int do_read(int fd, void *buf, size_t count)
95 {
96 int rv, off = 0;
97
98 while (off < count) {
99 rv = read(fd, (char *)buf + off, count - off);
100 if (rv == 0)
101 return -1;
102 if (rv == -1 && errno == EINTR)
103 continue;
104 if (rv == -1)
105 return -1;
106 off += rv;
107 }
108 return 0;
109 }
110
|
(1) Event noescape: |
"do_write(int, void *, size_t)" does not free or save its parameter "fd". |
111 int do_write(int fd, void *buf, size_t count)
112 {
113 int rv, off = 0;
114
115 retry:
116 rv = write(fd, (char *)buf + off, count);
117 if (rv == -1 && errno == EINTR)
118 goto retry;
119 if (rv < 0) {
120 log_error("write errno %d", errno);
121 return rv;
122 }
123
124 if (rv != count) {
125 count -= rv;
126 off += rv;
127 goto retry;
128 }
129 return 0;
130 }
131
132 uint64_t monotime(void)
133 {
134 struct timespec ts;
135 clock_gettime(CLOCK_MONOTONIC, &ts);
136 return ts.tv_sec;
137 }
138
139 static void client_alloc(void)
140 {
141 int i;
142
143 if (!client) {
144 client = malloc(CLIENT_NALLOC * sizeof(struct client));
145 pollfd = malloc(CLIENT_NALLOC * sizeof(struct pollfd));
146 } else {
147 client = realloc(client, (client_size + CLIENT_NALLOC) *
148 sizeof(struct client));
149 pollfd = realloc(pollfd, (client_size + CLIENT_NALLOC) *
150 sizeof(struct pollfd));
151 if (!pollfd)
152 log_error("can't alloc for pollfd");
153 }
154 if (!client || !pollfd)
155 log_error("can't alloc for client array");
156
157 for (i = client_size; i < client_size + CLIENT_NALLOC; i++) {
158 client[i].workfn = NULL;
159 client[i].deadfn = NULL;
160 client[i].fd = -1;
161 pollfd[i].fd = -1;
162 pollfd[i].revents = 0;
163 }
164 client_size += CLIENT_NALLOC;
165 }
166
167 void client_dead(int ci)
168 {
169 close(client[ci].fd);
170 client[ci].workfn = NULL;
171 client[ci].fd = -1;
172 pollfd[ci].fd = -1;
173 }
174
175 int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci))
176 {
177 int i;
178
179 if (!client)
180 client_alloc();
181 again:
182 for (i = 0; i < client_size; i++) {
183 if (client[i].fd == -1) {
184 client[i].workfn = workfn;
185 if (deadfn)
186 client[i].deadfn = deadfn;
187 else
188 client[i].deadfn = client_dead;
189 client[i].fd = fd;
190 pollfd[i].fd = fd;
191 pollfd[i].events = POLLIN;
192 if (i > client_maxi)
193 client_maxi = i;
194 return i;
195 }
196 }
197
198 client_alloc();
199 goto again;
200 }
201
202 int client_fd(int ci)
203 {
204 return client[ci].fd;
205 }
206
207 void client_ignore(int ci, int fd)
208 {
209 pollfd[ci].fd = -1;
210 pollfd[ci].events = 0;
211 }
212
213 void client_back(int ci, int fd)
214 {
215 pollfd[ci].fd = fd;
216 pollfd[ci].events = POLLIN;
217 }
218
219 static void sigterm_handler(int sig)
220 {
221 daemon_quit = 1;
222 }
223
224 static void sigchld_handler(int sig)
225 {
226 }
227
228 struct run *find_run(char *uuid_str)
229 {
230 struct run *run;
231
232 list_for_each_entry(run, &run_ops, list) {
233 if (!strcmp(run->uuid, uuid_str))
234 return run;
235 }
236 return NULL;
237 }
238
239 static void close_helper(void)
240 {
241 close(helper_req_fd);
242 close(helper_status_fd);
243 helper_req_fd = -1;
244 helper_status_fd = -1;
245 pollfd[helper_ci].fd = -1;
246 pollfd[helper_ci].events = 0;
247 helper_ci = -1;
248
249 /* don't set helper_pid = -1 until we've tried waitpid */
250 }
251
252 /*
253 * We cannot block the main thread on this write, so the pipe is NONBLOCK, and
254 * write fails with EAGAIN when the pipe is full. With around 1.5K request
255 * size and 64k default pipe size, the pipe will be full if we quickly send
256 * around 40 requests to the helper. We retry the message once a second, so
257 * we'll retry the write again in a second.
258 *
259 * By setting the pipe size to 1MB in setup_helper, we could quickly send many
260 * more requests before getting EAGAIN.
261 */
262
263 void send_helper_run_request(struct run_request *req)
264 {
265 int rv;
266
267 if (helper_req_fd == -1) {
268 log_error("send_helper_run_request no fd");
269 return;
270 }
271
272 retry:
273 rv = write(helper_req_fd, req, sizeof(struct run_request));
274 if (rv == -1 && errno == EINTR)
275 goto retry;
276
277 /* pipe is full, we'll try again in a second */
278 if (rv == -1 && errno == EAGAIN) {
279 helper_full_count++;
280 log_debug("send_helper_run_request full_count %u",
281 helper_full_count);
282 return;
283 }
284
285 /* helper exited or closed fd, quit using helper */
286 if (rv == -1 && errno == EPIPE) {
287 log_error("send_helper_run_request EPIPE");
288 close_helper();
289 return;
290 }
291
292 if (rv != sizeof(struct run_request)) {
293 /* this shouldn't happen */
294 log_error("send_helper_run_request %d %d", rv, errno);
295 close_helper();
296 return;
297 }
298 }
299
300 static void send_helper_run_cancel(struct run *run)
301 {
302 struct run_request req;
303 int rv;
304
305 if (helper_req_fd == -1) {
306 log_error("send_helper_run_cancel no fd");
307 return;
308 }
309
310 memset(&req, 0, sizeof(req));
311 memcpy(req.uuid, run->uuid, RUN_UUID_LEN);
312
313 rv = write(helper_req_fd, &req, sizeof(struct run_request));
314 if (rv < 0)
315 log_error("send_helper_run_cancel write error");
316 }
317
318 /*
319 * first pipe for daemon to send requests to helper; they are not acknowledged
320 * and the daemon does not get any result back for the requests.
321 *
322 * second pipe for helper to send general status/heartbeat back to the daemon
323 * every so often to confirm it's not dead/hung. If the helper gets stuck or
324 * killed, the daemon will not get the status and won't bother sending requests
325 * to the helper, and use SIGTERM instead
326 */
327
328 static int setup_helper(void)
329 {
330 int pid;
331 int pw_fd = -1; /* parent write */
332 int cr_fd = -1; /* child read */
333 int pr_fd = -1; /* parent read */
334 int cw_fd = -1; /* child write */
335 int pfd[2];
336
337 /* we can't allow the main daemon thread to block */
338 if (pipe2(pfd, O_NONBLOCK | O_CLOEXEC))
339 return -errno;
340
341 /* uncomment for rhel7 where this should be available */
342 /* fcntl(pfd[1], F_SETPIPE_SZ, 1024*1024); */
343
344 cr_fd = pfd[0];
345 pw_fd = pfd[1];
346
347 if (pipe2(pfd, O_NONBLOCK | O_CLOEXEC)) {
348 close(cr_fd);
349 close(pw_fd);
350 return -errno;
351 }
352
353 pr_fd = pfd[0];
354 cw_fd = pfd[1];
355
356 pid = fork();
357 if (pid < 0) {
358 close(cr_fd);
359 close(pw_fd);
360 close(pr_fd);
361 close(cw_fd);
362 return -errno;
363 }
364
365 if (pid) {
366 close(cr_fd);
367 close(cw_fd);
368 helper_req_fd = pw_fd;
369 helper_status_fd = pr_fd;
370 helper_pid = pid;
371 return 0;
372 } else {
373 close(pr_fd);
374 close(pw_fd);
375 run_helper(cr_fd, cw_fd, opt(daemon_debug_ind));
376 exit(0);
377 }
378 }
379
380 static void process_helper(int ci)
381 {
382 struct run_reply reply;
383 struct run_reply send_reply;
384 struct run *run;
385 int rv;
386
387 rv = read(client[ci].fd, &reply, sizeof(reply));
388 if (!rv || rv == -EAGAIN)
389 return;
390 if (rv < 0) {
391 log_error("process_helper rv %d errno %d", rv, errno);
392 goto fail;
393 }
394 if (rv != sizeof(reply)) {
395 log_error("process_helper recv size %d", rv);
396 goto fail;
397 }
398
399 if (!reply.header.type) {
400 /* log_debug("helper status"); */
401 helper_last_status = monotime();
402 return;
403 }
404
405 if (reply.header.type == DLM_MSG_RUN_REPLY) {
406 run = find_run(reply.uuid);
407 if (!run) {
408 log_error("helper reply no run uuid %s", reply.uuid);
409 return;
410 }
411
412 memset(&send_reply, 0, sizeof(send_reply));
413 memcpy(&send_reply.info, &run->info, sizeof(struct run_info));
414 memcpy(send_reply.uuid, run->uuid, RUN_UUID_LEN);
415 send_reply.header.type = DLM_MSG_RUN_REPLY;
416 send_reply.info.local_pid = reply.info.local_pid;
417 send_reply.info.local_result = reply.info.local_result;
418
419 log_debug("helper reply %s pid %d result %d",
420 send_reply.uuid, send_reply.info.local_pid, send_reply.info.local_result);
421
422 send_run_reply(run, &send_reply);
423 return;
424 }
425
426 return;
427
428 fail:
429 close_helper();
430 }
431
432 static void helper_dead(int ci)
433 {
434 int pid = helper_pid;
435 int rv, status;
436
437 close_helper();
438
439 helper_pid = -1;
440
441 rv = waitpid(pid, &status, WNOHANG);
442
443 if (rv != pid) {
444 /* should not happen */
445 log_error("helper pid %d dead wait %d", pid, rv);
446 return;
447 }
448
449 if (WIFEXITED(status)) {
450 log_error("helper pid %d exit status %d", pid,
451 WEXITSTATUS(status));
452 return;
453 }
454
455 if (WIFSIGNALED(status)) {
456 log_error("helper pid %d term signal %d", pid,
457 WTERMSIG(status));
458 return;
459 }
460
461 /* should not happen */
462 log_error("helper pid %d state change", pid);
463 }
464
465 static int start_run_operation(char *data, int datalen, int dest_nodeid, uint32_t flags, char *uuid_out)
466 {
467 struct run *run;
468 struct run_request req;
469 uuid_t uu;
470 int rv;
471
472 if (!opt(enable_helper_ind)) {
473 log_debug("ignore start_run helper not enabled");
474 return -1;
475 }
476
477 if (datalen > RUN_COMMAND_LEN)
478 return -1;
479
480 if (!(run = malloc(sizeof(struct run))))
481 return -1;
482
483 memset(run, 0, sizeof(struct run));
484
485 uuid_generate(uu);
486 uuid_unparse_lower(uu, run->uuid);
487 strncpy(run->command, data, datalen);
488 run->info.start_nodeid = our_nodeid;
489 run->info.dest_nodeid = dest_nodeid;
490 run->info.flags = flags;
491
492 memset(&req, 0, sizeof(req));
493 req.header.type = DLM_MSG_RUN_REQUEST;
494 memcpy(&req.info, &run->info, sizeof(struct run_info));
495 memcpy(req.uuid, run->uuid, RUN_UUID_LEN);
496 strncpy(req.command, data, datalen);
497
498 log_error("run start %s %.128s", run->uuid, run->command);
499
500 rv = send_run_request(run, &req);
501
502 memcpy(uuid_out, run->uuid, RUN_UUID_LEN);
503 list_add(&run->list, &run_ops);
504
505 /*
506 * This flag means the starting node should run the command itself
507 * at the time of the request and not wait to receive its own request.
508 */
509 if (flags & DLMC_FLAG_RUN_START_NODE_FIRST)
510 send_helper_run_request(&req);
511
512 return rv;
513 }
514
515 void clear_run(struct run *run)
516 {
517 log_debug("clear run %s", run->uuid);
518 list_del(&run->list);
519 free(run);
520 }
521
522 static int check_run_operation(char *uuid_str, uint32_t flags, struct dlmc_run_check_state *state)
523 {
524 char nodes_buf[128];
525 struct run *run;
526 int pos, len, ret, i;
527
528 if (!opt(enable_helper_ind)) {
529 log_debug("ignore check_run helper not enabled");
530 return -1;
531 }
532
533 run = find_run(uuid_str);
534 if (!run) {
535 log_debug("check_run no uuid %s", uuid_str);
536 return -1;
537 }
538
539 if (flags & DLMC_FLAG_RUN_CHECK_CANCEL) {
540 log_debug("cancel_run %s", run->uuid);
541 send_helper_run_cancel(run);
542 clear_run(run);
543 return 0;
544 }
545
546 log_debug("check_run %s reply_count %d need_replies %d fail_count %d",
547 uuid_str, run->info.reply_count, run->info.need_replies, run->info.fail_count);
548
549 if (run->info.need_replies)
550 state->check_status |= DLMC_RUN_STATUS_WAITING;
551 else
552 state->check_status |= DLMC_RUN_STATUS_DONE;
553
554 if (run->info.fail_count)
555 state->check_status |= DLMC_RUN_STATUS_FAILED;
556
557 if (!run->info.need_replies) {
558 if (run->info.fail_count) {
559 /* create a printable list of nodeids where the command failed */
560 pos = 0;
561 len = sizeof(nodes_buf);
562 memset(nodes_buf, 0, len);
563 for (i = 0; i < run->node_count; i++) {
564 if (!run->node_results[i].result)
565 continue;
566 ret = snprintf(nodes_buf + pos, len - pos, "%d ", run->node_results[i].nodeid);
567 if (ret >= len - pos)
568 break;
569 pos += ret;
570 }
571 nodes_buf[len-1] = '\0';
572
573 log_error("run ended %s error from %d remote nodes with ids: %s", run->uuid, run->info.fail_count, nodes_buf);
574 } else {
575 log_error("run ended %s success from %d remote nodes", run->uuid, run->info.reply_count);
576 }
577 }
578
579 if (!run->info.need_replies && (flags & DLMC_FLAG_RUN_CHECK_CLEAR))
580 clear_run(run);
581
582 return 0;
583 }
584
585 static struct lockspace *create_ls(const char *name)
586 {
587 struct lockspace *ls;
588
589 ls = malloc(sizeof(*ls));
590 if (!ls)
591 goto out;
592 memset(ls, 0, sizeof(struct lockspace));
593 strncpy(ls->name, name, DLM_LOCKSPACE_LEN);
594
595 INIT_LIST_HEAD(&ls->changes);
596 INIT_LIST_HEAD(&ls->node_history);
597 INIT_LIST_HEAD(&ls->saved_messages);
598 INIT_LIST_HEAD(&ls->plock_resources);
599 ls->plock_resources_root = RB_ROOT;
600 #if 0
601 INIT_LIST_HEAD(&ls->deadlk_nodes);
602 INIT_LIST_HEAD(&ls->transactions);
603 INIT_LIST_HEAD(&ls->resources);
604 #endif
605 setup_lockspace_config(ls);
606 out:
607 return ls;
608 }
609
610 struct lockspace *find_ls(const char *name)
611 {
612 struct lockspace *ls;
613
614 list_for_each_entry(ls, &lockspaces, list) {
615 if ((strlen(ls->name) == strlen(name)) &&
616 !strncmp(ls->name, name, strlen(name)))
617 return ls;
618 }
619 return NULL;
620 }
621
622 struct lockspace *find_ls_id(uint32_t id)
623 {
624 struct lockspace *ls;
625
626 list_for_each_entry(ls, &lockspaces, list) {
627 if (ls->global_id == id)
628 return ls;
629 }
630 return NULL;
631 }
632
633 struct fs_reg {
634 struct list_head list;
635 char name[DLM_LOCKSPACE_LEN+1];
636 };
637
638 static int fs_register_check(char *name)
639 {
640 struct fs_reg *fs;
641 list_for_each_entry(fs, &fs_register_list, list) {
642 if (!strcmp(name, fs->name))
643 return 1;
644 }
645 return 0;
646 }
647
648 static int fs_register_add(char *name)
649 {
650 struct fs_reg *fs;
651
652 if (fs_register_check(name))
653 return -EALREADY;
654
655 fs = malloc(sizeof(struct fs_reg));
656 if (!fs)
657 return -ENOMEM;
658 strncpy(fs->name, name, DLM_LOCKSPACE_LEN);
659 list_add(&fs->list, &fs_register_list);
660 return 0;
661 }
662
663 static void fs_register_del(char *name)
664 {
665 struct fs_reg *fs;
666 list_for_each_entry(fs, &fs_register_list, list) {
667 if (!strcmp(name, fs->name)) {
668 list_del(&fs->list);
669 free(fs);
670 return;
671 }
672 }
673 }
674
675 const char *dlm_mode_str(int mode)
676 {
677 switch (mode) {
678 case DLM_LOCK_IV:
679 return "IV";
680 case DLM_LOCK_NL:
681 return "NL";
682 case DLM_LOCK_CR:
683 return "CR";
684 case DLM_LOCK_CW:
685 return "CW";
686 case DLM_LOCK_PR:
687 return "PR";
688 case DLM_LOCK_PW:
689 return "PW";
690 case DLM_LOCK_EX:
691 return "EX";
692 }
693 return "??";
694 }
695
696 /* recv "online" (join) and "offline" (leave) messages from dlm via uevents */
697
698 #define MAX_LINE_UEVENT 4096
699
700 static void process_uevent(int ci)
701 {
702 const char *uevent_vals[Env_Last];
703 struct lockspace *ls;
704 char buf[MAX_LINE_UEVENT];
705 int rv;
706
707 memset(buf, 0, sizeof(buf));
708
709 retry_recv:
710 rv = recv(client[ci].fd, &buf, sizeof(buf), 0);
711 if (rv < 0) {
712 if (errno == EINTR)
713 goto retry_recv;
714 if (errno != EAGAIN)
715 log_error("uevent recv error %d errno %d", rv, errno);
716 return;
717 }
718
719 buf[MAX_LINE_UEVENT-1] = '\0';
720
721 decode_uevent(buf, rv, uevent_vars, Env_Last, uevent_vals);
722
723 if (!uevent_vals[Env_ACTION] ||
724 !uevent_vals[Env_DEVPATH] ||
725 !uevent_vals[Env_SUBSYSTEM] ||
726 !uevent_vals[Env_LOCKSPACE]) {
727 log_debug("failed to validate uevent, action: %p, devpath: %p, subsystem: %p, lockspace: %p",
728 uevent_vals[Env_ACTION], uevent_vals[Env_DEVPATH],
729 uevent_vals[Env_SUBSYSTEM],
730 uevent_vals[Env_LOCKSPACE]);
731 return;
732 }
733
734 if (strcmp(uevent_vals[Env_SUBSYSTEM], "dlm")) {
735 log_debug("uevent looks like dlm but came not from dlm subsystem");
736 return;
737 }
738
739 log_debug("uevent action: %s, devpath: %s, devpath: %s, lockspace: %s",
740 uevent_vals[Env_ACTION], uevent_vals[Env_SUBSYSTEM],
741 uevent_vals[Env_DEVPATH], uevent_vals[Env_LOCKSPACE]);
742
743 rv = 0;
744
745 if (!strcmp(uevent_vals[Env_ACTION], "online")) {
746 ls = find_ls(uevent_vals[Env_LOCKSPACE]);
747 if (ls) {
748 rv = -EEXIST;
749 goto out;
750 }
751
752 ls = create_ls(uevent_vals[Env_LOCKSPACE]);
753 if (!ls) {
754 rv = -ENOMEM;
755 goto out;
756 }
757
758 if (fs_register_check(ls->name))
759 ls->fs_registered = 1;
760
761 rv = dlm_join_lockspace(ls);
762 if (rv) {
763 /* ls already freed */
764 goto out;
765 }
766
767 } else if (!strcmp(uevent_vals[Env_ACTION], "offline")) {
768 ls = find_ls(uevent_vals[Env_LOCKSPACE]);
769 if (!ls) {
770 rv = -ENOENT;
771 goto out;
772 }
773
774 dlm_leave_lockspace(ls, uevent_vals[Env_RELEASE_RECOVER]);
775 }
776 out:
777 if (rv < 0)
778 log_error("%s action: %s, devpath: %s, devpath: %s, lockspace: %s - error %d errno %d",
779 __func__, uevent_vals[Env_ACTION],
780 uevent_vals[Env_SUBSYSTEM], uevent_vals[Env_DEVPATH],
781 uevent_vals[Env_LOCKSPACE], rv, errno);
782 }
783
784 static int setup_uevent(void)
785 {
786 struct sockaddr_nl snl;
787 int s, rv, val;
788
789 s = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
790 if (s < 0) {
791 log_error("uevent netlink socket");
792 return s;
793 }
794
795 /* man 7 netlink:
796 *
797 * However, reliable transmissions from kernel to user are impossible in
798 * any case. The kernel can't send a netlink message if the socket buffer
799 * is full: the message will be dropped and the kernel and the user-space
800 * process will no longer have the same view of kernel state. It is up to
801 * the application to detect when this happens (via the ENOBUFS error
802 * returned by recvmsg(2)) and resynchronize.
803 *
804 * To avoid ENOBUFS errors we set the netlink socket to realiable
805 * transmission mode which can be turned on by NETLINK_NO_ENOBUFS
806 * option. This option is available since kernel 2.6.30. If this setting
807 * fails we fallback to increase the netlink socket receive buffer.
808 */
809 val = 1;
810 rv = setsockopt(s, SOL_NETLINK, NETLINK_NO_ENOBUFS, &val, sizeof(val));
811 if (rv == -1) {
812 /* Fallback handling if NETLINK_NO_ENOBUFS fails to set.
813 *
814 * To prevent ENOBUFS errors we just set the receive buffer to
815 * two megabyte as other applications do it. This will not
816 * ensure that we never receive ENOBUFS but it's more unlikely.
817 */
818 val = DEFAULT_NETLINK_RCVBUF;
819 log_error("uevent netlink NETLINK_NO_ENOBUFS errno %d, will set rcvbuf to %d bytes", errno, val);
820
821 rv = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val));
822 if (rv == -1)
823 log_error("uevent netlink SO_RCVBUF errno %d", errno);
824
825 rv = setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val));
826 if (rv == -1)
827 log_error("uevent netlink SO_RCVBUFFORCE errno %d", errno);
828 }
829
830 memset(&snl, 0, sizeof(snl));
831 snl.nl_family = AF_NETLINK;
832 snl.nl_pid = getpid();
833 snl.nl_groups = 1;
834
835 rv = bind(s, (struct sockaddr *) &snl, sizeof(snl));
836 if (rv < 0) {
837 log_error("uevent bind error %d errno %d", rv, errno);
838 close(s);
839 return rv;
840 }
841
842 return s;
843 }
844
845 static inline void init_header_name(struct dlmc_header *h,
846 const char *name, size_t len)
847 {
848 #pragma GCC diagnostic push
849 #if __GNUC__ >= 8
850 #pragma GCC diagnostic ignored "-Wstringop-truncation"
851 #endif
852 strncpy(h->name, name, len);
853 #pragma GCC diagnostic pop
854 }
855
856 static void init_header(struct dlmc_header *h, int cmd, char *name, int result,
857 int extra_len)
858 {
859 memset(h, 0, sizeof(struct dlmc_header));
860
861 h->magic = DLMC_MAGIC;
862 h->version = DLMC_VERSION;
863 h->len = sizeof(struct dlmc_header) + extra_len;
864 h->command = cmd;
865 h->data = result;
866
867 if (name)
868 init_header_name(h, name, DLM_LOCKSPACE_LEN);
869 }
870
871 static char copy_buf[LOG_DUMP_SIZE];
872
873 static void copy_run_list(char *buf, int *len)
874 {
875 char tmp[1024];
876 struct run *run;
877 int ret, pos = 0;
878
879 list_for_each_entry(run, &run_ops, list) {
880 memset(tmp, 0, sizeof(tmp));
881
882 snprintf(tmp, 1024, "run_uuid %s start_nodeid %d local_pid %d local_result %d need_replies %d reply_count %d fail_count %d flags %x\n",
883 run->uuid, run->info.start_nodeid, run->info.local_pid,
884 run->info.local_result, run->info.need_replies,
885 run->info.reply_count, run->info.fail_count, run->info.flags);
886
887 if (pos + strlen(tmp) >= LOG_DUMP_SIZE)
888 break;
889
890 ret = sprintf(buf + pos, "%s", tmp);
891 pos += ret;
892
893 memset(tmp, 0, sizeof(tmp));
894
895 snprintf(tmp, 1024, "run_command %.1000s\n", run->command);
896
897 if (pos + strlen(tmp) >= LOG_DUMP_SIZE)
898 break;
899
900 ret = sprintf(buf + pos, "%s", tmp);
901 pos += ret;
902
903 /* TODO: dump node results */
904 }
905
906 *len = pos;
907 }
908
909 static void query_dump_run(int fd)
910 {
911 struct dlmc_header h;
912 int len = 0;
913
914 copy_run_list(copy_buf, &len);
915
916 init_header(&h, DLMC_CMD_DUMP_RUN, NULL, 0, len);
917 send(fd, &h, sizeof(h), MSG_NOSIGNAL);
918
919 if (len)
920 send(fd, copy_buf, len, MSG_NOSIGNAL);
921 }
922
923 static void query_dump_debug(int fd)
924 {
925 struct dlmc_header h;
926 int len = 0;
927
928 copy_log_dump(copy_buf, &len);
929
930 init_header(&h, DLMC_CMD_DUMP_DEBUG, NULL, 0, len);
931 send(fd, &h, sizeof(h), MSG_NOSIGNAL);
932
933 if (len)
934 send(fd, copy_buf, len, MSG_NOSIGNAL);
935 }
936
937 static void copy_options(char *buf, int *len)
938 {
939 struct dlm_option *o;
940 char tmp[256];
941 int i, ret, pos = 0, l = 0;
942
943 for (i = 0; i < dlm_options_max; i++) {
944 o = &dlm_options[i];
945
946 memset(tmp, 0, sizeof(tmp));
947
948 if (o->req_arg == req_arg_str)
949 l = snprintf(tmp, 240, "%s=%s", o->name, o->use_str);
950 else if (o->req_arg == req_arg_uint)
951 l = snprintf(tmp, 240, "%s=%u", o->name, o->use_uint);
952 else
953 l = snprintf(tmp, 240, "%s=%d", o->name, o->use_int);
954
955 if (o->dynamic_set)
956 snprintf(tmp + l, 15, " (set_config)\n");
957 else if (o->cli_set)
958 snprintf(tmp + l, 15, " (cli option)\n");
959 else if (o->file_set)
960 snprintf(tmp + l, 15, " (dlm.conf)\n");
961 else
962 snprintf(tmp + l, 15, "\n");
963
964 if (pos + strlen(tmp) >= LOG_DUMP_SIZE)
965 break;
966
967 ret = sprintf(buf + pos, "%s", tmp);
968 pos += ret;
969 }
970
971 *len = pos;
972 }
973
974 static void query_dump_config(int fd)
975 {
976 struct dlmc_header h;
977 int len = 0;
978
979 copy_options(copy_buf, &len);
980
981 init_header(&h, DLMC_CMD_DUMP_CONFIG, NULL, 0, len);
982 send(fd, &h, sizeof(h), MSG_NOSIGNAL);
983
984 if (len)
985 send(fd, copy_buf, len, MSG_NOSIGNAL);
986 }
987
988 static void query_dump_log_plock(int fd)
989 {
990 struct dlmc_header h;
991 int len = 0;
992
993 copy_log_dump_plock(copy_buf, &len);
994
995 init_header(&h, DLMC_CMD_DUMP_DEBUG, NULL, 0, len);
996 send(fd, &h, sizeof(h), MSG_NOSIGNAL);
997
998 if (len)
999 send(fd, copy_buf, len, MSG_NOSIGNAL);
1000 }
1001
1002 static void query_dump_plocks(int fd, char *name)
1003 {
1004 struct lockspace *ls;
1005 struct dlmc_header h;
1006 int len = 0;
1007 int rv;
1008
1009 ls = find_ls(name);
1010 if (!ls) {
1011 rv = -ENOENT;
1012 goto out;
1013 }
1014
1015 rv = copy_plock_state(ls, copy_buf, &len);
1016 out:
1017 init_header(&h, DLMC_CMD_DUMP_PLOCKS, name, rv, len);
1018 send(fd, &h, sizeof(h), MSG_NOSIGNAL);
1019
1020 if (len)
1021 send(fd, copy_buf, len, MSG_NOSIGNAL);
1022 }
1023
1024 /* combines a header and the data and sends it back to the client in
1025 a single do_write() call */
1026
1027 static void do_reply(int fd, int cmd, char *name, int result, int option,
1028 char *buf, int buflen)
1029 {
1030 struct dlmc_header *h;
1031 char *reply;
1032 int reply_len;
1033
1034 reply_len = sizeof(struct dlmc_header) + buflen;
1035 reply = malloc(reply_len);
1036 if (!reply)
1037 return;
1038 memset(reply, 0, reply_len);
1039 h = (struct dlmc_header *)reply;
1040
1041 init_header(h, cmd, name, result, buflen);
1042 h->option = option;
1043
1044 if (buf && buflen)
1045 memcpy(reply + sizeof(struct dlmc_header), buf, buflen);
1046
1047 do_write(fd, reply, reply_len);
1048
1049 free(reply);
1050 }
1051
1052 static void query_lockspace_info(int fd, char *name)
1053 {
1054 struct lockspace *ls;
1055 struct dlmc_lockspace lockspace;
1056 int rv;
1057
1058 ls = find_ls(name);
1059 if (!ls) {
1060 rv = -ENOENT;
1061 goto out;
1062 }
1063
1064 memset(&lockspace, 0, sizeof(lockspace));
1065
1066 rv = set_lockspace_info(ls, &lockspace);
1067 out:
1068 do_reply(fd, DLMC_CMD_LOCKSPACE_INFO, name, rv, 0,
1069 (char *)&lockspace, sizeof(lockspace));
1070 }
1071
1072 static void query_node_info(int fd, char *name, int nodeid)
1073 {
1074 struct lockspace *ls;
1075 struct dlmc_node node;
1076 int rv;
1077
1078 ls = find_ls(name);
1079 if (!ls) {
1080 rv = -ENOENT;
1081 goto out;
1082 }
1083
1084 memset(&node, 0, sizeof(node));
1085
1086 rv = set_node_info(ls, nodeid, &node);
1087 out:
1088 do_reply(fd, DLMC_CMD_NODE_INFO, name, rv, 0,
1089 (char *)&node, sizeof(node));
1090 }
1091
1092 static void query_lockspaces(int fd)
1093 {
1094 int ls_count = 0;
1095 struct dlmc_lockspace *lss = NULL;
1096 int rv, result;
1097
1098 rv = set_lockspaces(&ls_count, &lss);
1099 if (rv < 0) {
1100 result = rv;
1101 ls_count = 0;
1102 goto out;
1103 }
1104
1105 result = ls_count;
1106 out:
1107 do_reply(fd, DLMC_CMD_LOCKSPACES, NULL, result, 0,
1108 (char *)lss, ls_count * sizeof(struct dlmc_lockspace));
1109
1110 if (lss)
1111 free(lss);
1112 }
1113
1114 static void query_lockspace_nodes(int fd, char *name, int option, int max)
1115 {
1116 struct lockspace *ls;
1117 int node_count = 0;
1118 struct dlmc_node *nodes = NULL;
1119 int rv, result;
1120
1121 ls = find_ls(name);
1122 if (!ls) {
1123 result = -ENOENT;
1124 node_count = 0;
1125 goto out;
1126 }
1127
1128 rv = set_lockspace_nodes(ls, option, &node_count, &nodes);
1129 if (rv < 0) {
1130 result = rv;
1131 node_count = 0;
1132 goto out;
1133 }
1134
1135 /* node_count is the number of structs copied/returned; the caller's
1136 max may be less than that, in which case we copy as many as they
1137 asked for and return -E2BIG */
1138
1139 if (node_count > max) {
1140 result = -E2BIG;
1141 node_count = max;
1142 } else {
1143 result = node_count;
1144 }
1145 out:
1146 do_reply(fd, DLMC_CMD_LOCKSPACE_NODES, name, result, 0,
1147 (char *)nodes, node_count * sizeof(struct dlmc_node));
1148
1149 if (nodes)
1150 free(nodes);
1151 }
1152
1153 static void process_connection(int ci)
1154 {
1155 struct dlmc_header h;
1156 char uuid_str[RUN_UUID_LEN];
1157 char *extra = NULL;
1158 int rv, extra_len = 0;
1159 struct lockspace *ls;
1160 struct dlmc_run_check_state state;
1161
1162 memset(uuid_str, 0, sizeof(uuid_str));
1163
1164 rv = do_read(client[ci].fd, &h, sizeof(h));
1165 if (rv < 0) {
1166 log_debug("connection %d read error %d", ci, rv);
1167 goto out;
1168 }
1169
1170 if (h.magic != DLMC_MAGIC) {
1171 log_debug("connection %d magic error %x", ci, h.magic);
1172 goto out;
1173 }
1174
1175 if ((h.version & 0xFFFF0000) != (DLMC_VERSION & 0xFFFF0000)) {
1176 log_debug("connection %d version error %x", ci, h.version);
1177 goto out;
1178 }
1179
1180 if (h.len > sizeof(h)) {
1181 extra_len = h.len - sizeof(h);
1182 extra = malloc(extra_len);
1183 if (!extra) {
1184 log_error("process_connection no mem %d", extra_len);
1185 goto out;
1186 }
1187 memset(extra, 0, extra_len);
1188
1189 rv = do_read(client[ci].fd, extra, extra_len);
1190 if (rv < 0) {
1191 log_debug("connection %d extra read error %d", ci, rv);
1192 goto out;
1193 }
1194 }
1195
1196 switch (h.command) {
1197 case DLMC_CMD_FENCE_ACK:
1198 fence_ack_node(atoi(h.name));
1199 break;
1200
1201 case DLMC_CMD_FS_REGISTER:
1202 if (opt(enable_fscontrol_ind)) {
1203 rv = fs_register_add(h.name);
1204 ls = find_ls(h.name);
1205 if (ls)
1206 ls->fs_registered = 1;
1207 } else {
1208 rv = -EOPNOTSUPP;
1209 }
1210 do_reply(client[ci].fd, DLMC_CMD_FS_REGISTER, h.name, rv, 0,
1211 NULL, 0);
1212 break;
1213
1214 case DLMC_CMD_FS_UNREGISTER:
1215 fs_register_del(h.name);
1216 ls = find_ls(h.name);
1217 if (ls)
1218 ls->fs_registered = 0;
1219 break;
1220
1221 case DLMC_CMD_FS_NOTIFIED:
1222 ls = find_ls(h.name);
1223 if (ls)
1224 rv = set_fs_notified(ls, h.data);
1225 else
1226 rv = -ENOENT;
1227 /* pass back the nodeid provided by caller in option field */
1228 do_reply(client[ci].fd, DLMC_CMD_FS_NOTIFIED, h.name, rv,
1229 h.data, NULL, 0);
1230 break;
1231
1232 case DLMC_CMD_RUN_START:
1233 if (!extra_len)
1234 rv = -EINVAL;
1235 else
1236 rv = start_run_operation(extra, extra_len, h.data, h.flags, uuid_str);
1237 do_reply(client[ci].fd, DLMC_CMD_RUN_START, uuid_str, rv, 0, NULL, 0);
1238 client_dead(ci);
1239 break;
1240
1241 case DLMC_CMD_RUN_CHECK:
1242 memset(&state, 0, sizeof(state));
1243
1244 rv = check_run_operation(h.name, h.flags, &state);
1245
1246 do_reply(client[ci].fd, DLMC_CMD_RUN_CHECK, NULL, rv, 0, (char *)&state, sizeof(state));
1247 /* dlmc_run_check may retry checks on the same connection */
1248 break;
1249
1250 #if 0
1251 case DLMC_CMD_DEADLOCK_CHECK:
1252 ls = find_ls(h.name);
1253 if (ls)
1254 send_cycle_start(ls);
1255 client_dead(ci);
1256 break;
1257 #endif
1258 case DLMC_CMD_RELOAD_CONFIG:
1259 set_opt_file(1);
1260 break;
1261
1262 case DLMC_CMD_SET_CONFIG:
1263 if (extra_len)
1264 set_opt_online(extra, extra_len);
1265 break;
1266
1267 default:
1268 log_error("process_connection %d unknown command %d",
1269 ci, h.command);
1270 }
1271 out:
1272 if (extra)
1273 free(extra);
1274 }
1275
1276 static void process_listener(int ci)
1277 {
1278 int fd, i;
1279
1280 fd = accept(client[ci].fd, NULL, NULL);
1281 if (fd < 0) {
1282 log_error("process_listener: accept error %d %d", fd, errno);
1283 return;
1284 }
1285
1286 i = client_add(fd, process_connection, NULL);
1287
1288 log_debug("client connection %d fd %d", i, fd);
1289 }
1290
1291 static int setup_listener(const char *sock_path)
1292 {
1293 struct sockaddr_un addr;
1294 socklen_t addrlen;
1295 int rv, s;
1296
1297 /* we listen for new client connections on socket s */
1298
1299 s = socket(AF_LOCAL, SOCK_STREAM, 0);
1300 if (s < 0) {
1301 log_error("socket error %d %d", s, errno);
1302 return s;
1303 }
1304
1305 memset(&addr, 0, sizeof(addr));
1306 addr.sun_family = AF_LOCAL;
1307 strcpy(&addr.sun_path[1], sock_path);
1308 addrlen = sizeof(sa_family_t) + strlen(addr.sun_path+1) + 1;
1309
1310 rv = bind(s, (struct sockaddr *) &addr, addrlen);
1311 if (rv < 0) {
1312 log_error("bind error %d %d", rv, errno);
1313 close(s);
1314 return rv;
1315 }
1316
1317 rv = listen(s, 5);
1318 if (rv < 0) {
1319 log_error("listen error %d %d", rv, errno);
1320 close(s);
1321 return rv;
1322 }
1323 return s;
1324 }
1325
1326 static void query_lock(void)
1327 {
1328 pthread_mutex_lock(&query_mutex);
1329 }
1330
1331 static void query_unlock(void)
1332 {
1333 pthread_mutex_unlock(&query_mutex);
1334 }
1335
1336 /* This is a thread, so we have to be careful, don't call log_ functions.
1337 We need a thread to process queries because the main thread may block
1338 for long periods when writing to sysfs to stop dlm-kernel (any maybe
1339 other places). */
1340
1341 static void *process_queries(void *arg)
1342 {
1343 struct dlmc_header h;
1344 int s, f, rv;
1345
1346 rv = setup_listener(DLMC_QUERY_SOCK_PATH);
1347 if (rv < 0)
1348 return NULL;
1349
1350 s = rv;
1351
1352 for (;;) {
1353 f = accept(s, NULL, NULL);
1354 if (f < 0)
1355 return NULL;
1356
1357 rv = do_read(f, &h, sizeof(h));
1358 if (rv < 0) {
1359 goto out;
1360 }
1361
1362 if (h.magic != DLMC_MAGIC) {
1363 goto out;
1364 }
1365
1366 if ((h.version & 0xFFFF0000) != (DLMC_VERSION & 0xFFFF0000)) {
1367 goto out;
1368 }
1369
1370 query_lock();
1371
1372 switch (h.command) {
1373 case DLMC_CMD_DUMP_DEBUG:
1374 query_dump_debug(f);
1375 break;
1376 case DLMC_CMD_DUMP_CONFIG:
1377 query_dump_config(f);
1378 break;
1379 case DLMC_CMD_DUMP_LOG_PLOCK:
1380 query_dump_log_plock(f);
1381 break;
1382 case DLMC_CMD_DUMP_PLOCKS:
1383 query_dump_plocks(f, h.name);
1384 break;
1385 case DLMC_CMD_LOCKSPACE_INFO:
1386 query_lockspace_info(f, h.name);
1387 break;
1388 case DLMC_CMD_NODE_INFO:
1389 query_node_info(f, h.name, h.data);
1390 break;
1391 case DLMC_CMD_LOCKSPACES:
1392 query_lockspaces(f);
1393 break;
1394 case DLMC_CMD_LOCKSPACE_NODES:
1395 query_lockspace_nodes(f, h.name, h.option, h.data);
1396 break;
1397 case DLMC_CMD_DUMP_STATUS:
1398 send_state_daemon(f);
1399 send_state_daemon_nodes(f);
1400 send_state_startup_nodes(f);
1401 break;
1402 case DLMC_CMD_DUMP_RUN:
1403 query_dump_run(f);
1404 break;
1405 default:
1406 break;
1407 }
1408 query_unlock();
1409
1410 out:
1411 close(f);
1412 }
1413 }
1414
1415 static int setup_queries(void)
1416 {
1417 int rv;
1418
1419 pthread_mutex_init(&query_mutex, NULL);
1420
1421 rv = pthread_create(&query_thread, NULL, process_queries, NULL);
1422 if (rv < 0) {
1423 log_error("can't create query thread");
1424 return rv;
1425 }
1426 return 0;
1427 }
1428
1429 /* The dlm in kernels before 2.6.28 do not have the monitor device. We
1430 keep this fd open as long as we're running. If we exit/terminate while
1431 lockspaces exist in the kernel, the kernel will detect a close on this
1432 fd and stop the lockspaces. */
1433
1434 static void setup_monitor(void)
1435 {
1436 if (!monitor_minor)
1437 return;
1438
1439 kernel_monitor_fd = open("/dev/misc/dlm-monitor", O_RDONLY);
1440 log_debug("/dev/misc/dlm-monitor fd %d", kernel_monitor_fd);
1441 }
1442
1443 void cluster_dead(int ci)
1444 {
1445 if (!cluster_down)
1446 log_error("cluster is down, exiting");
1447 daemon_quit = 1;
1448 cluster_down = 1;
1449 }
1450
1451 static int loop(void)
1452 {
1453 struct lockspace *ls;
1454 int poll_timeout = -1;
1455 int rv, i;
1456 void (*workfn) (int ci);
1457 void (*deadfn) (int ci);
1458
1459 rv = setup_queries();
1460 if (rv < 0)
1461 goto out;
1462
1463 rv = setup_listener(DLMC_SOCK_PATH);
1464 if (rv < 0)
1465 goto out;
1466 client_add(rv, process_listener, NULL);
1467
1468 rv = setup_cluster_cfg();
1469 if (rv < 0)
1470 goto out;
1471 if (rv > 0)
1472 client_add(rv, process_cluster_cfg, cluster_dead);
1473
1474 rv = check_uncontrolled_lockspaces();
1475 if (rv < 0)
1476 goto out;
1477
1478 /*
1479 * unfence needs to happen after checking for uncontrolled dlm kernel
1480 * state (for which we are probably currently fenced, the state must
1481 * be cleared by a reboot). unfence needs to happen before joining
1482 * the daemon cpg, after which it needs to be possible for someone to
1483 * fence us.
1484 */
1485 rv = unfence_node(our_nodeid);
1486 if (rv < 0)
1487 goto out;
1488
1489 rv = setup_node_config();
1490 if (rv < 0)
1491 goto out;
1492
1493 rv = setup_cluster();
1494 if (rv < 0)
1495 goto out;
1496 client_add(rv, process_cluster, cluster_dead);
1497
1498 rv = setup_misc_devices();
1499 if (rv < 0)
1500 goto out;
1501
1502 rv = setup_configfs_options();
1503 if (rv < 0)
1504 goto out;
1505
1506 setup_monitor();
1507
1508 rv = setup_configfs_members(); /* calls update_cluster() */
1509 if (rv < 0)
1510 goto out;
1511
1512 rv = setup_uevent();
1513 if (rv < 0)
1514 goto out;
1515 client_add(rv, process_uevent, NULL);
1516
1517 rv = setup_cpg_daemon();
1518 if (rv < 0)
1519 goto out;
1520 client_add(rv, process_cpg_daemon, cluster_dead);
1521
1522 rv = set_protocol();
1523 if (rv < 0)
1524 goto out;
1525
1526 #if 0
1527 if (opt(enable_deadlk_ind)) {
1528 rv = setup_netlink();
1529 if (rv < 0)
1530 goto out;
1531 client_add(rv, process_netlink, NULL);
1532
1533 setup_deadlock();
1534 }
1535 #endif
1536
1537 rv = setup_plocks();
1538 if (rv < 0)
1539 goto out;
1540 plock_fd = rv;
1541 plock_ci = client_add(rv, process_plocks, NULL);
1542
1543 if (opt(enable_helper_ind))
1544 helper_ci = client_add(helper_status_fd, process_helper, helper_dead);
1545
1546 #ifdef USE_SD_NOTIFY
1547 sd_notify(0, "READY=1");
1548 #endif
1549
1550 /* We want to wait for our protocol to be set before
1551 we start to process fencing. */
1552 daemon_fence_allow = 1;
1553
1554 for (;;) {
1555 rv = poll(pollfd, client_maxi + 1, poll_timeout);
1556 if (rv == -1 && errno == EINTR) {
1557 if (daemon_quit && list_empty(&lockspaces)) {
1558 rv = 0;
1559 goto out;
1560 }
1561 if (daemon_quit) {
1562 log_error("shutdown ignored, active lockspaces");
1563 daemon_quit = 0;
1564 }
1565 continue;
1566 }
1567 if (rv < 0) {
1568 log_error("poll errno %d", errno);
1569 goto out;
1570 }
1571
1572 query_lock();
1573
1574 for (i = 0; i <= client_maxi; i++) {
1575 if (client[i].fd < 0)
1576 continue;
1577 if (pollfd[i].revents & POLLIN) {
1578 workfn = client[i].workfn;
1579 workfn(i);
1580 }
1581 if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL)) {
1582 deadfn = client[i].deadfn;
1583 deadfn(i);
1584 }
1585 }
1586 query_unlock();
1587
1588 if (daemon_quit)
1589 break;
1590
1591 query_lock();
1592
1593 poll_timeout = -1;
1594
1595 if (retry_fencing) {
1596 process_fencing_changes();
1597 poll_timeout = 1000;
1598 }
1599
1600 if (poll_lockspaces || poll_fs) {
1601 process_lockspace_changes();
1602 poll_timeout = 1000;
1603 }
1604
1605 if (poll_ignore_plock) {
1606 if (!limit_plocks()) {
1607 poll_ignore_plock = 0;
1608 client_back(plock_ci, plock_fd);
1609 }
1610 poll_timeout = 1000;
1611 }
1612
1613 if (poll_drop_plock) {
1614 drop_resources_all();
1615 if (poll_drop_plock)
1616 poll_timeout = 1000;
1617 }
1618
1619 query_unlock();
1620 }
1621 out:
1622 log_debug("shutdown");
1623 close_plocks();
1624 close_cpg_daemon();
1625 clear_configfs();
1626 close_cluster();
1627 close_cluster_cfg();
1628
1629 list_for_each_entry(ls, &lockspaces, list)
1630 log_error("abandoned lockspace %s", ls->name);
1631
1632 /* must be end */
1633 close_logging();
1634 return rv;
1635 }
1636
1637 static int lockfile(const char *name)
1638 {
1639 char path[PATH_MAX];
1640 char buf[16];
1641 struct flock lock;
1642 mode_t old_umask;
1643 int fd, rv;
1644
1645 old_umask = umask(0022);
1646 rv = mkdir(SYS_VARDIR, 0775);
1647 if (rv < 0 && errno != EEXIST) {
1648 umask(old_umask);
1649 return rv;
1650 }
1651
1652 rv = mkdir(SYS_RUNDIR, 0775);
1653 if (rv < 0 && errno != EEXIST) {
1654 umask(old_umask);
1655 return rv;
1656 }
1657
1658 rv = mkdir(RUNDIR, 0775);
1659 if (rv < 0 && errno != EEXIST) {
1660 umask(old_umask);
1661 return rv;
1662 }
1663 umask(old_umask);
1664
1665 snprintf(path, PATH_MAX, "%s/%s", RUNDIR, name);
1666
1667 fd = open(path, O_CREAT|O_WRONLY|O_CLOEXEC, 0644);
1668 if (fd < 0) {
1669 log_error("lockfile open error %s: %s",
1670 path, strerror(errno));
1671 return -1;
1672 }
1673
1674 lock.l_type = F_WRLCK;
1675 lock.l_start = 0;
1676 lock.l_whence = SEEK_SET;
1677 lock.l_len = 0;
1678
1679 rv = fcntl(fd, F_SETLK, &lock);
1680 if (rv < 0) {
1681 log_error("lockfile setlk error %s: %s",
1682 path, strerror(errno));
1683 goto fail;
1684 }
1685
1686 rv = ftruncate(fd, 0);
1687 if (rv < 0) {
1688 log_error("lockfile truncate error %s: %s",
1689 path, strerror(errno));
1690 goto fail;
1691 }
1692
1693 memset(buf, 0, sizeof(buf));
1694 snprintf(buf, sizeof(buf), "%d\n", getpid());
1695
1696 rv = write(fd, buf, strlen(buf));
1697 if (rv <= 0) {
1698 log_error("lockfile write error %s: %s",
1699 path, strerror(errno));
1700 goto fail;
1701 }
1702
1703 return fd;
1704 fail:
1705 close(fd);
1706 return -1;
1707 }
1708
1709 static void unlink_lockfile(int fd, const char *dir, const char *name)
1710 {
1711 char path[PATH_MAX];
1712
1713 snprintf(path, PATH_MAX, "%s/%s", dir, name);
1714 unlink(path);
1715 close(fd);
1716 }
1717
1718 static const char *req_arg_s(int a)
1719 {
1720 switch (a) {
1721 case no_arg:
1722 return "";
1723 case req_arg_bool:
1724 return "0|1";
1725 case req_arg_int:
1726 return "<int>";
1727 case req_arg_str:
1728 return "<str>";
1729 default:
1730 return "<arg>";
1731 }
1732 }
1733
1734 static void print_usage(void)
1735 {
1736 struct dlm_option *o;
1737 int i;
1738
1739 printf("Usage:\n");
1740 printf("\n");
1741 printf("dlm_controld [options]\n");
1742 printf("\n");
1743 printf("Option [arg]\n");
1744 printf("Description [default]\n");
1745 printf("\n");
1746
1747 for (i = 0; i < dlm_options_max; i++) {
1748 o = &dlm_options[i];
1749
1750 /* don't advertise options with no description */
1751 if (!strlen(o->desc))
1752 continue;
1753
1754 printf(" --%s", o->name);
1755
1756 if (o->letter) {
1757 printf(" | -%c", o->letter);
1758 if (o->req_arg)
1759 printf(" %s", req_arg_s(o->req_arg));
1760 } else {
1761 if (o->req_arg)
1762 printf(" %s", req_arg_s(o->req_arg));
1763 }
1764
1765 printf("\n");
1766
1767 printf(" %s", o->desc);
1768
1769 if (o->req_arg == req_arg_str)
1770 printf(" [%s]\n", o->default_str ? o->default_str : "");
1771 else if (o->req_arg == req_arg_int)
1772 printf(" [%d]\n", o->default_int);
1773 else if (o->req_arg == req_arg_bool)
1774 printf(" [%d]\n", o->default_int);
1775 else if (o->req_arg == req_arg_uint)
1776 printf(" [%u]\n", o->default_uint);
1777 else if (o->req_arg == no_arg && !o->default_int)
1778 printf(" [0]\n");
1779 else
1780 printf("\n");
1781
1782 printf("\n");
1783 }
1784 }
1785
1786 static void set_opt_default(int ind, const char *name, char letter, int arg_type,
1787 int default_int, const char *default_str,
1788 unsigned int default_uint, char reload, const char *desc)
1789 {
1790 dlm_options[ind].name = name;
1791 dlm_options[ind].letter = letter;
1792 dlm_options[ind].req_arg = arg_type;
1793 dlm_options[ind].desc = desc;
1794 dlm_options[ind].reload = reload;
1795 dlm_options[ind].default_int = default_int;
1796 dlm_options[ind].default_str = default_str;
1797 dlm_options[ind].default_uint = default_uint;
1798 dlm_options[ind].use_int = default_int;
1799 dlm_options[ind].use_str = (char *)default_str;
1800 dlm_options[ind].use_uint = default_uint;
1801 }
1802
1803 static void set_opt_defaults(void)
1804 {
1805 set_opt_default(daemon_debug_ind,
1806 "daemon_debug", 'D', req_arg_bool,
1807 0, NULL, 0, 1,
1808 "enable debugging to stderr and don't fork");
1809
1810 set_opt_default(foreground_ind,
1811 "foreground", '\0', req_arg_bool,
1812 0, NULL, 0, 0,
1813 "don't fork");
1814
1815 set_opt_default(log_debug_ind,
1816 "log_debug", 'K', req_arg_bool,
1817 0, NULL, 0, 1,
1818 "enable kernel dlm debugging messages");
1819
1820 set_opt_default(protocol_ind,
1821 "protocol", 'r', req_arg_str,
1822 0, "tcp", 0, 0,
1823 "dlm kernel lowcomms protocol: tcp, sctp");
1824
1825 set_opt_default(port_ind,
1826 "port", 'R', req_arg_uint,
1827 -1, NULL, 21064, 0,
1828 "dlm kernel lowcomms protocol port");
1829
1830 set_opt_default(mark_ind,
1831 "mark", '\0', req_arg_uint,
1832 0, NULL, 0, 0,
1833 "set mark value for DLM if not explicit by nodeid specified");
1834
1835 set_opt_default(debug_logfile_ind,
1836 "debug_logfile", 'L', req_arg_bool,
1837 0, NULL, 0, 1,
1838 "write debugging to log file");
1839
1840 set_opt_default(enable_fscontrol_ind,
1841 "enable_fscontrol", '\0', req_arg_bool,
1842 0, NULL, 0, 0,
1843 ""); /* do not advertise */
1844
1845 set_opt_default(enable_plock_ind,
1846 "enable_plock", 'p', req_arg_bool,
1847 1, NULL, 0, 0,
1848 "enable/disable posix lock support for cluster fs");
1849
1850 set_opt_default(plock_debug_ind,
1851 "plock_debug", 'P', req_arg_bool,
1852 0, NULL, 0, 1,
1853 "enable plock debugging");
1854
1855 set_opt_default(plock_rate_limit_ind,
1856 "plock_rate_limit", 'l', req_arg_int,
1857 0, NULL, 0, 1,
1858 "limit rate of plock operations (0 for none)");
1859
1860 set_opt_default(plock_ownership_ind,
1861 "plock_ownership", 'o', req_arg_bool,
1862 0, NULL, 0, 0,
1863 "enable/disable plock ownership (do not enable if threads do fcntl locking)");
1864
1865 set_opt_default(drop_resources_time_ind,
1866 "drop_resources_time", 't', req_arg_int,
1867 10000, NULL, 0, 1,
1868 "plock ownership drop resources time (milliseconds)");
1869
1870 set_opt_default(drop_resources_count_ind,
1871 "drop_resources_count", 'c', req_arg_int,
1872 10, NULL, 0, 1,
1873 "plock ownership drop resources count");
1874
1875 set_opt_default(drop_resources_age_ind,
1876 "drop_resources_age", 'a', req_arg_int,
1877 10000, NULL, 0, 1,
1878 "plock ownership drop resources age (milliseconds)");
1879
1880 set_opt_default(post_join_delay_ind,
1881 "post_join_delay", 'j', req_arg_int,
1882 30, NULL, 0, 1,
1883 "seconds to delay fencing after cluster join");
1884
1885 set_opt_default(enable_fencing_ind,
1886 "enable_fencing", 'f', req_arg_bool,
1887 1, NULL, 0, 0,
1888 "enable/disable fencing");
1889
1890 set_opt_default(enable_concurrent_fencing_ind,
1891 "enable_concurrent_fencing", '\0', req_arg_bool,
1892 0, NULL, 0, 0,
1893 "enable/disable concurrent fencing");
1894
1895 set_opt_default(enable_startup_fencing_ind,
1896 "enable_startup_fencing", 's', req_arg_bool,
1897 1, NULL, 0, 0,
1898 "enable/disable startup fencing");
1899
1900 set_opt_default(repeat_failed_fencing_ind,
1901 "repeat_failed_fencing", '\0', req_arg_bool,
1902 1, NULL, 0, 1,
1903 "enable/disable retrying after fencing fails");
1904
1905 set_opt_default(enable_quorum_fencing_ind,
1906 "enable_quorum_fencing", 'q', req_arg_bool,
1907 1, NULL, 0, 1,
1908 "enable/disable quorum requirement for fencing");
1909
1910 set_opt_default(enable_quorum_lockspace_ind,
1911 "enable_quorum_lockspace", '\0', req_arg_bool,
1912 1, NULL, 0, 1,
1913 "enable/disable quorum requirement for lockspace operations");
1914
1915 set_opt_default(enable_helper_ind,
1916 "enable_helper", '\0', req_arg_bool,
1917 1, NULL, 0, 0,
1918 "enable/disable helper process for running commands");
1919
1920 set_opt_default(help_ind,
1921 "help", 'h', no_arg,
1922 -1, NULL, 0, 0,
1923 "print this help, then exit");
1924
1925 set_opt_default(version_ind,
1926 "version", 'V', no_arg,
1927 -1, NULL, 0, 0,
1928 "Print program version information, then exit");
1929 }
1930
1931 int get_ind_name(char *s)
1932 {
1933 char name[PATH_MAX];
1934 char *p = s;
1935 int i;
1936
1937 memset(name, 0, sizeof(name));
1938
1939 for (i = 0; i < strlen(s); i++) {
1940 if (*p == '=')
1941 break;
1942 if (*p == ' ')
1943 break;
1944 name[i] = *p;
1945 p++;
1946 }
1947
1948 for (i = 0; i < dlm_options_max; i++) {
1949 if (!strcmp(dlm_options[i].name, name))
1950 return i;
1951 }
1952 return -1;
1953 }
1954
1955 static int get_ind_letter(char c)
1956 {
1957 int i;
1958
1959 for (i = 0; i < dlm_options_max; i++) {
1960 if (dlm_options[i].letter == c)
1961 return i;
1962 }
1963 return -1;
1964 }
1965
1966 struct dlm_option *get_dlm_option(char *name)
1967 {
1968 int i;
1969 i = get_ind_name(name);
1970 if (i < 0)
1971 return NULL;
1972 return &dlm_options[i];
1973 }
1974
1975 static void set_opt_cli(int argc, char **argv)
1976 {
1977 struct dlm_option *o;
1978 char *arg1, *p, *arg_str, *endptr;
1979 char bool_str[] = "1";
1980 char bundled_letters[8];
1981 int b, blc = 0, blc_max = 8;
1982 int debug_options = 0;
1983 int i, ind, bundled;
1984
1985 if (argc < 2)
1986 return;
1987
1988 arg1 = argv[1];
1989
1990 if (!strcmp(arg1, "help") || !strcmp(arg1, "--help") || !strcmp(arg1, "-h")) {
1991 print_usage();
1992 exit(EXIT_SUCCESS);
1993 }
1994
1995 if (!strcmp(arg1, "version") || !strcmp(arg1, "--version") || !strcmp(arg1, "-V")) {
1996 printf("dlm_controld %s (built %s %s)\n",
1997 RELEASE_VERSION, __DATE__, __TIME__);
1998 printf("%s\n", REDHAT_COPYRIGHT);
1999 exit(EXIT_SUCCESS);
2000 }
2001
2002 for (i = 1; i < argc; ) {
2003 p = argv[i++];
2004
2005 if (!strcmp(p, "--debug_options")) {
2006 debug_options = 1;
2007 continue;
2008 }
2009
2010 if (p[0] == '-' && p[1] == '-')
2011 ind = get_ind_name(p + 2);
2012 else if (p[0] == '-')
2013 ind = get_ind_letter(p[1]);
2014 else {
2015 fprintf(stderr, "unknown option arg %s\n", p);
2016 exit(EXIT_FAILURE);
2017 }
2018
2019 if (ind < 0) {
2020 fprintf(stderr, "unknown option %s\n", p);
2021 exit(EXIT_FAILURE);
2022 }
2023
2024 o = &dlm_options[ind];
2025 o->cli_set++;
2026
2027 if (!o->req_arg || (o->req_arg == req_arg_bool)) {
2028 bundled = 0;
2029
2030 /* current for no_arg type, there is not possible to have bundled options.
2031 * for req_arg_bool, bundled options, e.g. -DKP. all treat as "true".
2032 * below code save bundled, arg-less, single letters */
2033 if ((p[0] == '-') && isalpha(p[1]) && (strlen(p) > 2)) {
2034 for (b = 2; b < strlen(p) && blc < blc_max; b++) {
2035 if (!isalpha(p[b]))
2036 break;
2037 bundled_letters[blc++] = p[b];
2038 bundled = 1;
2039 }
2040 }
2041 if (bundled) {
2042 /* "-x" has same effect as "-x 1" */
2043 o->cli_int = 1;
2044 o->use_int = 1;
2045 continue;
2046 }
2047 }
2048
2049 arg_str = NULL;
2050
2051 if (strstr(p, "=")) {
2052 /* arg starts after = for name or letter */
2053 arg_str = strstr(p, "=") + 1;
2054
2055 } else if (strlen(p) > 2 && isalpha(p[1]) && isdigit(p[2])) {
2056 /* arg with no space between letter and digits */
2057 arg_str = p + 2;
2058
2059 } else {
2060 /* space separates arg from name or letter */
2061 if (o->req_arg == req_arg_bool) {
2062 /* bool type treat empty arg as true */
2063 if (i >= argc || argv[i][0] == '-')
2064 arg_str = bool_str;
2065 else
2066 arg_str = argv[i++];
2067 } else {
2068 if (i >= argc) {
2069 fprintf(stderr, "option %s no arg\n", p);
2070 exit(EXIT_FAILURE);
2071 }
2072 arg_str = argv[i++];
2073 }
2074 }
2075
2076 if (!arg_str || arg_str[0] == '-' || arg_str[0] == '\0') {
2077 fprintf(stderr, "option %s requires arg\n", p);
2078 exit(EXIT_FAILURE);
2079 }
2080 if ((o->req_arg != req_arg_str) && !strtol(arg_str, &endptr, 10)) {
2081 if (endptr == arg_str) {
2082 fprintf(stderr, "option %s requires digit number\n", p);
2083 exit(EXIT_FAILURE);
2084 }
2085 }
2086
2087 if (o->req_arg == req_arg_str) {
2088 o->cli_str = strdup(arg_str);
2089 o->use_str = o->cli_str;
2090 } else if (o->req_arg == req_arg_int) {
2091 o->cli_int = atoi(arg_str);
2092 o->use_int = o->cli_int;
2093 } else if (o->req_arg == req_arg_bool) {
2094 o->cli_int = atoi(arg_str) ? 1 : 0;
2095 o->use_int = o->cli_int;
2096 } else if (o->req_arg == req_arg_uint) {
2097 o->cli_uint = strtoul(arg_str, NULL, 0);
2098 o->use_uint = o->cli_uint;
2099 }
2100 }
2101
2102 /* process bundled letters saved above */
2103
2104 for (i = 0; i < blc; i++) {
2105 ind = get_ind_letter(bundled_letters[i]);
2106 if (ind < 0) {
2107 fprintf(stderr, "unknown option char %c\n", bundled_letters[i]);
2108 exit(EXIT_FAILURE);
2109 }
2110 /* bundled letter must be bool type, treat it with "true" value */
2111 o = &dlm_options[ind];
2112 o->cli_set++;
2113 o->cli_int = 1;
2114 o->use_int = 1;
2115 }
2116
2117 if (debug_options && opt(daemon_debug_ind)) {
2118 for (i = 0; i < dlm_options_max; i++) {
2119 o = &dlm_options[i];
2120 printf("%-25s cli_set %d cli_int %d cli_str %s use_int %d use_str %s\n",
2121 o->name, o->cli_set, o->cli_int, o->cli_str, o->use_int, o->use_str);
2122 }
2123 }
2124
2125 if (getenv("DLM_CONTROLD_DEBUG")) {
2126 dlm_options[daemon_debug_ind].use_int = 1;
2127 }
2128 }
2129
2130 #if 0
2131 /* When this is used, the systemd service file needs ControlGroup=cpu:/ */
2132 static void set_scheduler(void)
2133 {
2134 struct sched_param sched_param;
2135 int rv;
2136
2137 rv = sched_get_priority_max(SCHED_RR);
2138 if (rv != -1) {
2139 sched_param.sched_priority = rv;
2140 rv = sched_setscheduler(0, SCHED_RR, &sched_param);
2141 if (rv == -1)
2142 log_error("could not set SCHED_RR priority %d err %d",
2143 sched_param.sched_priority, errno);
2144 } else {
2145 log_error("could not get maximum scheduler priority err %d",
2146 errno);
2147 }
2148 }
2149 #endif
2150
2151 int main(int argc, char **argv)
2152 {
2153 struct sigaction act;
2154 int fd, rv;
2155
2156 /*
2157 * config priority: cli, config file, default
2158 * - explicit cli setting will override default,
2159 * - explicit file setting will override default
2160 * - explicit file setting will not override explicit cli setting
2161 *
2162 * "dlm reload_config" will trigger to reload config file, and
2163 * reload action also follows the rule: not override explicit
2164 * cli setting
2165 */
2166 set_opt_defaults();
2167 set_opt_cli(argc, argv);
2168 set_opt_file(0);
2169
2170 rv = node_config_init(CONF_FILE_PATH);
2171 if (rv)
2172 return 1;
2173
2174 strcpy(fence_all_device.name, "fence_all");
2175 strcpy(fence_all_device.agent, "dlm_stonith");
2176 fence_all_device.unfence = 0;
2177
2178 INIT_LIST_HEAD(&lockspaces);
2179 INIT_LIST_HEAD(&fs_register_list);
2180 INIT_LIST_HEAD(&run_ops);
2181 init_daemon();
2182
2183 if (!opt(daemon_debug_ind) && !opt(foreground_ind)) {
2184 if (daemon(0, 0) < 0) {
2185 perror("daemon error");
2186 exit(EXIT_FAILURE);
2187 }
2188 }
2189
2190 init_logging();
2191
2192 fd = lockfile(RUN_FILE_NAME);
2193 if (fd < 0)
2194 return 1;
2195
2196 log_level(NULL, LOG_INFO, "dlm_controld %s started", RELEASE_VERSION);
2197
2198 if (opt(enable_helper_ind))
2199 setup_helper();
2200
2201 memset(&act, 0, sizeof(act));
2202 act.sa_handler = sigterm_handler;
2203 rv = sigaction(SIGTERM, &act, NULL);
2204 if (rv < 0)
2205 goto out;
2206 rv = sigaction(SIGINT, &act, NULL);
2207 if (rv < 0)
2208 goto out;
2209
2210 memset(&act, 0, sizeof(act));
2211 act.sa_handler = SIG_IGN;
2212 rv = sigaction(SIGHUP, &act, NULL);
2213 if (rv < 0)
2214 goto out;
2215
2216 memset(&act, 0, sizeof(act));
2217 act.sa_handler = sigchld_handler;
2218 act.sa_flags = SA_NOCLDSTOP;
2219 rv = sigaction(SIGCHLD, &act, NULL);
2220 if (rv < 0)
2221 goto out;
2222
2223 /* set_scheduler(); */
2224
2225 rv = loop();
2226
2227 out:
2228 unlink_lockfile(fd, RUNDIR, RUN_FILE_NAME);
2229 return rv < 0 ? 1 : 0;
2230 }
2231
2232