1    	/*
2    	 * Copyright (C) 2013 Lars Marowsky-Bree <lmb@suse.com>
3    	 *
4    	 * This program is free software; you can redistribute it and/or
5    	 * modify it under the terms of the GNU General Public
6    	 * License as published by the Free Software Foundation; either
7    	 * version 2 of the License, or (at your option) any later version.
8    	 *
9    	 * This software is distributed in the hope that it will be useful,
10   	 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11   	 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   	 * General Public License for more details.
13   	 *
14   	 * You should have received a copy of the GNU General Public License along
15   	 * with this program; if not, write to the Free Software Foundation, Inc.,
16   	 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17   	 */
18   	
19   	#include "sbd.h"
20   	#include <sys/reboot.h>
21   	#include <sys/types.h>
22   	#include <sys/stat.h>
23   	#include <pwd.h>
24   	#include <unistd.h>
25   	#include <sys/time.h>
26   	#include <sys/resource.h>
27   	#include <limits.h>
28   	
29   	#ifdef _POSIX_MEMLOCK
30   	#  include <sys/mman.h>
31   	#endif
32   	
33   	/* Tunable defaults: */
34   	int  timeout_watchdog           = SBD_WATCHDOG_TIMEOUT_DEFAULT;
35   	int  timeout_msgwait            = 2 * SBD_WATCHDOG_TIMEOUT_DEFAULT;
36   	
37   	int  timeout_allocate           = 2;
38   	int  timeout_loop               = 1;
39   	int  timeout_io                 = 3;
40   	int  timeout_startup            = 120;
41   	
42   	int  watchdog_use               = 1;
43   	int  watchdog_set_timeout       = 1;
44   	int  timeout_watchdog_crashdump = 0;
45   	int  skip_rt                    = 0;
46   	int  debug                      = 0;
47   	int  debug_mode                 = 0;
48   	
49   	/* Global, non-tunable variables: */
50   	int  sector_size    = 0;
51   	int  servant_health = 0;
52   	
53   	const char *cmdname;
54   	char *local_uname;
55   	
56   	void
57   	usage(void)
58   	{
59   		fprintf(stderr,
60   	"Shared storage fencing tool.\n"
61   	"Syntax:\n"
62   	"	%s <options> <command> <cmdarguments>\n"
63   	"Options:\n"
64   	"-d <devname>	Block device to use (mandatory; can be specified up to 3 times)\n"
65   	"-h		Display this help.\n"
66   	"-n <node>	Set local node name; defaults to uname -n (optional)\n"
67   	"\n"
68   	"-R		Do NOT enable realtime priority (debugging only)\n"
69   	"-W		Use watchdog (recommended) (watch only)\n"
70   	"-w <dev>	Specify watchdog device (optional) (watch only)\n"
71   	"-T		Do NOT initialize the watchdog timeout (watch only)\n"
72   	"-S <0|1>	Set start mode if the node was previously fenced (watch only)\n"
73   	"-p <path>	Write pidfile to the specified path (watch only)\n"
74   	"-v|-vv|-vvv	Enable verbose|debug|debug-library logging (optional)\n"
75   	"\n"
76   	"-1 <N>		Set watchdog timeout to N seconds (optional, create only)\n"
77   	"-2 <N>		Set slot allocation timeout to N seconds (optional, create only)\n"
78   	"-3 <N>		Set daemon loop timeout to N seconds (optional, create only)\n"
79   	"-4 <N>		Set msgwait timeout to N seconds (optional, create only)\n"
80   	"-5 <N>		Warn if loop latency exceeds threshold (optional, watch only)\n"
81   	"			(default is 3, set to 0 to disable)\n"
82   	"-C <N>		Watchdog timeout to set before crashdumping\n"
83   	"			(def: 0s = disable gracefully, optional)\n"
84   	"-I <N>		Async IO read timeout (defaults to 3 * loop timeout, optional)\n"
85   	"-s <N>		Timeout to wait for devices to become available (def: 120s)\n"
86   	"-t <N>		Dampening delay before faulty servants are restarted (optional)\n"
87   	"			(default is 5, set to 0 to disable)\n"
88   	"-F <N>		# of failures before a servant is considered faulty (optional)\n"
89   	"			(default is 1, set to 0 to disable)\n"
90   	"-P		Check Pacemaker quorum and node health (optional, watch only)\n"
91   	"-Z		Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n"
92   	"-r		Set timeout-action to comma-separated combination of\n"
93   	"		noflush|flush plus reboot|crashdump|off (default is flush,reboot)\n"
94   	"Commands:\n"
95   	#if SUPPORT_SHARED_DISK
96   	"create		initialize N slots on <dev> - OVERWRITES DEVICE!\n"
97   	"list		List all allocated slots on device, and messages.\n"
98   	"dump		Dump meta-data header from device.\n"
99   	"allocate <node>\n"
100  	"		Allocate a slot for node (optional)\n"
101  	"message <node> (test|reset|off|crashdump|clear|exit)\n"
102  	"		Writes the specified message to node's slot.\n"
103  	#endif
104  	"watch		Loop forever, monitoring own slot\n"
105  	"query-watchdog	Check for available watchdog-devices and print some info\n"
106  	"test-watchdog	Test the watchdog-device selected.\n"
107  	"		Attention: This will arm the watchdog and have your system reset\n"
108  	"		           in case your watchdog is working properly!\n"
109  	                , cmdname);
110  	}
111  	
112  	/* This duplicates some code from linux/ioprio.h since these are not included
113  	 * even in linux-kernel-headers. Sucks. See also
114  	 * /usr/src/linux/Documentation/block/ioprio.txt and ioprio_set(2) */
115  	extern int sys_ioprio_set(int, int, int);
116  	int ioprio_set(int which, int who, int ioprio);
117  	inline int ioprio_set(int which, int who, int ioprio)
118  	{
119  	        return syscall(__NR_ioprio_set, which, who, ioprio);
120  	}
121  	
122  	enum {
123  	        IOPRIO_CLASS_NONE,
124  	        IOPRIO_CLASS_RT,
125  	        IOPRIO_CLASS_BE,
126  	        IOPRIO_CLASS_IDLE,
127  	};
128  	
129  	enum {
130  	        IOPRIO_WHO_PROCESS = 1,
131  	        IOPRIO_WHO_PGRP,
132  	        IOPRIO_WHO_USER,
133  	};
134  	
135  	#define IOPRIO_BITS             (16)
136  	#define IOPRIO_CLASS_SHIFT      (13)
137  	#define IOPRIO_PRIO_MASK        ((1UL << IOPRIO_CLASS_SHIFT) - 1)
138  	
139  	#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
140  	#define IOPRIO_PRIO_DATA(mask)  ((mask) & IOPRIO_PRIO_MASK)
141  	#define IOPRIO_PRIO_VALUE(class, data)  (((class) << IOPRIO_CLASS_SHIFT) | data)
142  	
143  	static void
144  	sbd_stack_hogger(unsigned char * inbuf, int kbytes)
145  	{
146  	    unsigned char buf[1024];
147  	
148  	    if(kbytes <= 0) {
149  	        return;
150  	    }
151  	
152  	    if (inbuf == NULL) {
153  	        memset(buf, HOG_CHAR, sizeof(buf));
154  	    } else {
155  	        memcpy(buf, inbuf, sizeof(buf));
156  	    }
157  	
158  	    if (kbytes > 0) {
159  	        sbd_stack_hogger(buf, kbytes-1);
160  	    }
161  	
162  	    return;
163  	}
164  	
165  	static void
166  	sbd_malloc_hogger(int kbytes)
167  	{
168  	    int	j;
169  	    void**chunks;
170  	    int	 chunksize = 1024;
171  	
172  	    if(kbytes <= 0) {
173  	        return;
174  	    }
175  	
176  	    /*
177  	     * We could call mallopt(M_MMAP_MAX, 0) to disable it completely,
178  	     * but we've already called mlockall()
179  	     *
180  	     * We could also call mallopt(M_TRIM_THRESHOLD, -1) to prevent malloc
181  	     * from giving memory back to the system, but we've already called
182  	     * mlockall(MCL_FUTURE), so there's no need.
183  	     */
184  	
185  	    chunks = malloc(kbytes * sizeof(void *));
186  	    if (chunks == NULL) {
187  	        cl_log(LOG_WARNING, "Could not preallocate chunk array");
188  	        return;
189  	    }
190  	
191  	    for (j=0; j < kbytes; ++j) {
192  	        chunks[j] = malloc(chunksize);
193  	        if (chunks[j] == NULL) {
194  	            cl_log(LOG_WARNING, "Could not preallocate block %d", j);
195  	
196  	        } else {
197  	            memset(chunks[j], 0, chunksize);
198  	        }
199  	    }
200  	
201  	    for (j=0; j < kbytes; ++j) {
202  	        free(chunks[j]);
203  	    }
204  	
205  	    free(chunks);
206  	}
207  	
208  	static void sbd_memlock(int stackgrowK, int heapgrowK) 
209  	{
210  	
211  	#ifdef _POSIX_MEMLOCK
212  	    /*
213  	     * We could call setrlimit(RLIMIT_MEMLOCK,...) with a large
214  	     * number, but the mcp runs as root and mlock(2) says:
215  	     *
216  	     * Since Linux 2.6.9, no limits are placed on the amount of memory
217  	     * that a privileged process may lock, and this limit instead
218  	     * governs the amount of memory that an unprivileged process may
219  	     * lock.
220  	     */
221  	    if (mlockall(MCL_CURRENT|MCL_FUTURE) >= 0) {
222  	        cl_log(LOG_INFO, "Locked ourselves in memory");
223  	
224  	        /* Now allocate some extra pages (MCL_FUTURE will ensure they stay around) */
225  	        sbd_malloc_hogger(heapgrowK);
226  	        sbd_stack_hogger(NULL, stackgrowK);
227  	
228  	    } else {
229  	        cl_perror("Unable to lock ourselves into memory");
230  	    }
231  	
232  	#else
233  	    cl_log(LOG_ERR, "Unable to lock ourselves into memory");
234  	#endif
235  	}
236  	
237  	static int get_realtime_budget(void)
238  	{
239  	    FILE *f;
240  	    char fname[PATH_MAX];
241  	    int res = -1, lnum = 0, num;
242  	    char *cgroup = NULL, *namespecs = NULL;
243  	
244  	    snprintf(fname, PATH_MAX, "/proc/%jd/cgroup", (intmax_t)getpid());
245  	    f = fopen(fname, "rt");
(1) Event cond_false: Condition "f == NULL", taking false branch.
246  	    if (f == NULL) {
247  	        cl_log(LOG_WARNING, "Can't open cgroup file for pid=%jd",
248  	                            (intmax_t)getpid());
249  	        goto exit_res;
(2) Event if_end: End of if statement.
250  	    }
(3) Event cond_true: Condition "(num = fscanf(f, "%d:%m[^:]:%m[^\n]\n", &lnum, &namespecs, &cgroup)) != -1", taking true branch.
(14) Event loop_begin: Jumped back to beginning of loop.
(15) Event allocated_storage: Allocating storage in "namespecs" to hold parsed value.
(16) Event cond_false: Condition "(num = fscanf(f, "%d:%m[^:]:%m[^\n]\n", &lnum, &namespecs, &cgroup)) != -1", taking false branch.
Also see events: [leaked_storage]
251  	    while( (num = fscanf(f, "%d:%m[^:]:%m[^\n]\n", &lnum,
252  	                         &namespecs, &cgroup)) !=EOF ) {
(4) Event cond_true: Condition "namespecs", taking true branch.
(5) Event cond_false: Condition "strstr(namespecs, "cpuacct")", taking false branch.
253  	        if (namespecs && strstr(namespecs, "cpuacct")) {
254  	            free(namespecs);
255  	            break;
(6) Event if_end: End of if statement.
256  	        }
(7) Event cond_true: Condition "cgroup", taking true branch.
257  	        if (cgroup) {
258  	            free(cgroup);
259  	            cgroup = NULL;
260  	        }
(8) Event cond_true: Condition "namespecs", taking true branch.
261  	        if (namespecs) {
262  	            free(namespecs);
263  	            namespecs = NULL;
264  	        }
265  	        /* not to get stuck if format changes */
(9) Event cond_true: Condition "num < 3", taking true branch.
(10) Event cond_false: Condition "fscanf(f, "%*[^\n]") == -1", taking false branch.
(11) Event cond_false: Condition "fscanf(f, "\n") == -1", taking false branch.
266  	        if ((num < 3) && ((fscanf(f, "%*[^\n]") == EOF) ||
267  	            (fscanf(f, "\n") == EOF))) {
268  	            break;
(12) Event if_end: End of if statement.
269  	        }
(13) Event loop: Jumping back to the beginning of the loop.
(17) Event loop_end: Reached end of loop.
270  	    }
271  	    fclose(f);
(18) Event cond_true: Condition "cgroup == NULL", taking true branch.
272  	    if (cgroup == NULL) {
273  	        cl_log(LOG_WARNING, "Failed getting cgroup for pid=%jd",
274  	                            (intmax_t)getpid());
(19) Event goto: Jumping to label "exit_res".
275  	        goto exit_res;
276  	    }
277  	    snprintf(fname, PATH_MAX, "/sys/fs/cgroup/cpu%s/cpu.rt_runtime_us",
278  	                              cgroup);
279  	    f = fopen(fname, "rt");
280  	    if (f == NULL) {
281  	        cl_log(LOG_WARNING, "cpu.rt_runtime_us existed for root-slice but "
282  	            "doesn't for '%s'", cgroup);
283  	        goto exit_res;
284  	    }
285  	    if (fscanf(f, "%d", &res) != 1) {
286  	        cl_log(LOG_WARNING, "failed reading rt-budget from %s", fname);
287  	    } else {
288  	        cl_log(LOG_INFO, "slice='%s' has rt-budget=%d", cgroup, res);
289  	    }
290  	    fclose(f);
291  	
(20) Event label: Reached label "exit_res".
292  	exit_res:
(21) Event cond_false: Condition "cgroup", taking false branch.
293  	    if (cgroup) {
294  	        free(cgroup);
(22) Event if_end: End of if statement.
295  	    }
(23) Event leaked_storage: Variable "namespecs" going out of scope leaks the storage it points to.
Also see events: [allocated_storage]
296  	    return res;
297  	}
298  	
299  	/* stolen from corosync */
300  	static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
301  	    FILE *f;
302  	    int res = -1;
303  	
304  	    /*
305  	     * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
306  	     * using systemd and systemd uses hardcoded path of cgroup mount point.
307  	     *
308  	     * This feature is expected to be removed as soon as systemd gets support
309  	     * for managing RT configuration.
310  	     */
311  	    f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
312  	    if (f == NULL) {
313  	        cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> "
314  	            "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
315  	        res = 0;
316  	        goto exit_res;
317  	    }
318  	    fclose(f);
319  	
320  	    if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) {
321  	        cl_log(LOG_DEBUG, "looks as if we have rt-budget in the slice we are "
322  	                          "-> skip moving to root-slice");
323  	        res = 0;
324  	        goto exit_res;
325  	    }
326  	
327  	    f = fopen("/sys/fs/cgroup/cpu/tasks", "w");
328  	    if (f == NULL) {
329  	        cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing");
330  	
331  	        goto exit_res;
332  	    }
333  	
334  	    if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) {
335  	        cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file");
336  	        goto close_and_exit_res;
337  	    }
338  	
339  	close_and_exit_res:
340  	    if (fclose(f) != 0) {
341  	        cl_log(LOG_WARNING, "Can't close cgroups tasks file");
342  	        goto exit_res;
343  	    }
344  	
345  	exit_res:
346  	    return (res);
347  	}
348  	
349  	void
350  	sbd_make_realtime(int priority, int stackgrowK, int heapgrowK)
351  	{
352  	    if(priority < 0) {
353  	        return;
354  	    }
355  	
356  	do {
357  	#ifdef SCHED_RR
358  	    if (move_to_root_cgroup) {
359  	        sbd_move_to_root_cgroup(enforce_moving_to_root_cgroup);
360  	    }
361  	
362  	    {
363  	        int pmin = sched_get_priority_min(SCHED_RR);
364  	        int pmax = sched_get_priority_max(SCHED_RR);
365  	        struct sched_param sp;
366  	        int pcurrent;
367  	
368  	        if (priority == 0) {
369  	            priority = pmax;
370  	        } else if (priority < pmin) {
371  	            priority = pmin;
372  	        } else if (priority > pmax) {
373  	            priority = pmax;
374  	        }
375  	
376  	        if (sched_getparam(0, &sp) < 0) {
377  	            cl_perror("Unable to get scheduler priority");
378  	
379  	        } else if ((pcurrent = sched_getscheduler(0)) < 0) {
380  	            cl_perror("Unable to get scheduler policy");
381  	
382  	        } else if ((pcurrent == SCHED_RR) &&
383  	                   (sp.sched_priority >= priority)) {
384  	                cl_log(LOG_INFO,
385  	                       "Stay with priority (%d) for policy SCHED_RR",
386  	                       sp.sched_priority);
387  	                break;
388  	        } else {
389  	            memset(&sp, 0, sizeof(sp));
390  	            sp.sched_priority = priority;
391  	
392  	            if (sched_setscheduler(0, SCHED_RR, &sp) < 0) {
393  	                cl_perror(
394  	                    "Unable to set scheduler policy to SCHED_RR priority %d",
395  	                    priority);
396  	            } else {
397  	                cl_log(LOG_INFO,
398  	                       "Scheduler policy is now SCHED_RR priority %d",
399  	                       priority);
400  	                break;
401  	            }
402  	        }
403  	    }
404  	#else
405  	    cl_log(LOG_ERR, "System does not support updating the scheduler policy");
406  	#endif
407  	#ifdef PRIO_PGRP
408  	    if (setpriority(PRIO_PGRP, 0, INT_MIN) < 0) {
409  	        cl_perror("Unable to raise the scheduler priority");
410  	    } else {
411  	        cl_log(LOG_INFO, "Scheduler priority raised to the maximum");
412  		}
413  	#else
414  	    cl_perror("System does not support setting the scheduler priority");
415  	#endif
416  	} while (0);
417  	
418  	    sbd_memlock(heapgrowK, stackgrowK);
419  	}
420  	
421  	void
422  	maximize_priority(void)
423  	{
424  		if (skip_rt) {
425  			cl_log(LOG_INFO, "Not elevating to realtime (-R specified).");
426  			return;
427  		}
428  	
429  		sbd_make_realtime(0, 256, 256);
430  	
431  		if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(),
432  				IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 1)) != 0) {
433  			cl_perror("ioprio_set() call failed.");
434  		}
435  	}
436  	
437  	void
438  	sysrq_init(void)
439  	{
440  		FILE* procf;
441  		int c;
442  		procf = fopen("/proc/sys/kernel/sysrq", "r");
443  		if (!procf) {
444  			cl_perror("cannot open /proc/sys/kernel/sysrq for read.");
445  			return;
446  		}
447  		if (fscanf(procf, "%d", &c) != 1) {
448  			cl_perror("Parsing sysrq failed");
449  			c = 0;
450  		}
451  		fclose(procf);
452  		if (c == 1)
453  			return;
454  		/* 8 for debugging dumps of processes, 
455  		   128 for reboot/poweroff */
456  		c |= 136; 
457  		procf = fopen("/proc/sys/kernel/sysrq", "w");
458  		if (!procf) {
459  			cl_perror("cannot open /proc/sys/kernel/sysrq for writing");
460  			return;
461  		}
462  		fprintf(procf, "%d", c);
463  		fclose(procf);
464  		return;
465  	}
466  	
467  	void
468  	sysrq_trigger(char t)
469  	{
470  		FILE *procf;
471  	
472  		procf = fopen("/proc/sysrq-trigger", "a");
473  		if (!procf) {
474  			cl_perror("Opening sysrq-trigger failed.");
475  			return;
476  		}
477  		cl_log(LOG_INFO, "sysrq-trigger: %c\n", t);
478  		fprintf(procf, "%c\n", t);
479  		fclose(procf);
480  		return;
481  	}
482  	
483  	
484  	static void
485  	do_exit(char kind, bool do_flush)
486  	{
487  	    /* TODO: Turn debug_mode into a bit field? Delay + kdump for example */
488  	    const char *reason = NULL;
489  	
490  	    if (kind == 'c') {
491  	        cl_log(LOG_NOTICE, "Initiating kdump");
492  	
493  	    } else if (debug_mode == 1) {
494  	        cl_log(LOG_WARNING, "Initiating kdump instead of panicking the node (debug mode)");
495  	        kind = 'c';
496  	    }
497  	
498  	    if (debug_mode == 2) {
499  	        cl_log(LOG_WARNING, "Shutting down SBD instead of panicking the node (debug mode)");
500  	        watchdog_close(true);
501  	        exit(0);
502  	    }
503  	
504  	    if (debug_mode == 3) {
505  	        /* Give the system some time to flush logs to disk before rebooting. */
506  	        cl_log(LOG_WARNING, "Delaying node panic by 10s (debug mode)");
507  	
508  	        watchdog_close(true);
509  	        sync();
510  	
511  	        sleep(10);
512  	    }
513  	
514  	    switch(kind) {
515  	        case 'b':
516  	            reason = "reboot";
517  	            break;
518  	        case 'c':
519  	            reason = "crashdump";
520  	            break;
521  	        case 'o':
522  	            reason = "off";
523  	            break;
524  	        default:
525  	            reason = "unknown";
526  	            break;
527  	    }
528  	
529  	    cl_log(LOG_EMERG, "Rebooting system: %s", reason);
530  	    if (do_flush) {
531  	        sync();
532  	    }
533  	
534  	    if (kind == 'c') {
535  	        if (timeout_watchdog_crashdump) {
536  	            if (timeout_watchdog != timeout_watchdog_crashdump) {
537  	                timeout_watchdog = timeout_watchdog_crashdump;
538  	                watchdog_init_interval();
539  	            }
540  	            watchdog_close(false);
541  	        } else {
542  	            watchdog_close(true);
543  	        }
544  	        sysrq_trigger(kind);
545  	    } else {
546  	        watchdog_close(false);
547  	        sysrq_trigger(kind);
548  	        if (reboot((kind == 'o')?RB_POWER_OFF:RB_AUTOBOOT) < 0) {
549  	            cl_perror("%s failed", (kind == 'o')?"Poweroff":"Reboot");
550  	        }
551  	    }
552  	
553  	    exit(1);
554  	}
555  	
556  	void
557  	do_crashdump(void)
558  	{
559  	    do_exit('c', true);
560  	}
561  	
562  	void
563  	do_reset(void)
564  	{
565  	    do_exit('b', true);
566  	}
567  	
568  	void
569  	do_off(void)
570  	{
571  	    do_exit('o', true);
572  	}
573  	
574  	void
575  	do_timeout_action(void)
576  	{
577  		do_exit(timeout_sysrq_char, do_flush);
578  	}
579  	
580  	/*
581  	 * Change directory to the directory our core file needs to go in
582  	 * Call after you establish the userid you're running under.
583  	 */
584  	int
585  	sbd_cdtocoredir(void)
586  	{
587  		int		rc;
588  		static const char *dir = NULL;
589  	
590  		if (dir == NULL) {
591  			dir = CRM_CORE_DIR;
592  		}
593  		if ((rc=chdir(dir)) < 0) {
594  			int errsave = errno;
595  			cl_perror("Cannot chdir to [%s]", dir);
596  			errno = errsave;
597  		}
598  		return rc;
599  	}
600  	
601  	pid_t
602  	make_daemon(void)
603  	{
604  		pid_t			pid;
605  		const char *		devnull = "/dev/null";
606  	
607  		pid = fork();
608  		if (pid < 0) {
609  			cl_log(LOG_ERR, "%s: could not start daemon\n",
610  					cmdname);
611  			cl_perror("fork");
612  			exit(1);
613  		}else if (pid > 0) {
614  			return pid;
615  		}
616  	
617  	        qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE);
618  	
619  		/* This is the child; ensure privileges have not been lost. */
620  		maximize_priority();
621  		sysrq_init();
622  	
623  		umask(022);
624  		close(0);
625  		(void)open(devnull, O_RDONLY);
626  		close(1);
627  		(void)open(devnull, O_WRONLY);
628  		close(2);
629  		(void)open(devnull, O_WRONLY);
630  		sbd_cdtocoredir();
631  		return 0;
632  	}
633  	
634  	void
635  	sbd_get_uname(void)
636  	{
637  		struct utsname		uname_buf;
638  		int i;
639  	
640  		if (uname(&uname_buf) < 0) {
641  			cl_perror("uname() failed?");
642  			exit(1);
643  		}
644  	
645  		local_uname = strdup(uname_buf.nodename);
646  	
647  		for (i = 0; i < strlen(local_uname); i++)
648  			local_uname[i] = tolower(local_uname[i]);
649  	}
650  	
651  	
652  	#define FMT_MAX 256
653  	void
654  	sbd_set_format_string(int method, const char *daemon)
655  	{
656  	    int offset = 0;
657  	    char fmt[FMT_MAX];
658  	    struct utsname res;
659  	
660  	    switch(method) {
661  	        case QB_LOG_STDERR:
662  	            break;
663  	
664  	        case QB_LOG_SYSLOG:
665  	            if(daemon && strcmp(daemon, "sbd") != 0) {
666  	                offset += snprintf(fmt + offset, FMT_MAX - offset, "%10s: ", daemon);
667  	            }
668  	            break;
669  	
670  	        default:
671  	            /* When logging to a file */
672  	            if (uname(&res) == 0) {
673  	                offset +=
674  	                    snprintf(fmt + offset, FMT_MAX - offset, "%%t [%d] %s %10s: ", getpid(),
675  	                             res.nodename, daemon);
676  	            } else {
677  	                offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%d] %10s: ", getpid(), daemon);
678  	            }
679  	    }
680  	
681  	    if (debug && method >= QB_LOG_STDERR) {
682  	        offset += snprintf(fmt + offset, FMT_MAX - offset, "(%%-12f:%%5l %%g) %%-7p: %%n: ");
683  	    } else {
684  	        offset += snprintf(fmt + offset, FMT_MAX - offset, "%%g %%-7p: %%n: ");
685  	    }
686  	
687  	    if (method == QB_LOG_SYSLOG) {
688  	        offset += snprintf(fmt + offset, FMT_MAX - offset, "%%b");
689  	    } else {
690  	        offset += snprintf(fmt + offset, FMT_MAX - offset, "\t%%b");
691  	    }
692  	
693  	    if(offset > 0) {
694  	        qb_log_format_set(method, fmt);
695  	    }
696  	}
697  	
698  	int sigqueue_zero(pid_t pid, int sig)
699  	{
700  	union sigval signal_value;
701  	
702  	    memset(&signal_value, 0, sizeof(signal_value));
703  	
704  	    return sigqueue(pid, sig, signal_value);
705  	}
706  	
707  	void
708  	notify_parent(void)
709  	{
710  	    pid_t		ppid;
711  	
712  	    ppid = getppid();
713  	
714  	    if (ppid == 1) {
715  	        /* Our parent died unexpectedly. Triggering
716  	         * self-fence. */
717  	        cl_log(LOG_WARNING, "Our parent is dead.");
718  	        do_timeout_action();
719  	    }
720  	
721  	    switch (servant_health) {
722  	        case pcmk_health_pending:
723  	        case pcmk_health_shutdown:
724  	        case pcmk_health_transient:
725  	            DBGLOG(LOG_DEBUG, "Not notifying parent: state transient (%d)", servant_health);
726  	            break;
727  	
728  	        case pcmk_health_unknown:
729  	        case pcmk_health_unclean:
730  	        case pcmk_health_noquorum:
731  	            DBGLOG(LOG_WARNING, "Notifying parent: UNHEALTHY (%d)", servant_health);
732  	            sigqueue_zero(ppid, SIG_PCMK_UNHEALTHY);
733  	            break;
734  	
735  	        case pcmk_health_online:
736  	            DBGLOG(LOG_DEBUG, "Notifying parent: healthy");
737  	            sigqueue_zero(ppid, SIG_LIVENESS);
738  	            break;
739  	
740  	        default:
741  	            DBGLOG(LOG_WARNING, "Notifying parent: UNHEALTHY %d", servant_health);
742  	            sigqueue_zero(ppid, SIG_PCMK_UNHEALTHY);
743  	            break;
744  	    }
745  	}
746  	
747  	void
748  	set_servant_health(enum pcmk_health state, int level, char const *format, ...)
749  	{
750  	    if (servant_health != state) {
751  	        va_list ap;
752  	        int len = 0;
753  	        char *string = NULL;
754  	
755  	        servant_health = state;
756  	
757  	        va_start(ap, format);
758  	        len = vasprintf (&string, format, ap);
759  	
760  	        if(len > 0) {
761  	            cl_log(level, "%s", string);
762  	        }
763  	        
764  	        va_end(ap);
765  	        free(string);
766  	    }
767  	}
768  	
769  	bool
770  	sbd_is_disk(struct servants_list_item *servant)
771  	{
772  	    if ((servant != NULL) &&
773  	        (servant->devname != NULL) &&
774  	        (servant->devname[0] == '/')) {
775  	        return true;
776  	    }
777  	    return false;
778  	}
779  	
780  	bool
781  	sbd_is_cluster(struct servants_list_item *servant)
782  	{
783  	    if ((servant != NULL) &&
784  	        (servant->devname != NULL) &&
785  	        (strcmp("cluster", servant->devname) == 0)) {
786  	        return true;
787  	    }
788  	    return false;
789  	}
790  	
791  	bool
792  	sbd_is_pcmk(struct servants_list_item *servant)
793  	{
794  	    if ((servant != NULL) &&
795  	        (servant->devname != NULL) &&
796  	        (strcmp("pcmk", servant->devname) == 0)) {
797  	        return true;
798  	    }
799  	    return false;
800  	}
801  	
802  	#define MAX_LEGITIMATE_AGE 3600 /* 1h should be plenty */
803  	
804  	int
805  	seconds_diff_time_t(time_t a, time_t b)
806  	{
807  	    long long diff;
808  	
809  	    diff = a - b;
810  	
811  	    if ((diff > -MAX_LEGITIMATE_AGE) && (diff < MAX_LEGITIMATE_AGE)) {
812  	        return (int) diff;
813  	    }
814  	
815  	    DBGLOG(LOG_WARNING, "Detected unreasonable age (%lld)", diff);
816  	    return MAX_LEGITIMATE_AGE; /* something is fishy - provoke timeout */
817  	}
818  	
819  	int
820  	seconds_diff_timespec(struct timespec *a, struct timespec *b)
821  	{
822  	    struct timeval diff;
823  	    struct timeval a_tv;
824  	    struct timeval b_tv;
825  	
826  	    TIMESPEC_TO_TIMEVAL(&a_tv, a);
827  	    TIMESPEC_TO_TIMEVAL(&b_tv, b);
828  	
829  	    timersub(&a_tv, &b_tv, &diff);
830  	
831  	    return seconds_diff_time_t(diff.tv_sec, 0);
832  	}
833