1    	/*
2    	 * Copyright 2004-2012 Red Hat, Inc.
3    	 *
4    	 * This copyrighted material is made available to anyone wishing to use,
5    	 * modify, copy, or redistribute it subject to the terms and conditions
6    	 * of the GNU General Public License v2 or (at your option) any later version.
7    	 */
8    	
9    	#include "dlm_daemon.h"
10   	
11   	/* protocol_version flags */
12   	#define PV_STATEFUL 0x0001
13   	
14   	/* retries are once a second */
15   	#define log_retry(cur_count, fmt, args...) ({ \
16   		if (cur_count < 60) \
17   			log_debug(fmt, ##args); \
18   		else if (cur_count == 60) \
19   			log_error(fmt, ##args); \
20   		else if (!(cur_count % 3600)) \
21   			log_error(fmt, ##args); \
22   	})
23   	
24   	struct protocol_version {
25   		uint16_t major;
26   		uint16_t minor;
27   		uint16_t patch;
28   		uint16_t flags;
29   	};
30   	
31   	struct protocol {
32   		union {
33   			struct protocol_version dm_ver;
34   			uint16_t                daemon_max[4];
35   		};
36   		union {
37   			struct protocol_version km_ver;
38   			uint16_t                kernel_max[4];
39   		};
40   		union {
41   			struct protocol_version dr_ver;
42   			uint16_t                daemon_run[4];
43   		};
44   		union {
45   			struct protocol_version kr_ver;
46   			uint16_t                kernel_run[4];
47   		};
48   	};
49   	
50   	/* fence_result flags */
51   	#define FR_FIPU			0x00000001
52   	#define FR_CLEAR_STARTUP	0x00000002
53   	#define FR_CLEAR_FIPU		0x00000004
54   	
55   	struct fence_result {
56   		uint32_t version;
57   		uint32_t flags;
58   		uint32_t nodeid;
59   		uint32_t result;
60   		uint64_t fence_walltime;
61   		char unused[1000];
62   	};
63   	
64   	struct node_daemon {
65   		struct list_head list;
66   		int nodeid;
67   		int killed;
68   		int daemon_member;
69   		int left_reason;
70   		int recover_setup;
71   		int fence_in_progress_unknown;
72   		int need_fence_clear;
73   		int need_fencing;
74   		int delay_fencing;
75   		int stateful_merge;
76   		int fence_pid;
77   		int fence_pid_wait;
78   		int fence_result_wait;
79   		int fence_actor_done; /* for status/debug */
80   		int fence_actor_last; /* for status/debug */
81   		int fence_actors[MAX_NODES];
82   		int fence_actors_orig[MAX_NODES];
83   	
84   		struct protocol proto;
85   		struct fence_config fence_config;
86   	
87   		uint64_t daemon_add_time;
88   		uint64_t daemon_rem_time;
89   		uint64_t fail_walltime;
90   		uint64_t fail_monotime;
91   		uint64_t fence_walltime;
92   		uint64_t fence_monotime;
93   	};
94   	
95   	#define REASON_STARTUP_FENCING -1
96   	
97   	static cpg_handle_t cpg_handle_daemon;
98   	static int cpg_fd_daemon;
99   	static struct protocol our_protocol;
100  	static struct list_head daemon_nodes;
101  	static struct list_head startup_nodes;
102  	static struct cpg_address daemon_member[MAX_NODES];
103  	static struct cpg_address daemon_joined[MAX_NODES];
104  	static struct cpg_address daemon_remove[MAX_NODES];
105  	static int daemon_member_count;
106  	static int daemon_joined_count;
107  	static int daemon_remove_count;
108  	static int daemon_ringid_wait;
109  	static struct cpg_ring_id daemon_ringid;
110  	static int daemon_fence_pid;
111  	static uint32_t last_join_seq;
112  	static uint32_t send_fipu_seq;
113  	static int wait_clear_fipu;
114  	static int fence_in_progress_unknown = 1;
115  	
116  	#define MAX_ZOMBIES 16
117  	static int zombie_pids[MAX_ZOMBIES];
118  	static int zombie_count;
119  	
120  	static int fence_result_pid;
121  	static unsigned int fence_result_try;
122  	static int stateful_merge_wait; /* cluster is stuck in waiting for manual intervention */
123  	
124  	static void send_fence_result(int nodeid, int result, uint32_t flags, uint64_t walltime);
125  	static void send_fence_clear(int nodeid, int result, uint32_t flags, uint64_t walltime);
126  	
127  	void log_config(const struct cpg_name *group_name,
128  			const struct cpg_address *member_list,
129  			size_t member_list_entries,
130  			const struct cpg_address *left_list,
131  			size_t left_list_entries,
132  			const struct cpg_address *joined_list,
133  			size_t joined_list_entries)
134  	{
135  		char m_buf[128];
136  		char j_buf[32];
137  		char l_buf[32];
138  		size_t i, len, pos;
139  		int ret;
140  	
141  		memset(m_buf, 0, sizeof(m_buf));
142  		memset(j_buf, 0, sizeof(j_buf));
143  		memset(l_buf, 0, sizeof(l_buf));
144  	
145  		len = sizeof(m_buf);
146  		pos = 0;
147  		for (i = 0; i < member_list_entries; i++) {
148  			ret = snprintf(m_buf + pos, len - pos, " %d",
149  				       member_list[i].nodeid);
150  			if (ret >= len - pos)
151  				break;
152  			pos += ret;
153  		}
154  	
155  		len = sizeof(j_buf);
156  		pos = 0;
157  		for (i = 0; i < joined_list_entries; i++) {
158  			ret = snprintf(j_buf + pos, len - pos, " %d",
159  				       joined_list[i].nodeid);
160  			if (ret >= len - pos)
161  				break;
162  			pos += ret;
163  		}
164  	
165  		len = sizeof(l_buf);
166  		pos = 0;
167  		for (i = 0; i < left_list_entries; i++) {
168  			ret = snprintf(l_buf + pos, len - pos, " %d",
169  				       left_list[i].nodeid);
170  			if (ret >= len - pos)
171  				break;
172  			pos += ret;
173  		}
174  	
175  		log_debug("%s conf %zu %zu %zu memb%s join%s left%s", group_name->value,
176  			  member_list_entries, joined_list_entries, left_list_entries,
177  			  strlen(m_buf) ? m_buf : " 0", strlen(j_buf) ? j_buf : " 0",
178  			  strlen(l_buf) ? l_buf : " 0");
179  	}
180  	
181  	void log_ringid(const char *name,
182  	                struct cpg_ring_id *ringid,
183  	                const uint32_t *member_list,
184  	                size_t member_list_entries)
185  	{
186  		char m_buf[128];
187  		size_t i, len, pos;
188  		int ret;
189  	
190  		memset(m_buf, 0, sizeof(m_buf));
191  	
192  		len = sizeof(m_buf);
193  		pos = 0;
194  		for (i = 0; i < member_list_entries; i++) {
195  			ret = snprintf(m_buf + pos, len - pos, " %u",
196  				       member_list[i]);
197  			if (ret >= len - pos)
198  				break;
199  			pos += ret;
200  		}
201  	
202  		log_debug("%s ring %u:%llu %zu memb%s",
203  			  name, ringid->nodeid, (unsigned long long)ringid->seq,
204  			  member_list_entries, m_buf);
205  	}
206  	
207  	const char *reason_str(int reason)
208  	{
209  		switch (reason) {
210  		case REASON_STARTUP_FENCING:
211  			return "startup";
212  		case CPG_REASON_JOIN:
213  			return "join";
214  		case CPG_REASON_LEAVE:
215  			return "leave";
216  		case CPG_REASON_NODEDOWN:
217  			return "nodedown";
218  		case CPG_REASON_NODEUP:
219  			return "nodeup";
220  		case CPG_REASON_PROCDOWN:
221  			return "procdown";
222  		default:
223  			return "unknown";
224  		};
225  	}
226  	
227  	const char *msg_name(int type)
228  	{
229  		switch (type) {
230  		case DLM_MSG_PROTOCOL:
231  			return "protocol";
232  		case DLM_MSG_FENCE_RESULT:
233  			return "fence_result";
234  		case DLM_MSG_FENCE_CLEAR:
235  			return "fence_clear";
236  	
237  		case DLM_MSG_START:
238  			return "start";
239  		case DLM_MSG_PLOCK:
240  			return "plock";
241  		case DLM_MSG_PLOCK_OWN:
242  			return "plock_own";
243  		case DLM_MSG_PLOCK_DROP:
244  			return "plock_drop";
245  		case DLM_MSG_PLOCK_SYNC_LOCK:
246  			return "plock_sync_lock";
247  		case DLM_MSG_PLOCK_SYNC_WAITER:
248  			return "plock_sync_waiter";
249  		case DLM_MSG_PLOCKS_DATA:
250  			return "plocks_data";
251  		case DLM_MSG_PLOCKS_DONE:
252  			return "plocks_done";
253  		case DLM_MSG_DEADLK_CYCLE_START:
254  			return "deadlk_cycle_start";
255  		case DLM_MSG_DEADLK_CYCLE_END:
256  			return "deadlk_cycle_end";
257  		case DLM_MSG_DEADLK_CHECKPOINT_READY:
258  			return "deadlk_checkpoint_ready";
259  		case DLM_MSG_DEADLK_CANCEL_LOCK:
260  			return "deadlk_cancel_lock";
261  		default:
262  			return "unknown";
263  		}
264  	}
265  	
266  	static int _send_message(cpg_handle_t h, void *buf, int len, int type)
267  	{
268  		struct iovec iov;
269  		cs_error_t error;
270  		int retries = 0;
271  	
272  		iov.iov_base = buf;
273  		iov.iov_len = len;
274  	
275  	 retry:
276  		error = cpg_mcast_joined(h, CPG_TYPE_AGREED, &iov, 1);
277  		if (error == CS_ERR_TRY_AGAIN) {
278  			retries++;
279  			usleep(1000);
280  			if (!(retries % 100))
281  				log_error("cpg_mcast_joined retry %d %s",
282  					   retries, msg_name(type));
283  			goto retry;
284  		}
285  		if (error != CS_OK) {
286  			log_error("cpg_mcast_joined error %d handle %llx %s",
287  				  error, (unsigned long long)h, msg_name(type));
288  			return -1;
289  		}
290  	
291  		if (retries)
292  			log_debug("cpg_mcast_joined retried %d %s",
293  				  retries, msg_name(type));
294  	
295  		return 0;
296  	}
297  	
298  	/* header fields caller needs to set: type, to_nodeid, flags, msgdata */
299  	
300  	void dlm_send_message(struct lockspace *ls, char *buf, int len)
301  	{
302  		struct dlm_header *hd = (struct dlm_header *) buf;
303  		int type = hd->type;
304  	
305  		hd->version[0]  = cpu_to_le16(our_protocol.daemon_run[0]);
306  		hd->version[1]  = cpu_to_le16(our_protocol.daemon_run[1]);
307  		hd->version[2]  = cpu_to_le16(our_protocol.daemon_run[2]);
308  		hd->type	= cpu_to_le16(hd->type);
309  		hd->nodeid      = cpu_to_le32(our_nodeid);
310  		hd->to_nodeid   = cpu_to_le32(hd->to_nodeid);
311  		hd->global_id   = cpu_to_le32(ls->global_id);
312  		hd->flags       = cpu_to_le32(hd->flags);
313  		hd->msgdata     = cpu_to_le32(hd->msgdata);
314  		hd->msgdata2    = cpu_to_le32(hd->msgdata2);
315  	
316  		_send_message(ls->cpg_handle, buf, len, type);
317  	}
318  	
319  	int dlm_send_message_daemon(char *buf, int len)
320  	{
321  		struct dlm_header *hd = (struct dlm_header *) buf;
322  		int type = hd->type;
323  	
324  		hd->version[0]  = cpu_to_le16(our_protocol.daemon_run[0]);
325  		hd->version[1]  = cpu_to_le16(our_protocol.daemon_run[1]);
326  		hd->version[2]  = cpu_to_le16(our_protocol.daemon_run[2]);
327  		hd->type	= cpu_to_le16(hd->type);
328  		hd->nodeid      = cpu_to_le32(our_nodeid);
329  		hd->to_nodeid   = cpu_to_le32(hd->to_nodeid);
330  		hd->flags       = cpu_to_le32(hd->flags);
331  		hd->msgdata     = cpu_to_le32(hd->msgdata);
332  		hd->msgdata2    = cpu_to_le32(hd->msgdata2);
333  	
334  		return _send_message(cpg_handle_daemon, buf, len, type);
335  	}
336  	
337  	void dlm_header_in(struct dlm_header *hd)
338  	{
339  		hd->version[0]  = le16_to_cpu(hd->version[0]);
340  		hd->version[1]  = le16_to_cpu(hd->version[1]);
341  		hd->version[2]  = le16_to_cpu(hd->version[2]);
342  		hd->type        = le16_to_cpu(hd->type);
343  		hd->nodeid      = le32_to_cpu(hd->nodeid);
344  		hd->to_nodeid   = le32_to_cpu(hd->to_nodeid);
345  		hd->global_id   = le32_to_cpu(hd->global_id);
346  		hd->flags       = le32_to_cpu(hd->flags);
347  		hd->msgdata     = le32_to_cpu(hd->msgdata);
348  		hd->msgdata2    = le32_to_cpu(hd->msgdata2);
349  	}
350  	
351  	static void run_info_out(struct run_info *info)
352  	{
353  		info->dest_nodeid  = cpu_to_le32(info->dest_nodeid);
354  		info->start_nodeid = cpu_to_le32(info->start_nodeid);
355  		info->local_pid    = cpu_to_le32(info->local_pid);
356  		info->local_result = cpu_to_le32(info->local_result);
357  		info->need_replies = cpu_to_le32(info->need_replies);
358  		info->reply_count  = cpu_to_le32(info->reply_count);
359  		info->fail_count   = cpu_to_le32(info->fail_count);
360  		info->flags        = cpu_to_le32(info->flags);
361  	}
362  	
363  	static void run_info_in(struct run_info *info)
364  	{
365  		info->dest_nodeid  = le32_to_cpu(info->dest_nodeid);
366  		info->start_nodeid = le32_to_cpu(info->start_nodeid);
367  		info->local_pid    = le32_to_cpu(info->local_pid);
368  		info->local_result = le32_to_cpu(info->local_result);
369  		info->need_replies = le32_to_cpu(info->need_replies);
370  		info->reply_count  = le32_to_cpu(info->reply_count);
371  		info->fail_count   = le32_to_cpu(info->fail_count);
372  		info->flags        = le32_to_cpu(info->flags);
373  	}
374  	
375  	static void run_request_out(struct run_request *req)
376  	{
377  		run_info_out(&req->info);
378  	}
379  	
380  	static void run_request_in(struct run_request *req)
381  	{
382  		run_info_in(&req->info);
383  	}
384  	
385  	static void run_reply_out(struct run_reply *rep)
386  	{
387  		run_info_out(&rep->info);
388  	}
389  	
390  	static void run_reply_in(struct run_reply *rep)
391  	{
392  		run_info_in(&rep->info);
393  	}
394  	
395  	int dlm_header_validate(struct dlm_header *hd, int nodeid)
396  	{
397  		if (hd->version[0] != our_protocol.daemon_run[0] ||
398  		    hd->version[1] != our_protocol.daemon_run[1]) {
399  			log_error("reject message from %d version %u.%u.%u vs %u.%u.%u",
400  				  nodeid, hd->version[0], hd->version[1],
401  				  hd->version[2], our_protocol.daemon_run[0],
402  				  our_protocol.daemon_run[1],
403  				  our_protocol.daemon_run[2]);
404  			return -1;
405  		}
406  	
407  		if (hd->nodeid != nodeid) {
408  			log_error("bad msg nodeid %d %d", hd->nodeid, nodeid);
409  			return -1;
410  		}
411  	
412  		return 0;
413  	}
414  	
415  	static struct node_daemon *get_node_daemon(int nodeid)
416  	{
417  		struct node_daemon *node;
418  	
419  		list_for_each_entry(node, &daemon_nodes, list) {
420  			if (node->nodeid == nodeid)
421  				return node;
422  		}
423  		return NULL;
424  	}
425  	
426  	static int nodes_need_fencing(void)
427  	{
428  		struct node_daemon *node;
429  	
430  		list_for_each_entry(node, &daemon_nodes, list) {
431  			if (node->need_fencing)
432  				return 1;
433  		}
434  		return 0;
435  	}
436  	
437  	static int nodeid_needs_fencing(int nodeid)
438  	{
439  		struct node_daemon *node;
440  	
441  		node = get_node_daemon(nodeid);
442  		if (!node) {
443  			log_error("nodeid_needs_fencing %d not found", nodeid);
444  			return 0;
445  		}
446  		return node->need_fencing;
447  	}
448  	
449  	static int all_daemon_members_fipu(void)
450  	{
451  		struct node_daemon *node;
452  	
453  		list_for_each_entry(node, &daemon_nodes, list) {
454  			if (!node->daemon_member)
455  				continue;
456  			if (!node->fence_in_progress_unknown)
457  				return 0;
458  		}
459  	
460  		list_for_each_entry(node, &daemon_nodes, list) {
461  			if (!node->daemon_member)
462  				continue;
463  			node->fence_in_progress_unknown = 0;
464  		}
465  	
466  		return 1;
467  	}
468  	
469  	int fence_node_time(int nodeid, uint64_t *last_fenced)
470  	{
471  		struct node_daemon *node;
472  	
473  		node = get_node_daemon(nodeid);
474  		if (!node)
475  			return -1;
476  	
477  		*last_fenced = node->fence_monotime;
478  		return 0;
479  	}
480  	
481  	int fence_in_progress(int *in_progress)
482  	{
483  		if (fence_in_progress_unknown) {
484  			*in_progress = 1;
485  		} else if (!list_empty(&startup_nodes)) {
486  			*in_progress = 2;
487  		} else if (nodes_need_fencing()) {
488  			*in_progress = 3;
489  		} else {
490  			*in_progress = 0;
491  		}
492  		return 0;
493  	}
494  	
495  	void add_startup_node(int nodeid)
496  	{
497  		struct node_daemon *node;
498  	
499  		node = malloc(sizeof(struct node_daemon));
500  		if (!node) {
501  			log_error("add_startup_node no mem");
502  			return;
503  		}
504  		memset(node, 0, sizeof(struct node_daemon));
505  		node->nodeid = nodeid;
506  		list_add_tail(&node->list, &startup_nodes);
507  	}
508  	
509  	static int clear_startup_node(int nodeid, int all)
510  	{
511  		struct node_daemon *node, *safe;
512  		int count = 0;
513  	
514  		list_for_each_entry_safe(node, safe, &startup_nodes, list) {
515  			if (all || node->nodeid == nodeid) {
516  				list_del(&node->list);
517  				free(node);
518  				count++;
519  			}
520  		}
521  		return count;
522  	}
523  	
524  	static struct node_daemon *add_node_daemon(int nodeid)
525  	{
526  		struct node_daemon *node;
527  		struct fence_config *fc;
528  		int rv;
529  	
530  		node = get_node_daemon(nodeid);
531  		if (node)
532  			return node;
533  	
534  		node = malloc(sizeof(struct node_daemon));
535  		if (!node) {
536  			log_error("add_node_daemon no mem");
537  			return NULL;
538  		}
539  		memset(node, 0, sizeof(struct node_daemon));
540  		node->nodeid = nodeid;
541  		list_add_tail(&node->list, &daemon_nodes);
542  	
543  		/* TODO: allow the config to be reread */
544  	
545  		fc = &node->fence_config;
546  		fc->nodeid = nodeid;
547  	
548  		/* explicit config file setting */
549  	
550  		rv = fence_config_init(fc, (unsigned int)nodeid, (char *)CONF_FILE_PATH);
551  		if (!rv)
552  			goto out;
553  	
554  		/* no config file setting, so use default */
555  	
556  		if (rv == -ENOENT) {
557  			fc->dev[0] = &fence_all_device;
558  			goto out;
559  		}
560  	
561  		log_error("fence config %d error %d", nodeid, rv);
562  	 out:
563  		return node;
564  	}
565  	
566  	/* A clean daemon member is a node that has joined the daemon cpg
567  	   from a "clean state", i.e. not a stateful merge.  If would not
568  	   have joined the daemon cpg if it found uncontrolled dlm kernel
569  	   state (check_uncontrolled_lockspaces).  We would not have
570  	   accepted and saved its protocol in node->proto.daemon if it
571  	   was a stateful merge. */
572  	
573  	static int is_clean_daemon_member(int nodeid)
574  	{
575  		struct node_daemon *node;
576  	
577  		node = get_node_daemon(nodeid);
578  		if (node && node->daemon_member && node->proto.daemon_max[0])
579  			return 1;
580  		return 0;
581  	}
582  	
583  	static int in_daemon_list(int nodeid, struct cpg_address *daemon_list, int count)
584  	{
585  		int i;
586  	
587  		for (i = 0; i < count; i++) {
588  			if (daemon_list[i].nodeid == nodeid)
589  				return 1;
590  		}
591  		return 0;
592  	}
593  	
594  	/* save in node->fence_actors[] any nodeid present when the node
595  	   failed which therefore saw it fail, knows it needs fencing, and
596  	   can request fencing for it if it becomes the low actor.  A node
597  	   added in the same change with the removed node does not qualify. */
598  	
599  	static int set_fence_actors(struct node_daemon *node, int all_memb)
600  	{
601  		int i, nodeid, count = 0, low = 0;
602  	
603  		memset(node->fence_actors, 0, sizeof(node->fence_actors));
604  		memset(node->fence_actors_orig, 0, sizeof(node->fence_actors_orig));
605  	
606  		for (i = 0; i < daemon_member_count; i++) {
607  			nodeid = daemon_member[i].nodeid;
608  	
609  			if (!all_memb && in_daemon_list(nodeid, daemon_joined, daemon_joined_count))
610  				continue;
611  	
612  			node->fence_actors[count++] = nodeid;
613  	
614  			if (!low || nodeid < low)
615  				low = nodeid;
616  		}
617  	
618  		/* keep a copy of the original set so they can be retried if all fail */
619  		memcpy(node->fence_actors_orig, node->fence_actors, sizeof(node->fence_actors));
620  	
621  		log_debug("set_fence_actors for %d low %d count %d",
622  			  node->nodeid, low, count);
623  		return low;
624  	}
625  	
626  	static int get_fence_actor(struct node_daemon *node)
627  	{
628  		int i, low, low_i;
629  	
630  	 retry:
631  		low = 0;
632  	
633  		for (i = 0; i < MAX_NODES; i++) {
634  			if (!node->fence_actors[i])
635  				continue;
636  	
637  			if (!low || node->fence_actors[i] < low) {
638  				low = node->fence_actors[i];
639  				low_i = i;
640  			}
641  		}
642  	
643  		if (low && !in_daemon_list(low, daemon_member, daemon_member_count)) {
644  			log_debug("get_fence_actor for %d low actor %d is gone",
645  				  node->nodeid, low);
646  	
647  			node->fence_actors[low_i] = 0;
648  			goto retry;
649  		}
650  	
651  		node->fence_actor_last = low;
652  	
653  		return low;
654  	}
655  	
656  	/* if an actor fails to fence, it will send that result, and others
657  	   will clear it from the actors, which will cause the next lowest
658  	   actor to try */
659  	
660  	static void clear_fence_actor(int nodeid, int actor)
661  	{
662  		struct node_daemon *node;
663  		int remaining = 0;
664  		int i;
665  	
666  		node = get_node_daemon(nodeid);
667  		if (!node)
668  			return;
669  	
670  		for (i = 0; i < MAX_NODES; i++) {
671  			if (node->fence_actors[i] == actor)
672  				node->fence_actors[i] = 0;
673  			else if (node->fence_actors[i])
674  				remaining++;
675  		}
676  	
677  		if (!remaining && opt(repeat_failed_fencing_ind)) {
678  			log_debug("clear_fence_actor %d restoring original actors to retry", actor);
679  			memcpy(node->fence_actors, node->fence_actors_orig, sizeof(node->fence_actors));
680  		}
681  	}
682  	
683  	static void clear_zombies(void)
684  	{
685  		int i, rv, result = 0;
686  	
687  		for (i = 0; i < MAX_ZOMBIES; i++) {
688  			if (!zombie_count)
689  				break;
690  			if (!zombie_pids[i])
691  				continue;
692  	
693  			rv = fence_result(-1, zombie_pids[i], &result);
694  			if (rv == -EAGAIN)
695  				continue;
696  	
697  			log_debug("cleared zombie %d rv %d result %d",
698  				  zombie_pids[i], rv, result);
699  	
700  			zombie_pids[i] = 0;
701  			zombie_count--;
702  		}
703  	}
704  	
705  	static void add_zombie(int pid)
706  	{
707  		int i;
708  	
709  		for (i = 0; i < MAX_ZOMBIES; i++) {
710  			if (!zombie_pids[i]) {
711  				zombie_pids[i] = pid;
712  				zombie_count++;
713  				return;
714  			}
715  		}
716  	}
717  	
718  	static void fence_pid_cancel(int nodeid, int pid)
719  	{
720  		int rv, result = 0;
721  	
722  		log_debug("fence_pid_cancel nodeid %d pid %d sigkill", nodeid, pid);
723  	
724  		kill(pid, SIGKILL);
725  		usleep(500000);
726  	
727  		rv = fence_result(nodeid, pid, &result);
728  		if (rv == -EAGAIN)
729  			add_zombie(pid);
730  	
731  		log_debug("fence_pid_cancel nodeid %d pid %d rv %d result %d",
732  			  nodeid, pid, rv, result);
733  	}
734  	
735  	static void kick_stateful_merge_members(void)
736  	{
737  		struct node_daemon *node;
738  	
739  		list_for_each_entry(node, &daemon_nodes, list) {
740  			if (!node->killed && node->stateful_merge) {
741  				log_error("daemon node %d kill stateful merge member",
742  					  node->nodeid);
743  				kick_node_from_cluster(node->nodeid);
744  				node->killed = 1;
745  			}
746  		}
747  	}
748  	
749  	/*
750  	 * fence_in_progress_unknown (fipu)
751  	 *
752  	 * If current daemon members are fencing someone, and a new node
753  	 * joins, that new node needs to wait for the previous members to
754  	 * finish any fencing they're doing before it can start a lockspace.
755  	 *
756  	 * The previous members may be fencing the last node that was using
757  	 * the lockspace the new node is going to use, so if it doesn't wait,
758  	 * it could start using a lockspace with an unfenced user.
759  	 *
760  	 * So, the daemon starts with fence_in_progress_unknown set to
761  	 * indicate that other nodes may be fencing someone, and it won't
762  	 * start any lockspaces until it is clear.
763  	 *
764  	 * A node starts with fence_in_progress_unknown set and won't
765  	 * start any lockspaces until it's clear.
766  	 *
767  	 * When using startup_fencing:
768  	 *
769  	 * . When all nodes start up together, all have fipu set,
770  	 * and will go through startup fencing, which will eventually
771  	 * result in all nodes either being clean daemon members or fenced,
772  	 * so everyone will clear fipu by seeing that.
773  	 *
774  	 * . The more common case is when a new node joins other previously
775  	 * running nodes.  The new node needs to be told that the others
776  	 * have no outstanding fencing ops before it can clear fipu.
777  	 * A previous member does send_fence_clear(0) to a new node once
778  	 * all fencing is complete.  The two flags in send_fence_clear are
779  	 * usually sent together but may sometimes may be in separate messages:
780  	 * send_fence_clear(0, CLEAR_STARTUP) to clear startup_nodes right away
781  	 * send_fence_clear(0, CLEAR_FIPU) to clear fipu once all fencing is done
782  	 *
783  	 * When not using startup_fencing:
784  	 *
785  	 * . When all nodes start up together, all have fipu set, and all
786  	 * will be waiting to receive_fence_clear from a previous node
787  	 * in order to clear it.  The nodes need to detect this situation,
788  	 * and when they do, they will know that everyone is in startup,
789  	 * so there can be no pending fencing on a previous node, so all
790  	 * can clear fipu.  To detect this case, when a node starts up
791  	 * with !startup_fence, it sends a special send_fence_clear(-ENODATA, FIPU)
792  	 * message about itself to indicate it has fipu set and needs it cleared.
793  	 * After sending this, it checks to see if all present nodes have sent
794  	 * this same message about themselves.  If so, then this startup
795  	 * case has been detected, an all will clear fipu.
796  	 *
797  	 * . New nodes that join after this startup initialization will be
798  	 * handled the same as when startup_fencing is set (above).
799  	 *
800  	 *
801  	 * startup_fencing
802  	 * ---------------
803  	 *
804  	 * case A
805  	 * all nodes start up,
806  	 * all have fipu set,
807  	 * all wait for startup_nodes to be empty, (joined or moved to need_fencing)
808  	 * all wait for no daemon_nodes to need_fencing, (joined or were fenced)
809  	 * all clear fipu
810  	 *
811  	 * later,
812  	 *
813  	 * case B
814  	 * new node starts,
815  	 * new node has fipu set,
816  	 * cur node sees need_fence_clear on new node
817  	 * cur node sees no pending fencing ops,
818  	 * cur node send_fence_clear(0) to new node,
819  	 * new node clears startup_nodes and fipu
820  	 *
821  	 * !startup_fencing
822  	 * ----------------
823  	 *
824  	 * case C
825  	 * all nodes start up,
826  	 * all have fipu set,
827  	 * all send_fence_clear(-ENODATA,FIPU),
828  	 * all receive_fence_clear(-ENODATA,FIPU) from everyone,
829  	 * all_daemon_members_fipu() is 1,
830  	 * all clear fipu
831  	 *
832  	 * later same as case B above
833  	 */
834  	
835  	static void daemon_fence_work(void)
836  	{
837  		struct node_daemon *node, *safe;
838  		int gone_count = 0, part_count = 0, merge_count = 0, clean_count = 0;
839  		int rv, nodeid, pid, need, low = 0, actor, result;
840  		int retry = 0;
841  		uint32_t flags;
842  	
843  		if (!daemon_fence_allow)
844  			return;
845  	
846  		if (daemon_ringid_wait) {
847  			/* We've seen a nodedown confchg callback, but not the
848  			   corresponding ringid callback. */
849  			log_retry(retry_fencing, "fence work wait for cpg ringid");
850  			retry = 1;
851  			goto out;
852  		}
853  	
854  		if (cluster_ringid_seq != daemon_ringid.seq) {
855  			/* wait for ringids to be in sync */
856  			log_retry(retry_fencing, "fence work wait for cluster ringid");
857  			retry = 1;
858  			goto out;
859  		}
860  	
861  		if (opt(enable_quorum_fencing_ind) && !cluster_quorate) {
862  			/* wait for quorum before doing any fencing, but if there
863  			   is none, send_fence_clear below can unblock new nodes */
864  			log_retry(retry_fencing, "fence work wait for quorum");
865  			retry = 1;
866  			goto out_fipu;
867  		}
868  	
869  		/*
870  		 * Count different types of nodes
871  		 * gone: node not a member
872  		 * part: member we've not received a proto message from
873  		 * merge: member we received a stateful proto message from
874  		 * clean: member we received a clean/new proto message from
875  		 *
876  		 * A node always views itself as a clean member, not a merge member.
877  		 */
878  	
879  		list_for_each_entry(node, &daemon_nodes, list) {
880  			if (!node->daemon_member) {
881  				gone_count++;
882  			} else {
883  				if (!low || node->nodeid < low)
884  					low = node->nodeid;
885  	
886  				if (node->stateful_merge)
887  					merge_count++;
888  				else if (!node->proto.daemon_max[0])
889  					part_count++;
890  				else
891  					clean_count++;
892  			}
893  		}
894  	
895  		/*
896  		 * Wait for stateful merged members to be removed before moving
897  		 * on to fencing.  Kill stateful merged members to clear them.
898  		 * This section is only relevant to non-two-node, even splits.
899  		 *
900  		 * With two node splits, they race to fence each other and
901  		 * whichever fences successfully then kills corosync on the other
902  		 * (in the case where corosync is still running on the fenced node).
903  		 *
904  		 * With an odd split, the partition that maintained quorum will
905  		 * kill stateful merged nodes when their proto message is received.
906  		 *
907  		 * With an even split, e.g. 2/2, we don't want both sets to
908  		 * be fencing each other right after merge, when both sides
909  		 * have quorum again and see the other side as statefully merged.
910  		 * So, delay fencing until the stateful nodes are cleared on one
911  		 * side (by way of the low nodeid killing stateful merged members).
912  		 *
913  		 * When there are 3 or more partitions that merge, none may see
914  		 * enough clean nodes, so the cluster would be stuck here waiting
915  		 * for someone to manually reset/restart enough nodes to produce
916  		 * sufficient clean nodes (>= merged).
917  		 */
918  	
919  		if (!cluster_two_node && merge_count) {
920  			log_retry(retry_fencing, "fence work wait to clear merge %d clean %d part %d gone %d",
921  				  merge_count, clean_count, part_count, gone_count);
922  	
923  			if ((clean_count >= merge_count) && !part_count && (low == our_nodeid))
924  				kick_stateful_merge_members();
925  			if ((clean_count < merge_count) && !part_count)
926  				stateful_merge_wait = 1;
927  	
928  			retry = 1;
929  			goto out;
930  		}
931  		if (stateful_merge_wait)
932  			stateful_merge_wait = 0;
933  	
934  		/*
935  		 * startup fencing
936  		 */
937  	
938  		list_for_each_entry_safe(node, safe, &startup_nodes, list) {
939  			if (is_clean_daemon_member(node->nodeid)) {
940  				log_debug("fence startup %d skip member", node->nodeid);
941  				list_del(&node->list);
942  				free(node);
943  				continue;
944  			}
945  	
946  			if (!opt(enable_startup_fencing_ind))
947  				continue;
948  	
949  			if (!fence_delay_begin) {
950  				log_debug("fence startup %d wait for initial delay", node->nodeid);
951  				continue;
952  			}
953  	
954  			if (monotime() - fence_delay_begin < opt(post_join_delay_ind)) {
955  				log_debug("fence startup %d delay %d from %llu",
956  					  node->nodeid, opt(post_join_delay_ind),
957  					  (unsigned long long)fence_delay_begin);
958  				retry = 1;
959  				continue;
960  			}
961  	
962  			/* clear this entry and create a daemon_nodes entry with
963  			   need_fencing and the fence loops below will handle it */
964  	
965  			nodeid = node->nodeid;
966  			list_del(&node->list);
967  			free(node);
968  	
969  			node = add_node_daemon(nodeid);
970  			if (!node) {
971  				log_debug("fence startup %d add failed", nodeid);
972  				continue;
973  			}
974  			if (node->need_fencing) {
975  				/* don't think this should happen? */
976  				log_error("fence startup %d already set", nodeid);
977  				continue;
978  			}
979  			node->need_fencing = 1;
980  			node->delay_fencing = 0;
981  			node->fence_monotime = 0;
982  			node->fence_walltime = 0;
983  			node->fence_actor_last = 0;
984  			node->fence_actor_done = 0;
985  			node->fence_pid_wait = 0;
986  			node->fence_pid = 0;
987  			node->fence_result_wait = 0;
988  			node->fence_config.pos = 0;
989  			node->left_reason = REASON_STARTUP_FENCING;
990  			node->fail_monotime = cluster_joined_monotime - 1;
991  			node->fail_walltime = cluster_joined_walltime - 1;
992  			low = set_fence_actors(node, 1);
993  	
994  			log_debug("fence startup nodeid %d act %d", node->nodeid, low);
995  		}
996  	
997  		/*
998  		 * request fencing
999  		 */
1000 	
1001 		list_for_each_entry(node, &daemon_nodes, list) {
1002 			if (!node->need_fencing)
1003 				continue;
1004 	
1005 			if (node->fence_pid_wait)
1006 				continue;
1007 	
1008 			if (node->fence_result_wait) {
1009 				log_debug("fence request %d result_wait", node->nodeid);
1010 				continue;
1011 			}
1012 	
1013 			if (is_clean_daemon_member(node->nodeid)) {
1014 				/*
1015 				 * node has rejoined in clean state
1016 				 */
1017 				log_debug("fence request %d skip for is_clean_daemon_member", node->nodeid);
1018 	
1019 				node->need_fencing = 0;
1020 				node->delay_fencing = 0;
1021 				node->fence_walltime = time(NULL);
1022 				node->fence_monotime = monotime();
1023 				node->fence_actor_done = node->nodeid;
1024 				continue;
1025 			}
1026 	
1027 			if (!opt(enable_concurrent_fencing_ind) && daemon_fence_pid) {
1028 				/* run one agent at a time in case they need the same switch */
1029 				log_retry(retry_fencing, "fence request %d delay for other pid %d",
1030 					  node->nodeid, daemon_fence_pid);
1031 				node->delay_fencing = 1;
1032 				retry = 1;
1033 				continue;
1034 			}
1035 	
1036 			/* use post_join_delay to avoid fencing a node in the short
1037 			   time between it joining the cluster (giving cluster quorum)
1038 			   and joining the daemon cpg, which allows it to bypass fencing */
1039 	
1040 			if (monotime() - fence_delay_begin < opt(post_join_delay_ind)) {
1041 				log_debug("fence request %d delay %d from %llu",
1042 					  node->nodeid, opt(post_join_delay_ind),
1043 					  (unsigned long long)fence_delay_begin);
1044 				node->delay_fencing = 1;
1045 				retry = 1;
1046 				continue;
1047 			}
1048 			node->delay_fencing = 0;
1049 	
1050 			/* get_fence_actor picks the low nodeid that existed
1051 			   when node failed and is still around.  if the current
1052 			   actor fails, get_fence_actor will not find it in the
1053 			   members list, will clear it, and return the next actor */
1054 	
1055 			actor = get_fence_actor(node);
1056 	
1057 			if (!actor) {
1058 				log_error("fence request %d no actor", node->nodeid);
1059 				continue;
1060 			}
1061 	
1062 			if (actor != our_nodeid) {
1063 				log_debug("fence request %d defer to %d",
1064 					  node->nodeid, actor);
1065 				continue;
1066 			}
1067 	
1068 			log_debug("fence request %d pos %d",
1069 				  node->nodeid, node->fence_config.pos);
1070 	
1071 			rv = fence_request(node->nodeid,
1072 					   node->fail_walltime,
1073 					   node->fail_monotime,
1074 					   &node->fence_config,
1075 					   node->left_reason,
1076 					   &pid);
1077 			if (rv < 0) {
1078 				send_fence_result(node->nodeid, rv, 0, time(NULL));
1079 				node->fence_result_wait = 1;
1080 				continue;
1081 			}
1082 	
1083 			node->fence_pid_wait = 1;
1084 			node->fence_pid = pid;
1085 			daemon_fence_pid = pid;
1086 		}
1087 	
1088 		/*
1089 		 * check outstanding fence requests
1090 		 */
1091 	
1092 		list_for_each_entry(node, &daemon_nodes, list) {
1093 			if (!node->need_fencing)
1094 				continue;
1095 	
1096 			if (node->delay_fencing)
1097 				continue;
1098 	
1099 			if (node->fence_result_wait) {
1100 				log_debug("fence wait %d result_wait", node->nodeid);
1101 				continue;
1102 			}
1103 	
1104 			if (!node->fence_pid_wait) {
1105 				/* another node is the actor */
1106 				log_debug("fence wait %d for done", node->nodeid);
1107 				continue;
1108 			}
1109 	
1110 			if (!node->fence_pid) {
1111 				/* shouldn't happen */
1112 				log_error("fence wait %d zero pid", node->nodeid);
1113 				node->fence_pid_wait = 0;
1114 				continue;
1115 			}
1116 	
1117 			nodeid = node->nodeid;
1118 			pid = node->fence_pid;
1119 	
1120 			if (is_clean_daemon_member(nodeid)) {
1121 				/*
1122 				 * node has rejoined in clean state so we can
1123 				 * abort outstanding fence op for it.  all nodes
1124 				 * will see and do this, so we don't need to send
1125 				 * a fence result.
1126 				 */
1127 				log_debug("fence wait %d pid %d skip for is_clean_daemon_member", nodeid, pid);
1128 	
1129 				node->need_fencing = 0;
1130 				node->delay_fencing = 0;
1131 				node->fence_walltime = time(NULL);
1132 				node->fence_monotime = monotime();
1133 				node->fence_actor_done = nodeid;
1134 	
1135 				node->fence_pid_wait = 0;
1136 				node->fence_pid = 0;
1137 				daemon_fence_pid = 0;
1138 	
1139 				fence_pid_cancel(nodeid, pid);
1140 				continue;
1141 			}
1142 	
1143 			retry = 1;
1144 	
1145 			rv = fence_result(nodeid, pid, &result);
1146 			if (rv == -EAGAIN) {
1147 				/* agent pid is still running */
1148 	
1149 				if (fence_result_pid != pid) {
1150 					fence_result_try = 0;
1151 					fence_result_pid = pid;
1152 				}
1153 				fence_result_try++;
1154 	
1155 				log_retry(fence_result_try, "fence wait %d pid %d running", nodeid, pid);
1156 				continue;
1157 			}
1158 	
1159 			node->fence_pid_wait = 0;
1160 			node->fence_pid = 0;
1161 			daemon_fence_pid = 0;
1162 	
1163 			if (rv < 0) {
1164 				/* shouldn't happen */
1165 				log_error("fence wait %d pid %d error %d", nodeid, pid, rv);
1166 				continue;
1167 			}
1168 	
1169 			log_debug("fence wait %d pid %d result %d", nodeid, pid, result);
1170 	
1171 			if (!result) {
1172 				/* agent exit 0, if there's another agent to run in
1173 				   parallel, set it to run next, otherwise success */
1174 	
1175 				rv = fence_config_next_parallel(&node->fence_config);
1176 				if (rv < 0) {
1177 					send_fence_result(nodeid, 0, 0, time(NULL));
1178 					node->fence_result_wait = 1;
1179 				}
1180 			} else {
1181 				/* agent exit 1, if there's another agent to run at
1182 				   next priority, set it to run next, otherwise fail */
1183 	
1184 				rv = fence_config_next_priority(&node->fence_config);
1185 				if (rv < 0) {
1186 					send_fence_result(nodeid, result, 0, time(NULL));
1187 					node->fence_result_wait = 1;
1188 				}
1189 			}
1190 		}
1191 	
1192 		/*
1193 		 * clear fence_in_progress_unknown
1194 		 */
1195 	 out_fipu:
1196 		if (opt(enable_startup_fencing_ind) &&
1197 		    fence_in_progress_unknown &&
1198 		    list_empty(&startup_nodes) &&
1199 		    !wait_clear_fipu &&
1200 		    !nodes_need_fencing()) {
1201 			/*
1202 			 * case A in comment above
1203 			 * all nodes are starting and have fipu set, they all do
1204 			 * startup fencing, and eventually see unknown nodes become
1205 			 * members or get fenced, so all clear fipu for themselves.
1206 			 */
1207 			fence_in_progress_unknown = 0;
1208 			log_debug("fence_in_progress_unknown 0 startup");
1209 		}
1210 	
1211 		if (!fence_in_progress_unknown) {
1212 			/*
1213 			 * case B in comment above
1214 			 * some cur nodes have fipu clear, new nodes have fipu set.
1215 			 * A current node needs to send_fence_clear to the new nodes
1216 			 * once all fencing is done so they clear fipu.
1217 			 */
1218 			low = 0;
1219 			need = 0;
1220 	
1221 			list_for_each_entry(node, &daemon_nodes, list) {
1222 				if (node->need_fencing)
1223 					need++;
1224 				if (!node->daemon_member || node->need_fence_clear)
1225 					continue;
1226 				if (!low || node->nodeid < low)
1227 					low = node->nodeid;
1228 			}
1229 	
1230 			list_for_each_entry(node, &daemon_nodes, list) {
1231 				if (!node->daemon_member || !node->need_fence_clear)
1232 					continue;
1233 				if (node->nodeid == our_nodeid) {
1234 					node->need_fence_clear = 0;
1235 					continue;
1236 				}
1237 				if (low != our_nodeid)
1238 					continue;
1239 	
1240 				flags = 0;
1241 	
1242 				if (node->need_fence_clear & FR_CLEAR_STARTUP) {
1243 					flags |= FR_CLEAR_STARTUP;
1244 					node->need_fence_clear &= ~FR_CLEAR_STARTUP;
1245 				}
1246 	
1247 				if ((node->need_fence_clear & FR_CLEAR_FIPU) && !need) {
1248 					flags |= FR_CLEAR_FIPU;
1249 					node->need_fence_clear &= ~FR_CLEAR_FIPU;
1250 				}
1251 	
1252 				if (!flags)
1253 					continue;
1254 	
1255 				send_fence_clear(node->nodeid, 0, flags, 0);
1256 			}
1257 		}
1258 	
1259 		if (!opt(enable_startup_fencing_ind) && fence_in_progress_unknown) {
1260 			/*
1261 			 * case C in comment above
1262 			 * all nodes are starting and have fipu set.  All expect a
1263 			 * previous node to send_fence_clear so they can clear fipu.
1264 			 * But there are no previous nodes. They need to detect this
1265 			 * condition.  Each node does send_fence_clear(ENODATA,FIPU).
1266 			 * When all have received this from all, condition is
1267 			 * detected and all clear fipu.
1268 			 */
1269 			if (all_daemon_members_fipu()) {
1270 				fence_in_progress_unknown = 0;
1271 				log_debug("fence_in_progress_unknown 0 all_fipu");
1272 			} else if (last_join_seq > send_fipu_seq) {
1273 				/* the seq numbers keep us from spamming this msg */
1274 				send_fence_clear(our_nodeid, -ENODATA, FR_FIPU, 0);
1275 				log_debug("send_fence_clear %d fipu", our_nodeid);
1276 				send_fipu_seq = last_join_seq;
1277 			}
1278 		}
1279 	
1280 		/*
1281 		 * clean up a zombie pid from an agent we killed
1282 		 */
1283 	
1284 		if (zombie_count)
1285 			clear_zombies();
1286 	
1287 		/*
1288 		 * setting retry_fencing will cause the main daemon poll loop
1289 		 * to timeout in 1 second and call this function again.
1290 		 */
1291 	 out:
1292 		if (retry)
1293 			retry_fencing++;
1294 		else
1295 			retry_fencing = 0;
1296 	}
1297 	
1298 	void process_fencing_changes(void)
1299 	{
1300 		daemon_fence_work();
1301 	}
1302 	
1303 	static void receive_fence_clear(struct dlm_header *hd, int len)
1304 	{
1305 		struct fence_result *fr;
1306 		struct node_daemon *node;
1307 		int count;
1308 	
1309 		fr = (struct fence_result *)((char *)hd + sizeof(struct dlm_header));
1310 	
1311 		fr->flags          = le32_to_cpu(fr->flags);
1312 		fr->nodeid         = le32_to_cpu(fr->nodeid);
1313 		fr->result         = le32_to_cpu(fr->result);
1314 		fr->fence_walltime = le64_to_cpu(fr->fence_walltime);
1315 	
1316 		if (len < sizeof(struct dlm_header) + sizeof(struct fence_result)) {
1317 			log_error("receive_fence_clear invalid len %d from %d",
1318 				  len, hd->nodeid);
1319 			return;
1320 		}
1321 	
1322 		node = get_node_daemon(fr->nodeid);
1323 		if (!node) {
1324 			log_error("receive_fence_clear from %d no daemon node %d",
1325 				  hd->nodeid, fr->nodeid);
1326 			return;
1327 		}
1328 	
1329 		log_debug("receive_fence_clear from %d for %d result %d flags %x",
1330 			  hd->nodeid, fr->nodeid, fr->result, fr->flags);
1331 	
1332 		/*
1333 		 * A node sends this message about itself indicating that it's in
1334 		 * startup with fipu set.  The only time we care about node->fipu
1335 		 * is when all nodes are fipu in startup. node->need_fence_clear
1336 		 * and node->fipu are not related, they address different cases.
1337 		 */
1338 		if ((fr->result == -ENODATA) && (fr->flags & FR_FIPU)) {
1339 			if (!fence_in_progress_unknown)
1340 				return;
1341 	
1342 			node->fence_in_progress_unknown = 1;
1343 			return;
1344 		}
1345 	
1346 		/*
1347 		 * An previous member sends this to new members to tell them that
1348 		 * they can clear startup_nodes and clear fipu.  These two flags
1349 		 * may come in separate messages if there is a pending fencing op
1350 		 * when the new member joins (CLEAR_STARTUP will come right away,
1351 		 * but CLEAR_FIPU will come once the fencing op is done.)
1352 		 *
1353 		 * We need wait_clear_fipu after emptying startup_nodes to avoid
1354 		 * thinking we've finished startup fencing in case A below, and
1355 		 * clearing fipu ourselves.
1356 		 */
1357 		if (!fr->result && (node->nodeid == our_nodeid)) {
1358 			if ((fr->flags & FR_CLEAR_STARTUP) && !list_empty(&startup_nodes)) {
1359 				count = clear_startup_node(0, 1);
1360 				log_debug("clear_startup_nodes %d", count);
1361 				wait_clear_fipu = 1;
1362 			}
1363 	
1364 			if ((fr->flags & FR_CLEAR_FIPU) && fence_in_progress_unknown) {
1365 				fence_in_progress_unknown = 0;
1366 				log_debug("fence_in_progress_unknown 0 recv");
1367 				wait_clear_fipu = 0;
1368 			}
1369 		}
1370 	
1371 		/* this node doesn't need these flags any more */
1372 		if (!fr->result) {
1373 			if (fr->flags & FR_CLEAR_STARTUP)
1374 				node->need_fence_clear &= ~FR_CLEAR_STARTUP;
1375 			if (fr->flags & FR_CLEAR_FIPU)
1376 				node->need_fence_clear &= ~FR_CLEAR_FIPU;
1377 		}
1378 	}
1379 	
1380 	static void send_fence_clear(int nodeid, int result, uint32_t flags, uint64_t walltime)
1381 	{
1382 		struct dlm_header *hd;
1383 		struct fence_result *fr;
1384 		char *buf;
1385 		int len;
1386 	
1387 		len = sizeof(struct dlm_header) + sizeof(struct fence_result);
1388 		buf = malloc(len);
1389 		if (!buf) {
1390 			log_error("send_fence_clear no mem %d", len);
1391 			return;
1392 		}
1393 		memset(buf, 0, len);
1394 	
1395 		hd = (struct dlm_header *)buf;
1396 		fr = (struct fence_result *)(buf + sizeof(*hd));
1397 	
1398 		hd->type = cpu_to_le16(DLM_MSG_FENCE_CLEAR);
1399 		hd->nodeid = cpu_to_le32(our_nodeid);
1400 	
1401 		fr->flags          = cpu_to_le32(flags);
1402 		fr->nodeid         = cpu_to_le32(nodeid);
1403 		fr->result         = cpu_to_le32(result);
1404 		fr->fence_walltime = cpu_to_le64(walltime);
1405 	
1406 		_send_message(cpg_handle_daemon, buf, len, DLM_MSG_FENCE_CLEAR);
1407 	}
1408 	
1409 	static void receive_fence_result(struct dlm_header *hd, int len)
1410 	{
1411 		struct fence_result *fr;
1412 		struct node_daemon *node;
1413 		uint64_t now;
1414 		int count;
1415 	
1416 		fr = (struct fence_result *)((char *)hd + sizeof(struct dlm_header));
1417 	
1418 		fr->flags          = le32_to_cpu(fr->flags);
1419 		fr->nodeid         = le32_to_cpu(fr->nodeid);
1420 		fr->result         = le32_to_cpu(fr->result);
1421 		fr->fence_walltime = le64_to_cpu(fr->fence_walltime);
1422 	
1423 		if (len < sizeof(struct dlm_header) + sizeof(struct fence_result)) {
1424 			log_error("receive_fence_result invalid len %d from %d",
1425 				  len, hd->nodeid);
1426 			return;
1427 		}
1428 	
1429 		count = clear_startup_node(fr->nodeid, 0);
1430 		if (count) {
1431 			log_debug("receive_fence_result %d from %d clear startup",
1432 				  fr->nodeid, hd->nodeid);
1433 			return;
1434 		}
1435 	
1436 		node = get_node_daemon(fr->nodeid);
1437 		if (!node) {
1438 			log_error("receive_fence_result %d from %d result %d no daemon node",
1439 				  fr->nodeid, hd->nodeid, fr->result);
1440 			return;
1441 		}
1442 	
1443 		if (!node->need_fencing) {
1444 			/* should never happen ... will happen if a manual fence_ack is
1445 			   done for a node that doesn't need it */
1446 			log_error("receive_fence_result %d from %d result %d no need_fencing",
1447 			  	  fr->nodeid, hd->nodeid, fr->result);
1448 			return;
1449 		}
1450 	
1451 		if ((hd->nodeid == our_nodeid) && !node->fence_result_wait && (fr->result != -ECANCELED)) {
1452 			/* should never happen */
1453 			log_error("receive_fence_result %d from %d result %d no fence_result_wait",
1454 				  fr->nodeid, hd->nodeid, fr->result);
1455 			/* should we ignore and return here? */
1456 		}
1457 	
1458 		if (node->daemon_member &&
1459 		    (!fr->result || (fr->result == -ECANCELED))) {
1460 	
1461 			/*
1462 			 * The node was successfully fenced, but is still a member.
1463 			 * This will happen when there is a partition, storage fencing
1464 			 * is started, a merge causes the node to become a member
1465 			 * again, and storage fencing completes successfully.  If we
1466 			 * received a proto message from the node after the merge, then
1467 			 * we will have detected a stateful merge, and we may have
1468 			 * already killed it.
1469 			 */
1470 	
1471 			log_error("receive_fence_result %d from %d result %d node is daemon_member",
1472 				  fr->nodeid, hd->nodeid, fr->result);
1473 	
1474 			kick_node_from_cluster(fr->nodeid);
1475 		}
1476 	
1477 		if ((hd->nodeid == our_nodeid) && (fr->result != -ECANCELED))
1478 			node->fence_result_wait = 0;
1479 	
1480 		now = monotime();
1481 	
1482 		log_error("fence status %d receive %d from %d walltime %llu local %llu",
1483 			  fr->nodeid, fr->result, hd->nodeid,
1484 			  (unsigned long long)fr->fence_walltime,
1485 			  (unsigned long long)now);
1486 	
1487 		if (!fr->result || (fr->result == -ECANCELED)) {
1488 			node->need_fencing = 0;
1489 			node->delay_fencing = 0;
1490 			node->fence_walltime = fr->fence_walltime;
1491 			node->fence_monotime = now;
1492 			node->fence_actor_done = hd->nodeid;
1493 		} else {
1494 			/* causes the next lowest nodeid to request fencing */
1495 			clear_fence_actor(fr->nodeid, hd->nodeid);
1496 		}
1497 	
1498 		if ((fr->result == -ECANCELED) && node->fence_pid_wait && node->fence_pid) {
1499 			fence_pid_cancel(node->nodeid, node->fence_pid);
1500 	
1501 			node->fence_pid_wait = 0;
1502 			node->fence_pid = 0;
1503 			daemon_fence_pid = 0;
1504 		}
1505 	}
1506 	
1507 	static void send_fence_result(int nodeid, int result, uint32_t flags, uint64_t walltime)
1508 	{
1509 		struct dlm_header *hd;
1510 		struct fence_result *fr;
1511 		char *buf;
1512 		int len;
1513 	
1514 		len = sizeof(struct dlm_header) + sizeof(struct fence_result);
1515 		buf = malloc(len);
1516 		if (!buf) {
1517 			log_error("send_fence_result no mem %d", len);
1518 			return;
1519 		}
1520 		memset(buf, 0, len);
1521 	
1522 		hd = (struct dlm_header *)buf;
1523 		fr = (struct fence_result *)(buf + sizeof(*hd));
1524 	
1525 		hd->type = cpu_to_le16(DLM_MSG_FENCE_RESULT);
1526 		hd->nodeid = cpu_to_le32(our_nodeid);
1527 	
1528 		fr->flags          = cpu_to_le32(flags);
1529 		fr->nodeid         = cpu_to_le32(nodeid);
1530 		fr->result         = cpu_to_le32(result);
1531 		fr->fence_walltime = cpu_to_le64(walltime);
1532 	
1533 		_send_message(cpg_handle_daemon, buf, len, DLM_MSG_FENCE_RESULT);
1534 	}
1535 	
1536 	void fence_ack_node(int nodeid)
1537 	{
1538 		send_fence_result(nodeid, -ECANCELED, 0, time(NULL));
1539 	}
1540 	
1541 	void set_protocol_stateful(void)
1542 	{
1543 		our_protocol.dr_ver.flags |= PV_STATEFUL;
1544 	}
1545 	
1546 	static void pv_in(struct protocol_version *pv)
1547 	{
1548 		pv->major = le16_to_cpu(pv->major);
1549 		pv->minor = le16_to_cpu(pv->minor);
1550 		pv->patch = le16_to_cpu(pv->patch);
1551 		pv->flags = le16_to_cpu(pv->flags);
1552 	}
1553 	
1554 	static void pv_out(struct protocol_version *pv)
1555 	{
1556 		pv->major = cpu_to_le16(pv->major);
1557 		pv->minor = cpu_to_le16(pv->minor);
1558 		pv->patch = cpu_to_le16(pv->patch);
1559 		pv->flags = cpu_to_le16(pv->flags);
1560 	}
1561 	
1562 	static void protocol_in(struct protocol *proto)
1563 	{
1564 		pv_in(&proto->dm_ver);
1565 		pv_in(&proto->km_ver);
1566 		pv_in(&proto->dr_ver);
1567 		pv_in(&proto->kr_ver);
1568 	}
1569 	
1570 	static void protocol_out(struct protocol *proto)
1571 	{
1572 		pv_out(&proto->dm_ver);
1573 		pv_out(&proto->km_ver);
1574 		pv_out(&proto->dr_ver);
1575 		pv_out(&proto->kr_ver);
1576 	}
1577 	
1578 	/* go through member list saved in last confchg, see if we have received a
1579 	   proto message from each */
1580 	
1581 	static int all_protocol_messages(void)
1582 	{
1583 		struct node_daemon *node;
1584 		int i;
1585 	
1586 		if (!daemon_member_count)
1587 			return 0;
1588 	
1589 		for (i = 0; i < daemon_member_count; i++) {
1590 			node = get_node_daemon(daemon_member[i].nodeid);
1591 			if (!node) {
1592 				log_error("all_protocol_messages no node %d",
1593 					  daemon_member[i].nodeid);
1594 				return 0;
1595 			}
1596 	
1597 			if (!node->proto.daemon_max[0])
1598 				return 0;
1599 		}
1600 		return 1;
1601 	}
1602 	
1603 	static int pick_min_protocol(struct protocol *proto)
1604 	{
1605 		uint16_t mind[4];
1606 		uint16_t mink[4];
1607 		struct node_daemon *node;
1608 		int i;
1609 	
1610 		memset(&mind, 0, sizeof(mind));
1611 		memset(&mink, 0, sizeof(mink));
1612 	
1613 		/* first choose the minimum major */
1614 	
1615 		for (i = 0; i < daemon_member_count; i++) {
1616 			node = get_node_daemon(daemon_member[i].nodeid);
1617 			if (!node) {
1618 				log_error("pick_min_protocol no node %d",
1619 					  daemon_member[i].nodeid);
1620 				return -1;
1621 			}
1622 	
1623 			if (!mind[0] || node->proto.daemon_max[0] < mind[0])
1624 				mind[0] = node->proto.daemon_max[0];
1625 	
1626 			if (!mink[0] || node->proto.kernel_max[0] < mink[0])
1627 				mink[0] = node->proto.kernel_max[0];
1628 		}
1629 	
1630 		if (!mind[0] || !mink[0]) {
1631 			log_error("pick_min_protocol zero major number");
1632 			return -1;
1633 		}
1634 	
1635 		/* second pick the minimum minor with the chosen major */
1636 	
1637 		for (i = 0; i < daemon_member_count; i++) {
1638 			node = get_node_daemon(daemon_member[i].nodeid);
1639 			if (!node)
1640 				continue;
1641 	
1642 			if (mind[0] == node->proto.daemon_max[0]) {
1643 				if (!mind[1] || node->proto.daemon_max[1] < mind[1])
1644 					mind[1] = node->proto.daemon_max[1];
1645 			}
1646 	
1647 			if (mink[0] == node->proto.kernel_max[0]) {
1648 				if (!mink[1] || node->proto.kernel_max[1] < mink[1])
1649 					mink[1] = node->proto.kernel_max[1];
1650 			}
1651 		}
1652 	
1653 		if (!mind[1] || !mink[1]) {
1654 			log_error("pick_min_protocol zero minor number");
1655 			return -1;
1656 		}
1657 	
1658 		/* third pick the minimum patch with the chosen major.minor */
1659 	
1660 		for (i = 0; i < daemon_member_count; i++) {
1661 			node = get_node_daemon(daemon_member[i].nodeid);
1662 			if (!node)
1663 				continue;
1664 	
1665 			if (mind[0] == node->proto.daemon_max[0] &&
1666 			    mind[1] == node->proto.daemon_max[1]) {
1667 				if (!mind[2] || node->proto.daemon_max[2] < mind[2])
1668 					mind[2] = node->proto.daemon_max[2];
1669 			}
1670 	
1671 			if (mink[0] == node->proto.kernel_max[0] &&
1672 			    mink[1] == node->proto.kernel_max[1]) {
1673 				if (!mink[2] || node->proto.kernel_max[2] < mink[2])
1674 					mink[2] = node->proto.kernel_max[2];
1675 			}
1676 		}
1677 	
1678 		if (!mind[2] || !mink[2]) {
1679 			log_error("pick_min_protocol zero patch number");
1680 			return -1;
1681 		}
1682 	
1683 		memcpy(&proto->daemon_run, &mind, sizeof(mind));
1684 		memcpy(&proto->kernel_run, &mink, sizeof(mink));
1685 		return 0;
1686 	}
1687 	
1688 	static void receive_protocol(struct dlm_header *hd, int len)
1689 	{
1690 		struct protocol *p;
1691 		struct node_daemon *node;
1692 		int new = 0;
1693 	
1694 		p = (struct protocol *)((char *)hd + sizeof(struct dlm_header));
1695 		protocol_in(p);
1696 	
1697 		if (len < sizeof(struct dlm_header) + sizeof(struct protocol)) {
1698 			log_error("receive_protocol invalid len %d from %d",
1699 				  len, hd->nodeid);
1700 			return;
1701 		}
1702 	
1703 		/* zero is an invalid version value */
1704 	
1705 		if (!p->daemon_max[0] || !p->daemon_max[1] || !p->daemon_max[2] ||
1706 		    !p->kernel_max[0] || !p->kernel_max[1] || !p->kernel_max[2]) {
1707 			log_error("receive_protocol invalid max value from %d "
1708 				  "daemon %u.%u.%u kernel %u.%u.%u", hd->nodeid,
1709 				  p->daemon_max[0], p->daemon_max[1], p->daemon_max[2],
1710 				  p->kernel_max[0], p->kernel_max[1], p->kernel_max[2]);
1711 			return;
1712 		}
1713 	
1714 		/* the run values will be zero until a version is set, after
1715 		   which none of the run values can be zero */
1716 	
1717 		if (p->daemon_run[0] && (!p->daemon_run[1] || !p->daemon_run[2] ||
1718 		    !p->kernel_run[0] || !p->kernel_run[1] || !p->kernel_run[2])) {
1719 			log_error("receive_protocol invalid run value from %d "
1720 				  "daemon %u.%u.%u kernel %u.%u.%u", hd->nodeid,
1721 				  p->daemon_run[0], p->daemon_run[1], p->daemon_run[2],
1722 				  p->kernel_run[0], p->kernel_run[1], p->kernel_run[2]);
1723 			return;
1724 		}
1725 	
1726 		/* save this node's proto so we can tell when we've got all, and
1727 		   use it to select a minimum protocol from all */
1728 	
1729 		node = get_node_daemon(hd->nodeid);
1730 		if (!node) {
1731 			log_error("receive_protocol no node %d", hd->nodeid);
1732 			return;
1733 		}
1734 	
1735 		if (!node->daemon_member) {
1736 			log_error("receive_protocol node %d not member", hd->nodeid);
1737 			return;
1738 		}
1739 	
1740 		log_debug("receive_protocol %d max %u.%u.%u.%x run %u.%u.%u.%x",
1741 			  hd->nodeid,
1742 			  p->daemon_max[0], p->daemon_max[1],
1743 			  p->daemon_max[2], p->daemon_max[3],
1744 			  p->daemon_run[0], p->daemon_run[1],
1745 			  p->daemon_run[2], p->daemon_run[3]);
1746 	
1747 		if (memcmp(&node->proto, p, sizeof(struct protocol))) {
1748 			log_debug("daemon node %d prot max %u.%u.%u.%x run %u.%u.%u.%x",
1749 				  hd->nodeid,
1750 				  node->proto.daemon_max[0], node->proto.daemon_max[1],
1751 				  node->proto.daemon_max[2], node->proto.daemon_max[3],
1752 				  node->proto.daemon_run[0], node->proto.daemon_run[1],
1753 				  node->proto.daemon_run[2], node->proto.daemon_run[3]);
1754 			new = 1;
1755 		}
1756 	
1757 		/* checking zero node->daemon_max[0] is a way to tell if we've received
1758 		   an acceptable (non-stateful) proto message from the node since we
1759 		   saw it join the daemon cpg */
1760 	
1761 		if (hd->nodeid != our_nodeid &&
1762 		    !node->proto.daemon_max[0] &&
1763 		    (p->dr_ver.flags & PV_STATEFUL) &&
1764 		    (our_protocol.dr_ver.flags & PV_STATEFUL)) {
1765 	
1766 			log_error("daemon node %d stateful merge", hd->nodeid);
1767 			log_debug("daemon node %d join %llu left %llu local quorum %llu killed %d",
1768 				  hd->nodeid,
1769 				  (unsigned long long)node->daemon_add_time,
1770 				  (unsigned long long)node->daemon_rem_time,
1771 				  (unsigned long long)cluster_quorate_monotime,
1772 				  node->killed);
1773 	
1774 			node->stateful_merge = 1;
1775 	
1776 			if (cluster_quorate && node->daemon_rem_time &&
1777 			    cluster_quorate_monotime < node->daemon_rem_time) {
1778 				if (!node->killed) {
1779 					if (cluster_two_node) {
1780 						/*
1781 						 * When there are two nodes and two_node mode
1782 						 * is used, both will have quorum throughout
1783 						 * the partition and subsequent stateful merge.
1784 						 *
1785 						 * - both will race to fence each other in
1786 						 *   response to the partition
1787 						 *
1788 						 * - both can attempt to kill the cluster
1789 						 *   on the other in response to the stateful
1790 						 *   merge here
1791 						 *
1792 						 * - we don't want both nodes to kill the cluster
1793 						 *   on the other, which can happen if the merge
1794 						 *   occurs before power fencing is successful,
1795 						 *   or can happen before/during/after storage
1796 						 *   fencing
1797 						 *
1798 						 * - if nodeA successfully fences nodeB (due
1799 						 *   to the partition), we want nodeA to kill
1800 						 *   the cluster on nodeB in response to the
1801 						 *   merge (we don't want nodeB to kill nodeA
1802 						 *   in response to the merge).
1803 						 *
1804 						 * So, a node that has successfully fenced the
1805 						 * other will kill the cluster on it. If fencing
1806 						 * is still running, we wait until it's
1807 						 * successfull to kill the cluster on the node
1808 						 * being fenced.
1809 						 */
1810 						if (nodeid_needs_fencing(hd->nodeid)) {
1811 							/* when fencing completes successfully,
1812 							   we'll see the node is a daemon member
1813 							   and kill it */
1814 							log_debug("daemon node %d delay kill for stateful merge", hd->nodeid);
1815 						} else {
1816 							log_error("daemon node %d kill due to stateful merge", hd->nodeid);
1817 							kick_node_from_cluster(hd->nodeid);
1818 						}
1819 					} else {
1820 						log_error("daemon node %d kill due to stateful merge", hd->nodeid);
1821 						kick_node_from_cluster(hd->nodeid);
1822 					}
1823 				}
1824 				node->killed = 1;
1825 			}
1826 	
1827 			/* don't save p->proto into node->proto; we need to come
1828 			   through here based on zero daemon_max[0] for other proto
1829 			   messages like this one from the same node */
1830 	
1831 			return;
1832 		}
1833 	
1834 		if (new) {
1835 			memcpy(&node->proto, p, sizeof(struct protocol));
1836 	
1837 			log_debug("daemon node %d save max %u.%u.%u.%x run %u.%u.%u.%x",
1838 				  node->nodeid,
1839 				  node->proto.daemon_max[0], node->proto.daemon_max[1],
1840 				  node->proto.daemon_max[2], node->proto.daemon_max[3],
1841 				  node->proto.daemon_run[0], node->proto.daemon_run[1],
1842 				  node->proto.daemon_run[2], node->proto.daemon_run[3]);
1843 		}
1844 	
1845 		/* if we have zero run values, and this msg has non-zero run values,
1846 		   then adopt them as ours; otherwise save this proto message */
1847 	
1848 		if (our_protocol.daemon_run[0])
1849 			return;
1850 	
1851 		if (p->daemon_run[0]) {
1852 			our_protocol.daemon_run[0] = p->daemon_run[0];
1853 			our_protocol.daemon_run[1] = p->daemon_run[1];
1854 			our_protocol.daemon_run[2] = p->daemon_run[2];
1855 	
1856 			our_protocol.kernel_run[0] = p->kernel_run[0];
1857 			our_protocol.kernel_run[1] = p->kernel_run[1];
1858 			our_protocol.kernel_run[2] = p->kernel_run[2];
1859 	
1860 			log_debug("run protocol from nodeid %d", hd->nodeid);
1861 		}
1862 	}
1863 	
1864 	static void send_protocol(struct protocol *proto)
1865 	{
1866 		struct dlm_header *hd;
1867 		struct protocol *pr;
1868 		char *buf;
1869 		int len;
1870 	
1871 		len = sizeof(struct dlm_header) + sizeof(struct protocol);
1872 		buf = malloc(len);
1873 		if (!buf) {
1874 			log_error("send_protocol no mem %d", len);
1875 			return;
1876 		}
1877 		memset(buf, 0, len);
1878 	
1879 		hd = (struct dlm_header *)buf;
1880 		pr = (struct protocol *)(buf + sizeof(*hd));
1881 	
1882 		hd->type = cpu_to_le16(DLM_MSG_PROTOCOL);
1883 		hd->nodeid = cpu_to_le32(our_nodeid);
1884 	
1885 		memcpy(pr, proto, sizeof(struct protocol));
1886 		protocol_out(pr);
1887 	
1888 		_send_message(cpg_handle_daemon, buf, len, DLM_MSG_PROTOCOL);
1889 		free(buf);
1890 	}
1891 	
1892 	int set_protocol(void)
1893 	{
1894 		struct protocol proto;
1895 		struct pollfd pollfd;
1896 		cs_error_t error;
1897 		int sent_proposal = 0;
1898 		int rv;
1899 	
1900 		memset(&pollfd, 0, sizeof(pollfd));
1901 		pollfd.fd = cpg_fd_daemon;
1902 		pollfd.events = POLLIN;
1903 	
1904 		while (1) {
1905 			if (our_protocol.daemon_run[0])
1906 				break;
1907 	
1908 			if (!sent_proposal && all_protocol_messages()) {
1909 				/* propose a protocol; look through info from all
1910 				   nodes and pick the min for both daemon and kernel,
1911 				   and propose that */
1912 	
1913 				sent_proposal = 1;
1914 	
1915 				/* copy our max values */
1916 				memcpy(&proto, &our_protocol, sizeof(struct protocol));
1917 	
1918 				rv = pick_min_protocol(&proto);
1919 				if (rv < 0)
1920 					return rv;
1921 	
1922 				log_debug("set_protocol member_count %d propose "
1923 					  "daemon %u.%u.%u kernel %u.%u.%u",
1924 					  daemon_member_count,
1925 					  proto.daemon_run[0], proto.daemon_run[1],
1926 					  proto.daemon_run[2], proto.kernel_run[0],
1927 					  proto.kernel_run[1], proto.kernel_run[2]);
1928 	
1929 				send_protocol(&proto);
1930 			}
1931 	
1932 			/* only process messages/events from daemon cpg until protocol
1933 			   is established */
1934 	
1935 			rv = poll(&pollfd, 1, -1);
1936 			if (rv == -1 && errno == EINTR) {
1937 				if (daemon_quit)
1938 					return -1;
1939 				continue;
1940 			}
1941 			if (rv < 0) {
1942 				log_error("set_protocol poll errno %d", errno);
1943 				return -1;
1944 			}
1945 	
1946 			if (pollfd.revents & POLLIN) {
1947 				/*
1948 				 * don't use process_cpg_daemon() because we only want to
1949 				 * dispatch one thing at a time because we only want to
1950 				 * handling protocol related things here.
1951 				 */
1952 	
1953 				error = cpg_dispatch(cpg_handle_daemon, CS_DISPATCH_ONE);
1954 				if (error != CS_OK && error != CS_ERR_BAD_HANDLE)
1955 					log_error("daemon cpg_dispatch one error %d", error);
1956 			}
1957 			if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
1958 				log_error("set_protocol poll revents %u",
1959 					  pollfd.revents);
1960 				return -1;
1961 			}
1962 		}
1963 	
1964 		if (our_protocol.daemon_run[0] != our_protocol.daemon_max[0] ||
1965 		    our_protocol.daemon_run[1] > our_protocol.daemon_max[1]) {
1966 			log_error("incompatible daemon protocol run %u.%u.%u max %u.%u.%u",
1967 				our_protocol.daemon_run[0],
1968 				our_protocol.daemon_run[1],
1969 				our_protocol.daemon_run[2],
1970 				our_protocol.daemon_max[0],
1971 				our_protocol.daemon_max[1],
1972 				our_protocol.daemon_max[2]);
1973 			return -1;
1974 		}
1975 	
1976 		if (our_protocol.kernel_run[0] != our_protocol.kernel_max[0] ||
1977 		    our_protocol.kernel_run[1] > our_protocol.kernel_max[1]) {
1978 			log_error("incompatible kernel protocol run %u.%u.%u max %u.%u.%u",
1979 				our_protocol.kernel_run[0],
1980 				our_protocol.kernel_run[1],
1981 				our_protocol.kernel_run[2],
1982 				our_protocol.kernel_max[0],
1983 				our_protocol.kernel_max[1],
1984 				our_protocol.kernel_max[2]);
1985 			return -1;
1986 		}
1987 	
1988 		log_debug("daemon run %u.%u.%u max %u.%u.%u "
1989 			  "kernel run %u.%u.%u max %u.%u.%u",
1990 			  our_protocol.daemon_run[0],
1991 			  our_protocol.daemon_run[1],
1992 			  our_protocol.daemon_run[2],
1993 			  our_protocol.daemon_max[0],
1994 			  our_protocol.daemon_max[1],
1995 			  our_protocol.daemon_max[2],
1996 			  our_protocol.kernel_run[0],
1997 			  our_protocol.kernel_run[1],
1998 			  our_protocol.kernel_run[2],
1999 			  our_protocol.kernel_max[0],
2000 			  our_protocol.kernel_max[1],
2001 			  our_protocol.kernel_max[2]);
2002 	
2003 		send_protocol(&our_protocol);
2004 		return 0;
2005 	}
2006 	
2007 	static void deliver_cb_daemon(cpg_handle_t handle,
2008 				      const struct cpg_name *group_name,
2009 				      uint32_t nodeid, uint32_t pid,
2010 				      void *data, size_t len)
2011 	{
2012 		struct dlm_header *hd;
2013 	
2014 		if (len < sizeof(*hd)) {
2015 			log_error("deliver_cb short message %zd", len);
2016 			return;
2017 		}
2018 	
2019 		hd = (struct dlm_header *)data;
2020 		dlm_header_in(hd);
2021 	
2022 		if (!daemon_fence_allow && hd->type != DLM_MSG_PROTOCOL) {
2023 			/* don't think this will happen; if it does we may
2024 			   need to verify that it's correct to ignore these
2025 			   messages instead of saving them to process after
2026 			   allow is set */
2027 			log_debug("deliver_cb_daemon ignore non proto msg %d", hd->type);
2028 			return;
2029 		}
2030 	
2031 		switch (hd->type) {
2032 		case DLM_MSG_PROTOCOL:
2033 			receive_protocol(hd, len);
2034 			break;
2035 		case DLM_MSG_FENCE_RESULT:
2036 			receive_fence_result(hd, len);
2037 			break;
2038 		case DLM_MSG_FENCE_CLEAR:
2039 			receive_fence_clear(hd, len);
2040 			break;
2041 		case DLM_MSG_RUN_REQUEST:
2042 			receive_run_request(hd, len);
2043 			break;
2044 		case DLM_MSG_RUN_REPLY:
2045 			receive_run_reply(hd, len);
2046 			break;
2047 		default:
2048 			log_error("deliver_cb_daemon unknown msg type %d", hd->type);
2049 		}
2050 	
2051 		daemon_fence_work();
2052 	}
2053 	
2054 	int receive_run_reply(struct dlm_header *hd, int len)
2055 	{
2056 		struct run_reply *rep = (struct run_reply *)hd;
2057 		struct run *run;
2058 		int i;
2059 	
2060 		run_reply_in(rep);
2061 	
2062 		log_debug("receive_run_reply %s from %d result %d",
2063 			  rep->uuid, hd->nodeid, rep->info.local_result);
2064 	
2065 		if (!opt(enable_helper_ind)) {
2066 			log_debug("receive_run_reply %s helper not enabled", rep->uuid);
2067 			return 0;
2068 		}
2069 	
2070 		run = find_run(rep->uuid);
2071 		if (!run) {
2072 			log_debug("receive_run_reply no uuid %s", rep->uuid);
2073 			return 0;
2074 		}
2075 	
2076 		/*
2077 		 * Only the starting node keeps track of results.
2078 		 */
2079 		if (run->info.start_nodeid != our_nodeid)
2080 			return 0;
2081 	
2082 		if (len != sizeof(struct run_reply)) {
2083 			log_debug("receive_run_reply %s bad len %d expect %zu",
2084 				  rep->uuid, len, sizeof(struct run_reply));
2085 			run->info.reply_count++;
2086 			run->info.need_replies--;
2087 			return 0;
2088 		}
2089 	
2090 		for (i = 0; i < run->node_count; i++) {
2091 			if (run->node_results[i].nodeid != hd->nodeid)
2092 				continue;
2093 	
2094 			/* shouldn't happen? */
2095 			if (run->node_results[i].replied)
2096 				break;
2097 	
2098 			run->node_results[i].result = rep->info.local_result;
2099 			run->node_results[i].replied = 1;
2100 	
2101 			if (rep->info.local_result)
2102 				run->info.fail_count++;
2103 	
2104 			run->info.reply_count++;
2105 			run->info.need_replies--;
2106 	
2107 			/*
2108 			log_debug("run reply_count % need_replies %d fail_count %d",
2109 				  run->info.reply_count, run->info.need_replies, run->info.fail_count);
2110 			*/
2111 			break;
2112 		}
2113 	
2114 		return 0;
2115 	}
2116 	
2117 	int receive_run_request(struct dlm_header *hd, int len)
2118 	{
2119 		struct run_request *req = (struct run_request *)hd;
2120 		struct run *run = NULL;
2121 	
2122 		run_request_in(req);
2123 	
2124 		log_debug("receive_run_request %s from %d", req->uuid, hd->nodeid);
2125 	
2126 		if (len != sizeof(struct run_request)) {
2127 			log_debug("receive_run_request %s bad len %d", req->uuid, len);
2128 			/* todo: send reply with failed */
2129 			return 0;
2130 		}
2131 	
2132 		if (req->info.dest_nodeid && (req->info.dest_nodeid != our_nodeid))
2133 			return 0;
2134 	
2135 		if (req->info.start_nodeid == our_nodeid) {
2136 			if (!(req->info.flags & DLMC_FLAG_RUN_START_NODE_RECV)) {
2137 				log_debug("receive_run_request ignore self");
2138 				return 0;
2139 			}
2140 	
2141 			if (!opt(enable_helper_ind)) {
2142 				log_debug("receive_run_request %s helper not enabled", req->uuid);
2143 				return 0;
2144 			}
2145 	
2146 			run = find_run(req->uuid);
2147 			if (!run) {
2148 				log_debug("receive_run_request from self no uuid %s", req->uuid);
2149 				return 0;
2150 			}
2151 	
2152 			log_debug("receive_run_request %s to helper", req->uuid);
2153 	
2154 			send_helper_run_request(req);
2155 			return 0;
2156 		}
2157 	
2158 		if (!opt(enable_helper_ind) && run) {
2159 			log_debug("receive_run_request %s helper not enabled", req->uuid);
2160 			run->info.reply_count++;
2161 			run->info.need_replies--;
2162 			/* todo: send reply with failed */
2163 			return 0;
2164 		}
2165 	
2166 		if (!(run = malloc(sizeof(struct run)))) {
2167 			log_error("receive_run_request %s no mem", req->uuid);
2168 			/* todo: send reply with failed */
2169 			return 0;
2170 		}
2171 	
2172 		memset(run, 0, sizeof(struct run));
2173 	
2174 		memcpy(run->uuid, req->uuid, RUN_UUID_LEN);
2175 		memcpy(run->command, req->command, RUN_COMMAND_LEN);
2176 		run->info.start_nodeid = req->info.start_nodeid;
2177 		run->info.dest_nodeid = req->info.dest_nodeid;
2178 		run->info.flags = req->info.flags;
2179 	
2180 		list_add(&run->list, &run_ops);
2181 	
2182 		log_error("run request %s %.128s", run->uuid, run->command);
2183 	
2184 		log_debug("receive_run_request %s to helper", req->uuid);
2185 	
2186 		send_helper_run_request(req);
2187 		/* todo: if no helper, send reply with failed */
2188 	
2189 		return 0;
2190 	}
2191 	
2192 	int send_run_request(struct run *run, struct run_request *req)
2193 	{
2194 		struct node_daemon *node;
2195 		int i = 0;
2196 		int rv;
2197 	
2198 		list_for_each_entry(node, &daemon_nodes, list) {
2199 			if (!node->daemon_member)
2200 				continue;
2201 	
2202 			/*
2203 			 * When this starting node does not run the command,
2204 			 * there is no reply for our nodeid.
2205 			 */
2206 			if ((node->nodeid == our_nodeid) &&
2207 			    (run->info.flags & DLMC_FLAG_RUN_START_NODE_NONE))
2208 					continue;
2209 	
2210 			/*
2211 			 * The command is only run on one specific node, and
2212 			 * only a reply from that node is needed.
2213 			 */
2214 			if (run->info.dest_nodeid && (node->nodeid != run->info.dest_nodeid))
2215 				continue;
2216 	
2217 			run->node_count++;
2218 			run->node_results[i].nodeid = node->nodeid;
2219 			i++;
2220 		}
2221 	
2222 		run->info.need_replies = run->node_count;
2223 	
2224 		log_debug("send_run_request %s for %d nodes", req->uuid, run->node_count);
2225 	
2226 		run_request_out(req);
2227 	
2228 		rv = dlm_send_message_daemon((char *)req, sizeof(struct run_request));
2229 	
2230 		return rv;
2231 	}
2232 	
2233 	int send_run_reply(struct run *run, struct run_reply *rep)
2234 	{
2235 		int rv;
2236 	
2237 		log_debug("send_run_reply %s result %d", rep->uuid, rep->info.local_result);
2238 	
2239 		run_reply_out(rep);
2240 	
2241 		rv = dlm_send_message_daemon((char *)rep, sizeof(struct run_reply));
2242 	
2243 		/*
2244 		 * If we are not the starting node, clear the run operation.
2245 		 */
2246 		if (rep->info.start_nodeid != our_nodeid)
2247 			clear_run(run);
2248 	
2249 		return rv;
2250 	}
2251 	
2252 	static void confchg_cb_daemon(cpg_handle_t handle,
2253 				      const struct cpg_name *group_name,
2254 				      const struct cpg_address *member_list,
2255 				      size_t member_list_entries,
2256 				      const struct cpg_address *left_list,
2257 				      size_t left_list_entries,
2258 				      const struct cpg_address *joined_list,
2259 				      size_t joined_list_entries)
2260 	{
2261 		struct node_daemon *node;
2262 		uint64_t now, now_wall;
2263 		int nodedown = 0, procdown = 0, leave = 0;
2264 		int check_joined_count = 0, check_remove_count = 0, check_member_count = 0;
2265 		int we_joined = 0;
2266 		int i, reason, low;
2267 	
2268 		now = monotime();
2269 		now_wall = time(NULL);
2270 	
2271 		log_config(group_name, member_list, member_list_entries,
2272 			   left_list, left_list_entries,
2273 			   joined_list, joined_list_entries);
2274 	
2275 		memset(&daemon_member, 0, sizeof(daemon_member));
2276 		daemon_member_count = member_list_entries;
2277 	
2278 		for (i = 0; i < member_list_entries; i++) {
2279 			daemon_member[i] = member_list[i];
2280 			/* add struct for nodes we've not seen before */
2281 			add_node_daemon(member_list[i].nodeid);
2282 		}
2283 	
2284 		memset(&daemon_joined, 0, sizeof(daemon_joined));
2285 		daemon_joined_count = joined_list_entries;
2286 	
2287 		for (i = 0; i < joined_list_entries; i++) {
2288 			daemon_joined[i] = joined_list[i];
2289 			if (joined_list[i].nodeid == our_nodeid)
2290 				we_joined = 1;
2291 		}
2292 	
2293 		memset(&daemon_remove, 0, sizeof(daemon_remove));
2294 		daemon_remove_count = left_list_entries;
2295 	
2296 		for (i = 0; i < left_list_entries; i++) {
2297 			daemon_remove[i] = left_list[i];
2298 	
2299 			if (left_list[i].reason == CPG_REASON_NODEDOWN)
2300 				nodedown++;
2301 			else if (left_list[i].reason == CPG_REASON_PROCDOWN)
2302 				procdown++;
2303 			else if (left_list[i].reason == CPG_REASON_LEAVE)
2304 				leave++;
2305 		}
2306 	
2307 		if (nodedown || procdown || leave)
2308 			log_debug("%s left reason nodedown %d procdown %d leave %d",
2309 				  group_name->value, nodedown, procdown, leave);
2310 	
2311 		if (nodedown)
2312 			daemon_ringid_wait = 1;
2313 	
2314 		if (joined_list_entries)
2315 			send_protocol(&our_protocol);
2316 	
2317 		list_for_each_entry(node, &daemon_nodes, list) {
2318 			if (in_daemon_list(node->nodeid, daemon_member, daemon_member_count)) {
2319 				if (node->daemon_member)
2320 					continue;
2321 	
2322 				check_joined_count++;
2323 	
2324 				/* node joined daemon cpg */
2325 				node->daemon_member = 1;
2326 				node->daemon_add_time = now;
2327 	
2328 				fence_delay_begin = now;
2329 				last_join_seq++;
2330 	
2331 				/* a joining node shows prev members in joined list */
2332 				if (!we_joined)
2333 					node->need_fence_clear = FR_CLEAR_STARTUP|FR_CLEAR_FIPU;
2334 	
2335 				if (node->need_fencing) {
2336 					/* need_fencing will be cleared if we accept a
2337 					   valid proto from it (is_clean_daemon_member) */
2338 					log_error("daemon joined %d needs fencing", node->nodeid);
2339 				} else {
2340 					log_debug("daemon joined %d", node->nodeid);
2341 				}
2342 			} else {
2343 				if (!node->daemon_member)
2344 					continue;
2345 	
2346 				check_remove_count++;
2347 	
2348 				/* node left daemon cpg */
2349 				node->daemon_member = 0;
2350 				node->daemon_rem_time = now;
2351 				node->killed = 0;
2352 				node->stateful_merge = 0;
2353 	
2354 				/* If we never accepted a valid proto from this node,
2355 				   then it never fully joined and there's no need to
2356 				   recover it.  Similary, node_history_lockspace_fail
2357 				   only sets need_fencing in the lockspace if
2358 				   node->start_time was non-zero. */
2359 	
2360 				if (node->proto.daemon_max[0]) {
2361 					/* tell loop below to look at this node */
2362 					node->recover_setup = 1;
2363 				} else {
2364 					log_debug("daemon remove %d no proto skip recover", node->nodeid);
2365 				}
2366 	
2367 				memset(&node->proto, 0, sizeof(struct protocol));
2368 			}
2369 		}
2370 	
2371 		list_for_each_entry(node, &daemon_nodes, list) {
2372 			if (node->daemon_member)
2373 				check_member_count++;
2374 		}
2375 	
2376 		/* when we join, all previous members look like they are joining */
2377 		if (!we_joined &&
2378 		    (daemon_joined_count != check_joined_count ||
2379 		     daemon_remove_count != check_remove_count ||
2380 		     daemon_member_count != check_member_count)) {
2381 			log_error("daemon counts joined %d check %d remove %d check %d member %d check %d",
2382 				  daemon_joined_count, check_joined_count,
2383 				  daemon_remove_count, check_remove_count,
2384 				  daemon_member_count, check_member_count);
2385 		}
2386 	
2387 		/* set up recovery work for nodes that just failed (recover_setup set above) */
2388 	
2389 		list_for_each_entry(node, &daemon_nodes, list) {
2390 			if (!node->recover_setup)
2391 				continue;
2392 	
2393 			node->recover_setup = 0;
2394 			reason = 0;
2395 			low = 0;
2396 	
2397 			if (!opt(enable_fencing_ind))
2398 				continue;
2399 	
2400 			if (node->need_fencing) {
2401 				log_error("daemon remove %d already needs fencing", node->nodeid);
2402 				continue;
2403 			}
2404 	
2405 			for (i = 0; i < left_list_entries; i++) {
2406 				if (left_list[i].nodeid != node->nodeid)
2407 					continue;
2408 				reason = left_list[i].reason;
2409 				break;
2410 			}
2411 	
2412 			if (reason == CPG_REASON_NODEDOWN || reason == CPG_REASON_PROCDOWN) {
2413 				if (node->fence_pid_wait || node->fence_pid) {
2414 					/* sanity check, should never happen */
2415 					log_error("daemon remove %d pid_wait %d pid %d",
2416 						  node->nodeid, node->fence_pid_wait, node->fence_pid);
2417 				}
2418 	
2419 				node->need_fencing = 1;
2420 				node->delay_fencing = 0;
2421 				node->fence_monotime = 0;
2422 				node->fence_walltime = 0;
2423 				node->fence_actor_last = 0;
2424 				node->fence_actor_done = 0;
2425 				node->fence_pid_wait = 0;
2426 				node->fence_pid = 0;
2427 				node->fence_result_wait = 0;
2428 				node->fence_config.pos = 0;
2429 				node->left_reason = reason;
2430 				node->fail_monotime = now;
2431 				node->fail_walltime = now_wall;
2432 				low = set_fence_actors(node, 0);
2433 			}
2434 	
2435 			log_debug("daemon remove %d %s need_fencing %d low %d",
2436 				  node->nodeid, reason_str(reason), node->need_fencing, low);
2437 		}
2438 	
2439 		daemon_fence_work();
2440 	}
2441 	
2442 	static void totem_cb_daemon(cpg_handle_t handle,
2443 	                            struct cpg_ring_id ring_id,
2444 	                            uint32_t member_list_entries,
2445 	                            const uint32_t *member_list)
2446 	{
2447 		daemon_ringid.nodeid = ring_id.nodeid;
2448 		daemon_ringid.seq = ring_id.seq;
2449 		daemon_ringid_wait = 0;
2450 	
2451 		log_ringid("dlm:controld", &ring_id, member_list, member_list_entries);
2452 	
2453 		daemon_fence_work();
2454 	}
2455 	
2456 	static cpg_model_v1_data_t cpg_callbacks_daemon = {
2457 		.cpg_deliver_fn = deliver_cb_daemon,
2458 		.cpg_confchg_fn = confchg_cb_daemon,
2459 		.cpg_totem_confchg_fn = totem_cb_daemon,
2460 		.flags = CPG_MODEL_V1_DELIVER_INITIAL_TOTEM_CONF,
2461 	};
2462 	
2463 	void process_cpg_daemon(int ci)
2464 	{
2465 		cs_error_t error;
2466 	
2467 		error = cpg_dispatch(cpg_handle_daemon, CS_DISPATCH_ALL);
2468 		if (error != CS_OK && error != CS_ERR_BAD_HANDLE)
2469 			log_error("daemon cpg_dispatch error %d", error);
2470 	}
2471 	
2472 	int setup_cpg_daemon(void)
2473 	{
2474 		cs_error_t error;
2475 		struct cpg_name name;
2476 		int i = 0;
2477 	
2478 		/* daemon 1.1.1 was cluster3/STABLE3/RHEL6 which is incompatible
2479 		   with cluster4/RHEL7 */ 
2480 	
2481 		memset(&our_protocol, 0, sizeof(our_protocol));
2482 	
2483 		if (opt(enable_fscontrol_ind))
2484 			our_protocol.daemon_max[0] = 2;
2485 		else
2486 			our_protocol.daemon_max[0] = 3;
2487 	
2488 		our_protocol.daemon_max[1] = 1;
2489 		our_protocol.daemon_max[2] = 1;
2490 		our_protocol.kernel_max[0] = 1;
2491 		our_protocol.kernel_max[1] = 1;
2492 		our_protocol.kernel_max[2] = 1;
2493 	
2494 		error = cpg_model_initialize(&cpg_handle_daemon, CPG_MODEL_V1,
2495 					     (cpg_model_data_t *)&cpg_callbacks_daemon,
2496 					     NULL);
2497 		if (error != CS_OK) {
2498 			log_error("daemon cpg_initialize error %d", error);
2499 			return -1;
2500 		}
2501 	
2502 		cpg_fd_get(cpg_handle_daemon, &cpg_fd_daemon);
2503 	
2504 		memset(&name, 0, sizeof(name));
2505 		sprintf(name.value, "dlm:controld");
2506 		name.length = strlen(name.value) + 1;
2507 	
2508 		log_debug("cpg_join %s ...", name.value);
2509 	 retry:
2510 		error = cpg_join(cpg_handle_daemon, &name);
2511 		if (error == CS_ERR_TRY_AGAIN) {
2512 			sleep(1);
2513 			if (!(++i % 10))
2514 				log_error("daemon cpg_join error retrying");
2515 			goto retry;
2516 		}
2517 		if (error != CS_OK) {
2518 			log_error("daemon cpg_join error %d", error);
2519 			goto fail;
2520 		}
2521 	
2522 		log_debug("setup_cpg_daemon %d", cpg_fd_daemon);
2523 		return cpg_fd_daemon;
2524 	
2525 	 fail:
2526 		cpg_finalize(cpg_handle_daemon);
2527 		return -1;
2528 	}
2529 	
2530 	static void stop_lockspaces(void)
2531 	{
2532 		struct lockspace *ls;
2533 	
2534 		list_for_each_entry(ls, &lockspaces, list) {
2535 			cpg_stop_kernel(ls);
2536 		}
2537 	}
2538 	
2539 	void close_cpg_daemon(void)
2540 	{
2541 		struct lockspace *ls;
2542 		cs_error_t error;
2543 		struct cpg_name name;
2544 		int i = 0;
2545 	
2546 		if (!cpg_handle_daemon) {
2547 			stop_lockspaces();
2548 			return;
2549 		}
2550 	
2551 		if (cluster_down)
2552 			goto fin;
2553 	
2554 		memset(&name, 0, sizeof(name));
2555 		sprintf(name.value, "dlm:controld");
2556 		name.length = strlen(name.value) + 1;
2557 	
2558 		log_debug("cpg_leave %s ...", name.value);
2559 	 retry:
2560 		error = cpg_leave(cpg_handle_daemon, &name);
2561 		if (error == CS_ERR_TRY_AGAIN) {
2562 			sleep(1);
2563 			if (!(++i % 10))
2564 				log_error("daemon cpg_leave error retrying");
2565 			goto retry;
2566 		}
2567 		if (error != CS_OK)
2568 			log_error("daemon cpg_leave error %d", error);
2569 	 fin:
2570 		list_for_each_entry(ls, &lockspaces, list) {
2571 			/* stop kernel ls lock activity before configfs cleanup */
2572 			cpg_stop_kernel(ls);
2573 			if (ls->cpg_handle)
2574 				cpg_finalize(ls->cpg_handle);
2575 		}
2576 		cpg_finalize(cpg_handle_daemon);
2577 	}
2578 	
2579 	void init_daemon(void)
2580 	{
2581 		INIT_LIST_HEAD(&daemon_nodes);
2582 		INIT_LIST_HEAD(&startup_nodes);
2583 	
2584 	}
2585 	
2586 	static int print_state_daemon_node(struct node_daemon *node, char *str)
2587 	{
2588 		snprintf(str, DLMC_STATE_MAXSTR-1,
2589 			 "member=%d "
2590 			 "killed=%d "
2591 			 "left_reason=%s "
2592 			 "need_fencing=%d "
2593 			 "delay_fencing=%d "
2594 			 "fence_pid=%d "
2595 			 "fence_pid_wait=%d "
2596 			 "fence_result_wait=%d "
2597 			 "fence_actor_last=%d "
2598 			 "fence_actor_done=%d "
2599 			 "add_time=%llu "
2600 			 "rem_time=%llu "
2601 			 "fail_walltime=%llu "
2602 			 "fail_monotime=%llu "
2603 			 "fence_walltime=%llu "
2604 			 "fence_monotime=%llu ",
2605 			 node->daemon_member,
2606 			 node->killed,
2607 			 reason_str(node->left_reason),
2608 			 node->need_fencing,
2609 			 node->delay_fencing,
2610 			 node->fence_pid,
2611 			 node->fence_pid_wait,
2612 			 node->fence_result_wait,
2613 			 node->fence_actor_last,
2614 			 node->fence_actor_done,
2615 			 (unsigned long long)node->daemon_add_time,
2616 			 (unsigned long long)node->daemon_rem_time,
2617 			 (unsigned long long)node->fail_walltime,
2618 			 (unsigned long long)node->fail_monotime,
2619 			 (unsigned long long)node->fence_walltime,
2620 			 (unsigned long long)node->fence_monotime);
2621 	
2622 		return strlen(str) + 1;
2623 	}
2624 	
2625 	void send_state_daemon_nodes(int fd)
2626 	{
2627 		struct node_daemon *node;
2628 		struct dlmc_state st;
2629 		char str[DLMC_STATE_MAXSTR];
2630 		int str_len;
2631 	
2632 		list_for_each_entry(node, &daemon_nodes, list) {
2633 			memset(&st, 0, sizeof(st));
2634 			st.type = DLMC_STATE_DAEMON_NODE;
2635 			st.nodeid = node->nodeid;
2636 	
2637 			memset(str, 0, sizeof(str));
2638 			str_len = print_state_daemon_node(node, str);
2639 	
2640 			st.str_len = str_len;
2641 	
2642 			send(fd, &st, sizeof(st), MSG_NOSIGNAL);
2643 			if (str_len)
2644 				send(fd, str, str_len, MSG_NOSIGNAL);
2645 		}
2646 	}
2647 	
2648 	void send_state_startup_nodes(int fd)
2649 	{
2650 		struct node_daemon *node;
2651 		struct dlmc_state st;
2652 		char str[DLMC_STATE_MAXSTR];
2653 		int str_len;
2654 	
2655 		list_for_each_entry(node, &startup_nodes, list) {
2656 			memset(&st, 0, sizeof(st));
2657 			st.type = DLMC_STATE_STARTUP_NODE;
2658 			st.nodeid = node->nodeid;
2659 	
2660 			memset(str, 0, sizeof(str));
2661 			str_len = print_state_daemon_node(node, str);
2662 	
2663 			st.str_len = str_len;
2664 	
2665 			send(fd, &st, sizeof(st), MSG_NOSIGNAL);
2666 			if (str_len)
2667 				send(fd, str, str_len, MSG_NOSIGNAL);
2668 		}
2669 	}
2670 	
2671 	static int print_state_daemon(char *str)
2672 	{
2673 		snprintf(str, DLMC_STATE_MAXSTR-1,
2674 			 "member_count=%d "
2675 			 "joined_count=%d "
2676 			 "remove_count=%d "
2677 			 "daemon_ringid=%llu "
2678 			 "cluster_ringid=%llu "
2679 			 "quorate=%d "
2680 			 "fence_pid=%d "
2681 			 "fence_in_progress_unknown=%d "
2682 			 "zombie_count=%d "
2683 			 "monotime=%llu "
2684 			 "stateful_merge_wait=%d ",
2685 			 daemon_member_count,
2686 			 daemon_joined_count,
2687 			 daemon_remove_count,
2688 			 (unsigned long long)daemon_ringid.seq,
2689 			 (unsigned long long)cluster_ringid_seq,
2690 			 cluster_quorate,
2691 			 daemon_fence_pid,
2692 			 fence_in_progress_unknown,
2693 			 zombie_count,
2694 			 (unsigned long long)monotime(),
2695 			 stateful_merge_wait);
2696 	
2697 		return strlen(str) + 1;
2698 	}
2699 	
2700 	void send_state_daemon(int fd)
2701 	{
2702 		struct dlmc_state st;
2703 		char str[DLMC_STATE_MAXSTR];
2704 		int str_len;
2705 	
2706 		memset(&st, 0, sizeof(st));
2707 		st.type = DLMC_STATE_DAEMON;
2708 		st.nodeid = our_nodeid;
2709 	
2710 		memset(str, 0, sizeof(str));
2711 		str_len = print_state_daemon(str);
2712 	
2713 		st.str_len = str_len;
2714 	
2715 		send(fd, &st, sizeof(st), MSG_NOSIGNAL);
(1) Event cond_true: Condition "str_len", taking true branch.
2716 		if (str_len)
(2) Event check_return: Calling "send(fd, str, str_len, MSG_NOSIGNAL)" without checking return value. This library function may fail and return an error code.
2717 			send(fd, str, str_len, MSG_NOSIGNAL);
2718 	}
2719 	
2720