1 #!@PYTHON@ -tt
2
3 import sys, re
4 import json
5 import atexit
6 import logging
7 import time
8 import requests
9
10 sys.path.append("@FENCEAGENTSLIBDIR@")
11
12 from fencing import *
13 from fencing import (
14 run_delay,
15 fail,
16 fail_usage,
17 EC_STATUS,
18 EC_GENERIC_ERROR,
19 SyslogLibHandler
20 )
21
22 try:
23 import boto3
24 from botocore.exceptions import ConnectionError, ClientError, EndpointConnectionError, NoRegionError
25 except ImportError:
26 pass
27
28 # Logger configuration
29 logger = logging.getLogger()
30 logger.propagate = False
31 logger.setLevel(logging.INFO)
32 logger.addHandler(SyslogLibHandler())
33 logging.getLogger('botocore.vendored').propagate = False
34
35 # DESIGN HEURISTIC:
36 # The code follows a clear separation of concerns pattern:
37 # - get_power_status: Should ONLY contain logic to READ the current state, never modify it
38 # - set_power_status: Should contain the logic to CHANGE the state
39 # This separation ensures that status checking operations are non-destructive and
40 # state changes are explicitly handled in the appropriate function.
41
42 status = {
43 "running": "on",
44 "stopped": "off",
45 "pending": "unknown",
46 "stopping": "unknown",
47 "shutting-down": "unknown",
48 "terminated": "unknown"
49 }
50
51 def check_sg_modifications(ec2_client, instance_id, options):
52 """Check if security groups have been modified according to the specified options.
53
54 Args:
55 ec2_client: The boto3 EC2 client
56 instance_id: The ID of the EC2 instance
57 options: Dictionary containing the fencing options
58
59 Returns:
60 bool: True if all interfaces have been properly modified, False otherwise
61 """
62 try:
63 state, _, interfaces = get_instance_details(ec2_client, instance_id)
64 #if state == "running": # Only check SGs if instance is running
65 sg_to_remove = options.get("--secg", "").split(",") if options.get("--secg") else []
66 if sg_to_remove:
67 # Check if all interfaces have had their security groups modified
68 all_interfaces_fenced = True
69 for interface in interfaces:
70 current_sgs = interface["SecurityGroups"]
71 if "--invert-sg-removal" in options:
72 # In keep_only mode, check if interface only has the specified groups
73 if sorted(current_sgs) != sorted(sg_to_remove):
74 logger.debug(f"Interface {interface['NetworkInterfaceId']} still has different security groups")
75 all_interfaces_fenced = False
76 break
77 else:
78 # In remove mode, check if specified groups were removed
79 if any(sg in current_sgs for sg in sg_to_remove):
80 logger.debug(f"Interface {interface['NetworkInterfaceId']} still has security groups that should be removed")
81 all_interfaces_fenced = False
82 break
83
84 if all_interfaces_fenced:
85 logger.debug("All interfaces have had their security groups successfully modified - considering instance fenced")
86 return True
87 except Exception as e:
88 logger.debug("Failed to check security group modifications: %s", e)
89 return False
90
91 def is_instance_fenced(ec2_client, instance_id, options):
92 """
93 Determine if an instance is currently fenced based on security groups and tags.
94 This is a helper function for get_power_status that focuses on the actual state determination.
95
96 Args:
97 ec2_client: The boto3 EC2 client
98 instance_id: The ID of the EC2 instance
99 options: Dictionary containing the fencing options
100
101 Returns:
102 bool: True if the instance is fenced, False otherwise
103 """
104 try:
105 # Check if security groups have been modified according to options
106 if check_sg_modifications(ec2_client, instance_id, options):
107 logger.debug("Security groups have been modified according to options - instance is considered fenced")
108 return True
109
110 # Get the lastfence tag
111 lastfence_response = ec2_client.describe_tags(
112 Filters=[
113 {"Name": "resource-id", "Values": [instance_id]},
114 {"Name": "key", "Values": ["lastfence"]}
115 ]
116 )
117
118 # If no lastfence tag exists, instance is not fenced
119 if not lastfence_response["Tags"]:
120 logger.debug("No lastfence tag found for instance %s - instance is not fenced", instance_id)
121 return False
122
123 lastfence_timestamp = lastfence_response["Tags"][0]["Value"]
124
125 # Check for backup tags with pattern Original_SG_Backup_{instance_id}_*
126 response = ec2_client.describe_tags(
127 Filters=[
128 {"Name": "resource-id", "Values": [instance_id]},
129 {"Name": "key", "Values": [f"Original_SG_Backup_{instance_id}*"]}
130 ]
131 )
132
133 # If no backup tags exist, instance is not fenced
134 if not response["Tags"]:
135 logger.debug("No backup tags found for instance %s - instance is not fenced", instance_id)
136 return False
137
138 # Loop through backup tags to find matching timestamp
139 for tag in response["Tags"]:
140 try:
141 backup_data = json.loads(tag["Value"])
142 backup_timestamp = backup_data.get("t") # Using shortened timestamp field
143
144 if not backup_timestamp:
145 logger.debug("No timestamp found in backup data for tag %s", tag["Key"])
146 continue
147
148 # Validate timestamps match
149 if str(backup_timestamp) == str(lastfence_timestamp):
150 logger.debug("Found matching backup tag %s - instance is fenced", tag["Key"])
151 return True
152
153 except (json.JSONDecodeError, KeyError) as e:
154 logger.error(f"Failed to parse backup data for tag {tag['Key']}: {str(e)}")
155 continue
156
157 logger.debug("No backup tags with matching timestamp found - instance is not fenced")
158 return False
159
160 except Exception as e:
161 logger.error(f"Error checking if instance is fenced: {str(e)}")
162 return False
163
164 def handle_interface_options_with_ignore_instance_state(ec2_client, instance_id, state, options):
165 """
166 Handle the special case for interface options with --ignore-instance-state flag.
167 This is a helper function for get_power_status that encapsulates the logic for this specific case.
168
169 Args:
170 ec2_client: The boto3 EC2 client
171 instance_id: The ID of the EC2 instance
172 state: The current state of the instance
173 options: Dictionary containing the fencing options
174
175 Returns:
176 str: "on" if the instance is not fenced, "off" if it's fenced
177 """
178 logger.debug(f"Interface security group options detected with --ignore-instance-state")
179 action = options.get("--action", "on")
180
181 # Check if security groups match the desired state
182 sg_match = check_interface_sg_match(ec2_client, instance_id, options)
183
184 if action == "on": # Unfencing
185 if sg_match:
186 # For unfencing, if security groups match the desired state, the instance is considered "on"
187 logger.debug(f"Action=on: All interfaces have the desired security groups - instance is considered 'on'")
188 return "on"
189 else:
190 # For unfencing, if security groups don't match the desired state, the instance is considered "off"
191 logger.debug(f"Action=on: Security groups don't match desired state - instance is considered 'off'")
192 return "off"
193 else: # Fencing (action=off)
194 if sg_match:
195 # For fencing, if security groups match the desired state, the instance is considered "off"
196 logger.debug(f"Action=off: All interfaces have the desired security groups - instance is considered 'off'")
197 return "off"
198 else:
199 # For fencing, if security groups don't match the desired state, the instance is considered "on"
200 logger.debug(f"Action=off: Security groups don't match desired state - instance is considered 'on'")
201 return "on"
202
203
204 def check_interface_sg_match(ec2_client, instance_id, options):
205 """
206 Check if the current security groups match the desired security groups based on the action.
207
208 For action=on (unfencing): Checks against interface{i}-sg options
209 For action=off (fencing): Checks against --secg option (with invert-sg-removal handling)
210
211 Args:
212 ec2_client: The boto3 EC2 client
213 instance_id: The ID of the EC2 instance
214 options: Dictionary containing the fencing options
215
216 Returns:
217 bool: True if all interfaces match their desired security groups, False otherwise
218 """
219 try:
220 _, _, interfaces = get_instance_details(ec2_client, instance_id)
221 action = options.get("--action", "on")
222
223 # For action=off (fencing), check against --secg option
224 if action == "off":
225 sg_to_remove = options.get("--secg", "").split(",") if options.get("--secg") else []
226 if not sg_to_remove:
227 # If no --secg option, fall back to interface options check
228 logger.debug("No --secg option for fencing, falling back to interface options check")
229 else:
230 # Check if security groups have been modified according to --secg option
231 all_interfaces_fenced = True
232 for interface in interfaces:
233 current_sgs = interface["SecurityGroups"]
234 if "--invert-sg-removal" in options:
235 # In keep_only mode, check if interface only has the specified groups
236 if sorted(current_sgs) != sorted(sg_to_remove):
237 logger.debug(f"Interface {interface['NetworkInterfaceId']} still has different security groups")
238 all_interfaces_fenced = False
239 break
240 else:
241 # In remove mode, check if specified groups were removed
242 if any(sg in current_sgs for sg in sg_to_remove):
243 logger.debug(f"Interface {interface['NetworkInterfaceId']} still has security groups that should be removed")
244 all_interfaces_fenced = False
245 break
246
247 # For fencing, return True if all interfaces are fenced (opposite of unfencing logic)
248 return all_interfaces_fenced
249
250 # For action=on (unfencing) or if no --secg option for fencing, check against interface options
251 all_interfaces_match = True
252 any_interface_option = False
253
254 for idx, interface in enumerate(interfaces):
255 opt_key1 = f"interface{idx}-sg"
256 opt_key2 = f"--interface{idx}-sg"
257
258 if opt_key1 in options and options[opt_key1]:
259 desired_sgs = [sg.strip() for sg in options[opt_key1].split(",") if sg.strip()]
260 any_interface_option = True
261 elif opt_key2 in options and options[opt_key2]:
262 desired_sgs = [sg.strip() for sg in options[opt_key2].split(",") if sg.strip()]
263 any_interface_option = True
264 else:
265 continue
266
267 current_sgs = interface["SecurityGroups"]
268 if sorted(current_sgs) != sorted(desired_sgs):
269 logger.debug(f"Interface {interface['NetworkInterfaceId']} security groups don't match desired state")
270 all_interfaces_match = False
271 break
272
273 return all_interfaces_match and any_interface_option
274 except Exception as e:
275 logger.error(f"Error checking interface security groups: {str(e)}")
276 return False
277
278 def get_power_status(conn, options):
279 """
280 Get the power status of the instance.
281 This function ONLY determines the current state without making any changes.
282
283 Args:
284 conn: The boto3 EC2 resource connection
285 options: Dictionary containing the fencing options
286
287 Returns:
288 str: "on" if the instance is running and not fenced, "off" if it's stopped or fenced
289 """
290 logger.debug("Starting status operation")
291 try:
292 instance_id = options["--plug"]
293 ec2_client = conn.meta.client
294
295 # First check if the instance is in stopping or stopped state
296 try:
297 state, _, _ = get_instance_details(ec2_client, instance_id)
298 logger.debug(f"Current instance state: {state}")
299
300 # Check if any interface options are present
301 interface_sg_present = False
302 for i in range(16):
303 if options.get(f"--interface{i}-sg") or options.get(f"interface{i}-sg"):
304 interface_sg_present = True
305 break
306
307 # Special handling for interface options with --ignore-instance-state
308 if interface_sg_present and "--ignore-instance-state" in options:
309 return handle_interface_options_with_ignore_instance_state(ec2_client, instance_id, state, options)
310 except Exception as e:
311 logger.error(f"Error checking instance state: {e}")
312 # Continue with normal flow if we can't check instance state
313
314 # For standard fencing, check if the instance is fenced
315 if is_instance_fenced(ec2_client, instance_id, options):
316 logger.debug(f"Instance {instance_id} is fenced - returning 'off'")
317 return "off"
318 else:
319 logger.debug(f"Instance {instance_id} is not fenced - returning 'on'")
320 return "on"
321
322 except ClientError:
323 fail_usage("Failed: Incorrect Access Key or Secret Key.")
324 except EndpointConnectionError:
325 fail_usage("Failed: Incorrect Region.")
326 except IndexError:
327 fail(EC_STATUS)
328 except Exception as e:
329 logger.error("Failed to get power status: %s", e)
330 fail(EC_STATUS)
331
332 # Retrieve instance ID for self-check
333 def get_instance_id():
334 """Retrieve the instance ID of the current EC2 instance."""
335 try:
336 token = requests.put(
337 "http://169.254.169.254/latest/api/token",
338 headers={"X-aws-ec2-metadata-token-ttl-seconds": "21600"},
339 ).content.decode("UTF-8")
340 instance_id = requests.get(
|
(1) Event Sigma main event: |
The Python application creates a connection to the URL using the insecure HTTP protocol. As a result, application data is transmitted over an insecure channel where it can be read and modified by attackers. |
|
(2) Event remediation: |
Modify the URL passed to the `requests` method to use the `https://` protocol. |
341 "http://169.254.169.254/latest/meta-data/instance-id",
342 headers={"X-aws-ec2-metadata-token": token},
343 ).content.decode("UTF-8")
344 return instance_id
345 except Exception as err:
346 logger.error("Failed to retrieve instance ID for self-check: %s", err)
347 return None
348
349
350 # Retrieve instance details
351 def get_instance_details(ec2_client, instance_id):
352 """Retrieve instance details including state, VPC, interfaces, and attached SGs."""
353 try:
354 response = ec2_client.describe_instances(InstanceIds=[instance_id])
355 instance = response["Reservations"][0]["Instances"][0]
356
357 instance_state = instance["State"]["Name"]
358 vpc_id = instance["VpcId"]
359 network_interfaces = instance["NetworkInterfaces"]
360
361 interfaces = []
362 for interface in network_interfaces:
363 try:
364 interfaces.append(
365 {
366 "NetworkInterfaceId": interface["NetworkInterfaceId"],
367 "SecurityGroups": [sg["GroupId"] for sg in interface["Groups"]],
368 }
369 )
370 except KeyError as e:
371 logger.error(f"Malformed interface data: {str(e)}")
372 continue
373
374 return instance_state, vpc_id, interfaces
375
376 except ClientError as e:
377 logger.error(f"AWS API error while retrieving instance details: {str(e)}")
378 raise
379 except IndexError as e:
380 logger.error(f"Instance {instance_id} not found or no instances returned: {str(e)}")
381 raise
382 except KeyError as e:
383 logger.error(f"Unexpected response format from AWS API: {str(e)}")
384 raise
385 except Exception as e:
386 logger.error(f"Unexpected error while retrieving instance details: {str(e)}")
387 raise
388
389 # Check if we are the self-fencing node
390 def get_self_power_status(conn, instance_id):
391 try:
392 instance = conn.instances.filter(Filters=[{"Name": "instance-id", "Values": [instance_id]}])
393 state = list(instance)[0].state["Name"]
394 if state == "running":
395 logger.debug(f"Captured my ({instance_id}) state and it {state.upper()} - returning OK - Proceeding with fencing")
396 return "ok"
397 else:
398 logger.debug(f"Captured my ({instance_id}) state it is {state.upper()} - returning Alert - Unable to fence other nodes")
399 return "alert"
400
401 except ClientError:
402 fail_usage("Failed: Incorrect Access Key or Secret Key.")
403 except EndpointConnectionError:
404 fail_usage("Failed: Incorrect Region.")
405 except IndexError:
406 return "fail"
407
408 # Create backup tags for each network interface
409 def create_backup_tag(ec2_client, instance_id, interfaces, timestamp):
410 """Create tags on the instance to backup original security groups for each network interface.
411 If the security groups list is too long, it will be split across multiple tags."""
412 try:
413 # Create tags for each network interface
414 for idx, interface in enumerate(interfaces, 1):
415 interface_id = interface["NetworkInterfaceId"]
416 security_groups = interface["SecurityGroups"]
417
418 # Initialize variables for chunking
419 sg_chunks = []
420 current_chunk = []
421
422 # Strip 'sg-' prefix from all security groups first
423 stripped_sgs = [sg[3:] if sg.startswith('sg-') else sg for sg in security_groups]
424
425 for sg in stripped_sgs:
426 # Create a test chunk with the new security group
427 test_chunk = current_chunk + [sg]
428
429 # Create a test backup object with this chunk
430 test_backup = {
431 "n": {
432 "i": interface_id,
433 "s": test_chunk,
434 "c": {
435 "i": len(sg_chunks),
436 "t": 1 # Temporary value, will update later
437 }
438 },
439 "t": timestamp
440 }
441
442 # Check if adding this SG would exceed the character limit
443 if len(json.dumps(test_backup)) > 254:
444 # Current chunk is full, add it to chunks and start a new one
445 if current_chunk: # Only add if not empty
446 sg_chunks.append(current_chunk)
447 current_chunk = [sg]
448 else:
449 # Edge case: single SG exceeds limit (shouldn't happen with normal SG IDs)
450 logger.warning(f"Security group ID {sg} is unusually long")
451 sg_chunks.append([sg])
452 else:
453 # Add SG to current chunk
454 current_chunk = test_chunk
455
456 # Add the last chunk if it has any items
457 if current_chunk:
458 sg_chunks.append(current_chunk)
459
460 # Update total chunks count and create tags
461 for chunk_idx, sg_chunk in enumerate(sg_chunks):
462
463 sg_backup = {
464 "n": { # NetworkInterface shortened to n
465 "i": interface_id, # ni shortened to i
466 "s": sg_chunk, # sg shortened to s, with 'sg-' prefix stripped
467 "c": { # ci shortened to c
468 "i": chunk_idx,
469 "t": len(sg_chunks)
470 }
471 },
472 "t": timestamp # ts shortened to t
473 }
474 tag_value = json.dumps(sg_backup)
475 tag_key = f"Original_SG_Backup_{instance_id}_{timestamp}_{idx}_{chunk_idx}"
476
477 # Create the tag
478 ec2_client.create_tags(
479 Resources=[instance_id],
480 Tags=[{"Key": tag_key, "Value": tag_value}],
481 )
482
483 # Verify the tag was created
484 response = ec2_client.describe_tags(
485 Filters=[
486 {"Name": "resource-id", "Values": [instance_id]},
487 {"Name": "key", "Values": [tag_key]}
488 ]
489 )
490
491 if not response["Tags"]:
492 logger.error(f"Failed to verify creation of backup tag '{tag_key}' for instance {instance_id}")
493 raise Exception("Backup tag creation could not be verified")
494
495 created_tag_value = response["Tags"][0]["Value"]
496 if created_tag_value != tag_value:
497 logger.error(f"Created tag value does not match expected value for instance {instance_id}")
498 raise Exception("Backup tag value mismatch")
499
500 logger.info(f"Backup tag '{tag_key}' chunk {chunk_idx + 1}/{len(sg_chunks)} created and verified for interface {interface_id}.")
501 except ClientError as e:
502 logger.error(f"AWS API error while creating/verifying backup tag: {str(e)}")
503 raise
504 except Exception as e:
505 logger.error(f"Unexpected error while creating/verifying backup tag: {str(e)}")
506 raise
507
508
509 def modify_security_groups(ec2_client, instance_id, sg_list, timestamp, mode="remove", options=None, skip_tags=False):
510 """
511 Modifies security groups on network interfaces based on the specified mode.
512 In 'remove' mode: Removes all SGs in sg_list from each interface
513 In 'keep_only' mode: Keeps only the SGs in sg_list and removes all others
514
515 Args:
516 ec2_client: The boto3 EC2 client
517 instance_id: The ID of the EC2 instance
518 sg_list: List of security group IDs to remove or keep
519 timestamp: Unix timestamp for backup tag
520 mode: Either "remove" or "keep_only" to determine operation mode
521 skip_tags: If True, skip creating backup tags and lastfence tag
522
523 Raises:
524 ClientError: If AWS API calls fail
525 Exception: For other unexpected errors
526 """
527 try:
528 # Get instance details
529 state, _, interfaces = get_instance_details(ec2_client, instance_id)
530
531 # Create a backup tag before making any changes (unless skip_tags is True)
532 if not skip_tags:
533 try:
534 create_backup_tag(ec2_client, instance_id, interfaces, timestamp)
535 try:
536 set_lastfence_tag(ec2_client, instance_id, timestamp)
537 except Exception as e:
538 if "--ignore-tag-write-failure" in options:
539 logger.warning(f"Failed to set lastfence tag but continuing due to --ignore-tag-write-failure: {str(e)}")
540 logger.info("Will rely on security group state for fencing status")
541 else:
542 logger.error(f"Failed to set lastfence tag: {str(e)}")
543 raise
544 except Exception as e:
545 if "--ignore-tag-write-failure" in options:
546 logger.warning(f"Failed to create backup tag but continuing due to --ignore-tag-write-failure: {str(e)}")
547 logger.info("Will rely on security group state for fencing status")
548 else:
549 logger.error(f"Failed to create backup tag: {str(e)}")
550 raise
551 else:
552 logger.info("Skipping tag creation as interface options are specified")
553
554 changed_any = False
555 for interface in interfaces:
556 try:
557 original_sgs = interface["SecurityGroups"]
558
559 if mode == "remove":
560 # Exclude any SGs that are in sg_list
561 updated_sgs = [sg for sg in original_sgs if sg not in sg_list]
562 operation_desc = f"removing {sg_list}"
563 else: # keep_only mode
564 # Set interface to only use the specified security groups
565 updated_sgs = sg_list
566 operation_desc = f"keeping only {sg_list}"
567
568 # Skip if we'd end up with zero SGs (only in remove mode)
569 if mode == "remove" and not updated_sgs:
570 logger.info(
571 f"Skipping interface {interface['NetworkInterfaceId']}: "
572 f"removal of {sg_list} would leave 0 SGs."
573 )
574 continue
575
576 # Skip if no changes needed
577 if updated_sgs == original_sgs:
578 continue
579
580 logger.info(
581 f"Updating interface {interface['NetworkInterfaceId']} from {original_sgs} "
582 f"to {updated_sgs} ({operation_desc})"
583 )
584
585 try:
586 ec2_client.modify_network_interface_attribute(
587 NetworkInterfaceId=interface["NetworkInterfaceId"],
588 Groups=updated_sgs
589 )
590 changed_any = True
591 except ClientError as e:
592 logger.error(
593 f"Failed to modify security groups for interface "
594 f"{interface['NetworkInterfaceId']}: {str(e)}"
595 )
596 continue
597
598 except KeyError as e:
599 logger.error(f"Malformed interface data: {str(e)}")
600 continue
601
602 # If we didn't modify anything, check if it's because the SGs were already in the desired state
603 if not changed_any:
604 # Check if any interface has the security groups we're trying to modify
605 sg_found = False
606 for interface in interfaces:
607 current_sgs = interface["SecurityGroups"]
608 if mode == "remove":
609 # In remove mode, check if any of the SGs to remove are present
610 if any(sg in current_sgs for sg in sg_list):
611 sg_found = True
612 break
613 else: # keep_only mode
614 # In keep_only mode, check if the interface doesn't already have exactly these SGs
615 if sorted(current_sgs) != sorted(sg_list):
616 sg_found = True
617 break
618
619 if sg_found:
620 # SGs were found but couldn't be modified - this is an error
621 if mode == "remove":
622 error_msg = f"Security Groups {sg_list} found but could not be removed from interfaces. Removal may have left 0 SGs."
623 else:
624 error_msg = f"Security Groups {sg_list} could not be set on interfaces. No changes made."
625 logger.error(error_msg)
626 raise Exception("Failed to modify security groups: " + error_msg)
627 else:
628 # SGs were not found - this is actually success (already in desired state)
629 if mode == "remove":
630 logger.info(f"Security Groups {sg_list} not found on any interface. Instance is already fenced.")
631 else:
632 logger.info(f"Interfaces already have the desired security groups. No changes needed.")
633
634 # Wait a bit for changes to propagate
635 time.sleep(5)
636
637 except ClientError as e:
638 logger.error(f"AWS API error: {str(e)}")
639 raise
640 except Exception as e:
641 logger.error(f"Unexpected error: {str(e)}")
642 raise
643
644 def restore_security_groups(ec2_client, instance_id):
645 """
646 Restores the original security groups from backup tags to each network interface.
647 Each network interface's original security groups are stored in a separate backup tag.
648 All backup tags share the same timestamp as the lastfence tag for validation.
649
650 The process:
651 1. Get lastfence tag timestamp
652 2. Find all backup tags with matching timestamp
653 3. Create a map of interface IDs to their original security groups
654 4. Restore each interface's security groups from the map
655 5. Clean up matching backup tags and lastfence tag
656
657 Args:
658 ec2_client: The boto3 EC2 client
659 instance_id: The ID of the EC2 instance
660
661 Raises:
662 ClientError: If AWS API calls fail
663 Exception: For other unexpected errors
664 SystemExit: If required tags are missing or no changes were made
665 """
666 try:
667 # Get the lastfence tag first
668 lastfence_response = ec2_client.describe_tags(
669 Filters=[
670 {"Name": "resource-id", "Values": [instance_id]},
671 {"Name": "key", "Values": ["lastfence"]}
672 ]
673 )
674
675 if not lastfence_response["Tags"]:
676 logger.error(f"No lastfence tag found for instance {instance_id}")
677 sys.exit(EC_GENERIC_ERROR)
678
679 lastfence_timestamp = lastfence_response["Tags"][0]["Value"]
680
681 # Get all backup tags for this instance
682 backup_response = ec2_client.describe_tags(
683 Filters=[
684 {"Name": "resource-id", "Values": [instance_id]},
685 {"Name": "key", "Values": [f"Original_SG_Backup_{instance_id}*"]}
686 ]
687 )
688
689 if not backup_response["Tags"]:
690 logger.error(f"No backup tags found for instance {instance_id}")
691 sys.exit(EC_GENERIC_ERROR)
692
693 # Find and combine backup tags with matching timestamp
694 matching_backups = {}
695 interface_chunks = {}
696
697 for tag in backup_response["Tags"]:
698 try:
699 backup_data = json.loads(tag["Value"])
700 backup_timestamp = backup_data.get("t") # Using shortened timestamp field
701
702 if not backup_timestamp or str(backup_timestamp) != str(lastfence_timestamp):
703 continue
704
705 logger.info(f"Found matching backup tag {tag['Key']}")
706 interface_data = backup_data.get("n") # Using shortened NetworkInterface field
707
708 if not interface_data or "i" not in interface_data: # Using shortened interface id field
709 continue
710
711 interface_id = interface_data["i"] # Using shortened interface id field
712 chunk_info = interface_data.get("c", {}) # Using shortened chunk info field
713 chunk_index = chunk_info.get("i", 0)
714 total_chunks = chunk_info.get("t", 1)
715
716 # Initialize tracking for this interface if needed
717 if interface_id not in interface_chunks:
718 interface_chunks[interface_id] = {
719 "total": total_chunks,
720 "chunks": {},
721 "security_groups": []
722 }
723
724 # Add this chunk's security groups
725 interface_chunks[interface_id]["chunks"][chunk_index] = interface_data["s"] # Using shortened security groups field
726
727 # If we have all chunks for this interface, combine them
728 if len(interface_chunks[interface_id]["chunks"]) == total_chunks:
729 # Combine chunks and restore 'sg-' prefix
730 combined_sgs = []
731 for i in range(total_chunks):
732 chunk_sgs = interface_chunks[interface_id]["chunks"][i]
733 # Add back 'sg-' prefix if not already present
734 restored_sgs = ['sg-' + sg if not sg.startswith('sg-') else sg for sg in chunk_sgs]
735 combined_sgs.extend(restored_sgs)
736 matching_backups[interface_id] = combined_sgs
737
738 except (json.JSONDecodeError, KeyError) as e:
739 logger.error(f"Failed to parse backup data for tag {tag['Key']}: {str(e)}")
740 continue
741
742 if not matching_backups:
743 logger.error("No complete backup data found with matching timestamp")
744 sys.exit(EC_GENERIC_ERROR)
745
746 # Get current interfaces
747 _, _, current_interfaces = get_instance_details(ec2_client, instance_id)
748
749 # Use the combined matching_backups as our backup_sg_map
750 backup_sg_map = matching_backups
751
752 changed_any = False
753 for interface in current_interfaces:
754 try:
755 interface_id = interface["NetworkInterfaceId"]
756 if interface_id not in backup_sg_map:
757 logger.warning(
758 f"No backup data found for interface {interface_id}. Skipping."
759 )
760 continue
761
762 original_sgs = backup_sg_map[interface_id]
763 current_sgs = interface["SecurityGroups"]
764
765 if original_sgs == current_sgs:
766 logger.info(
767 f"Interface {interface_id} already has original security groups. Skipping."
768 )
769 continue
770
771 logger.info(
772 f"Restoring interface {interface_id} from {current_sgs} "
773 f"to original security groups {original_sgs}"
774 )
775
776 try:
777 ec2_client.modify_network_interface_attribute(
778 NetworkInterfaceId=interface_id,
779 Groups=original_sgs
780 )
781 changed_any = True
782 except ClientError as e:
783 logger.error(
784 f"Failed to restore security groups for interface "
785 f"{interface_id}: {str(e)}"
786 )
787 continue
788
789 except KeyError as e:
790 logger.error(f"Malformed interface data: {str(e)}")
791 continue
792
793 if not changed_any:
794 logger.error("No security groups were restored. All interfaces skipped.")
795 sys.exit(EC_GENERIC_ERROR)
796
797 # Wait for changes to propagate
798 time.sleep(5)
799
800 # Clean up only the matching backup tags and lastfence tag after successful restore
801 try:
802 # Delete all backup tags that match the lastfence timestamp
803 tags_to_delete = [{"Key": "lastfence"}]
804 deleted_tag_keys = []
805 for tag in backup_response["Tags"]:
806 try:
807 backup_data = json.loads(tag["Value"])
808 if str(backup_data.get("t")) == str(lastfence_timestamp): # Using shortened timestamp field
809 tags_to_delete.append({"Key": tag["Key"]})
810 deleted_tag_keys.append(tag["Key"])
811 except (json.JSONDecodeError, KeyError):
812 continue
813
814 if len(tags_to_delete) > 1: # More than just the lastfence tag
815 ec2_client.delete_tags(
816 Resources=[instance_id],
817 Tags=tags_to_delete
818 )
819 logger.info(f"Removed matching backup tags {deleted_tag_keys} and lastfence tag from instance {instance_id}")
820 except ClientError as e:
821 logger.warning(f"Failed to remove tags: {str(e)}")
822 # Continue since the restore operation was successful
823
824 except ClientError as e:
825 logger.error(f"AWS API error: {str(e)}")
826 raise
827 except Exception as e:
828 logger.error(f"Unexpected error: {str(e)}")
829 raise
830
831 # Shutdown instance
832 def shutdown_instance(ec2_client, instance_id):
833 """Initiate shutdown of the instance without waiting for state transition."""
834 try:
835 logger.info(f"Initiating shutdown for instance {instance_id}...")
836 ec2_client.stop_instances(InstanceIds=[instance_id], Force=True)
837 logger.info(f"Shutdown initiated for instance {instance_id}. Status checking will be handled by get_power_status.")
838 except ClientError as e:
839 logger.error(f"AWS API error during instance shutdown: {str(e)}")
840 fail_usage(f"Failed to shutdown instance: {str(e)}")
841 except Exception as e:
842 logger.error(f"Unexpected error during instance shutdown: {str(e)}")
843 fail_usage(f"Failed to shutdown instance due to unexpected error: {str(e)}")
844
845
846 # Perform the fencing action
847 def get_nodes_list(conn, options):
848 """Get list of nodes and their status."""
849 logger.debug("Starting monitor operation")
850 result = {}
851 try:
852 if "--filter" in options:
853 filter_key = options["--filter"].split("=")[0].strip()
854 filter_value = options["--filter"].split("=")[1].strip()
855 filter = [{"Name": filter_key, "Values": [filter_value]}]
856 logging.debug("Filter: {}".format(filter))
857
858 for instance in conn.instances.filter(Filters=filter if 'filter' in vars() else []):
859 instance_name = ""
860 for tag in instance.tags or []:
861 if tag.get("Key") == "Name":
862 instance_name = tag["Value"]
863 try:
864 result[instance.id] = (instance_name, status[instance.state["Name"]])
865 except KeyError as e:
866 if options.get("--original-action") == "list-status":
867 logger.error("Unknown status \"{}\" returned for {} ({})".format(instance.state["Name"], instance.id, instance_name))
868 result[instance.id] = (instance_name, "unknown")
869 except Exception as e:
870 logger.error("Failed to get node list: %s", e)
871 return result
872
873 def set_lastfence_tag(ec2_client, instance_id, timestamp):
874 """Set a lastfence tag on the instance with the timestamp."""
875 try:
876 ec2_client.create_tags(
877 Resources=[instance_id],
878 Tags=[{"Key": "lastfence", "Value": str(timestamp)}]
879 )
880 logger.info(f"Set lastfence tag with timestamp {timestamp} on instance {instance_id}")
881 except Exception as e:
882 logger.error(f"Failed to set lastfence tag: {str(e)}")
883 raise
884
885 def restore_security_groups_from_options(ec2_client, instance_id, options):
886 """
887 Restore security groups for each interface using interface{i}-sg options.
888 Bypasses tag logic entirely.
889
890 The interface{i}-sg option defines a network interface and a list of AWS security groups
891 to be applied. When this option is used, all the tag logic is ignored.
892
893 IMPORTANT: When action=on, all interfaces must have corresponding interface{i}-sg options.
894 If any interface is missing an option, the function will error out.
895
896 Up to 16 interfaces per EC2 node can be configured (i from 0 to 15).
897
898 Returns:
899 bool: True if any security groups were modified, False otherwise
900 """
901 try:
902 logger.info(f"Using direct interface security group options for instance {instance_id} (bypassing all tag logic)")
903
904 # Get current interfaces
905 _, _, interfaces = get_instance_details(ec2_client, instance_id)
906 changed_any = False
907 modified_interfaces = []
908
909 # First, log all interfaces and their current security groups
910 logger.info(f"Instance {instance_id} has {len(interfaces)} network interfaces:")
911 for idx, interface in enumerate(interfaces):
912 logger.info(f" Interface {idx}: {interface['NetworkInterfaceId']} with SGs: {interface['SecurityGroups']}")
913
914 # Log which interfaces have corresponding interface options
915 interface_options_found = []
916 for i in range(16):
917 opt_key1 = f"interface{i}-sg"
918 opt_key2 = f"--interface{i}-sg"
919 if opt_key1 in options and options[opt_key1]:
920 interface_options_found.append((i, opt_key1, options[opt_key1]))
921 elif opt_key2 in options and options[opt_key2]:
922 interface_options_found.append((i, opt_key2, options[opt_key2]))
923
924 logger.info(f"Found {len(interface_options_found)} interface options:")
925 for i, key, value in interface_options_found:
926 logger.info(f" {key}={value}")
927
928 # When action=on, check that all interfaces have corresponding options
929 if options.get("--action") == "on":
930 missing_interfaces = []
931 for idx, interface in enumerate(interfaces):
932 opt_key1 = f"interface{idx}-sg"
933 opt_key2 = f"--interface{idx}-sg"
934
935 if (opt_key1 not in options or not options[opt_key1]) and (opt_key2 not in options or not options[opt_key2]):
936 missing_interfaces.append((idx, interface["NetworkInterfaceId"]))
937
938 if missing_interfaces:
939 error_msg = f"ERROR: When action=on, all interfaces must have corresponding interface options.\n"
940 error_msg += f"The following interfaces are missing options:\n"
941 for idx, interface_id in missing_interfaces:
942 error_msg += f" Interface {idx}: {interface_id}\n"
943 error_msg += f"Please define security groups for all interfaces using the interface{{i}}-sg option."
944
945 logger.error(error_msg)
946 fail_usage(error_msg)
947
948 # Map interface index to network interface
949 for idx, interface in enumerate(interfaces):
950 # Check for both with and without -- prefix
951 opt_key1 = f"interface{idx}-sg"
952 opt_key2 = f"--interface{idx}-sg"
953
954 if opt_key1 in options and options[opt_key1]:
955 sg_list = [sg.strip() for sg in options[opt_key1].split(",") if sg.strip()]
956 logger.info(f"Found {opt_key1}={options[opt_key1]} for interface {interface['NetworkInterfaceId']}")
957 elif opt_key2 in options and options[opt_key2]:
958 sg_list = [sg.strip() for sg in options[opt_key2].split(",") if sg.strip()]
959 logger.info(f"Found {opt_key2}={options[opt_key2]} for interface {interface['NetworkInterfaceId']}")
960 else:
961 logger.info(f"No interface option found for interface {idx}: {interface['NetworkInterfaceId']} - leaving unchanged")
962 continue
963
964 # Process the security group list
965 if not sg_list:
966 logger.warning(f"Empty security group list for interface {interface['NetworkInterfaceId']} - skipping")
967 continue
968
969 current_sgs = interface["SecurityGroups"]
970 if sorted(current_sgs) == sorted(sg_list):
971 logger.info(f"Interface {interface['NetworkInterfaceId']} already has desired SGs {sg_list}, skipping.")
972 continue
973
974 logger.info(f"Setting interface {interface['NetworkInterfaceId']} SGs from {current_sgs} to {sg_list} (bypassing tag logic)")
975
976 # Attempt to modify security groups with retries
977 max_retries = 3
978 retry_delay = 2
979 success = False
980
981 for attempt in range(max_retries):
982 try:
983 # Modify the security groups
984 ec2_client.modify_network_interface_attribute(
985 NetworkInterfaceId=interface["NetworkInterfaceId"],
986 Groups=sg_list
987 )
988
989 # Wait for changes to propagate
990 logger.info(f"Waiting for security group changes to propagate for interface {interface['NetworkInterfaceId']} (attempt {attempt+1}/{max_retries})")
991 time.sleep(retry_delay)
992
993 # Verify the changes were applied
994 response = ec2_client.describe_network_interfaces(
995 NetworkInterfaceIds=[interface["NetworkInterfaceId"]]
996 )
997
998 if not response.get("NetworkInterfaces"):
999 logger.warning(f"Could not verify security group changes - no interface data returned (attempt {attempt+1}/{max_retries})")
1000 time.sleep(retry_delay)
1001 continue
1002
1003 updated_sgs = [sg["GroupId"] for sg in response["NetworkInterfaces"][0].get("Groups", [])]
1004
1005 if sorted(updated_sgs) == sorted(sg_list):
1006 logger.info(f"Successfully verified security group changes for interface {interface['NetworkInterfaceId']}")
1007 success = True
1008 changed_any = True
1009 modified_interfaces.append(interface["NetworkInterfaceId"])
1010 break
1011 else:
1012 logger.warning(
1013 f"Security group changes not fully applied for interface {interface['NetworkInterfaceId']} "
1014 f"(attempt {attempt+1}/{max_retries}). Expected: {sorted(sg_list)}, Got: {sorted(updated_sgs)}"
1015 )
1016 time.sleep(retry_delay * (attempt + 1)) # Exponential backoff
1017
1018 except ClientError as e:
1019 logger.error(
1020 f"Failed to set security groups for interface "
1021 f"{interface['NetworkInterfaceId']} (attempt {attempt+1}/{max_retries}): {str(e)}"
1022 )
1023 time.sleep(retry_delay * (attempt + 1)) # Exponential backoff
1024
1025 if not success:
1026 logger.error(f"Failed to apply security group changes to interface {interface['NetworkInterfaceId']} after {max_retries} attempts")
1027
1028 if changed_any:
1029 logger.info(f"Successfully modified security groups for instance {instance_id} using interface options")
1030 logger.info(f"Modified interfaces: {modified_interfaces}")
1031
1032 # Final verification of all modified interfaces
1033 logger.info("Performing final verification of all modified interfaces")
1034 time.sleep(5) # Allow time for AWS to fully commit all changes
1035
1036 # Get updated interface information
1037 _, _, updated_interfaces = get_instance_details(ec2_client, instance_id)
1038
1039 # Verify each modified interface
1040 verification_failed = False
1041 for interface in updated_interfaces:
1042 if interface["NetworkInterfaceId"] in modified_interfaces:
1043 interface_idx = next((idx for idx, iface in enumerate(interfaces)
1044 if iface["NetworkInterfaceId"] == interface["NetworkInterfaceId"]), None)
1045
1046 if interface_idx is not None:
1047 opt_key1 = f"interface{interface_idx}-sg"
1048 opt_key2 = f"--interface{interface_idx}-sg"
1049
1050 if opt_key1 in options and options[opt_key1]:
1051 expected_sgs = [sg.strip() for sg in options[opt_key1].split(",") if sg.strip()]
1052 elif opt_key2 in options and options[opt_key2]:
1053 expected_sgs = [sg.strip() for sg in options[opt_key2].split(",") if sg.strip()]
1054 else:
1055 continue
1056
1057 current_sgs = interface["SecurityGroups"]
1058 if sorted(current_sgs) != sorted(expected_sgs):
1059 logger.error(
1060 f"Final verification failed for interface {interface['NetworkInterfaceId']}: "
1061 f"Expected SGs {sorted(expected_sgs)}, but found {sorted(current_sgs)}"
1062 )
1063 verification_failed = True
1064 else:
1065 logger.info(f"Final verification successful for interface {interface['NetworkInterfaceId']}")
1066
1067 if verification_failed:
1068 logger.error("Some interfaces failed final verification - security group changes may not be fully committed")
1069 return False
1070 else:
1071 logger.info("All security group changes successfully verified and committed")
1072 return True
1073 else:
1074 logger.warning(f"No security groups were modified for instance {instance_id} using interface options")
1075 return False
1076 except Exception as e:
1077 logger.error(f"Error in restore_security_groups_from_options: {str(e)}")
1078 raise
1079
1080 def set_power_status(conn, options):
1081 """
1082 Set power status of the instance.
1083 This function contains all the logic to CHANGE the state based on the requested action.
1084
1085 Args:
1086 conn: The boto3 EC2 resource connection
1087 options: Dictionary containing the fencing options
1088
1089 Returns:
1090 bool: True if the operation was successful
1091 """
1092 timestamp = int(time.time()) # Unix timestamp
1093 ec2_client = conn.meta.client
1094 instance_id = options["--plug"]
1095 sg_to_remove = options.get("--secg", "").split(",") if options.get("--secg") else []
1096
1097 # Perform self-check if skip-race not set
1098 if "--skip-race-check" not in options:
1099 self_instance_id = get_instance_id()
1100 if self_instance_id == instance_id:
1101 fail_usage("Self-fencing detected. Exiting.")
1102
1103 try:
1104 # Get instance details
1105 instance_state, _, interfaces = get_instance_details(ec2_client, instance_id)
1106
1107 # Log instance state and whether we're ignoring it
1108 if instance_state != "running":
1109 if "--ignore-instance-state" in options:
1110 logger.info(f"Instance {instance_id} is in '{instance_state}' state but --ignore-instance-state is set, proceeding with fencing")
1111 else:
1112 # Only verify instance is running for 'off' action if --ignore-instance-state is not set
1113 if options["--action"] == "off":
1114 fail_usage(f"Instance {instance_id} is not running. Exiting.")
1115
1116 # Check for interface options both with and without -- prefix
1117 interface_sg_present = any([
1118 options.get(f"--interface{i}-sg") or options.get(f"interface{i}-sg") for i in range(16)
1119 ])
1120
1121 # Handle different combinations of action and options
1122 if options["--action"] == "on":
1123 logger.info(f"Executing ON action for instance {instance_id}")
1124
1125 if interface_sg_present:
1126 # For ON action with interface options: set the security groups specified in interface options
1127 logger.info("Using interface options to set security groups")
1128 restore_security_groups_from_options(ec2_client, instance_id, options)
1129 else:
1130 # Standard ON action without interface options: restore from tags
1131 if "--unfence-ignore-restore" in options:
1132 logger.info("Skipping security group restoration as --unfence-ignore-restore is set")
1133 else:
1134 logger.info("Restoring security groups from backup tags")
1135 restore_security_groups(ec2_client, instance_id)
1136
1137 elif options["--action"] == "off":
1138 logger.info(f"Executing OFF action for instance {instance_id}")
1139
1140 # For OFF action with --secg option: modify security groups
1141 if sg_to_remove:
1142 logger.info(f"Using --secg option to modify security groups: {sg_to_remove}")
1143 mode = "keep_only" if "--invert-sg-removal" in options else "remove"
1144
1145 try:
1146 # Skip tag creation when interface options are present
1147 modify_security_groups(ec2_client, instance_id, sg_to_remove, timestamp, mode, options, skip_tags=interface_sg_present)
1148
1149 # If onfence-poweroff is set, also shut down the instance
1150 if "--onfence-poweroff" in options:
1151 logger.info("--onfence-poweroff is set, initiating instance shutdown")
1152 shutdown_instance(ec2_client, instance_id)
1153
1154 except Exception as e:
1155 if isinstance(e, ClientError):
1156 logger.error("AWS API error: %s", e)
1157 fail_usage(str(e))
1158 elif "--ignore-tag-write-failure" in options:
1159 # If we're ignoring tag failures, only fail if the security group modifications failed
1160 if "Failed to modify security groups" in str(e):
1161 logger.error("Failed to modify security groups: %s", e)
1162 fail(EC_STATUS)
1163 else:
1164 logger.warning("Ignoring error due to ignore-tag-write-failure: %s", e)
1165 else:
1166 logger.error("Failed to set power status: %s", e)
1167 fail(EC_STATUS)
1168 elif interface_sg_present:
1169 # If no --secg option but interface options are present, check if we need to apply interface options
1170 logger.info("No --secg option provided with interface options")
1171
1172 # Special handling for --ignore-instance-state flag
1173 if "--ignore-instance-state" in options:
1174 logger.info("--ignore-instance-state flag detected with interface options - applying interface options regardless of instance state")
1175 success = restore_security_groups_from_options(ec2_client, instance_id, options)
1176 if not success:
1177 logger.error("Failed to apply interface security group options")
1178 fail(EC_STATUS)
1179 # Normal flow without --ignore-instance-state
1180 else:
1181 # Check if current security groups match desired state
1182 if not check_interface_sg_match(ec2_client, instance_id, options):
1183 logger.info("Current security groups don't match desired state, applying interface options")
1184 success = restore_security_groups_from_options(ec2_client, instance_id, options)
1185 if not success:
1186 logger.error("Failed to apply interface security group options")
1187 fail(EC_STATUS)
1188 else:
1189 logger.info("Current security groups already match desired state, no changes needed")
1190 else:
1191 logger.warning("No --secg option or interface options provided for OFF action, no changes will be made")
1192
1193 except Exception as e:
1194 logger.error("Unexpected error in set_power_status: %s", e)
1195 fail(EC_STATUS)
1196
1197 # Explicitly return True to indicate success
1198 return True
1199
1200
1201 # Define fencing agent options
1202 def define_new_opts():
1203 all_opt["port"]["help"] = "-n, --plug=[id] AWS Instance ID to perform action on "
1204 all_opt["port"]["shortdesc"] = "AWS Instance ID to perform action on "
1205
1206 # New options for static interface security group restoration
1207 # Up to 16 interfaces per EC2 node
1208 for i in range(16):
1209 all_opt[f"interface{i}-sg"] = {
1210 "getopt": ":",
1211 "longopt": f"interface{i}-sg",
1212 "help": f"--interface{i}-sg=[sg1,sg2,...] Comma-separated list of Security Groups to restore for interface {i} (bypasses tag logic)",
1213 "shortdesc": f"Security Groups to restore for interface {i} (bypasses tag logic)",
1214 "required": "0",
1215 "order": 13 + i,
1216 }
1217
1218 all_opt["region"] = {
1219 "getopt": "r:",
1220 "longopt": "region",
1221 "help": "-r, --region=[region] AWS region (e.g., us-east-1)",
1222 "shortdesc": "AWS Region.",
1223 "required": "0",
1224 "order": 1,
1225 }
1226 all_opt["access_key"] = {
1227 "getopt": "a:",
1228 "longopt": "access-key",
1229 "help": "-a, --access-key=[key] AWS access key.",
1230 "shortdesc": "AWS Access Key.",
1231 "required": "0",
1232 "order": 2,
1233 }
1234 all_opt["secret_key"] = {
1235 "getopt": "s:",
1236 "longopt": "secret-key",
1237 "help": "-s, --secret-key=[key] AWS secret key.",
1238 "shortdesc": "AWS Secret Key.",
1239 "required": "0",
1240 "order": 3,
1241 }
1242 all_opt["secg"] = {
1243 "getopt": ":",
1244 "longopt": "secg",
1245 "help": "--secg=[sg1,sg2,...] Comma-separated list of Security Groups to remove.",
1246 "shortdesc": "Security Groups to remove.",
1247 "required": "0",
1248 "order": 4,
1249 }
1250 all_opt["skip_race_check"] = {
1251 "getopt": "",
1252 "longopt": "skip-race-check",
1253 "help": "--skip-race-check Skip race condition check.",
1254 "shortdesc": "Skip race condition check.",
1255 "required": "0",
1256 "order": 6,
1257 }
1258 all_opt["invert-sg-removal"] = {
1259 "getopt": "",
1260 "longopt": "invert-sg-removal",
1261 "help": "--invert-sg-removal Remove all security groups except the specified one(s).",
1262 "shortdesc": "Remove all security groups except specified..",
1263 "required": "0",
1264 "order": 7,
1265 }
1266 all_opt["unfence-ignore-restore"] = {
1267 "getopt": "",
1268 "longopt": "unfence-ignore-restore",
1269 "help": "--unfence-ignore-restore Do not restore security groups from tag when unfencing (off).",
1270 "shortdesc": "Remove all security groups except specified..",
1271 "required": "0",
1272 "order": 8,
1273 }
1274 all_opt["filter"] = {
1275 "getopt": ":",
1276 "longopt": "filter",
1277 "help": "--filter=[key=value] Filter (e.g. vpc-id=[vpc-XXYYZZAA])",
1278 "shortdesc": "Filter for list-action",
1279 "required": "0",
1280 "order": 9
1281 }
1282 all_opt["boto3_debug"] = {
1283 "getopt": "b:",
1284 "longopt": "boto3_debug",
1285 "help": "-b, --boto3_debug=[option] Boto3 and Botocore library debug logging",
1286 "shortdesc": "Boto Lib debug",
1287 "required": "0",
1288 "default": "False",
1289 "order": 10
1290 }
1291 all_opt["onfence-poweroff"] = {
1292 "getopt": "",
1293 "longopt": "onfence-poweroff",
1294 "help": "--onfence-poweroff Power off the machine async upon fence (this is a network fencing agent...)",
1295 "shortdesc": "Power off the machine async..",
1296 "required": "0",
1297 "order": 11
1298 }
1299 all_opt["ignore-tag-write-failure"] = {
1300 "getopt": "",
1301 "longopt": "ignore-tag-write-failure",
1302 "help": "--ignore-tag-write-failure Continue to fence even if backup tag fails. This ensures prioriization of fencing over AWS backplane access",
1303 "shortdesc": "Continue to fence even if backup tag fails..",
1304 "required": "0",
1305 "order": 12
1306 }
1307 all_opt["ignore-instance-state"] = {
1308 "getopt": "",
1309 "longopt": "ignore-instance-state",
1310 "help": "--ignore-instance-state Fence regardless of what AWS returns re the power state of the instance, (this is a network fencing agent...)",
1311 "shortdesc": "Fence regardless of AWS state",
1312 "required": "0",
1313 "order": 13
1314 }
1315
1316
1317 def main():
1318 conn = None
1319
1320 device_opt = [
1321 "no_password",
1322 "region",
1323 "access_key",
1324 "secret_key",
1325 "secg",
1326 "port",
1327 "skip_race_check",
1328 "invert-sg-removal",
1329 "unfence-ignore-restore",
1330 "filter",
1331 "boto3_debug",
1332 "onfence-poweroff",
1333 "ignore-tag-write-failure",
1334 "ignore-instance-state"
1335 ]
1336
1337 # Add interface{i}-sg options to device_opt
1338 for i in range(16):
1339 device_opt.append(f"interface{i}-sg")
1340
1341 atexit.register(atexit_handler)
1342
1343 define_new_opts()
1344
1345 try:
1346 processed_input = process_input(device_opt)
1347 options = check_input(device_opt, processed_input)
1348 except Exception as e:
1349 logger.error(f"Failed to process input options: {str(e)}")
1350 sys.exit(EC_GENERIC_ERROR)
1351
1352 run_delay(options)
1353
1354 docs = {
1355 "shortdesc": "Fence agent for AWS (Amazon Web Services) Net",
1356 "longdesc": (
1357 "fence_aws_vpc is a Network and Power Fencing agent for AWS VPC that works by "
1358 "manipulating security groups. It uses the boto3 library to connect to AWS.\n\n"
1359 "boto3 can be configured with AWS CLI or by creating ~/.aws/credentials.\n"
1360 "For instructions see: https://boto3.readthedocs.io/en/latest/guide/quickstart.html#configuration"
1361 " "
1362 "NOTE: If onfence-poweroff is set, the agent won't be able to power on the node again, it will have to be powered on manually or with other automation."
1363 ),
1364 "vendorurl": "http://www.amazon.com"
1365 }
1366 show_docs(options, docs)
1367
1368 if options.get("--action", "") == "reboot":
1369 options["--action"] = "off"
1370
1371 # Configure logging
1372 if "--debug-file" in options:
1373 for handler in logger.handlers:
1374 if isinstance(handler, logging.FileHandler):
1375 logger.removeHandler(handler)
1376 lh = logging.FileHandler(options["--debug-file"])
1377 logger.addHandler(lh)
1378 lhf = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
1379 lh.setFormatter(lhf)
1380 lh.setLevel(logging.DEBUG)
1381
1382 # Configure boto3 logging
1383 if options.get("--boto3_debug", "").lower() not in ["1", "yes", "on", "true"]:
1384 boto3.set_stream_logger('boto3', logging.INFO)
1385 boto3.set_stream_logger('botocore', logging.CRITICAL)
1386 logging.getLogger('botocore').propagate = False
1387 logging.getLogger('boto3').propagate = False
1388 else:
1389 log_format = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
1390 logging.getLogger('botocore').propagate = False
1391 logging.getLogger('boto3').propagate = False
1392 fdh = logging.FileHandler('/var/log/fence_aws_vpc_boto3.log')
1393 fdh.setFormatter(log_format)
1394 logging.getLogger('boto3').addHandler(fdh)
1395 logging.getLogger('botocore').addHandler(fdh)
1396 logging.debug("Boto debug level is %s and sending debug info to /var/log/fence_aws_vpc_boto3.log",
1397 options.get("--boto3_debug"))
1398
1399 # Establish AWS connection
1400 region = options.get("--region")
1401 access_key = options.get("--access-key")
1402 secret_key = options.get("--secret-key")
1403
1404 try:
1405 conn = boto3.resource(
1406 "ec2",
1407 region_name=region,
1408 aws_access_key_id=access_key,
1409 aws_secret_access_key=secret_key,
1410 )
1411 except Exception as e:
1412 if not options.get("--action", "") in ["metadata", "manpage", "validate-all"]:
1413 fail_usage("Failed: Unable to connect to AWS: " + str(e))
1414 else:
1415 pass
1416
1417 # Operate the fencing device using the fence library's fence_action
1418 result = fence_action(conn, options, set_power_status, get_power_status, get_nodes_list)
1419 sys.exit(result)
1420
1421
1422 if __name__ == "__main__":
1423 main()
1424
1425