Skip to content

Commit fff7e47

Browse files
authored
Azure enhancements (#8)
* Enhance Azure discovery: parallel subscriptions, checkpointing, resume, and retry logic - Add parallel processing for multiple Azure subscriptions (--subscription-workers) - Implement checkpointing to save progress every 50 subs or 15 mins - Add resume functionality to continue from last checkpoint - Include retry logic with exponential backoff for failed API calls - Update README with new features and usage examples - Improve consistency and reliability for large Azure environments * Remove legacy resource_count output functionality - Remove --include-counts argument from all providers - Remove legacy resource_count file generation code - Update README to remove references to legacy outputs - Streamline output to focus on Universal DDI licensing files
1 parent c6c912e commit fff7e47

File tree

14 files changed

+1288
-475
lines changed

14 files changed

+1288
-475
lines changed

README.md

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ A Python tool that discovers and counts DDI Objects and Active IPs across AWS, A
2525
- **Active IP Tracking**: Identify IP addresses assigned to running instances and services
2626
- **Flexible Output**: Support for JSON, CSV, and TXT output formats
2727
- **Parallel Processing**: Configurable worker threads for improved performance
28+
- **Checkpointing & Resume**: Save progress and resume interrupted discoveries (Azure)
29+
- **Retry Logic**: Automatic retries for failed API calls to ensure consistency
2830
- **Modular Design**: Clean separation between cloud providers and shared utilities
2931

3032
## Quick Start
@@ -60,11 +62,8 @@ source venv/bin/activate # macOS/Linux
6062

6163
# Run discovery (licensing outputs are generated by default)
6264
python main.py aws
63-
python main.py azure
65+
python main.py azure --subscription-workers 8 # For large Azure environments
6466
python main.py gcp
65-
66-
# Optional: include legacy resource_count files
67-
python main.py aws --include-counts
6867
```
6968

7069
## Installation
@@ -175,6 +174,12 @@ python main.py {provider} [options]
175174
**Options:**
176175
- `--format {json,csv,txt}` - Output format (default: txt)
177176
- `--workers <number>` - Number of parallel workers (default: 8)
177+
- `--subscription-workers <number>` - Parallel subscriptions for Azure (default: 4)
178+
- `--retry-attempts <number>` - Retry attempts for failed API calls (default: 3)
179+
- `--no-checkpoint` - Disable checkpointing and resume
180+
- `--resume` - Auto-resume from checkpoint without prompt
181+
- `--checkpoint-file <path>` - Custom checkpoint file path
182+
- `--checkpoint-interval <number>` - Save checkpoint every N subscriptions (default: 50)
178183
- `--full` - Save detailed resource data (default: summary only)
179184

180185
### Examples
@@ -186,8 +191,13 @@ python main.py aws
186191
# High-performance discovery
187192
python main.py aws --workers 12
188193

189-
# Include legacy count file
190-
python main.py azure --include-counts
194+
# Azure with parallel subscriptions and retries
195+
python main.py azure --subscription-workers 8 --retry-attempts 5
196+
197+
# Resume interrupted discovery
198+
python main.py azure --resume
199+
200+
191201
```
192202

193203

@@ -202,7 +212,6 @@ Generated in the `output/` directory:
202212
- `{provider}_universal_ddi_licensing_{timestamp}.txt` - Human-readable summary
203213
- `{provider}_universal_ddi_proof_{timestamp}.json` - Audit manifest (scope, regions, hashes)
204214
- `{provider}_unknown_resources_{timestamp}.json` - Only when unknown types exist
205-
- Optional legacy: `*_resource_count_*.{format}` via `--include-counts`
206215

207216
### Output Structure
208217

aws_discovery/aws_discovery.py

Lines changed: 151 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ def _discover_region(self, region: str) -> List[Dict]:
140140
load_balancers = self._discover_load_balancers(region)
141141
region_resources.extend(load_balancers)
142142

143+
# Discover allocated Elastic IPs (including unattached)
144+
elastic_ips = self._discover_elastic_ips(region)
145+
region_resources.extend(elastic_ips)
146+
143147
except Exception as e:
144148
self.logger.error(f"Error discovering region {region}: {e}")
145149

@@ -168,6 +172,14 @@ def _discover_ec2_instances(self, region: str) -> List[Dict]:
168172
private_ip = instance.get("PrivateIpAddress")
169173
public_ip = instance.get("PublicIpAddress")
170174

175+
# IPv6 addresses live on the network interfaces
176+
ipv6_ips = []
177+
for nic in instance.get("NetworkInterfaces", []) or []:
178+
for entry in nic.get("Ipv6Addresses", []) or []:
179+
ipv6 = entry.get("Ipv6Address")
180+
if ipv6:
181+
ipv6_ips.append(ipv6)
182+
171183
# Get tags
172184
tags = get_resource_tags(instance.get("Tags", []))
173185

@@ -182,6 +194,7 @@ def _discover_ec2_instances(self, region: str) -> List[Dict]:
182194
"state": instance_state,
183195
"private_ip": private_ip,
184196
"public_ip": public_ip,
197+
"ipv6_ips": ipv6_ips,
185198
"vpc_id": instance.get("VpcId"),
186199
"subnet_id": instance.get("SubnetId"),
187200
"launch_time": instance.get("LaunchTime"),
@@ -288,6 +301,7 @@ def _discover_subnets(self, region: str) -> List[Dict]:
288301
details = {
289302
"subnet_id": subnet_id,
290303
"cidr_block": cidr_block,
304+
"Ipv6CidrBlockAssociationSet": subnet.get("Ipv6CidrBlockAssociationSet", []),
291305
"state": state,
292306
"vpc_id": vpc_id,
293307
"availability_zone": availability_zone,
@@ -381,55 +395,102 @@ def _discover_load_balancers(self, region: str) -> List[Dict]:
381395
try:
382396
elb = self.clients[region]["elb"]
383397

384-
response = elb.describe_load_balancers()
385-
for lb in response.get("LoadBalancerDescriptions", []):
386-
lb_name = lb.get("LoadBalancerName")
387-
if not lb_name:
398+
paginator = elb.get_paginator("describe_load_balancers")
399+
for response in paginator.paginate():
400+
for lb in response.get("LoadBalancerDescriptions", []):
401+
lb_name = lb.get("LoadBalancerName")
402+
if not lb_name:
403+
continue
404+
405+
# Get load balancer details
406+
dns_name = lb.get("DNSName")
407+
state = "active" if dns_name else "inactive"
408+
409+
# Get tags
410+
try:
411+
tags_response = elb.describe_tags(LoadBalancerNames=[lb_name])
412+
lb_tags = {}
413+
if tags_response.get("TagDescriptions"):
414+
lb_tags = get_resource_tags(tags_response["TagDescriptions"][0].get("Tags", []))
415+
except Exception as e:
416+
self.logger.warning(f"Could not describe tags for {lb_name}: {e}")
417+
lb_tags = {}
418+
419+
# Determine if Management Token is required
420+
is_managed = self._is_managed_service(lb_tags)
421+
requires_token = not is_managed
422+
423+
# Create resource details
424+
details = {
425+
"load_balancer_name": lb_name,
426+
"dns_name": dns_name,
427+
"state": state,
428+
"vpc_id": lb.get("VPCId"),
429+
"availability_zones": lb.get("AvailabilityZones", []),
430+
"security_groups": lb.get("SecurityGroups", []),
431+
}
432+
433+
# Format resource
434+
formatted_resource = self._format_resource(
435+
resource_data=details,
436+
resource_type="classic-load-balancer",
437+
region=region,
438+
name=lb_name,
439+
requires_management_token=requires_token,
440+
state=state,
441+
tags=lb_tags,
442+
)
443+
444+
resources.append(formatted_resource)
445+
446+
except Exception as e:
447+
self.logger.warning(f"Error discovering Classic LB in {region}: {e}")
448+
449+
return resources
450+
451+
def _discover_elastic_ips(self, region: str) -> List[Dict]:
452+
"""Discover allocated Elastic IPs (including unattached) in a region."""
453+
resources: List[Dict] = []
454+
try:
455+
ec2 = self.clients[region]["ec2"]
456+
resp = ec2.describe_addresses()
457+
for addr in resp.get("Addresses", []):
458+
public_ip = addr.get("PublicIp")
459+
allocation_id = addr.get("AllocationId") or public_ip
460+
if not allocation_id:
388461
continue
389462

390-
# Get load balancer details
391-
dns_name = lb.get("DNSName")
392-
state = "active" if dns_name else "inactive"
393-
394-
# Get tags
395-
try:
396-
tags_response = elb.describe_tags(LoadBalancerNames=[lb_name])
397-
lb_tags = {}
398-
if tags_response.get("TagDescriptions"):
399-
lb_tags = get_resource_tags(tags_response["TagDescriptions"][0].get("Tags", []))
400-
except Exception as e:
401-
self.logger.warning(f"Could not describe tags for {lb_name}: {e}")
402-
lb_tags = {}
403-
404-
# Determine if Management Token is required
405-
is_managed = self._is_managed_service(lb_tags)
406-
requires_token = not is_managed
407-
408-
# Create resource details
463+
tags = get_resource_tags(addr.get("Tags", []))
464+
465+
# Elastic IPs are allocations. We record them under the allocated key
466+
# so they show up clearly in active IP breakdowns.
409467
details = {
410-
"load_balancer_name": lb_name,
411-
"dns_name": dns_name,
412-
"state": state,
413-
"vpc_id": lb.get("VPCId"),
414-
"availability_zones": lb.get("AvailabilityZones", []),
415-
"security_groups": lb.get("SecurityGroups", []),
468+
"allocation_id": addr.get("AllocationId"),
469+
"association_id": addr.get("AssociationId"),
470+
"domain": addr.get("Domain"),
471+
"instance_id": addr.get("InstanceId"),
472+
"network_interface_id": addr.get("NetworkInterfaceId"),
473+
"private_ip": addr.get("PrivateIpAddress"),
474+
"elastic_ip": public_ip,
475+
"public_ipv4_pool": addr.get("PublicIpv4Pool"),
416476
}
417477

418-
# Format resource
419-
formatted_resource = self._format_resource(
420-
resource_data=details,
421-
resource_type="classic-load-balancer",
422-
region=region,
423-
name=lb_name,
424-
requires_management_token=requires_token,
425-
state=state,
426-
tags=lb_tags,
427-
)
478+
state = "associated" if addr.get("AssociationId") or addr.get("InstanceId") else "unassociated"
428479

429-
resources.append(formatted_resource)
480+
resources.append(
481+
self._format_resource(
482+
resource_data=details,
483+
resource_type="elastic-ip",
484+
region=region,
485+
name=str(allocation_id),
486+
requires_management_token=True,
487+
state=state,
488+
tags=tags,
489+
)
490+
)
430491

431492
except Exception as e:
432-
self.logger.warning(f"Error discovering Classic LB in {region}: {e}")
493+
self.logger.warning(f"Error discovering Elastic IPs in {region}: {e}")
433494

434495
return resources
435496

@@ -438,53 +499,55 @@ def _discover_route53_zones_and_records(self) -> List[Dict]:
438499
resources = []
439500
try:
440501
route53 = get_aws_client("route53", "us-east-1", self.aws_config)
441-
zones_resp = route53.list_hosted_zones()
442-
for zone in zones_resp.get("HostedZones", []):
443-
zone_id = zone["Id"].split("/")[-1]
444-
zone_name = zone["Name"].rstrip(".")
445-
is_private = zone.get("Config", {}).get("PrivateZone", False)
446-
447-
# Add the zone as a resource
448-
zone_resource = self._format_resource(
449-
resource_data={
450-
"zone_id": zone_id,
451-
"zone_name": zone_name,
452-
"private": is_private,
453-
"record_set_count": zone.get("ResourceRecordSetCount", 0),
454-
},
455-
resource_type="route53-zone",
456-
region="global",
457-
name=zone_name,
458-
requires_management_token=True,
459-
state="private" if is_private else "public",
460-
tags={},
461-
)
462-
resources.append(zone_resource)
463-
464-
# List all records in the zone
465-
paginator = route53.get_paginator("list_resource_record_sets")
466-
for page in paginator.paginate(HostedZoneId=zone["Id"]):
467-
for record in page.get("ResourceRecordSets", []):
468-
record_type = record.get("Type")
469-
record_name = record.get("Name", "").rstrip(".")
470-
471-
record_resource = self._format_resource(
472-
resource_data={
473-
"zone_id": zone_id,
474-
"zone_name": zone_name,
475-
"record_type": record_type,
476-
"record_name": record_name,
477-
"ttl": record.get("TTL"),
478-
"resource_records": record.get("ResourceRecords", []),
479-
},
480-
resource_type="route53-record",
481-
region="global",
482-
name=record_name,
483-
requires_management_token=True,
484-
state=record_type,
485-
tags={},
486-
)
487-
resources.append(record_resource)
502+
503+
zones_paginator = route53.get_paginator("list_hosted_zones")
504+
for zones_resp in zones_paginator.paginate():
505+
for zone in zones_resp.get("HostedZones", []):
506+
zone_id = zone["Id"].split("/")[-1]
507+
zone_name = zone["Name"].rstrip(".")
508+
is_private = zone.get("Config", {}).get("PrivateZone", False)
509+
510+
# Add the zone as a resource
511+
zone_resource = self._format_resource(
512+
resource_data={
513+
"zone_id": zone_id,
514+
"zone_name": zone_name,
515+
"private": is_private,
516+
"record_set_count": zone.get("ResourceRecordSetCount", 0),
517+
},
518+
resource_type="route53-zone",
519+
region="global",
520+
name=zone_name,
521+
requires_management_token=True,
522+
state="private" if is_private else "public",
523+
tags={},
524+
)
525+
resources.append(zone_resource)
526+
527+
# List all records in the zone
528+
paginator = route53.get_paginator("list_resource_record_sets")
529+
for page in paginator.paginate(HostedZoneId=zone["Id"]):
530+
for record in page.get("ResourceRecordSets", []):
531+
record_type = record.get("Type")
532+
record_name = record.get("Name", "").rstrip(".")
533+
534+
record_resource = self._format_resource(
535+
resource_data={
536+
"zone_id": zone_id,
537+
"zone_name": zone_name,
538+
"record_type": record_type,
539+
"record_name": record_name,
540+
"ttl": record.get("TTL"),
541+
"resource_records": record.get("ResourceRecords", []),
542+
},
543+
resource_type="route53-record",
544+
region="global",
545+
name=record_name,
546+
requires_management_token=True,
547+
state=record_type,
548+
tags={},
549+
)
550+
resources.append(record_resource)
488551

489552
except Exception as e:
490553
self.logger.error(f"Error discovering Route 53 zones/records: {e}")

aws_discovery/discover.py

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,7 @@ def main(args=None):
8585
action="store_true",
8686
help="Save/export full resource/object data (default: only summary and token calculation)",
8787
)
88-
parser.add_argument(
89-
"--include-counts",
90-
action="store_true",
91-
help="Also write legacy resource_count files alongside licensing outputs",
92-
)
88+
9389
args = parser.parse_args()
9490

9591
print("AWS Cloud Discovery for Management Token Calculation")
@@ -204,24 +200,7 @@ def main(args=None):
204200
print("Results saved to:")
205201
for file_type, filepath in saved_files.items():
206202
print(f" {file_type}: {filepath}")
207-
else:
208-
# Save only legacy count file if requested
209-
if args.include_counts:
210-
output_dir = config.output_directory
211-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
212-
from shared.output_utils import save_resource_count_results
213-
214-
summary_files = save_resource_count_results(
215-
count_results,
216-
output_dir,
217-
args.format,
218-
timestamp,
219-
"aws",
220-
extra_info={"accounts": scanned_accounts},
221-
)
222-
print(f"Summary saved to: {summary_files['resource_count']}")
223-
else:
224-
print("Skipping legacy resource_count output (use --include-counts to enable)")
203+
225204

226205
print("\nDiscovery completed successfully!")
227206

0 commit comments

Comments
 (0)