Skip to content

Commit c4b2ad1

Browse files
Stefanclaude
authored andcommitted
perf: optimize licensing calculator, resource counter, and consolidate _is_managed_service
- Module-level frozensets for DDI/asset type lookups (eliminate per-call set reconstruction) - Single-pass provider grouping in _get_provider_breakdown (eliminate double _determine_provider scan) - Module-level _IP_KEY_MAP constant in resource_counter (eliminate per-resource dict rebuild) - Consolidate _is_managed_service into data-driven base class (3 copies -> 1) - Proof manifest single write (serialize -> hash -> write, not write -> read -> rewrite) - Single _scan_timestamp per discovery run (cached in __init__) - Remove duplicate timestamp creation in all 3 provider discover.py files Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 71f66c4 commit c4b2ad1

File tree

9 files changed

+120
-196
lines changed

9 files changed

+120
-196
lines changed

aws_discovery/aws_discovery.py

Lines changed: 12 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,18 @@
3535
class AWSDiscovery(BaseDiscovery):
3636
"""AWS Cloud Discovery implementation."""
3737

38+
_managed_key_prefixes = (
39+
"aws:ecs:",
40+
"aws:eks:",
41+
"eks.amazonaws.com/",
42+
"lambda:",
43+
"aws:lambda:",
44+
"elasticmapreduce:",
45+
"aws:elasticmapreduce:",
46+
)
47+
_managed_key_exact = frozenset({"managed-by", "managed_by", "aws-managed"})
48+
_managed_value_exact = frozenset({"aws-managed", "ecs", "lambda", "eks"})
49+
3850
def __init__(self, config: AWSConfig):
3951
"""
4052
Initialize AWS discovery.
@@ -752,41 +764,6 @@ def _discover_route53_zones_and_records(self) -> List[Dict]:
752764

753765
return resources
754766

755-
def _is_managed_service(self, tags: Dict[str, str]) -> bool:
756-
"""Check if a resource is a managed service (Management Token-free).
757-
758-
Detects resources created/managed by AWS platform services (ECS tasks,
759-
Lambda ENIs, EKS system pods, etc.). Avoids false positives from generic
760-
aws:cloudformation:* or aws:autoscaling:* auto-tags.
761-
"""
762-
if not tags:
763-
return False
764-
765-
# Specific tag key prefixes that indicate AWS-managed resources
766-
managed_key_prefixes = (
767-
"aws:ecs:",
768-
"aws:eks:",
769-
"eks.amazonaws.com/",
770-
"lambda:",
771-
"aws:lambda:",
772-
"elasticmapreduce:",
773-
"aws:elasticmapreduce:",
774-
)
775-
managed_key_exact = {"managed-by", "managed_by", "aws-managed"}
776-
777-
for key, value in tags.items():
778-
key_lower = key.lower()
779-
value_lower = value.lower()
780-
781-
if key_lower in managed_key_exact:
782-
return True
783-
if any(key_lower.startswith(prefix) for prefix in managed_key_prefixes):
784-
return True
785-
if value_lower in ("aws-managed", "ecs", "lambda", "eks"):
786-
return True
787-
788-
return False
789-
790767
def get_management_token_free_assets(self) -> List[Dict]:
791768
"""
792769
Get list of Management Token-free assets.

aws_discovery/discover.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,6 @@ def main(args=None):
157157
calculator = UniversalDDILicensingCalculator()
158158
licensing_results = calculator.calculate_from_discovery_results(native_objects, provider="aws")
159159

160-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
161-
162160
# Export CSV for Sales Engineers
163161
csv_file = os.path.join("output", f"aws_universal_ddi_licensing_{timestamp}.csv")
164162
calculator.export_csv(csv_file, provider="aws")

azure_discovery/azure_discovery.py

Lines changed: 4 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ def make_retry_policy(sub_name: str, print_lock: threading.Lock) -> VisibleRetry
7979
class AzureDiscovery(BaseDiscovery):
8080
"""Azure Cloud Discovery implementation."""
8181

82+
_managed_key_prefixes = ("aks-managed-", "k8s-azure-", "ms-resource-usage:")
83+
_managed_key_exact = frozenset({"managed-by", "managed_by", "azure-managed"})
84+
_managed_value_exact = frozenset({"azure-managed", "aks", "appservice", "azure-functions"})
85+
8286
def __init__(
8387
self,
8488
config: AzureConfig,
@@ -999,37 +1003,6 @@ def _discover_azure_dns_zones_and_records(self) -> List[Dict]:
9991003

10001004
return resources
10011005

1002-
def _is_managed_service(self, tags: Dict[str, str]) -> bool:
1003-
"""Check if a resource is a managed service (Management Token-free).
1004-
1005-
Detects resources created/managed by Azure platform services (AKS system
1006-
pools, App Service infra, etc.). Avoids false positives from generic tags
1007-
that happen to contain 'azure' or 'service'.
1008-
"""
1009-
if not tags:
1010-
return False
1011-
1012-
# Specific tag key prefixes that indicate Azure-managed resources
1013-
managed_key_prefixes = (
1014-
"aks-managed-",
1015-
"k8s-azure-",
1016-
"ms-resource-usage:",
1017-
)
1018-
managed_key_exact = {"managed-by", "managed_by", "azure-managed"}
1019-
1020-
for key, value in tags.items():
1021-
key_lower = key.lower()
1022-
value_lower = value.lower()
1023-
1024-
if key_lower in managed_key_exact:
1025-
return True
1026-
if any(key_lower.startswith(prefix) for prefix in managed_key_prefixes):
1027-
return True
1028-
if value_lower in ("azure-managed", "aks", "appservice", "azure-functions"):
1029-
return True
1030-
1031-
return False
1032-
10331006
def get_scanned_subscription_ids(self) -> list:
10341007
"""Return the Azure Subscription ID(s) scanned."""
10351008
return [self.subscription_id] if self.subscription_id else []

azure_discovery/discover.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,6 @@ def discover_subscription(sub_id):
405405
calculator = UniversalDDILicensingCalculator()
406406
calculator.calculate_from_discovery_results(all_native_objects, provider="azure")
407407

408-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
409-
410408
# Export CSV for Sales Engineers
411409
csv_file = os.path.join("output", f"azure_universal_ddi_licensing_{timestamp}.csv")
412410
calculator.export_csv(csv_file, provider="azure")

gcp_discovery/discover.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,6 @@ def discover_project(project_info):
253253
calculator = UniversalDDILicensingCalculator()
254254
calculator.calculate_from_discovery_results(all_native_objects, provider="gcp")
255255

256-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
257-
258256
# Export CSV for Sales Engineers
259257
csv_file = os.path.join("output", f"gcp_universal_ddi_licensing_{timestamp}.csv")
260258
calculator.export_csv(csv_file, provider="gcp")

gcp_discovery/gcp_discovery.py

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@
3333
class GCPDiscovery(BaseDiscovery):
3434
"""GCP Cloud Discovery implementation."""
3535

36+
_managed_key_prefixes = ("goog-managed-by", "gke-managed", "cloud-run", "cloud-functions")
37+
_managed_key_exact = frozenset({"managed-by", "managed_by", "google-managed"})
38+
_managed_value_exact = frozenset({"google-managed", "gke", "cloud-run", "cloud-functions"})
39+
3640
def __init__(self, config: GCPConfig, shared_compute_clients: Optional[dict] = None):
3741
"""
3842
Initialize GCP discovery.
@@ -794,38 +798,6 @@ def _discover_dns_records(self, zone) -> List[Dict]:
794798

795799
return resources
796800

797-
def _is_managed_service(self, labels: Dict[str, str]) -> bool:
798-
"""Check if a resource is a managed service (Management Token-free).
799-
800-
Detects resources created/managed by GCP platform services (GKE system
801-
pods, Cloud Run infra, Cloud Functions, etc.). Uses specific key prefixes
802-
and exact value matches to avoid false positives.
803-
"""
804-
if not labels:
805-
return False
806-
807-
# Specific label key prefixes that indicate GCP-managed resources
808-
managed_key_prefixes = (
809-
"goog-managed-by",
810-
"gke-managed",
811-
"cloud-run",
812-
"cloud-functions",
813-
)
814-
managed_key_exact = {"managed-by", "managed_by", "google-managed"}
815-
816-
for key, value in labels.items():
817-
key_lower = key.lower()
818-
value_lower = value.lower()
819-
820-
if key_lower in managed_key_exact:
821-
return True
822-
if any(key_lower.startswith(prefix) for prefix in managed_key_prefixes):
823-
return True
824-
if value_lower in ("google-managed", "gke", "cloud-run", "cloud-functions"):
825-
return True
826-
827-
return False
828-
829801
def get_scanned_project_ids(self) -> list:
830802
"""Return the GCP Project ID(s) scanned."""
831803
return [self.project_id] if self.project_id else []

shared/base_discovery.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ class DiscoveryConfig:
2121
class BaseDiscovery(ABC):
2222
"""Base class for cloud discovery implementations."""
2323

24+
# Subclasses override these to customize managed-service detection.
25+
# The base _is_managed_service checks key exact, key prefix, and value exact.
26+
_managed_key_prefixes: tuple = ()
27+
_managed_key_exact: frozenset = frozenset({"managed-by", "managed_by"})
28+
_managed_value_exact: frozenset = frozenset()
29+
2430
def __init__(self, config: DiscoveryConfig):
2531
"""
2632
Initialize the base discovery class.
@@ -31,6 +37,7 @@ def __init__(self, config: DiscoveryConfig):
3137
self.config = config
3238
self._discovered_resources: Optional[List[Dict]] = None
3339
self.resource_counter = ResourceCounter(config.provider)
40+
self._scan_timestamp: str = datetime.now().isoformat()
3441

3542
logging.basicConfig(level=logging.WARNING)
3643
self.logger = logging.getLogger(self.__class__.__name__)
@@ -147,18 +154,27 @@ def _format_resource(
147154
"requires_management_token": requires_management_token,
148155
"tags": tags or {},
149156
"details": resource_data,
150-
"discovered_at": datetime.now().isoformat(),
157+
"discovered_at": self._scan_timestamp,
151158
}
152159

153160
def _is_managed_service(self, tags: Dict[str, str]) -> bool:
154-
"""Base managed-service check. Provider subclasses override with specific indicators."""
161+
"""Data-driven managed-service check using class-level indicator sets.
162+
163+
Subclasses customize detection by setting _managed_key_prefixes,
164+
_managed_key_exact, and _managed_value_exact class attributes.
165+
"""
155166
if not tags:
156167
return False
157168

158-
managed_key_exact = {"managed-by", "managed_by"}
159169
for key, value in tags.items():
160170
key_lower = key.lower()
161-
if key_lower in managed_key_exact:
171+
value_lower = value.lower() if isinstance(value, str) else ""
172+
173+
if key_lower in self._managed_key_exact:
174+
return True
175+
if any(key_lower.startswith(prefix) for prefix in self._managed_key_prefixes):
176+
return True
177+
if value_lower in self._managed_value_exact:
162178
return True
163179

164180
return False

0 commit comments

Comments
 (0)