Skip to content

Commit ab8f684

Browse files
authored
Merge pull request #35375 from dims/updates-to-audit_log_parser-to-better-find-discrepencies
updates to audit_log_parser.py to better find discrepencies
2 parents 2fb3c47 + d088854 commit ab8f684

File tree

1 file changed

+140
-11
lines changed

1 file changed

+140
-11
lines changed

experiment/audit/audit_log_parser.py

Lines changed: 140 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,42 @@
3535
from pathlib import Path
3636
import argparse
3737
import time
38+
def load_ineligible_endpoints(ineligible_endpoints_url=None):
39+
"""
40+
Load the list of ineligible endpoints from URL or local file.
41+
42+
Args:
43+
ineligible_endpoints_url (str, optional): URL or local path to ineligible endpoints YAML file
44+
45+
Returns:
46+
set: Set of ineligible endpoint operation IDs to filter out
47+
"""
48+
if ineligible_endpoints_url is None:
49+
ineligible_endpoints_url = ("https://raw.githubusercontent.com/kubernetes/kubernetes/"
50+
"master/test/conformance/testdata/ineligible_endpoints.yaml")
51+
52+
try:
53+
print(f"Loading ineligible endpoints from: {ineligible_endpoints_url}")
54+
with urllib.request.urlopen(ineligible_endpoints_url, timeout=30) as response:
55+
content = response.read().decode()
56+
57+
# Parse the YAML manually since it's simple structure
58+
ineligible_endpoints = set()
59+
for line in content.split('\n'):
60+
line = line.strip()
61+
if line.startswith('- endpoint:'):
62+
# Extract endpoint name after "- endpoint: "
63+
endpoint = line.replace('- endpoint:', '').strip()
64+
if endpoint:
65+
ineligible_endpoints.add(endpoint)
66+
67+
print(f"Loaded {len(ineligible_endpoints)} ineligible endpoints")
68+
return ineligible_endpoints
69+
70+
except Exception as e: # pylint: disable=broad-except
71+
print(f"Warning: Failed to load ineligible endpoints: {e}")
72+
print("Proceeding without filtering ineligible endpoints")
73+
return set()
3874

3975

4076
class SwaggerEndpointMapper:
@@ -184,10 +220,23 @@ def _normalize_audit_path(self, uri):
184220
# Remove query parameters
185221
uri = uri.split('?')[0]
186222

223+
# Handle API group discovery paths - these should not be normalized
224+
# Patterns like /apis/apps/, /apis/networking.k8s.io/, etc.
225+
if re.match(r'^/apis/[^/]+/?$', uri):
226+
return uri
227+
228+
# Handle core API discovery paths
229+
if uri in ['/api/', '/apis/']:
230+
return uri
231+
187232
# Replace actual values with parameter placeholders
188233
normalized = re.sub(r'/namespaces/[^/]+', '/namespaces/{namespace}', uri)
189234
normalized = re.sub(r'/nodes/[^/]+(?=/|$)', '/nodes/{node}', normalized)
190235

236+
# Handle proxy paths with additional path segments
237+
# Convert /proxy/anything/else to /proxy/{path}
238+
normalized = re.sub(r'/proxy/.*$', '/proxy/{path}', normalized)
239+
191240
# Replace resource names with {name} placeholder
192241
# Split the path and process each segment
193242
parts = normalized.split('/')
@@ -232,6 +281,43 @@ def _normalize_audit_path(self, uri):
232281

233282
return '/'.join(result_parts)
234283

284+
def _normalize_watch_path(self, uri):
285+
"""Normalize watch operation URI to match Swagger watch path format."""
286+
# Remove query parameters
287+
uri = uri.split('?')[0]
288+
289+
# Convert regular resource path to watch path
290+
# /apis/group/version/resources -> /apis/group/version/watch/resources
291+
# /apis/group/version/namespaces/{namespace}/resources -> /apis/group/version/watch/namespaces/{namespace}/resources
292+
# /api/v1/resources -> /api/v1/watch/resources
293+
# /api/v1/namespaces/{namespace}/resources -> /api/v1/watch/namespaces/{namespace}/resources
294+
295+
if uri.startswith('/apis/'):
296+
# Pattern: /apis/group/version/...
297+
parts = uri.split('/')
298+
if len(parts) >= 4: # /apis/group/version/...
299+
if len(parts) >= 5 and parts[4] == 'namespaces':
300+
# /apis/group/version/namespaces/... -> /apis/group/version/watch/namespaces/...
301+
watch_uri = '/'.join(parts[:4]) + '/watch/' + '/'.join(parts[4:])
302+
else:
303+
# /apis/group/version/resources -> /apis/group/version/watch/resources
304+
watch_uri = '/'.join(parts[:4]) + '/watch/' + '/'.join(parts[4:])
305+
else:
306+
watch_uri = uri
307+
elif uri.startswith('/api/v1/'):
308+
# Pattern: /api/v1/...
309+
if '/namespaces/' in uri:
310+
# /api/v1/namespaces/... -> /api/v1/watch/namespaces/...
311+
watch_uri = uri.replace('/api/v1/', '/api/v1/watch/')
312+
else:
313+
# /api/v1/resources -> /api/v1/watch/resources
314+
watch_uri = uri.replace('/api/v1/', '/api/v1/watch/')
315+
else:
316+
watch_uri = uri
317+
318+
# Now apply normal normalization to the watch path
319+
return self._normalize_audit_path(watch_uri)
320+
235321
def _k8s_verb_to_http_method(self, k8s_verb, uri): # pylint: disable=unused-argument,no-self-use
236322
"""Convert Kubernetes audit verb to HTTP method for Swagger lookup."""
237323
k8s_verb = k8s_verb.lower()
@@ -256,9 +342,14 @@ def get_operation_id(self, method, uri):
256342
if not self.swagger_spec:
257343
return None
258344

345+
# Handle watch operations - convert to watch path format
346+
if method.lower() == 'watch':
347+
normalized_uri = self._normalize_watch_path(uri)
348+
else:
349+
normalized_uri = self._normalize_audit_path(uri)
350+
259351
# Convert Kubernetes verb to HTTP method
260352
http_method = self._k8s_verb_to_http_method(method, uri).lower()
261-
normalized_uri = self._normalize_audit_path(uri)
262353
key = f"{http_method}:{normalized_uri}"
263354

264355
# Direct match
@@ -454,6 +545,12 @@ def parse_audit_logs(file_paths, swagger_mapper=None): # pylint: disable=too-ma
454545
total_entries += 1
455546
file_entries += 1
456547

548+
# Only process RequestReceived stage entries
549+
stage = entry.get('stage', '')
550+
if stage != 'RequestReceived':
551+
skipped_entries += 1
552+
continue
553+
457554
verb = entry.get('verb', '')
458555
request_uri = entry.get('requestURI', '')
459556

@@ -510,7 +607,7 @@ def parse_audit_logs(file_paths, swagger_mapper=None): # pylint: disable=too-ma
510607
return endpoint_counts, stats
511608

512609

513-
def write_results(endpoint_counts, stats, swagger_mapper=None, output_file=None, sort_by='count'): # pylint: disable=too-many-statements
610+
def write_results(endpoint_counts, stats, swagger_mapper=None, output_file=None, sort_by='count', ineligible_endpoints=None): # pylint: disable=too-many-statements
514611
"""
515612
Write results to file or stdout.
516613
@@ -520,25 +617,47 @@ def write_results(endpoint_counts, stats, swagger_mapper=None, output_file=None,
520617
swagger_mapper (SwaggerEndpointMapper): Mapper for finding missing endpoints
521618
output_file (str, optional): Output file path
522619
sort_by (str): Sort method - 'count' (descending) or 'name' (alphabetical)
620+
ineligible_endpoints (set, optional): Set of ineligible endpoints to filter out
523621
"""
622+
if ineligible_endpoints is None:
623+
ineligible_endpoints = set()
624+
625+
# Filter out ineligible endpoints from results
626+
filtered_endpoint_counts = Counter()
627+
ineligible_found_count = 0
628+
for endpoint, count in endpoint_counts.items():
629+
if endpoint not in ineligible_endpoints:
630+
filtered_endpoint_counts[endpoint] = count
631+
else:
632+
ineligible_found_count += count
633+
634+
# Update stats to reflect filtering
635+
filtered_stats = stats.copy()
636+
filtered_stats['unique_endpoints'] = len(filtered_endpoint_counts)
637+
filtered_stats['total_api_calls'] = sum(filtered_endpoint_counts.values())
638+
filtered_stats['ineligible_endpoints_filtered'] = len(endpoint_counts) - len(filtered_endpoint_counts)
639+
filtered_stats['ineligible_api_calls_filtered'] = ineligible_found_count
524640
if sort_by == 'count':
525-
sorted_endpoints = endpoint_counts.most_common()
641+
sorted_endpoints = filtered_endpoint_counts.most_common()
526642
sort_desc = "sorted by count (descending)"
527643
elif sort_by == 'name':
528-
sorted_endpoints = sorted(endpoint_counts.items(), key=lambda x: x[0].lower())
644+
sorted_endpoints = sorted(filtered_endpoint_counts.items(), key=lambda x: x[0].lower())
529645
sort_desc = "sorted alphabetically"
530646
else:
531-
sorted_endpoints = endpoint_counts.most_common()
647+
sorted_endpoints = filtered_endpoint_counts.most_common()
532648
sort_desc = "sorted by count (descending)"
533649

534650
output = []
535651
output.append("Kubernetes API Endpoints Found in Audit Log (Swagger-Enhanced)")
536652
output.append("=" * 70)
537-
output.append(f"Total unique endpoints: {stats['unique_endpoints']}")
538-
output.append(f"Total API calls: {stats['total_api_calls']}")
539-
output.append(f"Swagger-based matches: {stats['swagger_matches']}")
540-
output.append(f"Fallback matches: {stats['fallback_matches']}")
541-
output.append(f"Skipped entries: {stats['skipped_entries']}")
653+
output.append(f"Total unique endpoints: {filtered_stats['unique_endpoints']}")
654+
output.append(f"Total API calls: {filtered_stats['total_api_calls']}")
655+
output.append(f"Swagger-based matches: {filtered_stats['swagger_matches']}")
656+
output.append(f"Fallback matches: {filtered_stats['fallback_matches']}")
657+
output.append(f"Skipped entries: {filtered_stats['skipped_entries']}")
658+
if ineligible_endpoints:
659+
output.append(f"Ineligible endpoints filtered: {filtered_stats['ineligible_endpoints_filtered']}")
660+
output.append(f"Ineligible API calls filtered: {filtered_stats['ineligible_api_calls_filtered']}")
542661
output.append(f"Results {sort_desc}")
543662
output.append("")
544663
output.append("Endpoint Name (OpenAPI Operation ID) | Count")
@@ -562,6 +681,10 @@ def write_results(endpoint_counts, stats, swagger_mapper=None, output_file=None,
562681
if not any(version in op for version in ['V1alpha', 'V1beta', 'V2alpha', 'V2beta', 'V3alpha', 'V3beta', 'alpha', 'beta'])
563682
}
564683

684+
# Filter out ineligible endpoints from missing operations
685+
if ineligible_endpoints:
686+
stable_missing_operations = stable_missing_operations - ineligible_endpoints
687+
565688
if stable_missing_operations:
566689
filtered_count = len(missing_operations) - len(stable_missing_operations)
567690

@@ -615,9 +738,15 @@ def main():
615738
parser.add_argument('--swagger-url', help='Custom Swagger/OpenAPI specification URL')
616739
parser.add_argument('--sort', choices=['count', 'name'], default='name',
617740
help='Sort results by count (descending) or name (alphabetical). Default: name')
741+
parser.add_argument('--ineligible-endpoints-url',
742+
help='URL or local path to ineligible endpoints YAML file '
743+
'(default: https://raw.githubusercontent.com/kubernetes/kubernetes/master/test/conformance/testdata/ineligible_endpoints.yaml)')
618744

619745
args = parser.parse_args()
620746

747+
# Load ineligible endpoints for filtering
748+
ineligible_endpoints = load_ineligible_endpoints(args.ineligible_endpoints_url)
749+
621750
# Initialize Swagger mapper
622751
swagger_mapper = SwaggerEndpointMapper(args.swagger_url)
623752

@@ -634,7 +763,7 @@ def main():
634763
sys.exit(1)
635764

636765
# Write results
637-
write_results(endpoint_counts, stats, swagger_mapper, args.output, args.sort)
766+
write_results(endpoint_counts, stats, swagger_mapper, args.output, args.sort, ineligible_endpoints)
638767

639768

640769
if __name__ == '__main__':

0 commit comments

Comments
 (0)