Skip to content

Commit 786d775

Browse files
authored
Merge pull request #35371 from dims/analyze-the-audit-logs-frompull-audit-kind-conformance-with-data-from-ci-audit-kind-conformance
Analyse the audit logs from pull-audit-kind-conformance with data from ci-audit-kind-conformance
2 parents a46f694 + 67c6661 commit 786d775

File tree

2 files changed

+356
-0
lines changed

2 files changed

+356
-0
lines changed

config/jobs/kubernetes/sig-arch/conformance-audit.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ presubmits:
3333
&& curl -sO https://raw.githubusercontent.com/ii/kind/ci-audit-logging/hack/ci/e2e-k8s.sh
3434
&& bash e2e-k8s.sh
3535
&& python3 ./../test-infra/experiment/audit/audit_log_parser.py --audit-logs ${ARTIFACTS}/audit/audit*.log --output "${ARTIFACTS}/audit/audit-endpoints.txt" --swagger-url "file://$PWD/api/openapi-spec/swagger.json"
36+
&& set -x
37+
&& python3 ./../test-infra/experiment/audit/kubernetes_api_analysis.py --pull-audit-endpoints "${ARTIFACTS}/audit/audit-endpoints.txt" --swagger-url "file://$PWD/api/openapi-spec/swagger.json"
3638
env:
3739
- name: BUILD_TYPE
3840
value: docker
Lines changed: 354 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,354 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright 2025 The Kubernetes Authors.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""
18+
Kubernetes API Operations Analysis Tool
19+
20+
This script analyzes Kubernetes API coverage by comparing audit logs from CI runs
21+
against Pull Request changes. It automatically discovers the latest CI audit data
22+
from Google Cloud Storage and compares it with local Pull Request audit data to
23+
identify added or removed API operations.
24+
25+
Usage:
26+
# Auto-discover latest CI data and use default remote swagger
27+
python3 kubernetes_api_analysis.py
28+
29+
# Use specific local swagger file
30+
python3 kubernetes_api_analysis.py --swagger-url /path/to/swagger.json
31+
32+
# Use specific CI audit file (skip auto-discovery)
33+
python3 kubernetes_api_analysis.py --ci-file my-ci-audit.txt
34+
"""
35+
36+
import argparse
37+
import json
38+
import os
39+
import subprocess
40+
import sys
41+
import urllib.request
42+
43+
44+
def extract_swagger_operations(swagger_url, output_file):
45+
"""
46+
Extract all operationIds from Kubernetes swagger/OpenAPI specification.
47+
48+
Args:
49+
swagger_url (str): URL or local path to swagger.json file
50+
output_file (str): Path to write extracted operations list
51+
52+
Returns:
53+
list: Sorted list of operation IDs found in the swagger spec
54+
"""
55+
print("Step 1: Extracting operationIds from swagger.json...")
56+
print(f"Swagger URL: {swagger_url}")
57+
print(f"Output file: {output_file}")
58+
59+
try:
60+
# Check if it's a URL or local file path
61+
if swagger_url.startswith(('http://', 'https://')):
62+
print("Downloading swagger specification...")
63+
with urllib.request.urlopen(swagger_url) as response:
64+
swagger_data = json.loads(response.read().decode())
65+
else:
66+
# Local file path
67+
if not os.path.exists(swagger_url):
68+
print(f"Error: Swagger file not found at {swagger_url}")
69+
sys.exit(1)
70+
with open(swagger_url, 'r') as f:
71+
swagger_data = json.load(f)
72+
except (json.JSONDecodeError, IOError, urllib.error.URLError) as e:
73+
print(f"Error reading swagger specification: {e}")
74+
sys.exit(1)
75+
76+
operation_ids = set()
77+
78+
# Extract operationIds from all paths in the OpenAPI specification
79+
# Each path can have multiple HTTP methods (GET, POST, etc.)
80+
# Each method should have an operationId that uniquely identifies it
81+
if 'paths' in swagger_data:
82+
for methods in swagger_data['paths'].values():
83+
for method, details in methods.items():
84+
# Skip 'parameters' as it's not an HTTP method
85+
if method != 'parameters' and isinstance(details, dict):
86+
operation_id = details.get('operationId')
87+
if operation_id:
88+
operation_ids.add(operation_id)
89+
90+
# Sort alphabetically for consistent output
91+
sorted_operations = sorted(operation_ids)
92+
93+
# Write to file for later use and debugging
94+
with open(output_file, 'w') as f:
95+
for op_id in sorted_operations:
96+
f.write(f"{op_id}\n")
97+
98+
print(f"Extracted {len(sorted_operations)} operationIds to {output_file}")
99+
return sorted_operations
100+
101+
102+
def extract_operations_from_audit_file(audit_file, swagger_operations):
103+
"""
104+
Extract operations from Kubernetes audit log file and filter by valid swagger operations.
105+
106+
The audit file format is expected to be:
107+
"OperationId | Count" where the first column contains the Kubernetes API operation ID
108+
109+
Args:
110+
audit_file (str): Path to the audit log file
111+
swagger_operations (set): Set of valid operation IDs from swagger spec
112+
113+
Returns:
114+
list: Sorted list of operations found in audit file that exist in swagger
115+
"""
116+
if not os.path.exists(audit_file):
117+
print(f"Error: Audit file not found: {audit_file}")
118+
sys.exit(1)
119+
120+
operations = set()
121+
122+
with open(audit_file, 'r') as f:
123+
for line in f:
124+
line = line.strip()
125+
# Parse audit log format: look for lines with " | " delimiter
126+
# Skip header lines and "NOT FOUND" entries
127+
if " | " in line and "Endpoint Name" not in line and "NOT FOUND" not in line:
128+
# Extract operation name (first column before |)
129+
operation = line.split('|')[0].strip()
130+
# Only include operations that are defined in the swagger specification
131+
# This filters out any custom or invalid operation IDs
132+
if operation and operation in swagger_operations:
133+
operations.add(operation)
134+
135+
return sorted(operations)
136+
137+
138+
def compare_operations(ci_operations, pull_operations):
139+
"""
140+
Compare API operations between CI baseline and Pull Request changes.
141+
142+
Args:
143+
ci_operations (list): Operations found in CI audit log
144+
pull_operations (list): Operations found in Pull Request audit log
145+
146+
Returns:
147+
tuple: (added_operations, removed_operations) - both as sorted lists
148+
"""
149+
ci_set = set(ci_operations)
150+
pull_set = set(pull_operations)
151+
152+
# Operations in Pull but not in CI (newly added API usage)
153+
added = sorted(pull_set - ci_set)
154+
# Operations in CI but not in Pull (removed API usage)
155+
removed = sorted(ci_set - pull_set)
156+
157+
return added, removed
158+
159+
160+
def find_latest_ci_audit_file():
161+
"""
162+
Find and download the latest CI audit file from Google Cloud Storage.
163+
164+
This function replicates the logic from find_last_audit_run.sh:
165+
1. Lists all CI run directories in GCS bucket
166+
2. Sorts by timestamp (newest first)
167+
3. Finds first directory with finished.json (indicating completed run)
168+
4. Downloads the audit-endpoints.txt file from that run
169+
170+
Returns:
171+
str: Local filename of downloaded audit file, or None if not found
172+
173+
Requires:
174+
gsutil command-line tool (Google Cloud SDK)
175+
"""
176+
bucket_path = "gs://kubernetes-ci-logs/logs/ci-audit-kind-conformance"
177+
178+
print("Searching for latest CI audit run...")
179+
print(f"Enumerating directories in {bucket_path}...")
180+
181+
try:
182+
# Get all directories, sort by timestamp (descending)
183+
# Directory names are timestamps, so reverse sort gives us newest first
184+
result = subprocess.run(['gsutil', 'ls', f'{bucket_path}/'],
185+
capture_output=True, text=True, check=True)
186+
directories = sorted(result.stdout.strip().split('\n'), reverse=True)
187+
188+
# Find the first directory with finished.json (indicates completed CI run)
189+
for directory in directories:
190+
directory = directory.strip()
191+
if not directory:
192+
continue
193+
194+
finished_path = f"{directory}finished.json"
195+
try:
196+
# Check if finished.json exists in this directory
197+
subprocess.run(['gsutil', '-q', 'stat', finished_path],
198+
capture_output=True, check=True)
199+
print(f"Found directory with finished.json: {directory}")
200+
201+
# Check for audit endpoints file in the artifacts
202+
audit_path = f"{directory}artifacts/audit/audit-endpoints.txt"
203+
try:
204+
subprocess.run(['gsutil', '-q', 'stat', audit_path],
205+
capture_output=True, check=True)
206+
print(f"Found audit file at: {audit_path}")
207+
208+
# Download the file to local directory with descriptive name
209+
local_filename = "ci-audit-kind-conformance-audit-endpoints.txt"
210+
subprocess.run(['gsutil', 'cp', audit_path, local_filename],
211+
capture_output=True, check=True)
212+
print(f"Downloaded to: {local_filename}")
213+
return local_filename
214+
215+
except subprocess.CalledProcessError:
216+
print(f"Audit file not found at: {audit_path}")
217+
continue
218+
219+
except subprocess.CalledProcessError:
220+
# No finished.json in this directory, continue to next
221+
continue
222+
223+
print("No directory with finished.json and audit file found")
224+
return None
225+
226+
except subprocess.CalledProcessError as e:
227+
print(f"Error accessing GCS bucket: {e}")
228+
return None
229+
except FileNotFoundError:
230+
print("Error: gsutil not found. Please install Google Cloud SDK.")
231+
return None
232+
233+
234+
def create_argument_parser():
235+
"""Create and configure the argument parser."""
236+
parser = argparse.ArgumentParser(
237+
description='Kubernetes API Operations Analysis',
238+
epilog="""
239+
Examples:
240+
%(prog)s --pull-audit-endpoints my-pull-audit.txt # Use defaults with required pull file
241+
%(prog)s --swagger-url /path/swagger.json --pull-audit-endpoints my-pr.txt # Use local swagger file
242+
%(prog)s --ci-audit-endpoints my-ci-audit.txt --pull-audit-endpoints my-pr.txt # Skip auto-discovery, specify both files
243+
""",
244+
formatter_class=argparse.RawDescriptionHelpFormatter
245+
)
246+
247+
default_swagger_url = ("https://raw.githubusercontent.com/kubernetes/kubernetes/"
248+
"refs/heads/master/api/openapi-spec/swagger.json")
249+
parser.add_argument('--swagger-url',
250+
default=default_swagger_url,
251+
help='Swagger/OpenAPI specification URL or local file path '
252+
'(default: %(default)s)')
253+
parser.add_argument('--ci-audit-endpoints',
254+
default=None,
255+
help='CI audit endpoints file (default: auto-discover latest from GCS)')
256+
parser.add_argument('--pull-audit-endpoints',
257+
required=True,
258+
help='Pull Request audit endpoints file (required)')
259+
parser.add_argument('--output-file',
260+
default="swagger_operations.txt",
261+
help='Output file for swagger operations (default: %(default)s)')
262+
return parser
263+
264+
265+
def display_results(swagger_operations, ci_operations, pull_operations,
266+
added_operations, removed_operations, output_file):
267+
"""Display the analysis results."""
268+
swagger_count = len(swagger_operations)
269+
ci_count = len(ci_operations)
270+
pull_count = len(pull_operations)
271+
added_count = len(added_operations)
272+
removed_count = len(removed_operations)
273+
net_change = added_count - removed_count
274+
275+
print("SUMMARY")
276+
print("=======")
277+
print(f"Total Operations in Swagger: {swagger_count}")
278+
print(f"Operations in CI: {ci_count}")
279+
print(f"Operations in Pull: {pull_count}")
280+
print(f"Operations Added: {added_count}")
281+
print(f"Operations Removed: {removed_count}")
282+
print(f"Net Change: {net_change:+d}")
283+
print()
284+
285+
print("OPERATIONS ADDED IN PULL (NOT IN CI)")
286+
print("====================================")
287+
print(f"Count: {added_count}")
288+
print()
289+
if added_operations:
290+
for i, operation in enumerate(added_operations, 1):
291+
print(f"{i:3d}. {operation}")
292+
else:
293+
print("No operations added.")
294+
print()
295+
296+
print("OPERATIONS REMOVED FROM PULL (IN CI BUT NOT PULL)")
297+
print("=================================================")
298+
print(f"Count: {removed_count}")
299+
print()
300+
if removed_operations:
301+
for i, operation in enumerate(removed_operations, 1):
302+
print(f"{i:3d}. {operation}")
303+
else:
304+
print("No operations removed.")
305+
print()
306+
307+
print("Analysis complete!")
308+
print("Generated files:")
309+
print(f"- {output_file} (swagger operations list)")
310+
311+
312+
def main():
313+
"""Main function that orchestrates the API analysis workflow."""
314+
args = create_argument_parser().parse_args()
315+
316+
print("Kubernetes API Operations Analysis")
317+
print("==================================")
318+
print()
319+
320+
# Extract operations from swagger specification
321+
swagger_operations = extract_swagger_operations(args.swagger_url, args.output_file)
322+
swagger_operations_set = set(swagger_operations)
323+
print()
324+
325+
# Determine CI file - auto-discover latest if not specified
326+
ci_file = args.ci_audit_endpoints
327+
if ci_file is None:
328+
print("No CI audit endpoints file specified, auto-discovering latest from GCS...")
329+
ci_file = find_latest_ci_audit_file()
330+
if ci_file is None:
331+
print("Failed to find latest CI audit file. "
332+
"Please specify --ci-audit-endpoints manually.")
333+
sys.exit(1)
334+
print()
335+
336+
# Parse and compare audit endpoint files
337+
print("Step 2: Comparing audit endpoint files...")
338+
print(f"CI File: {ci_file}")
339+
print(f"Pull File: {args.pull_audit_endpoints}")
340+
print()
341+
342+
print("Extracting operations from audit files (filtering by swagger operations)...")
343+
ci_operations = extract_operations_from_audit_file(ci_file, swagger_operations_set)
344+
pull_operations = extract_operations_from_audit_file(args.pull_audit_endpoints,
345+
swagger_operations_set)
346+
347+
# Analyze differences and display results
348+
added_operations, removed_operations = compare_operations(ci_operations, pull_operations)
349+
display_results(swagger_operations, ci_operations, pull_operations,
350+
added_operations, removed_operations, args.output_file)
351+
352+
353+
if __name__ == "__main__":
354+
main()

0 commit comments

Comments
 (0)