Skip to content

Commit c4bf2b6

Browse files
authored
Merge pull request #35385 from dims/generate-json-with-operation-id-to-audit-log-entry
generate a json with operation id to audit log entry mappings
2 parents 32babfa + c69f219 commit c4bf2b6

File tree

2 files changed

+62
-7
lines changed

2 files changed

+62
-7
lines changed

config/jobs/kubernetes/sig-arch/conformance-audit.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ presubmits:
3232
curl -sSL https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/"
3333
&& curl -sO https://raw.githubusercontent.com/ii/kind/ci-audit-logging/hack/ci/e2e-k8s.sh
3434
&& bash e2e-k8s.sh
35-
&& python3 ./../test-infra/experiment/audit/audit_log_parser.py --audit-logs ${ARTIFACTS}/audit/audit*.log --output "${ARTIFACTS}/audit/audit-endpoints.txt" --swagger-url "file://$PWD/api/openapi-spec/swagger.json"
35+
&& python3 ./../test-infra/experiment/audit/audit_log_parser.py --audit-logs ${ARTIFACTS}/audit/audit*.log --output "${ARTIFACTS}/audit/audit-endpoints.txt" --audit-operations-json "${ARTIFACTS}/audit/audit-operations.json" --swagger-url "file://$PWD/api/openapi-spec/swagger.json"
3636
&& set -x
3737
&& python3 ./../test-infra/experiment/audit/kubernetes_api_analysis.py --pull-audit-endpoints "${ARTIFACTS}/audit/audit-endpoints.txt" --swagger-url "file://$PWD/api/openapi-spec/swagger.json"
3838
env:
@@ -91,7 +91,7 @@ periodics:
9191
curl -sSL https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/"
9292
&& curl -sO https://raw.githubusercontent.com/ii/kind/ci-audit-logging/hack/ci/e2e-k8s.sh
9393
&& bash e2e-k8s.sh
94-
&& python3 ./../test-infra/experiment/audit/audit_log_parser.py --audit-logs ${ARTIFACTS}/audit/audit*.log --output "${ARTIFACTS}/audit/audit-endpoints.txt" --swagger-url "file://$PWD/api/openapi-spec/swagger.json"
94+
&& python3 ./../test-infra/experiment/audit/audit_log_parser.py --audit-logs ${ARTIFACTS}/audit/audit*.log --output "${ARTIFACTS}/audit/audit-endpoints.txt" --audit-operations-json "${ARTIFACTS}/audit/audit-operations.json" --swagger-url "file://$PWD/api/openapi-spec/swagger.json"
9595
env:
9696
- name: BUILD_TYPE
9797
value: docker

experiment/audit/audit_log_parser.py

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -557,9 +557,10 @@ def parse_audit_logs(file_paths, swagger_mapper=None): # pylint: disable=too-ma
557557
swagger_mapper (SwaggerEndpointMapper): Mapper for converting to operation IDs
558558
559559
Returns:
560-
tuple: (Counter of endpoint counts, stats dict)
560+
tuple: (Counter of endpoint counts, dict of operation samples, stats dict)
561561
"""
562562
endpoint_counts = Counter()
563+
operation_samples = {} # Store up to 5 audit entries per operation
563564
total_entries = 0
564565
skipped_entries = 0
565566
swagger_matches = 0
@@ -609,12 +610,24 @@ def parse_audit_logs(file_paths, swagger_mapper=None): # pylint: disable=too-ma
609610
if operation_id:
610611
endpoint_counts[operation_id] += 1
611612
swagger_matches += 1
613+
614+
# Store up to 5 audit samples for this operation
615+
if operation_id not in operation_samples:
616+
operation_samples[operation_id] = []
617+
if len(operation_samples[operation_id]) < 5:
618+
operation_samples[operation_id].append(entry)
612619
else:
613620
# Try fallback parsing for edge cases
614621
fallback_endpoint = convert_to_k8s_endpoint_fallback(effective_verb, request_uri)
615622
if fallback_endpoint:
616623
endpoint_counts[fallback_endpoint] += 1
617624
fallback_matches += 1
625+
626+
# Store up to 5 audit samples for this fallback operation
627+
if fallback_endpoint not in operation_samples:
628+
operation_samples[fallback_endpoint] = []
629+
if len(operation_samples[fallback_endpoint]) < 5:
630+
operation_samples[fallback_endpoint].append(entry)
618631
else:
619632
skipped_entries += 1
620633
else:
@@ -653,20 +666,22 @@ def parse_audit_logs(file_paths, swagger_mapper=None): # pylint: disable=too-ma
653666
print(f" Total API calls: {sum(endpoint_counts.values())}")
654667
print(f" Skipped entries: {skipped_entries}")
655668

656-
return endpoint_counts, stats
669+
return endpoint_counts, operation_samples, stats
657670

658671

659-
def write_results(endpoint_counts, stats, swagger_mapper=None, output_file=None, sort_by='count', ineligible_endpoints=None): # pylint: disable=too-many-statements
672+
def write_results(endpoint_counts, operation_samples, stats, swagger_mapper=None, output_file=None, sort_by='count', ineligible_endpoints=None, audit_operations_json='audit-operations.json'): # pylint: disable=too-many-statements
660673
"""
661674
Write results to file or stdout.
662675
663676
Args:
664677
endpoint_counts (Counter): Endpoint counts
678+
operation_samples (dict): Sample audit entries for each operation
665679
stats (dict): Parsing statistics
666680
swagger_mapper (SwaggerEndpointMapper): Mapper for finding missing endpoints
667681
output_file (str, optional): Output file path
668682
sort_by (str): Sort method - 'count' (descending) or 'name' (alphabetical)
669683
ineligible_endpoints (set, optional): Set of ineligible endpoints to filter out
684+
audit_operations_json (str): Output path for audit operations JSON file
670685
"""
671686
if ineligible_endpoints is None:
672687
ineligible_endpoints = set()
@@ -780,6 +795,43 @@ def write_results(endpoint_counts, stats, swagger_mapper=None, output_file=None,
780795
print("\nResults:")
781796
print(result_text)
782797

798+
# Generate audit-operations.json JSON file with sample audit entries
799+
_write_audit_operations_json(filtered_endpoint_counts, operation_samples, ineligible_endpoints, deprecated_operations, audit_operations_json)
800+
801+
802+
def _write_audit_operations_json(filtered_endpoint_counts, operation_samples, ineligible_endpoints, deprecated_operations, json_output_path='audit-operations.json'):
803+
"""
804+
Write audit-operations.json JSON file with sample audit entries for each operation.
805+
806+
Args:
807+
filtered_endpoint_counts (Counter): Filtered endpoint counts
808+
operation_samples (dict): Sample audit entries for each operation
809+
ineligible_endpoints (set): Set of ineligible endpoints
810+
deprecated_operations (set): Set of deprecated operations
811+
json_output_path (str): Output path for JSON file
812+
"""
813+
# Build final JSON with samples for operations that passed filtering
814+
audit_operations_json = {}
815+
816+
for operation_id in filtered_endpoint_counts:
817+
# Skip operations that are ineligible or deprecated
818+
if operation_id in ineligible_endpoints or operation_id in deprecated_operations:
819+
continue
820+
821+
# Get sample audit entries for this operation (up to 5)
822+
samples = operation_samples.get(operation_id, [])
823+
audit_operations_json[operation_id] = samples
824+
825+
# Write to JSON file
826+
try:
827+
with open(json_output_path, 'w', encoding='utf-8') as f:
828+
json.dump(audit_operations_json, f, indent=2, ensure_ascii=False)
829+
830+
total_samples = sum(len(samples) for samples in audit_operations_json.values())
831+
print(f"Generated {json_output_path} with {len(audit_operations_json)} operations and {total_samples} sample audit entries")
832+
except IOError as e:
833+
print(f"Error writing {json_output_path}: {e}")
834+
783835

784836
def main():
785837
"""Main function to parse command line arguments and run the parser."""
@@ -804,6 +856,9 @@ def main():
804856
parser.add_argument('--ineligible-endpoints-url',
805857
help='URL or local path to ineligible endpoints YAML file '
806858
'(default: https://raw.githubusercontent.com/kubernetes/kubernetes/master/test/conformance/testdata/ineligible_endpoints.yaml)')
859+
parser.add_argument('--audit-operations-json',
860+
default='audit-operations.json',
861+
help='Output path for audit operations JSON file (default: %(default)s)')
807862

808863
args = parser.parse_args()
809864

@@ -819,14 +874,14 @@ def main():
819874
sys.exit(1)
820875

821876
# Parse the audit log(s)
822-
endpoint_counts, stats = parse_audit_logs(args.audit_logs, swagger_mapper)
877+
endpoint_counts, operation_samples, stats = parse_audit_logs(args.audit_logs, swagger_mapper)
823878

824879
if not endpoint_counts:
825880
print("No endpoints found or error parsing file")
826881
sys.exit(1)
827882

828883
# Write results
829-
write_results(endpoint_counts, stats, swagger_mapper, args.output, args.sort, ineligible_endpoints)
884+
write_results(endpoint_counts, operation_samples, stats, swagger_mapper, args.output, args.sort, ineligible_endpoints, args.audit_operations_json)
830885

831886

832887
if __name__ == '__main__':

0 commit comments

Comments
 (0)