1+ #!/usr/bin/env python3
2+ """
3+ Aggregate all test, coverage, LLM Judge, and performance results into a single Markdown report for CI/CD.
4+ """
5+ import os
6+ import json
7+ from datetime import datetime
8+
9+ def read_json (path ):
10+ try :
11+ with open (path , 'r' ) as f :
12+ return json .load (f )
13+ except FileNotFoundError :
14+ print (f"⚠️ File not found: { path } " )
15+ return None
16+ except json .JSONDecodeError as e :
17+ print (f"⚠️ Failed to decode JSON from { path } : { e } " )
18+ return None
19+ except Exception as e :
20+ print (f"⚠️ Unexpected error reading { path } : { e } " )
21+ return None
22+
23+ def read_coverage ():
24+ # Try to read coverage summary from htmlcov or coverage.xml
25+ summary = {}
26+ if os .path .exists ('htmlcov/index.html' ):
27+ # Parse HTML for total coverage (simple regex)
28+ try :
29+ with open ('htmlcov/index.html' ) as f :
30+ html = f .read ()
31+ import re
32+ m = re .search (r'TOTAL.*?(\d+)%' , html )
33+ if m :
34+ summary ['total' ] = int (m .group (1 ))
35+ except Exception :
36+ pass
37+ if os .path .exists ('coverage.xml' ):
38+ try :
39+ import xml .etree .ElementTree as ET
40+ tree = ET .parse ('coverage.xml' )
41+ root = tree .getroot ()
42+ summary ['total' ] = float (root .attrib .get ('line-rate' , 0 )) * 100
43+ except Exception :
44+ pass
45+ return summary
46+
47+ def read_pytest_results ():
48+ # Try to read pytest output from last run (if available)
49+ log_path = 'ci_build_doc_test.log'
50+ if not os .path .exists (log_path ):
51+ return None
52+ summary = {}
53+ try :
54+ with open (log_path ) as f :
55+ lines = f .readlines ()
56+ for line in lines :
57+ if 'collected' in line and 'items' in line :
58+ summary ['collected' ] = int (line .split ('collected' )[1 ].split ('items' )[0 ].strip ())
59+ if 'passed' in line and 'skipped' in line :
60+ import re
61+ m = re .findall (r'(\d+) passed' , line )
62+ if m :
63+ summary ['passed' ] = int (m [0 ])
64+ m = re .findall (r'(\d+) skipped' , line )
65+ if m :
66+ summary ['skipped' ] = int (m [0 ])
67+ return summary
68+ except Exception as e :
69+ print (f"⚠️ Error parsing pytest log: { e } " )
70+ return {}
71+
72+ def main ():
73+ now = datetime .utcnow ().strftime ('%Y-%m-%d %H:%M UTC' )
74+ report = [f"# 🧪 Final Test Report\n \n *Generated: { now } *\n " ]
75+
76+ # Test summary
77+ pytest_results = read_pytest_results ()
78+ if pytest_results is not None :
79+ if pytest_results :
80+ report .append ("## ✅ Test Results Summary\n " )
81+ report .append (f"- Total tests collected: { pytest_results .get ('collected' ,'?' )} " )
82+ report .append (f"- Passed: { pytest_results .get ('passed' ,'?' )} " )
83+ report .append (f"- Skipped: { pytest_results .get ('skipped' ,'?' )} " )
84+ report .append ("" )
85+ else :
86+ report .append ("## ✅ Test Results Summary\n - ⚠️ Pytest log found but no summary could be parsed.\n " )
87+ else :
88+ report .append ("## ✅ Test Results Summary\n - ⚠️ No pytest summary found.\n " )
89+
90+ # Coverage summary
91+ coverage = read_coverage ()
92+ if coverage and 'total' in coverage :
93+ report .append (f"## 📊 Coverage Summary\n - Total coverage: **{ coverage ['total' ]} %**\n " )
94+ else :
95+ report .append ("## 📊 Coverage Summary\n - ⚠️ No coverage data found.\n " )
96+
97+ # LLM Judge results
98+ llm_judge = read_json ('llm_judge_results.json' )
99+ if llm_judge :
100+ report .append ("## 🤖 LLM Judge Results\n " )
101+ score = llm_judge .get ('overall_score' , '?' )
102+ report .append (f"- Overall Score: **{ score } /10**" )
103+ if 'scores' in llm_judge :
104+ report .append ("- Score Breakdown:" )
105+ for k , v in llm_judge ['scores' ].items ():
106+ if isinstance (v , dict ):
107+ report .append (f" - { k } : { v .get ('score' ,'?' )} /10 — { v .get ('justification' ,'' )} " )
108+ else :
109+ report .append (f" - { k } : { v } " )
110+ if 'recommendations' in llm_judge :
111+ report .append ("- Recommendations:" )
112+ for rec in llm_judge ['recommendations' ]:
113+ report .append (f" - { rec } " )
114+ if 'next_steps' in llm_judge :
115+ report .append ("- Next Steps:" )
116+ for step in llm_judge ['next_steps' ]:
117+ report .append (f" - { step } " )
118+ report .append ("" )
119+ else :
120+ report .append ("## 🤖 LLM Judge Results\n - ⚠️ No LLM Judge results found.\n " )
121+
122+ # Performance metrics
123+ perf = read_json ('performance_metrics.json' )
124+ if perf :
125+ report .append ("## 🚦 Performance Metrics\n " )
126+ for k in ['elapsed_seconds' ,'memory_mb' ,'threshold_seconds' ,'threshold_mb' ,'status' ]:
127+ if k in perf :
128+ report .append (f"- { k .replace ('_' ,' ' ).title ()} : { perf [k ]} " )
129+ report .append ("" )
130+ else :
131+ report .append ("## 🚦 Performance Metrics\n - ⚠️ No performance metrics found.\n " )
132+
133+ # Recommendations (consolidated, no duplicates)
134+ report .append ("## 📝 Recommendations\n " )
135+ any_recommendation = False
136+ if llm_judge and 'recommendations' in llm_judge and llm_judge ['recommendations' ]:
137+ for rec in llm_judge ['recommendations' ]:
138+ report .append (f"- { rec } " )
139+ any_recommendation = True
140+ if coverage and coverage .get ('total' ,0 ) < 50 :
141+ report .append ("- 🚨 Coverage is below 50%. Add more tests!" )
142+ any_recommendation = True
143+ if perf and perf .get ('status' ) == 'FAIL' :
144+ report .append ("- 🚨 Performance regression detected. Optimize code or dependencies." )
145+ any_recommendation = True
146+ if not any_recommendation :
147+ report .append ("- No additional recommendations.\n " )
148+ report .append ("" )
149+
150+ # Comparison to previous run (stub)
151+ report .append ("## 🔄 Comparison to Previous Run\n - (Comparison feature coming soon)\n " )
152+
153+ # Save report
154+ with open ('final_test_report.md' ,'w' ) as f :
155+ f .write ('\n ' .join (report ))
156+ print ("✅ Final test report generated: final_test_report.md" )
157+
158+ if __name__ == "__main__" :
159+ main ()
0 commit comments