Skip to content

Commit d7b1c57

Browse files
chore: generate accuracy test summary post test
1 parent 9db296e commit d7b1c57

File tree

4 files changed

+499
-1
lines changed

4 files changed

+499
-1
lines changed

resources/test-summary-template.html

Lines changed: 337 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,337 @@
1+
<!doctype html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8" />
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
6+
<title>MongoDB MCP Server - Accuracy Test Summary</title>
7+
<style>
8+
body {
9+
font-family:
10+
-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
11+
margin: 0;
12+
padding: 20px;
13+
background-color: #f5f5f5;
14+
color: #333;
15+
}
16+
.container {
17+
max-width: 1400px;
18+
margin: 0 auto;
19+
background: white;
20+
border-radius: 8px;
21+
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
22+
padding: 30px;
23+
}
24+
h1 {
25+
color: #00684a;
26+
border-bottom: 3px solid #00684a;
27+
padding-bottom: 10px;
28+
margin-bottom: 30px;
29+
}
30+
.header-info {
31+
background: #f8f9fa;
32+
padding: 20px;
33+
border-radius: 6px;
34+
margin-bottom: 30px;
35+
border-left: 4px solid #00684a;
36+
}
37+
.header-info h2 {
38+
margin-top: 0;
39+
color: #00684a;
40+
}
41+
.info-grid {
42+
display: grid;
43+
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
44+
gap: 15px;
45+
margin-top: 15px;
46+
}
47+
.info-item {
48+
background: white;
49+
padding: 15px;
50+
border-radius: 4px;
51+
border: 1px solid #dee2e6;
52+
}
53+
.info-label {
54+
font-weight: bold;
55+
color: #00684a;
56+
margin-bottom: 5px;
57+
}
58+
.info-value {
59+
color: #666;
60+
word-break: break-all;
61+
}
62+
.summary {
63+
background: #f8f9fa;
64+
padding: 20px;
65+
border-radius: 6px;
66+
margin-bottom: 30px;
67+
border-left: 4px solid #007bff;
68+
}
69+
.summary h2 {
70+
margin-top: 0;
71+
color: #007bff;
72+
}
73+
.stat-grid {
74+
display: grid;
75+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
76+
gap: 15px;
77+
margin-top: 15px;
78+
}
79+
.stat-item {
80+
background: white;
81+
padding: 15px;
82+
border-radius: 4px;
83+
border: 1px solid #dee2e6;
84+
}
85+
.stat-value {
86+
font-size: 1.5em;
87+
font-weight: bold;
88+
color: #007bff;
89+
}
90+
.stat-label {
91+
font-size: 0.9em;
92+
color: #666;
93+
margin-top: 5px;
94+
}
95+
table {
96+
width: 100%;
97+
border-collapse: collapse;
98+
margin-top: 20px;
99+
font-size: 14px;
100+
}
101+
th,
102+
td {
103+
padding: 12px 8px;
104+
text-align: left;
105+
border-bottom: 1px solid #dee2e6;
106+
vertical-align: top;
107+
}
108+
th {
109+
background-color: #00684a;
110+
color: white;
111+
font-weight: 600;
112+
position: sticky;
113+
top: 0;
114+
z-index: 10;
115+
}
116+
.test-row {
117+
cursor: pointer;
118+
transition: background-color 0.2s;
119+
}
120+
.test-row:hover {
121+
background-color: #f8f9fa;
122+
}
123+
.expanded-row {
124+
background-color: #f8f9fa;
125+
}
126+
.details-row {
127+
display: none;
128+
background-color: #ffffff;
129+
border-left: 4px solid #00684a;
130+
}
131+
.details-row.visible {
132+
display: table-row;
133+
}
134+
.details-content {
135+
padding: 20px;
136+
background: #f8f9fa;
137+
border-radius: 6px;
138+
margin: 10px 0;
139+
}
140+
.conversation-section {
141+
margin-bottom: 20px;
142+
}
143+
.conversation-section h4 {
144+
color: #00684a;
145+
margin-bottom: 10px;
146+
}
147+
.conversation-content {
148+
background: white;
149+
padding: 15px;
150+
border-radius: 4px;
151+
border: 1px solid #dee2e6;
152+
white-space: pre-wrap;
153+
font-family: "Monaco", "Menlo", monospace;
154+
font-size: 12px;
155+
max-height: 400px;
156+
overflow-y: auto;
157+
}
158+
.accuracy-perfect {
159+
background-color: #d4edda;
160+
color: #155724;
161+
padding: 4px 8px;
162+
border-radius: 4px;
163+
font-weight: bold;
164+
}
165+
.accuracy-good {
166+
background-color: #fff3cd;
167+
color: #856404;
168+
padding: 4px 8px;
169+
border-radius: 4px;
170+
font-weight: bold;
171+
}
172+
.accuracy-poor {
173+
background-color: #f8d7da;
174+
color: #721c24;
175+
padding: 4px 8px;
176+
border-radius: 4px;
177+
font-weight: bold;
178+
}
179+
.tool-call {
180+
background: #e9ecef;
181+
padding: 2px 6px;
182+
border-radius: 3px;
183+
margin: 0 2px 2px 0;
184+
cursor: help;
185+
display: inline-block;
186+
word-break: break-word;
187+
}
188+
.tokens-usage {
189+
background: #e3f2fd;
190+
padding: 2px 6px;
191+
border-radius: 3px;
192+
cursor: help;
193+
}
194+
.prompt-cell {
195+
width: 35%;
196+
min-width: 350px;
197+
word-wrap: break-word;
198+
font-family: "Monaco", "Menlo", monospace;
199+
font-size: 12px;
200+
background-color: #f8f9fa;
201+
}
202+
.model-cell {
203+
width: 15%;
204+
min-width: 180px;
205+
word-wrap: break-word;
206+
}
207+
.tool-calls-cell {
208+
width: 12%;
209+
min-width: 120px;
210+
word-wrap: break-word;
211+
white-space: normal;
212+
}
213+
.accuracy-cell {
214+
width: 8%;
215+
min-width: 80px;
216+
text-align: center;
217+
}
218+
.response-time-cell {
219+
width: 10%;
220+
min-width: 100px;
221+
text-align: center;
222+
}
223+
.tokens-cell {
224+
width: 10%;
225+
min-width: 100px;
226+
text-align: center;
227+
}
228+
.expand-indicator {
229+
margin-right: 8px;
230+
font-weight: bold;
231+
color: #00684a;
232+
}
233+
.status-done {
234+
color: #28a745;
235+
font-weight: bold;
236+
}
237+
.status-failed {
238+
color: #dc3545;
239+
font-weight: bold;
240+
}
241+
.status-in-progress {
242+
color: #ffc107;
243+
font-weight: bold;
244+
}
245+
@media (max-width: 768px) {
246+
.container {
247+
padding: 15px;
248+
}
249+
table {
250+
font-size: 12px;
251+
}
252+
th,
253+
td {
254+
padding: 8px 4px;
255+
}
256+
.info-grid,
257+
.stat-grid {
258+
grid-template-columns: 1fr;
259+
}
260+
}
261+
</style>
262+
</head>
263+
<body>
264+
<div class="container">
265+
<h1>📊 MongoDB MCP Server - Accuracy Test Summary</h1>
266+
<div class="header-info">
267+
<h2>Run Information & Summary</h2>
268+
<div class="info-grid">
269+
<div class="info-item">
270+
<div class="info-label">Accuracy Run ID</div>
271+
<div class="info-value">{{accuracyRunId}}</div>
272+
</div>
273+
<div class="info-item">
274+
<div class="info-label">Accuracy Run Status</div>
275+
<div class="info-value status-{{runStatus}}">{{runStatusUpper}}</div>
276+
</div>
277+
<div class="info-item">
278+
<div class="info-label">Commit SHA</div>
279+
<div class="info-value">{{commitSHA}}</div>
280+
</div>
281+
<div class="info-item">
282+
<div class="info-label">Report Generated On</div>
283+
<div class="info-value">{{reportGeneratedOn}}</div>
284+
</div>
285+
<div class="info-item">
286+
<div class="info-label">Snapshots Captured On</div>
287+
<div class="info-value">{{createdOn}}</div>
288+
</div>
289+
<div class="info-item">
290+
<div class="info-label">Total Prompts Evaluated</div>
291+
<div class="info-value">{{totalTests}}</div>
292+
</div>
293+
<div class="info-item">
294+
<div class="info-label">Models Tested</div>
295+
<div class="info-value">{{modelsCount}}</div>
296+
</div>
297+
<div class="info-item">
298+
<div class="info-label">Evals with 0% Accuracy</div>
299+
<div class="info-value">{{testsWithZeroAccuracy}}</div>
300+
</div>
301+
</div>
302+
</div>
303+
<table>
304+
<thead>
305+
<tr>
306+
<th>Prompt</th>
307+
<th>Model</th>
308+
<th>Expected Tool Calls</th>
309+
<th>LLM Tool Calls</th>
310+
<th>Accuracy</th>
311+
<th>LLM Response Time (ms)</th>
312+
<th>Total Tokens Used</th>
313+
</tr>
314+
</thead>
315+
<tbody>
316+
{{tableRows}}
317+
</tbody>
318+
</table>
319+
</div>
320+
<script>
321+
function toggleDetails(index) {
322+
const detailsRow = document.getElementById("details-" + index);
323+
const indicator = document.getElementById("indicator-" + index);
324+
const testRow = detailsRow.previousElementSibling;
325+
if (detailsRow.classList.contains("visible")) {
326+
detailsRow.classList.remove("visible");
327+
indicator.textContent = "▶";
328+
testRow.classList.remove("expanded-row");
329+
} else {
330+
detailsRow.classList.add("visible");
331+
indicator.textContent = "▼";
332+
testRow.classList.add("expanded-row");
333+
}
334+
}
335+
</script>
336+
</body>
337+
</html>

0 commit comments

Comments
 (0)