Skip to content

Commit b8be9b3

Browse files
Remove CI evals overview panel and tag filtering (#1623)
1 parent 5205b2f commit b8be9b3

15 files changed

+463
-499
lines changed

mcpjam-inspector/client/src/components/CiEvalsTab.tsx

Lines changed: 21 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@ import {
1111
} from "@/components/ui/resizable";
1212
import { useSharedAppState } from "@/state/app-state-context";
1313
import { useCiEvalsRoute, navigateToCiEvalsRoute } from "@/lib/ci-evals-router";
14-
import { aggregateSuite, groupSuitesByTag, groupRunsByCommit } from "./evals/helpers";
15-
import { OverviewPanel } from "./evals/overview-panel";
14+
import { aggregateSuite, groupRunsByCommit } from "./evals/helpers";
1615
import { useEvalMutations } from "./evals/use-eval-mutations";
1716
import { useEvalQueries } from "./evals/use-eval-queries";
1817
import { useEvalHandlers } from "./evals/use-eval-handlers";
19-
import { CiSuiteListSidebar, type SidebarMode } from "./evals/ci-suite-list-sidebar";
18+
import {
19+
CiSuiteListSidebar,
20+
type SidebarMode,
21+
} from "./evals/ci-suite-list-sidebar";
2022
import { CiSuiteDetail } from "./evals/ci-suite-detail";
2123
import { CommitDetailView } from "./evals/commit-detail-view";
2224
import { useWorkspaceMembers } from "@/hooks/useWorkspaces";
@@ -35,7 +37,6 @@ export function CiEvalsTab({ convexWorkspaceId }: CiEvalsTabProps) {
3537

3638
const [deletingSuiteId, setDeletingSuiteId] = useState<string | null>(null);
3739
const [deletingRunId, setDeletingRunId] = useState<string | null>(null);
38-
const [filterTag, setFilterTag] = useState<string | null>(null);
3940
const [sidebarMode, setSidebarMode] = useState<SidebarMode>("runs");
4041
const [hasAutoSwitchedMode, setHasAutoSwitchedMode] = useState(false);
4142

@@ -89,13 +90,7 @@ export function CiEvalsTab({ convexWorkspaceId }: CiEvalsTabProps) {
8990
[queries.sortedSuites],
9091
);
9192

92-
const tagGroups = useMemo(() => groupSuitesByTag(sdkSuites), [sdkSuites]);
93-
const hasTags = tagGroups.some((g) => g.tag !== "Untagged");
94-
95-
const commitGroups = useMemo(
96-
() => groupRunsByCommit(sdkSuites),
97-
[sdkSuites],
98-
);
93+
const commitGroups = useMemo(() => groupRunsByCommit(sdkSuites), [sdkSuites]);
9994

10095
// Auto-switch to "By Suite" when all runs are manual (no commit SHAs)
10196
useEffect(() => {
@@ -113,16 +108,8 @@ export function CiEvalsTab({ convexWorkspaceId }: CiEvalsTabProps) {
113108

114109
const selectedCommitGroup = useMemo(() => {
115110
if (!selectedCommitSha) return null;
116-
return (
117-
commitGroups.find((g) => g.commitSha === selectedCommitSha) ?? null
118-
);
111+
return commitGroups.find((g) => g.commitSha === selectedCommitSha) ?? null;
119112
}, [commitGroups, selectedCommitSha]);
120-
const allTags = useMemo(
121-
() =>
122-
Array.from(new Set(sdkSuites.flatMap((e) => e.suite.tags ?? []))).sort(),
123-
[sdkSuites],
124-
);
125-
126113
const selectedSuiteEntry = useMemo(() => {
127114
if (!selectedSuiteId) return null;
128115
return (
@@ -166,10 +153,6 @@ export function CiEvalsTab({ convexWorkspaceId }: CiEvalsTabProps) {
166153
navigateToCiEvalsRoute({ type: "suite-overview", suiteId });
167154
}, []);
168155

169-
const handleSelectOverview = useCallback(() => {
170-
navigateToCiEvalsRoute({ type: "list" });
171-
}, []);
172-
173156
const handleSelectCommit = useCallback((commitSha: string) => {
174157
navigateToCiEvalsRoute({ type: "commit-detail", commitSha });
175158
}, []);
@@ -293,11 +276,7 @@ export function CiEvalsTab({ convexWorkspaceId }: CiEvalsTabProps) {
293276
suites={sdkSuites}
294277
selectedSuiteId={selectedSuiteId}
295278
onSelectSuite={handleSelectSuite}
296-
onSelectOverview={handleSelectOverview}
297-
isOverviewSelected={!selectedSuiteId && route.type !== "commit-detail"}
298279
isLoading={queries.isOverviewLoading}
299-
filterTag={filterTag}
300-
hasTags={true}
301280
sidebarMode={sidebarMode}
302281
onSidebarModeChange={setSidebarMode}
303282
commitGroups={commitGroups}
@@ -333,18 +312,20 @@ export function CiEvalsTab({ convexWorkspaceId }: CiEvalsTabProps) {
333312
</div>
334313
</div>
335314
) : route.type === "list" || !selectedSuite ? (
336-
<OverviewPanel
337-
suites={sdkSuites}
338-
allTags={allTags}
339-
filterTag={filterTag}
340-
onFilterTagChange={setFilterTag}
341-
onSelectSuite={handleSelectSuite}
342-
onRerunSuite={(suiteId) => {
343-
const entry = sdkSuites.find((e) => e.suite._id === suiteId);
344-
if (entry) handlers.handleRerun(entry.suite);
345-
}}
346-
allCommitGroups={commitGroups}
347-
/>
315+
<div className="flex-1 flex items-center justify-center">
316+
<div className="text-center max-w-md mx-auto p-8">
317+
<div className="w-20 h-20 bg-muted rounded-full flex items-center justify-center mx-auto mb-6">
318+
<GitBranch className="h-10 w-10 text-muted-foreground" />
319+
</div>
320+
<h2 className="text-2xl font-semibold text-foreground mb-2">
321+
Select a suite
322+
</h2>
323+
<p className="text-sm text-muted-foreground">
324+
Choose a CI suite or commit from the sidebar to inspect runs
325+
and test iterations.
326+
</p>
327+
</div>
328+
</div>
348329
) : queries.isSuiteDetailsLoading ? (
349330
<div className="flex h-full items-center justify-center">
350331
<div className="text-center">

mcpjam-inspector/client/src/components/evals/__tests__/ai-insights.test.ts

Lines changed: 8 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -123,16 +123,16 @@ describe("isFlaky", () => {
123123
});
124124

125125
it("returns true for frequent alternation", () => {
126-
expect(
127-
isFlaky(["passed", "failed", "passed", "failed", "passed"]),
128-
).toBe(true);
126+
expect(isFlaky(["passed", "failed", "passed", "failed", "passed"])).toBe(
127+
true,
128+
);
129129
});
130130

131131
it("ignores 'other' results when counting switches", () => {
132132
// After filtering: passed, failed, passed = 2 switches
133-
expect(
134-
isFlaky(["passed", "other", "failed", "other", "passed"]),
135-
).toBe(true);
133+
expect(isFlaky(["passed", "other", "failed", "other", "passed"])).toBe(
134+
true,
135+
);
136136
});
137137

138138
it("only looks at first 10 entries", () => {
@@ -163,9 +163,7 @@ describe("classifyFailure", () => {
163163
it("tags as 'new' when there is no prior history", () => {
164164
const suiteId = "suite-new";
165165
const run = makeRun({ suiteId, result: "failed" });
166-
const groups: CommitGroup[] = [
167-
makeCommitGroup({ runs: [run] }),
168-
];
166+
const groups: CommitGroup[] = [makeCommitGroup({ runs: [run] })];
169167

170168
const result = classifyFailure(run, "New Suite", groups);
171169
expect(result.tags).toContain("new");
@@ -218,11 +216,7 @@ describe("classifyFailure", () => {
218216
}),
219217
];
220218

221-
const result = classifyFailure(
222-
failedRun,
223-
"Flaky Regression",
224-
groups,
225-
);
219+
const result = classifyFailure(failedRun, "Flaky Regression", groups);
226220
expect(result.tags).toContain("regression");
227221
expect(result.tags).toContain("flaky");
228222
});
@@ -287,156 +281,3 @@ describe("classifyAllFailures", () => {
287281
expect(results[0].suiteName).toBe("Unknown suite");
288282
});
289283
});
290-
291-
// ---------------------------------------------------------------------------
292-
// buildTriageContext
293-
// ---------------------------------------------------------------------------
294-
295-
describe("buildTriageContext", () => {
296-
it("builds context with correct aggregated data", () => {
297-
const failedRun = makeRun({
298-
suiteId: "s1",
299-
result: "failed",
300-
summary: { total: 10, passed: 7, failed: 3, passRate: 70 },
301-
configSnapshot: {
302-
tests: [
303-
{
304-
title: "test-a",
305-
query: "q",
306-
provider: "p",
307-
model: "m",
308-
runs: 1,
309-
expectedToolCalls: [],
310-
},
311-
{
312-
title: "test-b",
313-
query: "q",
314-
provider: "p",
315-
model: "m",
316-
runs: 1,
317-
expectedToolCalls: [],
318-
},
319-
],
320-
environment: { servers: [] },
321-
},
322-
});
323-
const passedRun = makeRun({
324-
suiteId: "s2",
325-
result: "passed",
326-
summary: { total: 5, passed: 5, failed: 0, passRate: 100 },
327-
});
328-
const notRunRun = makeRun({
329-
suiteId: "s3",
330-
result: "cancelled",
331-
});
332-
333-
const suiteMap = new Map([
334-
["s1", "Failed Suite"],
335-
["s2", "Passed Suite"],
336-
["s3", "Not Run Suite"],
337-
]);
338-
339-
const commitGroup = makeCommitGroup({
340-
commitSha: "abc123",
341-
shortSha: "abc1234",
342-
branch: "main",
343-
runs: [failedRun, passedRun, notRunRun],
344-
suiteMap,
345-
});
346-
347-
const classified = [
348-
{
349-
run: failedRun,
350-
suiteName: "Failed Suite",
351-
tags: ["regression" as const],
352-
},
353-
];
354-
355-
const ctx = buildTriageContext(
356-
commitGroup,
357-
classified,
358-
[passedRun],
359-
[notRunRun],
360-
);
361-
362-
expect(ctx.commitSha).toBe("abc1234");
363-
expect(ctx.branch).toBe("main");
364-
expect(ctx.totalSuites).toBe(3);
365-
expect(ctx.totalCases.total).toBe(15);
366-
expect(ctx.totalCases.passed).toBe(12);
367-
expect(ctx.totalCases.failed).toBe(3);
368-
expect(ctx.failures).toHaveLength(1);
369-
expect(ctx.failures[0].suiteName).toBe("Failed Suite");
370-
expect(ctx.failures[0].tags).toEqual(["regression"]);
371-
expect(ctx.failures[0].testNames).toEqual(["test-a", "test-b"]);
372-
expect(ctx.passedSuites).toEqual(["Passed Suite"]);
373-
expect(ctx.notRunSuites).toEqual(["Not Run Suite"]);
374-
});
375-
});
376-
377-
// ---------------------------------------------------------------------------
378-
// buildOverviewTriageContext
379-
// ---------------------------------------------------------------------------
380-
381-
describe("buildOverviewTriageContext", () => {
382-
it("categorizes suites correctly", () => {
383-
const suites = [
384-
{
385-
suite: { _id: "s1", name: "Failing Suite" } as any,
386-
latestRun: makeRun({ suiteId: "s1", result: "failed" }),
387-
recentRuns: [],
388-
passRateTrend: [],
389-
totals: { passed: 3, failed: 2, runs: 5 },
390-
},
391-
{
392-
suite: { _id: "s2", name: "Passing Suite" } as any,
393-
latestRun: makeRun({ suiteId: "s2", result: "passed" }),
394-
recentRuns: [],
395-
passRateTrend: [],
396-
totals: { passed: 10, failed: 0, runs: 10 },
397-
},
398-
{
399-
suite: { _id: "s3", name: "New Suite" } as any,
400-
latestRun: null,
401-
recentRuns: [],
402-
passRateTrend: [],
403-
totals: { passed: 0, failed: 0, runs: 0 },
404-
},
405-
];
406-
407-
const ctx = buildOverviewTriageContext(suites, []);
408-
expect(ctx.totalSuites).toBe(3);
409-
expect(ctx.passingSuites).toBe(1);
410-
expect(ctx.neverRunSuites).toBe(1);
411-
expect(ctx.failingSuites).toHaveLength(1);
412-
expect(ctx.failingSuites[0].name).toBe("Failing Suite");
413-
expect(ctx.failingSuites[0].passRate).toBe("60%");
414-
});
415-
416-
it("includes suites that passed overall but have failed cases", () => {
417-
const suites = [
418-
{
419-
suite: { _id: "s1", name: "Mostly Passing Suite" } as any,
420-
latestRun: makeRun({ suiteId: "s1", result: "passed" }),
421-
recentRuns: [],
422-
passRateTrend: [],
423-
totals: { passed: 14, failed: 2, runs: 16 },
424-
},
425-
{
426-
suite: { _id: "s2", name: "Fully Passing Suite" } as any,
427-
latestRun: makeRun({ suiteId: "s2", result: "passed" }),
428-
recentRuns: [],
429-
passRateTrend: [],
430-
totals: { passed: 10, failed: 0, runs: 10 },
431-
},
432-
];
433-
434-
const ctx = buildOverviewTriageContext(suites, []);
435-
expect(ctx.totalSuites).toBe(2);
436-
expect(ctx.passingSuites).toBe(1);
437-
expect(ctx.failingSuites).toHaveLength(1);
438-
expect(ctx.failingSuites[0].name).toBe("Mostly Passing Suite");
439-
expect(ctx.failingSuites[0].passRate).toBe("88%");
440-
expect(ctx.failingSuites[0].failedCases).toBe(2);
441-
});
442-
});

mcpjam-inspector/client/src/components/evals/ai-insights.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,3 @@ export function classifyAllFailures(
109109
return classifyFailure(run, suiteName, allCommitGroups);
110110
});
111111
}
112-

0 commit comments

Comments
 (0)