Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions mcpjam-inspector/.env.local
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
VITE_CONVEX_URL=https://proper-clownfish-150.convex.cloud
CONVEX_URL=https://proper-clownfish-150.convex.cloud
VITE_CONVEX_URL=https://quiet-woodpecker-801.convex.cloud
CONVEX_URL=https://quiet-woodpecker-801.convex.cloud
VITE_WORKOS_CLIENT_ID=client_01K4C1TVA6CMQ3G32F1P301A9G
VITE_WORKOS_REDIRECT_URI=mcpjam://oauth/callback
CONVEX_HTTP_URL=https://proper-clownfish-150.convex.site
CONVEX_HTTP_URL=https://quiet-woodpecker-801.convex.site
ENVIRONMENT=local
VITE_DISABLE_POSTHOG_LOCAL=true
12 changes: 12 additions & 0 deletions mcpjam-inspector/client/src/components/CiEvalsTab.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export function CiEvalsTab({ convexWorkspaceId }: CiEvalsTabProps) {
const [deletingRunId, setDeletingRunId] = useState<string | null>(null);
const [filterTag, setFilterTag] = useState<string | null>(null);
const [sidebarMode, setSidebarMode] = useState<SidebarMode>("runs");
const [hasAutoSwitchedMode, setHasAutoSwitchedMode] = useState(false);

const selectedSuiteId =
route.type === "suite-overview" ||
Expand Down Expand Up @@ -96,6 +97,17 @@ export function CiEvalsTab({ convexWorkspaceId }: CiEvalsTabProps) {
[sdkSuites],
);

// Auto-switch to "By Suite" when all runs are manual (no commit SHAs)
useEffect(() => {
if (hasAutoSwitchedMode) return;
if (commitGroups.length === 0) return;
const allManual = commitGroups.every((g) => g.commitSha === "manual");
if (allManual) {
setSidebarMode("suites");
setHasAutoSwitchedMode(true);
}
}, [commitGroups, hasAutoSwitchedMode]);

const selectedCommitSha =
route.type === "commit-detail" ? route.commitSha : null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,18 @@ export function CiSuiteListSidebar({
? suites.filter((e) => e.suite.tags?.includes(filterTag))
: suites;

// Group suites by name, keeping the most recent one as the "primary" entry
// Group suites by base name (strip trailing timestamps/parenthetical suffixes
// that some SDK users append, e.g. "Suite Name (2026-03-12 15:20:43)")
const groupedSuites = useMemo(() => {
const groups = new Map<string, EvalSuiteOverviewEntry[]>();
for (const entry of filteredSuites) {
const name = entry.suite.name || "Untitled suite";
if (!groups.has(name)) {
groups.set(name, []);
const rawName = entry.suite.name || "Untitled suite";
// Strip trailing " (YYYY-MM-DD ...)" or " (timestamp)" patterns
const baseName = rawName.replace(/\s*\(\d{4}-\d{2}-\d{2}[^)]*\)\s*$/, "").trim() || rawName;
if (!groups.has(baseName)) {
groups.set(baseName, []);
}
groups.get(name)!.push(entry);
groups.get(baseName)!.push(entry);
}
// Sort each group by latest run time (most recent first)
for (const entries of groups.values()) {
Expand Down
54 changes: 43 additions & 11 deletions mcpjam-inspector/client/src/components/evals/overview-panel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ import { useCommitTriage } from "./use-ai-triage";
// Helpers
// ---------------------------------------------------------------------------

/** Strip trailing timestamp suffixes from suite names for display, e.g. "Suite (2026-03-12 15:20:43)" → "Suite" */
function stripTimestampSuffix(name: string): string {
return name.replace(/\s*\(\d{4}-\d{2}-\d{2}[^)]*\)\s*$/, "").trim() || name;
}

function toPercent(value: number): number {
const n = value <= 1 ? value * 100 : value;
return Math.max(0, Math.min(100, Math.round(n)));
Expand Down Expand Up @@ -209,6 +214,7 @@ export function OverviewPanel({
const [selectedBucketId, setSelectedBucketId] = useState<string | null>(null);
const [failuresOnly, setFailuresOnly] = useState(false);
const [suiteSearch, setSuiteSearch] = useState("");
const [failurePageSize, setFailurePageSize] = useState(10);

// Apply tag filter
const filteredSuites = useMemo(
Expand Down Expand Up @@ -276,12 +282,18 @@ export function OverviewPanel({

const aiOverviewTriage = useCommitTriage(failedOverviewRunIds);

// Auto-request triage when failures exist (skip if already unavailable)
// Auto-request triage when failures exist (skip if already unavailable or errored)
useEffect(() => {
if (failedOverviewRunIds.length > 0 && !aiOverviewTriage.summary && !aiOverviewTriage.loading && !aiOverviewTriage.unavailable) {
if (
failedOverviewRunIds.length > 0 &&
!aiOverviewTriage.summary &&
!aiOverviewTriage.loading &&
!aiOverviewTriage.unavailable &&
!aiOverviewTriage.error
) {
aiOverviewTriage.requestTriage();
}
}, [failedOverviewRunIds.length, aiOverviewTriage.summary, aiOverviewTriage.loading, aiOverviewTriage.unavailable, aiOverviewTriage.requestTriage]);
}, [failedOverviewRunIds.length, aiOverviewTriage.summary, aiOverviewTriage.loading, aiOverviewTriage.unavailable, aiOverviewTriage.error, aiOverviewTriage.requestTriage]);

// Pre-compute inline failure tags for the failure feed
// Tags suites with failed cases OR failed result
Expand Down Expand Up @@ -647,9 +659,12 @@ export function OverviewPanel({

<CollapsibleContent>
<div className="border-t divide-y">
{failureEntries.map((entry) => {
{failureEntries.slice(0, failurePageSize).map((entry) => {
const isFailed = entry.latestRun?.result === "failed";
const isNeverRun = !entry.latestRun;
const passRate = isFailed && entry.latestRun?.summary
? toPercent(entry.latestRun.summary.passRate ?? 0)
: null;

return (
<button
Expand All @@ -666,19 +681,28 @@ export function OverviewPanel({
<div className="min-w-0 flex-1">
<div className="flex items-center gap-1.5">
<span className="text-sm font-medium truncate">
{entry.suite.name}
{stripTimestampSuffix(entry.suite.name)}
</span>
{isFailed &&
failureTagMap.get(entry.suite._id)?.map((tag) => (
<InlineFailureTag key={tag} tag={tag} />
))}
</div>
{isFailed && entry.latestRun?.summary && (
<div className="text-xs text-muted-foreground mt-0.5">
{entry.latestRun.summary.passed}/
{entry.latestRun.summary.total} tests passed
{entry.latestRun.summary.passRate !== undefined &&
` (${Math.round(entry.latestRun.summary.passRate)}%)`}
<div className="flex items-center gap-2 mt-1">
<div className="flex-1 h-1.5 bg-muted rounded-full overflow-hidden">
<div
className={cn(
"h-full rounded-full transition-all",
passRate! >= 75 ? "bg-amber-500" : "bg-destructive",
)}
style={{ width: `${passRate}%` }}
/>
</div>
<span className="text-xs text-muted-foreground shrink-0 tabular-nums">
{entry.latestRun.summary.passed}/
{entry.latestRun.summary.total} ({passRate}%)
</span>
</div>
)}
{isFailed && entry.latestRun?.ciMetadata && (
Expand Down Expand Up @@ -719,6 +743,14 @@ export function OverviewPanel({
);
})}
</div>
{failureEntries.length > failurePageSize && (
<button
onClick={() => setFailurePageSize((s) => s + 20)}
className="w-full py-2 text-xs text-primary hover:bg-muted/50 transition-colors border-t font-medium"
>
Show more ({failureEntries.length - failurePageSize} remaining)
</button>
)}
</CollapsibleContent>
</div>
</Collapsible>
Expand Down Expand Up @@ -840,7 +872,7 @@ export function OverviewPanel({
{/* Suite name */}
<div className="min-w-0">
<div className="text-sm font-medium truncate">
{entry.suite.name || "Untitled suite"}
{stripTimestampSuffix(entry.suite.name) || "Untitled suite"}
</div>
</div>

Expand Down
63 changes: 25 additions & 38 deletions mcpjam-inspector/client/src/components/evals/use-ai-triage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,49 +40,31 @@ export function useCommitTriage(
const [error, setError] = useState<string | null>(null);
const [summary, setSummary] = useState<string | null>(null);
const [unavailable, setUnavailable] = useState(false);
const hasAttemptedRef = useRef(false);

// Track whether the mutation exists at the module level (survives re-renders)
let requestTriageMutation: ReturnType<typeof useMutation> | null = null;
let mutationExists = true;
try {
// eslint-disable-next-line react-hooks/rules-of-hooks
requestTriageMutation = useMutation("testSuites:requestTriage" as any);
} catch {
// Mutation not registered yet — backend not deployed
mutationExists = false;
}
// Always call useMutation (React hooks rules) — if the function doesn't
// exist on the backend, the call itself will fail, which we handle below.
const requestTriageMutation = useMutation("testSuites:requestTriage" as any);

// Once we know the mutation doesn't exist, mark unavailable permanently
// (no state update needed on subsequent renders since unavailable is already true)
const mutationExistsRef = useRef(mutationExists);
mutationExistsRef.current = mutationExists;

useEffect(() => {
if (!mutationExistsRef.current) {
setUnavailable(true);
}
}, []);

// Reset state when the run IDs change (navigating to a different commit),
// but preserve unavailable if the mutation doesn't exist
// Reset state when the run IDs actually change (navigating to a different commit)
const runKey = failedRunIds.join(",");
const prevRunKeyRef = useRef(runKey);
useEffect(() => {
setSummary(null);
setError(null);
setLoading(false);
// Only reset unavailable if the mutation actually exists
if (mutationExistsRef.current) {
setUnavailable(false);
if (prevRunKeyRef.current !== runKey) {
prevRunKeyRef.current = runKey;
setSummary(null);
setError(null);
setLoading(false);
hasAttemptedRef.current = false;
// Keep unavailable sticky — if the mutation doesn't exist, it won't
// magically appear when switching commits
}
}, [runKey]);

const requestTriage = useCallback(() => {
if (failedRunIds.length === 0 || unavailable) return;
if (!requestTriageMutation) {
setUnavailable(true);
return;
}
if (failedRunIds.length === 0 || unavailable || hasAttemptedRef.current) return;

hasAttemptedRef.current = true;
setLoading(true);
setError(null);

Expand All @@ -96,16 +78,21 @@ export function useCommitTriage(
setLoading(false);
} else {
// Backend will generate async — for now show as pending
// In future, a reactive query subscription will update this
setLoading(false);
setError("Triage requested — results will appear when backend processing completes.");
setSummary("Triage requested — results will appear when backend processing completes.");
}
})
.catch((err: unknown) => {
const message = err instanceof Error ? err.message : String(err);
// Detect "function not found" errors from Convex
if (message.includes("Could not find") || message.includes("not found")) {
// Detect backend errors that mean triage isn't available — mark permanently unavailable
if (
message.includes("Could not find") ||
message.includes("not found") ||
message.includes("is not a function") ||
message.includes("Server Error")
) {
setUnavailable(true);
setError(null);
} else {
setError(message);
}
Expand Down
Loading