Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions src/lib/confidence.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { describe, it, expect } from "vitest";
import { confidenceFraction, confidencePercent } from "./confidence.js";

describe("confidenceFraction", () => {
it("passes 0–1 fractions through", () => {
expect(confidenceFraction(0.9)).toBeCloseTo(0.9);
expect(confidenceFraction(0.42)).toBeCloseTo(0.42);
expect(confidenceFraction(1)).toBe(1);
expect(confidenceFraction(0)).toBe(0);
});

it("rescales 0–100 values to a fraction", () => {
expect(confidenceFraction(90)).toBeCloseTo(0.9);
expect(confidenceFraction(95)).toBeCloseTo(0.95);
expect(confidenceFraction(95.0)).toBeCloseTo(0.95);
});

it("clamps to [0, 1] and handles bad input", () => {
expect(confidenceFraction(9000)).toBe(1); // pathological — clamp, don't emit 90x
expect(confidenceFraction(-5)).toBe(0);
expect(confidenceFraction(null)).toBe(0);
expect(confidenceFraction(undefined)).toBe(0);
expect(confidenceFraction(NaN)).toBe(0);
});
});

describe("confidencePercent", () => {
it("renders both scales as the same percentage (no more 9000%)", () => {
expect(confidencePercent(0.9)).toBe(90);
expect(confidencePercent(90)).toBe(90); // the bug: was 9000
expect(confidencePercent(0.95)).toBe(95);
expect(confidencePercent(95)).toBe(95);
expect(confidencePercent(null)).toBe(0);
});
});
27 changes: 27 additions & 0 deletions src/lib/confidence.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/**
* Confidence-score scale normalization.
*
* `confidenceScore` is meant to be a 0–1 fraction, but the synthesis LLM is
* inconsistent: some completions emit `0.9`, others `90` (or `95.00`) on a
* 0–100 scale. Stored unnormalized, the 0–100 values render as nonsense once a
* consumer multiplies by 100 (e.g. `90 * 100 = 9000%`).
*
* These helpers coerce either scale to a single canonical form. Normalize at
* the source so new reports store a 0–1 fraction, and use these defensively at
* display sites so reports already persisted on the wrong scale still render
* sensibly.
*/

/** Coerce a confidence score (0–1 fraction OR 0–100 percentage) to a 0–1
* fraction, clamped to [0, 1]. `null`/`undefined`/`NaN` → 0. A value `> 1` is
* assumed to be on the 0–100 scale and divided by 100. */
export function confidenceFraction(raw: number | null | undefined): number {
if (raw == null || Number.isNaN(raw)) return 0;
const fraction = raw > 1 ? raw / 100 : raw;
return Math.max(0, Math.min(1, fraction));
}

/** Whole-number percentage (0–100) for display, from either input scale. */
export function confidencePercent(raw: number | null | undefined): number {
return Math.round(confidenceFraction(raw) * 100);
}
3 changes: 2 additions & 1 deletion src/server/investigation-runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import { ulid } from "ulid";
import { createLogger } from "../logger.js";
import type { Database } from "./db.js";
import { confidencePercent } from "../lib/confidence.js";
import type { IInvestigationAgent } from "../types/agent-interfaces.js";
import type { RcaReport } from "../types/rca-types.js";
import type { ServiceConfig, InvestigationTemplate } from "../config/schema.js";
Expand Down Expand Up @@ -321,7 +322,7 @@ export class InvestigationRunner {
total_output_tokens: totalTokens.outputTokens,
total_duration_ms: totalDurationMs,
});
const confidencePct = report.confidenceScore != null ? Math.round(report.confidenceScore * 100) : null;
const confidencePct = report.confidenceScore != null ? confidencePercent(report.confidenceScore) : null;
eventLog.append({
kind: "investigation_completed",
severity: "success",
Expand Down
3 changes: 2 additions & 1 deletion src/server/slack-notifier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import { createLogger } from "../logger.js";
import type { RcaReport } from "../types/rca-types.js";
import { confidencePercent } from "../lib/confidence.js";

const logger = createLogger();

Expand Down Expand Up @@ -79,7 +80,7 @@ export async function notifySlack(

const severity = report.severity ?? "unknown";
const confidence = report.confidenceScore != null
? `${Math.round(report.confidenceScore * 100)}%`
? `${confidencePercent(report.confidenceScore)}%`
: "N/A";
const rootCause = report.rootCause ?? "Unable to determine";
const summary = report.summary ?? "";
Expand Down
3 changes: 2 additions & 1 deletion src/web/components/ChatPane.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { Button } from "@/components/ui/button";
import { Search, SearchCode, MessageSquare, Plus, FileText, ChevronRight, ChevronDown, Send, Trash2, X, ArrowRight, Zap } from "lucide-react";
import { renderInline } from "../lib/renderInline";
import { renderMarkdown } from "../lib/renderMarkdown";
import { confidenceFraction } from "../../lib/confidence.js";
import { formatTokens } from "../lib/formatTokens.js";
import { formatTimestamp } from "../lib/formatTimestamp";
import { MetricChart, type TimeSeriesData } from "./MetricChart";
Expand Down Expand Up @@ -729,7 +730,7 @@ export function ChatPane({ ws, onInvestigationStarted, onViewInvestigation, acti
<Badge variant={msg.report.severity === "critical" ? "destructive" : "secondary"} className="text-[8px] uppercase tracking-[0.1em]">
{msg.report.severity}
</Badge>
<span className="text-[8px] font-mono text-muted-foreground/70">{msg.report.confidence}{msg.report.confidenceScore != null ? ` (${msg.report.confidenceScore.toFixed(2)})` : ""}</span>
<span className="text-[8px] font-mono text-muted-foreground/70">{msg.report.confidence}{msg.report.confidenceScore != null ? ` (${confidenceFraction(msg.report.confidenceScore).toFixed(2)})` : ""}</span>
</div>
</div>
{msg.report.summary && (
Expand Down
3 changes: 2 additions & 1 deletion src/web/components/InvestigationPane.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import type { RcaReport as RcaReportType } from "../../types/rca-types.js";
import { formatTokens } from "../lib/formatTokens.js";
import { buildPhaseActions } from "../lib/grafana-links.js";
import { downloadMarkdown, downloadPng, copyMarkdown } from "../lib/exportInvestigation.js";
import { confidencePercent } from "../../lib/confidence.js";

const DEFAULT_PHASES: PhaseState[] = [
{ name: "planning", label: "Planning", status: "pending" },
Expand Down Expand Up @@ -706,7 +707,7 @@ export function InvestigationPane({
{(report as any)?.confidence && (
<MetaRow
label="confidence"
value={`${String((report as any).confidence).toUpperCase()}${(report as any).confidenceScore ? ` · ${Math.round((report as any).confidenceScore * 100)}%` : ""}`}
value={`${String((report as any).confidence).toUpperCase()}${(report as any).confidenceScore ? ` · ${confidencePercent((report as any).confidenceScore)}%` : ""}`}
/>
)}
{(report as any)?.severity && (
Expand Down
11 changes: 8 additions & 3 deletions src/web/components/RcaReport.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { Badge } from "@/components/ui/badge";
import { type ReactNode } from "react";
import { FileText } from "lucide-react";
import { renderInline } from "../lib/renderInline";
import { confidenceFraction } from "../../lib/confidence.js";

interface RcaReportData {
rootCause: string;
Expand Down Expand Up @@ -98,6 +99,10 @@ function Section({ label, count, children }: { label: string; count?: number; ch

export function RcaReport({ report, hideOldDashboardLinks }: { report: RcaReportData; hideOldDashboardLinks?: boolean }) {

// Normalize to a 0–1 fraction for display/styling — stored scores may be 0–1
// or 0–100 depending on the synthesis completion (see lib/confidence).
const confFrac = report.confidenceScore != null ? confidenceFraction(report.confidenceScore) : null;

const severityGlow =
report.severity === "critical" ? "glow-red border-destructive/30" :
report.severity === "high" ? "glow-coral border-accent/25" :
Expand All @@ -118,7 +123,7 @@ export function RcaReport({ report, hideOldDashboardLinks }: { report: RcaReport
{report.severity}
</Badge>
<span className="text-[9px] font-mono text-muted-foreground">
{report.confidence}{report.confidenceScore != null ? ` (${report.confidenceScore.toFixed(2)})` : ""} confidence
{report.confidence}{confFrac != null ? ` (${confFrac.toFixed(2)})` : ""} confidence
</span>
</div>
</div>
Expand All @@ -142,7 +147,7 @@ export function RcaReport({ report, hideOldDashboardLinks }: { report: RcaReport
</div>
)}
{/* Low confidence banner */}
{report.confidenceScore != null && report.confidenceScore > 0 && report.confidenceScore < 0.5 && (
{confFrac != null && confFrac > 0 && confFrac < 0.5 && (
<div className="px-5 py-2.5 bg-warning/8 border-b border-warning/15 flex items-center gap-2">
<span className="text-warning text-sm">⚠</span>
<span className="text-[11px] font-body text-warning/80">Low confidence — insufficient data to determine root cause</span>
Expand All @@ -155,7 +160,7 @@ export function RcaReport({ report, hideOldDashboardLinks }: { report: RcaReport
<div className="space-y-4">
<div>
<SectionLabel color="text-primary">Root Cause</SectionLabel>
<p className={`text-[13px] font-body leading-relaxed ${report.confidenceScore != null && report.confidenceScore < 0.5 ? "text-foreground/50 italic" : "text-foreground/90"}`}>{renderInline(report.rootCause)}</p>
<p className={`text-[13px] font-body leading-relaxed ${confFrac != null && confFrac < 0.5 ? "text-foreground/50 italic" : "text-foreground/90"}`}>{renderInline(report.rootCause)}</p>
</div>

<div>
Expand Down
3 changes: 2 additions & 1 deletion src/web/lib/formatRcaMarkdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
*/

import type { RcaReport } from "../../types/rca-types.js";
import { confidencePercent } from "../../lib/confidence.js";

export function formatRcaMarkdown(report: RcaReport): string {
const lines: string[] = [];

lines.push(`# RCA Report: ${report.service}`);
lines.push("");
lines.push(`**Severity:** ${report.severity} | **Confidence:** ${report.confidence} (${Math.round(report.confidenceScore * 100)}%)`);
lines.push(`**Severity:** ${report.severity} | **Confidence:** ${report.confidence} (${confidencePercent(report.confidenceScore)}%)`);
lines.push(`**Investigated:** ${report.investigatedAt}`);
if (report.timeRange) {
lines.push(`**Investigation window:** ${report.timeRange.from} → ${report.timeRange.to}`);
Expand Down
6 changes: 5 additions & 1 deletion src/workflows/steps/synthesis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import { safeJsonParse } from "../../agents/shared/processors.js";
import { createSynthesisAgent } from "../../agents/synthesis.js";
import { wrapUntrusted } from "../../agents/shared/prompt-helpers.js";
import { formatPatterns } from "../../agents/shared/patterns.js";
import { confidenceFraction } from "../../lib/confidence.js";
import { withLlmRetry, safeAgentRetryConfig } from "../../agents/shared/llm-retry.js";
import { LlmUnavailableError } from "../../agents/shared/llm-errors.js";
import { RankedHypothesisSchema } from "../schemas.js";
Expand Down Expand Up @@ -241,7 +242,10 @@ export function buildSynthesisStep(config: WorkflowConfig) {
dashboardLinks = synthesisParsed.dashboardLinks ?? dashboardLinks;
recommendedActions = synthesisParsed.recommendedActions ?? recommendedActions;
confidence = synthesisParsed.confidence ?? confidence;
confidenceScore = synthesisParsed.confidenceScore ?? confidenceScore;
// Normalize to a 0–1 fraction at the source — the LLM is inconsistent
// (some completions emit 0.9, others 90), and every downstream consumer
// (display ×100, the low-confidence gate) assumes 0–1.
confidenceScore = confidenceFraction(synthesisParsed.confidenceScore ?? confidenceScore);
}

// Deterministic severity validation
Expand Down
Loading