fix(ai): normalize boolean scores in onlineEval scoresSummary (#263)

lukasmalkmus · web-flow · commit ff758426d58c · 2026-02-25T15:32:12.000+02:00
## Overview - `onlineEval()` was writing raw boolean scores (`true`/`false`) into the parent eval span's `eval.case.scores` attribute, while child scorer spans correctly normalized them to `1`/`0` with `eval.score.is_boolean` metadata via `normalizeBooleanScore()` - Apply the same `normalizeBooleanScore()` call when building `scoresSummary` so both parent and child spans produce consistent numeric scores  --- > [!NOTE] > **Low Risk** > Small telemetry-only change that affects how scores are serialized into span attributes; low risk aside from potential downstream expectations of boolean values. > > **Overview** > Ensures `onlineEval()` writes consistent numeric scores into the parent eval span’s `eval.case.scores` summary by normalizing boolean `score` values (`true/false` → `1/0`) and propagating the corresponding `eval.score.is_boolean` metadata. > > This updates `onlineEval.ts` to call `normalizeBooleanScore()` while building `scoresSummary`, and only emits normalized metadata when non-empty. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit bfa6ce7. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup>
diff --git a/packages/ai/src/online-evals/onlineEval.ts b/packages/ai/src/online-evals/onlineEval.ts
@@ -9,6 +9,7 @@ import type {
   ScorerSampling,
 } from './types';
 import { executeScorer } from './executor';
+import { normalizeBooleanScore } from '../evals/normalize-score';
 import { Attr } from '../otel/semconv/attributes';
 import type { ValidateName } from '../util/name-validation';
 import { isValidName } from '../util/name-validation-runtime';
@@ -359,11 +360,16 @@ async function executeOnlineEvalInternal<
 
     const scoresSummary: Record<string, ScorerResult> = {};
     for (const [name, result] of Object.entries(results)) {
+      const { score: normalizedScore, metadata: normalizedMetadata } = normalizeBooleanScore(
+        result.score,
+        result.metadata,
+      );
+
       scoresSummary[name] = {
         name: result.name,
-        score: result.score,
-        ...(result.metadata &&
-          Object.keys(result.metadata).length > 0 && { metadata: result.metadata }),
+        score: normalizedScore,
+        ...(normalizedMetadata &&
+          Object.keys(normalizedMetadata).length > 0 && { metadata: normalizedMetadata }),
         ...(result.error && { error: result.error }),
       };
     }