diff --git a/packages/ai/package.json b/packages/ai/package.json index dcb10aa8..d1dc2320 100644 --- a/packages/ai/package.json +++ b/packages/ai/package.json @@ -90,8 +90,14 @@ }, "peerDependencies": { "@opentelemetry/api": "^1.9.0", + "typescript": ">=5.4", "zod": "^3.25.0 || ^4.0.0" }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + }, "devDependencies": { "@ai-sdk/anthropicv1": "npm:@ai-sdk/anthropic@^1.2.12", "@ai-sdk/anthropicv2": "npm:@ai-sdk/anthropic@^2.0.57", diff --git a/packages/ai/src/evals/builder.ts b/packages/ai/src/evals/builder.ts index d596740b..7c865465 100644 --- a/packages/ai/src/evals/builder.ts +++ b/packages/ai/src/evals/builder.ts @@ -95,7 +95,7 @@ class EvalBuilderImpl< } // Call existing Eval function - this handles all Vitest registration - Eval(finalName, finalParams); + Eval(finalName, finalParams); } } diff --git a/packages/ai/src/evals/eval.ts b/packages/ai/src/evals/eval.ts index f53cbe18..bf253472 100644 --- a/packages/ai/src/evals/eval.ts +++ b/packages/ai/src/evals/eval.ts @@ -119,9 +119,10 @@ export function Eval< Step extends string = string, >( name: ValidateName, - params: Omit, 'capability' | 'step'> & { + params: Omit, 'capability' | 'step' | 'scorers'> & { capability: ValidateName; step?: ValidateName | undefined; + scorers: ReadonlyArray, NoInfer, TOutput>>; }, ): void { // Record eval name for validation diff --git a/packages/ai/test/evals/eval.types.test.ts b/packages/ai/test/evals/eval.types.test.ts new file mode 100644 index 00000000..fac465a2 --- /dev/null +++ b/packages/ai/test/evals/eval.types.test.ts @@ -0,0 +1,138 @@ +import { describe, it, expectTypeOf } from 'vitest'; +import { Eval } from '../../src/evals'; +import { Scorer } from '../../src/scorers/scorers'; + +describe('Eval type inference', () => { + it('infers task input and expected from data when scorer omits input', () => { + const answerSimilarityScorer = Scorer( + 'answer-similarity', + ({ output, expected }: { output: string; expected: string }) => { + output; + expected; + return 1; + }, + ); + + const compileOnly = () => + Eval('name-apl-query', { + capability: 'name_query', + data: () => [ + { + input: "['nginx-access-logs'] | where status >= 500", + expected: 'Nginx 5xx Errors', + }, + ], + task: async ({ input }: { input: string }) => input, + scorers: [answerSimilarityScorer], + }); + + compileOnly; + }); + + it('preserves contextual task input typing from data when scorer omits input', () => { + const exactMatch = Scorer( + 'exact-match', + ({ expected, output }: { expected: string; output: string }) => expected === output, + ); + + const compileOnly = () => + Eval('categorize-messages', { + capability: 'support-agent', + data: [ + { + input: 'Hello world', + expected: 'support', + }, + ], + task: ({ input }) => { + expectTypeOf(input).toEqualTypeOf(); + return input; + }, + scorers: [exactMatch], + }); + + compileOnly; + }); + + it('keeps structured task input inference anchored to data when scorers only use output', () => { + const queueMatchScorer = Scorer( + 'queue-match', + ({ expected, output }: { expected: { queue: string }; output: { queue: string } }) => + expected.queue === output.queue, + ); + + const compileOnly = () => + Eval('route-support-ticket', { + capability: 'support-routing', + data: [ + { + input: { + ticketId: 'ticket-123', + message: 'Need help with a refund', + customer: { + tier: 'enterprise' as const, + }, + }, + expected: { + queue: 'billing' as const, + }, + }, + ], + task: ({ input, expected }) => { + expectTypeOf(input.ticketId).toEqualTypeOf(); + expectTypeOf(input.customer.tier).toEqualTypeOf<'enterprise'>(); + expectTypeOf(expected.queue).toEqualTypeOf<'billing'>(); + + return { + queue: input.customer.tier === 'enterprise' ? 'billing' : 'general', + }; + }, + scorers: [queueMatchScorer], + }); + + compileOnly; + }); + + it('rejects task input that conflicts with the data source', () => { + const OutputOnlyScorer = Scorer( + 'output-only', + ({ output }: { output: string }) => output.length > 0, + ); + + const invalid = () => + Eval('mismatched-task-input', { + capability: 'name_query', + // @ts-expect-error task input must match the data input type + data: () => [ + { + input: 'foo', + expected: 'bar', + }, + ], + task: async ({ input }: { input: number }) => String(input), + scorers: [OutputOnlyScorer], + }); + + invalid; + }); + + it('rejects a scorer whose input type conflicts with data', () => { + const inputAwareScorer = Scorer( + 'input-aware', + ({ input, output }: { input: { id: number }; output: string }) => { + return input.id > 0 && output.length > 0; + }, + ); + + const invalid = () => + Eval('scorer-input-mismatch', { + capability: 'test', + data: [{ input: 'hello', expected: 'world' }], + task: ({ input }) => input, + // @ts-expect-error scorer input type conflicts with data input type + scorers: [inputAwareScorer], + }); + + invalid; + }); +});