diff --git a/packages/sdk/server-ai/README.md b/packages/sdk/server-ai/README.md index 62a0b92425..c91170e41f 100644 --- a/packages/sdk/server-ai/README.md +++ b/packages/sdk/server-ai/README.md @@ -97,7 +97,7 @@ if (aiConfig.enabled) { ```typescript // Use the same defaultConfig from the retrieval section above -const chat = await aiClient.initChat( +const chat = await aiClient.createChat( 'customer-support-chat', context, defaultConfig, diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts new file mode 100644 index 0000000000..1144f119f8 --- /dev/null +++ b/packages/sdk/server-ai/__tests__/Judge.test.ts @@ -0,0 +1,497 @@ +import { LDLogger } from '@launchdarkly/js-server-sdk-common'; + +import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker'; +import { LDAIJudgeConfig, LDMessage } from '../src/api/config/types'; +import { Judge } from '../src/api/judge/Judge'; +import { StructuredResponse } from '../src/api/judge/types'; +import { AIProvider } from '../src/api/providers/AIProvider'; + +describe('Judge', () => { + let mockProvider: jest.Mocked; + let mockTracker: jest.Mocked; + let mockLogger: jest.Mocked; + let judgeConfig: LDAIJudgeConfig; + + const mockTrackData = { + variationKey: 'test-variation', + configKey: 'test-config', + version: 1, + }; + + beforeEach(() => { + // Mock the AIProvider - only mock what's actually used + mockProvider = { + invokeStructuredModel: jest.fn(), + } as any; + + // Mock the LDAIConfigTracker - only mock what's actually used + mockTracker = { + trackMetricsOf: jest.fn(), + getTrackData: jest.fn().mockReturnValue(mockTrackData), + } as any; + + // Mock the logger - only mock what's actually used + mockLogger = { + debug: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + } as any; + + // Create a basic judge config + judgeConfig = { + enabled: true, + messages: [ + { role: 'system', content: 'You are a helpful judge that evaluates AI responses.' }, + { + role: 'user', + content: + 'Evaluate and report scores for important metrics: Input: {{message_history}}, Output: {{response_to_evaluate}}', + }, + ], + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + tracker: mockTracker, + evaluationMetricKeys: ['relevance', 'accuracy', 'helpfulness'], + toVercelAISDK: jest.fn(), + }; + }); + + describe('constructor', () => { + it('initializes with proper configuration', () => { + const judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + + expect(judge).toBeDefined(); + }); + }); + + describe('evaluate', () => { + let judge: Judge; + + beforeEach(() => { + judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + }); + + it('evaluates AI response successfully', async () => { + const mockStructuredResponse: StructuredResponse = { + data: { + evaluations: { + relevance: { score: 0.8, reasoning: 'The response is relevant to the question' }, + accuracy: { score: 0.9, reasoning: 'The response is factually accurate' }, + helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' }, + }, + }, + rawResponse: JSON.stringify({ + evaluations: { + relevance: { score: 0.8, reasoning: 'The response is relevant to the question' }, + accuracy: { score: 0.9, reasoning: 'The response is factually accurate' }, + helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' }, + }, + }), + metrics: { + success: true, + usage: { + total: 100, + input: 50, + output: 50, + }, + }, + }; + + mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); + mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + + const result = await judge.evaluate( + 'What is the capital of France?', + 'Paris is the capital of France.', + ); + + expect(result).toEqual({ + evals: { + relevance: { score: 0.8, reasoning: 'The response is relevant to the question' }, + accuracy: { score: 0.9, reasoning: 'The response is factually accurate' }, + helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' }, + }, + success: true, + }); + + expect(mockProvider.invokeStructuredModel).toHaveBeenCalledWith( + expect.arrayContaining([ + expect.objectContaining({ + role: 'system', + content: 'You are a helpful judge that evaluates AI responses.', + }), + expect.objectContaining({ + role: 'user', + content: + 'Evaluate and report scores for important metrics: Input: What is the capital of France?, Output: Paris is the capital of France.', + }), + ]), + expect.any(Object), // evaluation response structure + ); + }); + + it('handles sampling rate correctly', async () => { + // Mock Math.random to return 0.3 (should be sampled with rate 0.5 since 0.3 <= 0.5) + const originalRandom = Math.random; + Math.random = jest.fn().mockReturnValue(0.3); + + // Mock the structured response + const mockStructuredResponse: StructuredResponse = { + data: { + evaluations: { + relevance: { score: 0.8, reasoning: 'Good' }, + accuracy: { score: 0.9, reasoning: 'Accurate' }, + helpfulness: { score: 0.7, reasoning: 'Helpful' }, + }, + }, + rawResponse: JSON.stringify({ + evaluations: { + relevance: { score: 0.8, reasoning: 'Good' }, + accuracy: { score: 0.9, reasoning: 'Accurate' }, + helpfulness: { score: 0.7, reasoning: 'Helpful' }, + }, + }), + metrics: { + success: true, + usage: { total: 100, input: 50, output: 50 }, + }, + }; + + mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); + mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + + const result = await judge.evaluate('test input', 'test output', 0.5); + + expect(result).toBeDefined(); + expect(mockProvider.invokeStructuredModel).toHaveBeenCalled(); + + Math.random = originalRandom; + }); + + it('returns undefined when not sampled', async () => { + // Mock Math.random to return 0.8 (should not be sampled with rate 0.5 since 0.8 > 0.5) + const originalRandom = Math.random; + Math.random = jest.fn().mockReturnValue(0.8); + + const result = await judge.evaluate('test input', 'test output', 0.5); + + expect(result).toBeUndefined(); + expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled(); + expect(mockLogger.debug).toHaveBeenCalledWith( + 'Judge evaluation skipped due to sampling rate: 0.5', + ); + + Math.random = originalRandom; + }); + + it('returns undefined when evaluationMetricKeys is empty', async () => { + const configWithoutMetrics: LDAIJudgeConfig = { + ...judgeConfig, + evaluationMetricKeys: [], + }; + const judgeWithoutMetrics = new Judge( + configWithoutMetrics, + mockTracker, + mockProvider, + mockLogger, + ); + + const result = await judgeWithoutMetrics.evaluate('test input', 'test output'); + + expect(result).toBeUndefined(); + expect(mockLogger.warn).toHaveBeenCalledWith( + 'Judge configuration is missing required evaluationMetricKeys', + mockTrackData, + ); + }); + + it('returns undefined when messages are missing', async () => { + const configWithoutMessages: LDAIJudgeConfig = { + ...judgeConfig, + messages: undefined, + }; + const judgeWithoutMessages = new Judge( + configWithoutMessages, + mockTracker, + mockProvider, + mockLogger, + ); + + const result = await judgeWithoutMessages.evaluate('test input', 'test output'); + + expect(result).toBeUndefined(); + expect(mockLogger.warn).toHaveBeenCalledWith( + 'Judge configuration must include messages', + mockTrackData, + ); + }); + + it('returns partial evaluations when some metrics are missing', async () => { + const mockStructuredResponse: StructuredResponse = { + data: { + evaluations: { + relevance: { score: 0.8, reasoning: 'Good' }, + // accuracy is missing + helpfulness: { score: 0.7, reasoning: 'Helpful' }, + }, + }, + rawResponse: JSON.stringify({ + evaluations: { + relevance: { score: 0.8, reasoning: 'Good' }, + helpfulness: { score: 0.7, reasoning: 'Helpful' }, + }, + }), + metrics: { + success: true, + usage: { total: 100, input: 50, output: 50 }, + }, + }; + + mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); + mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + + const result = await judge.evaluate('test input', 'test output'); + + // When one metric is missing, it returns the partial evals it has with success: false + expect(result).toEqual({ + evals: { + relevance: { score: 0.8, reasoning: 'Good' }, + helpfulness: { score: 0.7, reasoning: 'Helpful' }, + }, + success: false, + }); + }); + + it('returns empty evaluations when response structure is malformed', async () => { + const mockStructuredResponse: StructuredResponse = { + data: { + // Missing 'evaluations' wrapper - malformed structure + relevance: { score: 0.8, reasoning: 'Good' }, + accuracy: { score: 0.9, reasoning: 'Accurate' }, + helpfulness: { score: 0.7, reasoning: 'Helpful' }, + }, + rawResponse: JSON.stringify({ + relevance: { score: 0.8, reasoning: 'Good' }, + accuracy: { score: 0.9, reasoning: 'Accurate' }, + helpfulness: { score: 0.7, reasoning: 'Helpful' }, + }), + metrics: { + success: true, + usage: { total: 100, input: 50, output: 50 }, + }, + }; + + mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); + mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + + const result = await judge.evaluate('test input', 'test output'); + + // When the structure is completely wrong, returns empty evals with success: false + expect(result).toEqual({ + evals: {}, + success: false, + }); + }); + + it('handles provider errors gracefully', async () => { + const error = new Error('Provider error'); + mockTracker.trackMetricsOf.mockRejectedValue(error); + + const result = await judge.evaluate('test input', 'test output'); + + expect(result).toEqual({ + evals: {}, + success: false, + error: 'Provider error', + }); + expect(mockLogger.error).toHaveBeenCalledWith('Judge evaluation failed:', error); + }); + + it('handles non-Error exceptions', async () => { + mockTracker.trackMetricsOf.mockRejectedValue('String error'); + + const result = await judge.evaluate('test input', 'test output'); + + expect(result).toEqual({ + evals: {}, + success: false, + error: 'Unknown error', + }); + }); + }); + + describe('evaluateMessages', () => { + let judge: Judge; + + beforeEach(() => { + judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + }); + + it('evaluates messages and response successfully', async () => { + const messages: LDMessage[] = [ + { role: 'user', content: 'What is the capital of France?' }, + { role: 'assistant', content: 'Paris is the capital of France.' }, + ]; + const response = { + message: { role: 'assistant' as const, content: 'Paris is the capital of France.' }, + metrics: { success: true }, + }; + + const mockStructuredResponse: StructuredResponse = { + data: { + evaluations: { + relevance: { score: 0.8, reasoning: 'The response is relevant to the question' }, + accuracy: { score: 0.9, reasoning: 'The response is factually accurate' }, + helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' }, + }, + }, + rawResponse: JSON.stringify({ + evaluations: { + relevance: { score: 0.8, reasoning: 'The response is relevant to the question' }, + accuracy: { score: 0.9, reasoning: 'The response is factually accurate' }, + helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' }, + }, + }), + metrics: { + success: true, + usage: { total: 100, input: 50, output: 50 }, + }, + }; + + mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func()); + mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse); + + const result = await judge.evaluateMessages(messages, response); + + expect(result).toEqual({ + evals: { + relevance: { score: 0.8, reasoning: 'The response is relevant to the question' }, + accuracy: { score: 0.9, reasoning: 'The response is factually accurate' }, + helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' }, + }, + success: true, + }); + + expect(mockProvider.invokeStructuredModel).toHaveBeenCalledWith( + expect.arrayContaining([ + expect.objectContaining({ + role: 'system', + content: 'You are a helpful judge that evaluates AI responses.', + }), + expect.objectContaining({ + role: 'user', + content: + 'Evaluate and report scores for important metrics: Input: What is the capital of France?\r\nParis is the capital of France., Output: Paris is the capital of France.', + }), + ]), + expect.any(Object), // evaluation response structure + ); + }); + + it('handles sampling rate correctly', async () => { + const messages: LDMessage[] = [{ role: 'user', content: 'test' }]; + const response = { + message: { role: 'assistant' as const, content: 'test response' }, + metrics: { success: true }, + }; + + // Mock Math.random to return 0.8 (should not be sampled with rate 0.5 since 0.8 > 0.5) + const originalRandom = Math.random; + Math.random = jest.fn().mockReturnValue(0.8); + + const result = await judge.evaluateMessages(messages, response, 0.5); + + expect(result).toBeUndefined(); + expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled(); + + Math.random = originalRandom; + }); + }); + + describe('_constructEvaluationMessages', () => { + let judge: Judge; + + beforeEach(() => { + judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + }); + + it('constructs evaluation messages correctly', () => { + // Access private method for testing + // eslint-disable-next-line no-underscore-dangle + const constructMessages = (judge as any)._constructEvaluationMessages.bind(judge); + const messages = constructMessages('test input', 'test output'); + + expect(messages).toHaveLength(2); + expect(messages[0]).toEqual({ + role: 'system', + content: 'You are a helpful judge that evaluates AI responses.', + }); + expect(messages[1]).toEqual({ + role: 'user', + content: + 'Evaluate and report scores for important metrics: Input: test input, Output: test output', + }); + }); + }); + + describe('_parseEvaluationResponse', () => { + let judge: Judge; + + beforeEach(() => { + judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + }); + + it('parses valid evaluation response correctly', () => { + // eslint-disable-next-line no-underscore-dangle + const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); + const responseData = { + evaluations: { + relevance: { score: 0.8, reasoning: 'Good' }, + accuracy: { score: 0.9, reasoning: 'Accurate' }, + helpfulness: { score: 0.7, reasoning: 'Helpful' }, + }, + }; + + const result = parseResponse(responseData); + + expect(result).toEqual({ + relevance: { score: 0.8, reasoning: 'Good' }, + accuracy: { score: 0.9, reasoning: 'Accurate' }, + helpfulness: { score: 0.7, reasoning: 'Helpful' }, + }); + }); + + it('returns empty object for invalid response data', () => { + // eslint-disable-next-line no-underscore-dangle + const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); + const responseData = { + relevance: { score: 0.8, reasoning: 'Good' }, + // Missing evaluations wrapper - invalid structure + }; + + const result = parseResponse(responseData); + + // Returns empty object when evaluations structure is missing + expect(result).toEqual({}); + }); + + it('handles missing score or reasoning fields', () => { + // eslint-disable-next-line no-underscore-dangle + const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); + const responseData = { + evaluations: { + relevance: { score: 0.8 }, // Missing reasoning + accuracy: { reasoning: 'Accurate' }, // Missing score + helpfulness: { score: 0.7, reasoning: 'Helpful' }, + }, + }; + + const result = parseResponse(responseData); + + // Only helpfulness passes validation, relevance and accuracy are skipped + expect(result).toEqual({ + helpfulness: { score: 0.7, reasoning: 'Helpful' }, + }); + }); + }); +}); diff --git a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts index 6badffd94a..c1d40727f9 100644 --- a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts @@ -1,21 +1,35 @@ import { LDContext } from '@launchdarkly/js-server-sdk-common'; -import { LDAIAgentDefaults } from '../src/api/agents'; -import { LDAIDefaults } from '../src/api/config'; +import { + LDAIAgentConfigDefault, + LDAIConversationConfigDefault, + LDAIJudgeConfigDefault, +} from '../src/api/config/types'; +import { Judge } from '../src/api/judge/Judge'; +import { AIProviderFactory } from '../src/api/providers/AIProviderFactory'; import { LDAIClientImpl } from '../src/LDAIClientImpl'; import { LDClientMin } from '../src/LDClientMin'; +// Mock Judge and AIProviderFactory +jest.mock('../src/api/judge/Judge'); +jest.mock('../src/api/providers/AIProviderFactory'); + const mockLdClient: jest.Mocked = { variation: jest.fn(), track: jest.fn(), }; +// Reset mocks before each test +beforeEach(() => { + jest.clearAllMocks(); +}); + const testContext: LDContext = { kind: 'user', key: 'test-user' }; it('returns config with interpolated messages', async () => { const client = new LDAIClientImpl(mockLdClient); const key = 'test-flag'; - const defaultValue: LDAIDefaults = { + const defaultValue: LDAIConversationConfigDefault = { model: { name: 'test', parameters: { name: 'test-model' } }, messages: [], enabled: true, @@ -36,6 +50,7 @@ it('returns config with interpolated messages', async () => { _ldMeta: { variationKey: 'v1', enabled: true, + mode: 'completion', }, }; @@ -73,14 +88,14 @@ it('returns config with interpolated messages', async () => { it('includes context in variables for messages interpolation', async () => { const client = new LDAIClientImpl(mockLdClient); const key = 'test-flag'; - const defaultValue: LDAIDefaults = { + const defaultValue: LDAIConversationConfigDefault = { model: { name: 'test', parameters: { name: 'test-model' } }, messages: [], }; const mockVariation = { messages: [{ role: 'system', content: 'User key: {{ldctx.key}}' }], - _ldMeta: { variationKey: 'v1', enabled: true }, + _ldMeta: { variationKey: 'v1', enabled: true, mode: 'completion' }, }; mockLdClient.variation.mockResolvedValue(mockVariation); @@ -94,7 +109,7 @@ it('includes context in variables for messages interpolation', async () => { it('handles missing metadata in variation', async () => { const client = new LDAIClientImpl(mockLdClient); const key = 'test-flag'; - const defaultValue: LDAIDefaults = { + const defaultValue: LDAIConversationConfigDefault = { model: { name: 'test', parameters: { name: 'test-model' } }, messages: [], }; @@ -108,11 +123,10 @@ it('handles missing metadata in variation', async () => { const result = await client.config(key, testContext, defaultValue); + // When metadata/mode is missing, a disabled config is returned expect(result).toEqual({ - model: { name: 'example-provider', parameters: { name: 'imagination' } }, - messages: [{ role: 'system', content: 'Hello' }], - tracker: expect.any(Object), enabled: false, + tracker: undefined, toVercelAISDK: expect.any(Function), }); }); @@ -120,7 +134,7 @@ it('handles missing metadata in variation', async () => { it('passes the default value to the underlying client', async () => { const client = new LDAIClientImpl(mockLdClient); const key = 'non-existent-flag'; - const defaultValue: LDAIDefaults = { + const defaultValue: LDAIConversationConfigDefault = { model: { name: 'default-model', parameters: { name: 'default' } }, provider: { name: 'default-provider' }, messages: [{ role: 'system', content: 'Default messages' }], @@ -128,7 +142,7 @@ it('passes the default value to the underlying client', async () => { }; const expectedLDFlagValue = { - _ldMeta: { enabled: true }, + _ldMeta: { enabled: true, mode: 'completion', variationKey: '' }, model: defaultValue.model, messages: defaultValue.messages, provider: defaultValue.provider, @@ -154,7 +168,7 @@ it('passes the default value to the underlying client', async () => { it('returns single agent config with interpolated instructions', async () => { const client = new LDAIClientImpl(mockLdClient); const key = 'test-agent'; - const defaultValue: LDAIAgentDefaults = { + const defaultValue: LDAIAgentConfigDefault = { model: { name: 'test', parameters: { name: 'test-model' } }, instructions: 'You are a helpful assistant.', enabled: true, @@ -206,7 +220,7 @@ it('returns single agent config with interpolated instructions', async () => { it('includes context in variables for agent instructions interpolation', async () => { const client = new LDAIClientImpl(mockLdClient); const key = 'test-agent'; - const defaultValue: LDAIAgentDefaults = { + const defaultValue: LDAIAgentConfigDefault = { model: { name: 'test', parameters: { name: 'test-model' } }, instructions: 'You are a helpful assistant.', enabled: true, @@ -227,7 +241,7 @@ it('includes context in variables for agent instructions interpolation', async ( it('handles missing metadata in agent variation', async () => { const client = new LDAIClientImpl(mockLdClient); const key = 'test-agent'; - const defaultValue: LDAIAgentDefaults = { + const defaultValue: LDAIAgentConfigDefault = { model: { name: 'test', parameters: { name: 'test-model' } }, instructions: 'You are a helpful assistant.', enabled: true, @@ -242,18 +256,17 @@ it('handles missing metadata in agent variation', async () => { const result = await client.agent(key, testContext, defaultValue); + // When metadata/mode is missing, a disabled config is returned expect(result).toEqual({ - model: { name: 'example-provider', parameters: { name: 'imagination' } }, - instructions: 'Hello.', - tracker: expect.any(Object), enabled: false, + tracker: undefined, }); }); it('passes the default value to the underlying client for single agent', async () => { const client = new LDAIClientImpl(mockLdClient); const key = 'non-existent-agent'; - const defaultValue: LDAIAgentDefaults = { + const defaultValue: LDAIAgentConfigDefault = { model: { name: 'default-model', parameters: { name: 'default' } }, provider: { name: 'default-provider' }, instructions: 'Default instructions', @@ -261,7 +274,7 @@ it('passes the default value to the underlying client for single agent', async ( }; const expectedLDFlagValue = { - _ldMeta: { enabled: defaultValue.enabled }, + _ldMeta: { enabled: defaultValue.enabled, mode: 'agent', variationKey: '' }, model: defaultValue.model, provider: defaultValue.provider, instructions: defaultValue.instructions, @@ -380,3 +393,227 @@ it('handles empty agent configs array', async () => { 0, ); }); + +// New judgeConfig-related tests +describe('judgeConfig method', () => { + it('retrieves judge configuration successfully', async () => { + const client = new LDAIClientImpl(mockLdClient); + const key = 'test-judge'; + const defaultValue: LDAIJudgeConfigDefault = { + enabled: true, + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + evaluationMetricKeys: ['relevance', 'accuracy'], + messages: [{ role: 'system', content: 'You are a judge.' }], + }; + + const mockJudgeConfig = { + enabled: true, + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + evaluationMetricKeys: ['relevance', 'accuracy'], + messages: [{ role: 'system' as const, content: 'You are a judge.' }], + tracker: {} as any, + toVercelAISDK: jest.fn(), + }; + + // Mock the _evaluate method + const evaluateSpy = jest.spyOn(client as any, '_evaluate'); + evaluateSpy.mockResolvedValue(mockJudgeConfig); + + const result = await client.judgeConfig(key, testContext, defaultValue); + + expect(mockLdClient.track).toHaveBeenCalledWith( + '$ld:ai:judge:function:single', + testContext, + key, + 1, + ); + expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); + expect(result).toBe(mockJudgeConfig); + }); + + it('handles variables parameter', async () => { + const client = new LDAIClientImpl(mockLdClient); + const key = 'test-judge'; + const defaultValue: LDAIJudgeConfigDefault = { + enabled: true, + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + evaluationMetricKeys: ['relevance'], + messages: [{ role: 'system', content: 'You are a judge.' }], + }; + const variables = { metric: 'relevance' }; + + const mockJudgeConfig = { + enabled: true, + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + evaluationMetricKeys: ['relevance'], + messages: [{ role: 'system' as const, content: 'You are a judge.' }], + tracker: {} as any, + toVercelAISDK: jest.fn(), + }; + + const evaluateSpy = jest.spyOn(client as any, '_evaluate'); + evaluateSpy.mockResolvedValue(mockJudgeConfig); + + const result = await client.judgeConfig(key, testContext, defaultValue, variables); + + expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', variables); + expect(result).toBe(mockJudgeConfig); + }); +}); + +describe('createJudge method', () => { + let mockProvider: jest.Mocked; + let mockJudge: jest.Mocked; + + beforeEach(() => { + mockProvider = { + invokeStructuredModel: jest.fn(), + }; + + mockJudge = { + evaluate: jest.fn(), + evaluateMessages: jest.fn(), + } as any; + + // Mock AIProviderFactory.create + (AIProviderFactory.create as jest.Mock).mockResolvedValue(mockProvider); + + // Mock Judge constructor + (Judge as jest.MockedClass).mockImplementation(() => mockJudge); + }); + + it('initializes judge successfully', async () => { + const client = new LDAIClientImpl(mockLdClient); + const key = 'test-judge'; + const defaultValue: LDAIJudgeConfigDefault = { + enabled: true, + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + evaluationMetricKeys: ['relevance', 'accuracy'], + messages: [{ role: 'system', content: 'You are a judge.' }], + }; + + const mockJudgeConfig = { + enabled: true, + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + evaluationMetricKeys: ['relevance', 'accuracy'], + messages: [{ role: 'system' as const, content: 'You are a judge.' }], + tracker: {} as any, + toVercelAISDK: jest.fn(), + }; + + // Mock the judgeConfig method + const judgeConfigSpy = jest.spyOn(client, 'judgeConfig'); + judgeConfigSpy.mockResolvedValue(mockJudgeConfig); + + const result = await client.createJudge(key, testContext, defaultValue); + + expect(mockLdClient.track).toHaveBeenCalledWith( + '$ld:ai:judge:function:createJudge', + testContext, + key, + 1, + ); + expect(judgeConfigSpy).toHaveBeenCalledWith(key, testContext, defaultValue, { + message_history: '{{message_history}}', + response_to_evaluate: '{{response_to_evaluate}}', + }); + expect(AIProviderFactory.create).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined); + expect(Judge).toHaveBeenCalledWith( + mockJudgeConfig, + mockJudgeConfig.tracker, + mockProvider, + undefined, + ); + expect(result).toBe(mockJudge); + }); + + it('returns undefined when judge configuration is disabled', async () => { + const client = new LDAIClientImpl(mockLdClient); + const key = 'test-judge'; + const defaultValue: LDAIJudgeConfigDefault = { + enabled: false, + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + evaluationMetricKeys: ['relevance'], + messages: [{ role: 'system', content: 'You are a judge.' }], + }; + + const mockJudgeConfig = { + enabled: false, // This should be false to test disabled case + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + evaluationMetricKeys: ['relevance'], + messages: [{ role: 'system' as const, content: 'You are a judge.' }], + tracker: undefined, // No tracker for disabled config + toVercelAISDK: jest.fn(), + }; + + const judgeConfigSpy = jest.spyOn(client, 'judgeConfig'); + judgeConfigSpy.mockResolvedValue(mockJudgeConfig); + + const result = await client.createJudge(key, testContext, defaultValue); + + expect(result).toBeUndefined(); + expect(AIProviderFactory.create).not.toHaveBeenCalled(); + expect(Judge).not.toHaveBeenCalled(); + }); + + it('returns undefined when AIProviderFactory.create fails', async () => { + const client = new LDAIClientImpl(mockLdClient); + const key = 'test-judge'; + const defaultValue: LDAIJudgeConfigDefault = { + enabled: true, + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + evaluationMetricKeys: ['relevance'], + messages: [{ role: 'system', content: 'You are a judge.' }], + }; + + const mockJudgeConfig = { + enabled: true, + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + evaluationMetricKeys: ['relevance'], + messages: [{ role: 'system' as const, content: 'You are a judge.' }], + tracker: {} as any, + toVercelAISDK: jest.fn(), + }; + + const judgeConfigSpy = jest.spyOn(client, 'judgeConfig'); + judgeConfigSpy.mockResolvedValue(mockJudgeConfig); + + (AIProviderFactory.create as jest.Mock).mockResolvedValue(undefined); + + const result = await client.createJudge(key, testContext, defaultValue); + + expect(result).toBeUndefined(); + expect(AIProviderFactory.create).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined); + expect(Judge).not.toHaveBeenCalled(); + }); + + it('handles errors gracefully', async () => { + const client = new LDAIClientImpl(mockLdClient); + const key = 'test-judge'; + const defaultValue: LDAIJudgeConfigDefault = { + enabled: true, + model: { name: 'gpt-4' }, + provider: { name: 'openai' }, + evaluationMetricKeys: ['relevance'], + messages: [{ role: 'system', content: 'You are a judge.' }], + }; + + const error = new Error('Judge configuration error'); + const judgeConfigSpy = jest.spyOn(client, 'judgeConfig'); + judgeConfigSpy.mockRejectedValue(error); + + const result = await client.createJudge(key, testContext, defaultValue); + + expect(result).toBeUndefined(); + }); +}); diff --git a/packages/sdk/server-ai/__tests__/TrackedChat.test.ts b/packages/sdk/server-ai/__tests__/TrackedChat.test.ts index c91b2d3d57..4ac561307f 100644 --- a/packages/sdk/server-ai/__tests__/TrackedChat.test.ts +++ b/packages/sdk/server-ai/__tests__/TrackedChat.test.ts @@ -1,13 +1,13 @@ import { TrackedChat } from '../src/api/chat/TrackedChat'; import { ChatResponse } from '../src/api/chat/types'; -import { LDAIConfig, LDMessage } from '../src/api/config/LDAIConfig'; import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker'; +import { LDAIConversationConfig, LDMessage } from '../src/api/config/types'; import { AIProvider } from '../src/api/providers/AIProvider'; describe('TrackedChat', () => { let mockProvider: jest.Mocked; let mockTracker: jest.Mocked; - let aiConfig: LDAIConfig; + let aiConfig: LDAIConversationConfig; beforeEach(() => { // Mock the AIProvider @@ -136,7 +136,7 @@ describe('TrackedChat', () => { }); it('returns empty array when no messages exist and includeConfigMessages is false', () => { - const configWithoutMessages: LDAIConfig = { + const configWithoutMessages: LDAIConversationConfig = { ...aiConfig, messages: [], }; @@ -167,7 +167,7 @@ describe('TrackedChat', () => { }); it('handles undefined config messages gracefully', () => { - const configWithoutMessages: LDAIConfig = { + const configWithoutMessages: LDAIConversationConfig = { ...aiConfig, messages: undefined, }; diff --git a/packages/sdk/server-ai/examples/bedrock/src/index.ts b/packages/sdk/server-ai/examples/bedrock/src/index.ts index fc74d63ff2..ac331d23e8 100644 --- a/packages/sdk/server-ai/examples/bedrock/src/index.ts +++ b/packages/sdk/server-ai/examples/bedrock/src/index.ts @@ -61,9 +61,13 @@ async function main() { myVariable: 'My User Defined Variable', }, ); - const { tracker } = aiConfig; - const completion = tracker.trackBedrockConverseMetrics( + if (!aiConfig.enabled || !aiConfig.tracker) { + console.log('*** AI configuration is not enabled'); + process.exit(0); + } + + const completion = aiConfig.tracker.trackBedrockConverseMetrics( await awsClient.send( new ConverseCommand({ modelId: aiConfig.model?.name ?? 'no-model', diff --git a/packages/sdk/server-ai/examples/openai/src/index.ts b/packages/sdk/server-ai/examples/openai/src/index.ts index d199edd274..d943e67a0d 100644 --- a/packages/sdk/server-ai/examples/openai/src/index.ts +++ b/packages/sdk/server-ai/examples/openai/src/index.ts @@ -53,12 +53,17 @@ async function main(): Promise { model: { name: 'gpt-4', }, + enabled: false, }, { myVariable: 'My User Defined Variable' }, ); - const { tracker } = aiConfig; - const completion = await tracker.trackOpenAIMetrics(async () => + if (!aiConfig.enabled || !aiConfig.tracker) { + console.log('*** AI configuration is not enabled'); + process.exit(0); + } + + const completion = await aiConfig.tracker.trackOpenAIMetrics(async () => client.chat.completions.create({ messages: aiConfig.messages || [], model: aiConfig.model?.name || 'gpt-4', diff --git a/packages/sdk/server-ai/examples/vercel-ai/src/index.ts b/packages/sdk/server-ai/examples/vercel-ai/src/index.ts index 4d79bd3800..bd087e2296 100644 --- a/packages/sdk/server-ai/examples/vercel-ai/src/index.ts +++ b/packages/sdk/server-ai/examples/vercel-ai/src/index.ts @@ -40,7 +40,10 @@ async function main() { const aiClient = initAi(client); // Get AI configuration from LaunchDarkly - const aiConfig = await aiClient.config(aiConfigKey, context, { model: { name: 'gpt-4' } }); + const aiConfig = await aiClient.config(aiConfigKey, context, { + model: { name: 'gpt-4' }, + enabled: false, + }); if (!aiConfig.enabled || !aiConfig.tracker) { console.log('*** AI configuration is not enabled'); diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts index edac548100..ef1d2b31ae 100644 --- a/packages/sdk/server-ai/src/LDAIClientImpl.ts +++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts @@ -2,61 +2,41 @@ import * as Mustache from 'mustache'; import { LDContext, LDLogger } from '@launchdarkly/js-server-sdk-common'; -import { LDAIAgent, LDAIAgentConfig, LDAIAgentDefaults } from './api/agents'; import { TrackedChat } from './api/chat'; import { - LDAIConfig, - LDAIConfigTracker, - LDAIDefaults, + LDAIAgentConfig, + LDAIAgentConfigDefault, + LDAIAgentRequestConfig, + LDAIConfigDefaultKind, + LDAIConfigKind, + LDAIConfigMode, + LDAIConversationConfig, + LDAIConversationConfigDefault, + LDAIJudgeConfig, + LDAIJudgeConfigDefault, + LDJudge, LDMessage, - LDModelConfig, - LDProviderConfig, VercelAISDKConfig, VercelAISDKMapOptions, VercelAISDKProvider, } from './api/config'; +import { LDAIConfigFlagValue, LDAIConfigUtils } from './api/config/LDAIConfigUtils'; +import { Judge } from './api/judge/Judge'; import { LDAIClient } from './api/LDAIClient'; import { AIProviderFactory, SupportedAIProvider } from './api/providers'; import { LDAIConfigMapper } from './LDAIConfigMapper'; import { LDAIConfigTrackerImpl } from './LDAIConfigTrackerImpl'; import { LDClientMin } from './LDClientMin'; -type Mode = 'completion' | 'agent'; - /** - * Metadata associated with a model configuration variation. + * Tracking event keys for AI SDK usage metrics. */ -interface LDMeta { - variationKey: string; - enabled: boolean; - version?: number; - mode?: Mode; -} - -/** - * Interface for the model configuration variation returned by LaunchDarkly. This is the internal - * typing and not meant for exposure to the application developer. - */ -interface VariationContent { - model?: LDModelConfig; - messages?: LDMessage[]; - instructions?: string; - provider?: LDProviderConfig; - _ldMeta?: LDMeta; -} - -/** - * The result of evaluating a configuration. - */ -interface EvaluationResult { - tracker: LDAIConfigTracker; - enabled: boolean; - model?: LDModelConfig; - provider?: LDProviderConfig; - messages?: LDMessage[]; - instructions?: string; - mode?: string; -} +const TRACK_CONFIG_SINGLE = '$ld:ai:config:function:single'; +const TRACK_CONFIG_CREATE_CHAT = '$ld:ai:config:function:createChat'; +const TRACK_JUDGE_SINGLE = '$ld:ai:judge:function:single'; +const TRACK_JUDGE_CREATE = '$ld:ai:judge:function:createJudge'; +const TRACK_AGENT_SINGLE = '$ld:ai:agent:function:single'; +const TRACK_AGENT_MULTIPLE = '$ld:ai:agent:function:multiple'; export class LDAIClientImpl implements LDAIClient { private _logger?: LDLogger; @@ -69,32 +49,26 @@ export class LDAIClientImpl implements LDAIClient { return Mustache.render(template, variables, undefined, { escape: (item: any) => item }); } - private static _toLDFlagValue(defaultValue: LDAIDefaults | LDAIAgentDefaults): { - _ldMeta: { enabled: boolean }; - model?: LDModelConfig; - messages?: LDMessage[]; - provider?: LDProviderConfig; - instructions?: string; - } { - return { - _ldMeta: { enabled: defaultValue.enabled ?? false }, - model: defaultValue.model, - messages: 'messages' in defaultValue ? defaultValue.messages : undefined, - provider: defaultValue.provider, - instructions: 'instructions' in defaultValue ? defaultValue.instructions : undefined, - }; - } - private async _evaluate( key: string, context: LDContext, - defaultValue: LDAIDefaults, - ): Promise { - // Convert default value to LDFlagValue format - // eslint-disable-next-line no-underscore-dangle - const ldFlagValue = LDAIClientImpl._toLDFlagValue(defaultValue); + defaultValue: LDAIConfigDefaultKind, + mode: LDAIConfigMode, + variables?: Record, + ): Promise { + const ldFlagValue = LDAIConfigUtils.toFlagValue(defaultValue, mode); - const value: VariationContent = await this._ldClient.variation(key, context, ldFlagValue); + const value: LDAIConfigFlagValue = await this._ldClient.variation(key, context, ldFlagValue); + + // Validate mode match + // eslint-disable-next-line no-underscore-dangle + const flagMode = value._ldMeta?.mode; + if (flagMode !== mode) { + this._logger?.warn( + `AI Config mode mismatch for ${key}: expected ${mode}, got ${flagMode}. Returning disabled config.`, + ); + return LDAIConfigUtils.createDisabledConfig(mode); + } const tracker = new LDAIConfigTrackerImpl( this._ldClient, @@ -108,174 +82,263 @@ export class LDAIClientImpl implements LDAIClient { context, ); - // eslint-disable-next-line no-underscore-dangle - const enabled = !!value._ldMeta?.enabled; + const config = LDAIConfigUtils.fromFlagValue(value, tracker); - return { - tracker, - enabled, - model: value.model, - provider: value.provider, - messages: value.messages, - instructions: value.instructions, - // eslint-disable-next-line no-underscore-dangle - mode: value._ldMeta?.mode ?? 'completion', - }; + // Apply variable interpolation (always needed for ldctx) + return this._applyInterpolation(config, context, variables); } - private async _evaluateAgent( - key: string, + private _applyInterpolation( + config: LDAIConversationConfig | LDAIAgentConfig | LDAIJudgeConfig, context: LDContext, - defaultValue: LDAIAgentDefaults, variables?: Record, - ): Promise { - const { - tracker, - enabled, - model, - provider: configProvider, - instructions, - } = await this._evaluate(key, context, defaultValue); - - const agent: LDAIAgent = { - tracker, - enabled, - }; + ): LDAIConversationConfig | LDAIAgentConfig | LDAIJudgeConfig { + const allVariables = { ...variables, ldctx: context }; - // We are going to modify the contents before returning them, so we make a copy. - // This isn't a deep copy and the application developer should not modify the returned content. - if (model) { - agent.model = { ...model }; + if ('messages' in config && config.messages) { + return { + ...config, + messages: config.messages.map((entry: LDMessage) => ({ + ...entry, + content: this._interpolateTemplate(entry.content, allVariables), + })), + }; } - if (configProvider) { - agent.provider = { ...configProvider }; + if ('instructions' in config && config.instructions) { + return { + ...config, + instructions: this._interpolateTemplate(config.instructions, allVariables), + }; } - const allVariables = { ...variables, ldctx: context }; + return config; + } - if (instructions) { - agent.instructions = this._interpolateTemplate(instructions, allVariables); - } + private _addVercelAISDKSupport(config: LDAIConversationConfig): LDAIConversationConfig { + const { messages } = config; + const mapper = new LDAIConfigMapper(config.model, config.provider, messages); + + return { + ...config, + toVercelAISDK: ( + sdkProvider: VercelAISDKProvider | Record>, + options?: VercelAISDKMapOptions | undefined, + ): VercelAISDKConfig => mapper.toVercelAISDK(sdkProvider, options), + }; + } + + private async _initializeJudges( + judgeConfigs: LDJudge[], + context: LDContext, + variables?: Record, + defaultAiProvider?: SupportedAIProvider, + ): Promise> { + const judges: Record = {}; + + const judgePromises = judgeConfigs.map(async (judgeConfig) => { + const judge = await this.createJudge( + judgeConfig.key, + context, + { enabled: false }, + variables, + defaultAiProvider, + ); + return judge ? { key: judgeConfig.key, judge } : null; + }); + + const results = await Promise.all(judgePromises); + results.forEach((result) => { + if (result) { + judges[result.key] = result.judge; + } + }); + + return judges; + } - return agent; + async completionConfig( + key: string, + context: LDContext, + defaultValue: LDAIConversationConfigDefault, + variables?: Record, + ): Promise { + this._ldClient.track(TRACK_CONFIG_SINGLE, context, key, 1); + + const config = await this._evaluate(key, context, defaultValue, 'completion', variables); + return this._addVercelAISDKSupport(config as LDAIConversationConfig); } + /** + * @deprecated Use `completionConfig` instead. This method will be removed in a future version. + */ async config( key: string, context: LDContext, - defaultValue: LDAIDefaults, + defaultValue: LDAIConversationConfigDefault, variables?: Record, - ): Promise { - this._ldClient.track('$ld:ai:config:function:single', context, key, 1); - - const { - tracker, - enabled, - model, - provider: configProvider, - messages, - } = await this._evaluate(key, context, defaultValue); - - const config: Omit = { - tracker, - enabled, - }; + ): Promise { + return this.completionConfig(key, context, defaultValue, variables); + } - // We are going to modify the contents before returning them, so we make a copy. - // This isn't a deep copy and the application developer should not modify the returned content. - if (model) { - config.model = { ...model }; - } - if (configProvider) { - config.provider = { ...configProvider }; - } - const allVariables = { ...variables, ldctx: context }; + async judgeConfig( + key: string, + context: LDContext, + defaultValue: LDAIJudgeConfigDefault, + variables?: Record, + ): Promise { + this._ldClient.track(TRACK_JUDGE_SINGLE, context, key, 1); - if (messages) { - config.messages = messages.map((entry: any) => ({ - ...entry, - content: this._interpolateTemplate(entry.content, allVariables), - })); - } + const config = await this._evaluate(key, context, defaultValue, 'judge', variables); + return config as LDAIJudgeConfig; + } - const mapper = new LDAIConfigMapper(config.model, config.provider, config.messages); + async agentConfig( + key: string, + context: LDContext, + defaultValue: LDAIAgentConfigDefault, + variables?: Record, + ): Promise { + this._ldClient.track(TRACK_AGENT_SINGLE, context, key, 1); - return { - ...config, - toVercelAISDK: ( - sdkProvider: VercelAISDKProvider | Record>, - options?: VercelAISDKMapOptions | undefined, - ): VercelAISDKConfig => mapper.toVercelAISDK(sdkProvider, options), - }; + const config = await this._evaluate(key, context, defaultValue, 'agent', variables); + return config as LDAIAgentConfig; } + /** + * @deprecated Use `agentConfig` instead. This method will be removed in a future version. + */ async agent( key: string, context: LDContext, - defaultValue: LDAIAgentDefaults, + defaultValue: LDAIAgentConfigDefault, variables?: Record, - ): Promise { - // Track agent usage - this._ldClient.track('$ld:ai:agent:function:single', context, key, 1); - - return this._evaluateAgent(key, context, defaultValue, variables); + ): Promise { + return this.agentConfig(key, context, defaultValue, variables); } - async agents( + async agentConfigs( agentConfigs: T, context: LDContext, - ): Promise> { - // Track multiple agents usage - this._ldClient.track( - '$ld:ai:agent:function:multiple', - context, - agentConfigs.length, - agentConfigs.length, - ); + ): Promise> { + this._ldClient.track(TRACK_AGENT_MULTIPLE, context, agentConfigs.length, agentConfigs.length); - const agents = {} as Record; + const agents = {} as Record; await Promise.all( agentConfigs.map(async (config) => { - const agent = await this._evaluateAgent( + const agent = await this._evaluate( config.key, context, config.defaultValue, + 'agent', config.variables, ); - agents[config.key as T[number]['key']] = agent; + agents[config.key as T[number]['key']] = agent as LDAIAgentConfig; }), ); return agents; } - async initChat( + /** + * @deprecated Use `agentConfigs` instead. This method will be removed in a future version. + */ + async agents( + agentConfigs: T, + context: LDContext, + ): Promise> { + return this.agentConfigs(agentConfigs, context); + } + + async createChat( key: string, context: LDContext, - defaultValue: LDAIDefaults, + defaultValue: LDAIConversationConfigDefault, variables?: Record, defaultAiProvider?: SupportedAIProvider, ): Promise { - // Track chat initialization - this._ldClient.track('$ld:ai:config:function:initChat', context, key, 1); + this._ldClient.track(TRACK_CONFIG_CREATE_CHAT, context, key, 1); - const aiConfig = await this.config(key, context, defaultValue, variables); + const config = await this.completionConfig(key, context, defaultValue, variables); - // Return undefined if the configuration is disabled - if (!aiConfig.enabled) { + if (!config.enabled || !config.tracker) { this._logger?.info(`Chat configuration is disabled: ${key}`); return undefined; } - // Create the AIProvider instance - const provider = await AIProviderFactory.create(aiConfig, this._logger, defaultAiProvider); + const provider = await AIProviderFactory.create(config, this._logger, defaultAiProvider); if (!provider) { return undefined; } - // Create the TrackedChat instance with the provider - return new TrackedChat(aiConfig, aiConfig.tracker, provider); + const judges = await this._initializeJudges( + config.judgeConfiguration?.judges ?? [], + context, + variables, + defaultAiProvider, + ); + + return new TrackedChat(config, config.tracker, provider, judges, this._logger); + } + + async createJudge( + key: string, + context: LDContext, + defaultValue: LDAIJudgeConfigDefault, + variables?: Record, + defaultAiProvider?: SupportedAIProvider, + ): Promise { + this._ldClient.track(TRACK_JUDGE_CREATE, context, key, 1); + + try { + if (variables?.message_history !== undefined) { + this._logger?.warn( + "The variable 'message_history' is reserved by the judge and will be ignored.", + ); + } + if (variables?.response_to_evaluate !== undefined) { + this._logger?.warn( + "The variable 'response_to_evaluate' is reserved by the judge and will be ignored.", + ); + } + + // Overwrite reserved variables to ensure they remain as placeholders for judge evaluation + const extendedVariables = { + ...variables, + message_history: '{{message_history}}', + response_to_evaluate: '{{response_to_evaluate}}', + }; + + const judgeConfig = await this.judgeConfig(key, context, defaultValue, extendedVariables); + + if (!judgeConfig.enabled || !judgeConfig.tracker) { + this._logger?.info(`Judge configuration is disabled: ${key}`); + return undefined; + } + + const provider = await AIProviderFactory.create(judgeConfig, this._logger, defaultAiProvider); + if (!provider) { + return undefined; + } + + return new Judge(judgeConfig, judgeConfig.tracker, provider, this._logger); + } catch (error) { + this._logger?.error(`Failed to initialize judge ${key}:`, error); + return undefined; + } + } + + /** + * @deprecated Use `createChat` instead. This method will be removed in a future version. + */ + async initChat( + key: string, + context: LDContext, + defaultValue: LDAIConversationConfigDefault, + variables?: Record, + defaultAiProvider?: SupportedAIProvider, + ): Promise { + return this.createChat(key, context, defaultValue, variables, defaultAiProvider); } } diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts index 578ef1f8da..c55051f4cf 100644 --- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts @@ -2,6 +2,7 @@ import { LDContext } from '@launchdarkly/js-server-sdk-common'; import { LDAIConfigTracker } from './api/config'; import { LDAIMetricSummary } from './api/config/LDAIConfigTracker'; +import { EvalScore } from './api/judge/types'; import { createBedrockTokenUsage, createOpenAiUsage, @@ -25,7 +26,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { private _context: LDContext, ) {} - private _getTrackData(): { + getTrackData(): { variationKey: string; configKey: string; version: number; @@ -43,7 +44,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { trackDuration(duration: number): void { this._trackedMetrics.durationMs = duration; - this._ldClient.track('$ld:ai:duration:total', this._context, this._getTrackData(), duration); + this._ldClient.track('$ld:ai:duration:total', this._context, this.getTrackData(), duration); } async trackDurationOf(func: () => Promise): Promise { @@ -64,28 +65,35 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { this._ldClient.track( '$ld:ai:tokens:ttf', this._context, - this._getTrackData(), + this.getTrackData(), timeToFirstTokenMs, ); } + trackEvalScores(scores: Record) { + // Track each evaluation score individually + Object.entries(scores).forEach(([metricKey, evalScore]) => { + this._ldClient.track(metricKey, this._context, this.getTrackData(), evalScore.score); + }); + } + trackFeedback(feedback: { kind: LDFeedbackKind }): void { this._trackedMetrics.feedback = feedback; if (feedback.kind === LDFeedbackKind.Positive) { - this._ldClient.track('$ld:ai:feedback:user:positive', this._context, this._getTrackData(), 1); + this._ldClient.track('$ld:ai:feedback:user:positive', this._context, this.getTrackData(), 1); } else if (feedback.kind === LDFeedbackKind.Negative) { - this._ldClient.track('$ld:ai:feedback:user:negative', this._context, this._getTrackData(), 1); + this._ldClient.track('$ld:ai:feedback:user:negative', this._context, this.getTrackData(), 1); } } trackSuccess(): void { this._trackedMetrics.success = true; - this._ldClient.track('$ld:ai:generation:success', this._context, this._getTrackData(), 1); + this._ldClient.track('$ld:ai:generation:success', this._context, this.getTrackData(), 1); } trackError(): void { this._trackedMetrics.success = false; - this._ldClient.track('$ld:ai:generation:error', this._context, this._getTrackData(), 1); + this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(), 1); } async trackMetricsOf( @@ -290,7 +298,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { trackTokens(tokens: LDTokenUsage): void { this._trackedMetrics.tokens = tokens; - const trackData = this._getTrackData(); + const trackData = this.getTrackData(); if (tokens.total > 0) { this._ldClient.track('$ld:ai:tokens:total', this._context, trackData, tokens.total); } diff --git a/packages/sdk/server-ai/src/api/LDAIClient.ts b/packages/sdk/server-ai/src/api/LDAIClient.ts index 3f98474a62..c726df1f23 100644 --- a/packages/sdk/server-ai/src/api/LDAIClient.ts +++ b/packages/sdk/server-ai/src/api/LDAIClient.ts @@ -1,8 +1,16 @@ import { LDContext } from '@launchdarkly/js-server-sdk-common'; -import { LDAIAgent, LDAIAgentConfig, LDAIAgentDefaults } from './agents'; import { TrackedChat } from './chat'; -import { LDAIConfig, LDAIDefaults } from './config/LDAIConfig'; +import { + LDAIAgentConfig, + LDAIAgentConfigDefault, + LDAIAgentRequestConfig, + LDAIConversationConfig, + LDAIConversationConfigDefault, + LDAIJudgeConfig, + LDAIJudgeConfigDefault, +} from './config'; +import { Judge } from './judge/Judge'; import { SupportedAIProvider } from './providers'; /** @@ -10,7 +18,7 @@ import { SupportedAIProvider } from './providers'; */ export interface LDAIClient { /** - * Retrieves and processes an AI Config based on the provided key, LaunchDarkly context, + * Retrieves and processes a completion AI Config based on the provided key, LaunchDarkly context, * and variables. This includes the model configuration and the customized messages. * * @param key The key of the AI Config. @@ -34,9 +42,11 @@ export interface LDAIClient { * const variables = {username: 'john'}; * const defaultValue = { * enabled: false, + * model: { name: 'gpt-4' }, + * provider: { name: 'openai' }, * }; * - * const result = config(key, context, defaultValue, variables); + * const result = completionConfig(key, context, defaultValue, variables); * // Output: * { * enabled: true, @@ -60,12 +70,22 @@ export interface LDAIClient { * } * ``` */ + completionConfig( + key: string, + context: LDContext, + defaultValue: LDAIConversationConfigDefault, + variables?: Record, + ): Promise; + + /** + * @deprecated Use `completionConfig` instead. This method will be removed in a future version. + */ config( key: string, context: LDContext, - defaultValue: LDAIDefaults, + defaultValue: LDAIConversationConfigDefault, variables?: Record, - ): Promise; + ): Promise; /** * Retrieves and processes a single AI Config agent based on the provided key, LaunchDarkly context, @@ -89,21 +109,67 @@ export interface LDAIClient { * const key = "research_agent"; * const context = {...}; * const variables = { topic: 'climate change' }; - * const agent = await client.agent(key, context, { + * const agentConfig = await client.agentConfig(key, context, { * enabled: true, + * model: { name: 'gpt-4' }, + * provider: { name: 'openai' }, * instructions: 'You are a research assistant.', * }, variables); * - * const researchResult = agent.instructions; // Interpolated instructions - * agent.tracker.trackSuccess(); + * const researchResult = agentConfig.instructions; // Interpolated instructions + * agentConfig.tracker.trackSuccess(); * ``` */ + agentConfig( + key: string, + context: LDContext, + defaultValue: LDAIAgentConfigDefault, + variables?: Record, + ): Promise; + + /** + * @deprecated Use `agentConfig` instead. This method will be removed in a future version. + */ agent( key: string, context: LDContext, - defaultValue: LDAIAgentDefaults, + defaultValue: LDAIAgentConfigDefault, variables?: Record, - ): Promise; + ): Promise; + + /** + * Retrieves and processes a Judge AI Config based on the provided key, LaunchDarkly context, + * and variables. This includes the model configuration and the customized messages for evaluation. + * + * @param key The key of the Judge AI Config. + * @param context The LaunchDarkly context object that contains relevant information about the + * current environment, user, or session. This context may influence how the configuration is + * processed or personalized. + * @param defaultValue A fallback value containing model configuration and messages. This will + * be used if the configuration is not available from LaunchDarkly. + * @param variables Optional variables for template interpolation in messages and instructions. + * @returns A promise that resolves to a tracked judge configuration. + * + * @example + * ```typescript + * const judgeConf = await client.judgeConfig(key, context, { + * enabled: true, + * model: { name: 'gpt-4' }, + * provider: { name: 'openai' }, + * evaluationMetricKeys: ['$ld:ai:judge:relevance'], + * messages: [{ role: 'system', content: 'You are a relevance judge.' }] + * }, variables); + * + * const config = judgeConf.config; // Interpolated configuration + * judgeConf.tracker.trackSuccess(); + * ``` + */ + judgeConfig( + key: string, + context: LDContext, + defaultValue: LDAIJudgeConfigDefault, + variables?: Record, + ): Promise; /** * Retrieves and processes multiple AI Config agents based on the provided agent configurations @@ -122,32 +188,50 @@ export interface LDAIClient { * * @example * ``` - * const agentConfigs = [ + * const agentConfigsList = [ * { * key: 'research_agent', - * defaultValue: { enabled: true, instructions: 'You are a research assistant.' }, + * defaultValue: { + * enabled: true, + * model: { name: 'gpt-4' }, + * provider: { name: 'openai' }, + * instructions: 'You are a research assistant.' + * }, * variables: { topic: 'climate change' } * }, * { * key: 'writing_agent', - * defaultValue: { enabled: true, instructions: 'You are a writing assistant.' }, + * defaultValue: { + * enabled: true, + * model: { name: 'gpt-4' }, + * provider: { name: 'openai' }, + * instructions: 'You are a writing assistant.' + * }, * variables: { style: 'academic' } * } * ] as const; * const context = {...}; * - * const agents = await client.agents(agentConfigs, context); - * const researchResult = agents["research_agent"].instructions; // Interpolated instructions - * agents["research_agent"].tracker.trackSuccess(); + * const configs = await client.agentConfigs(agentConfigsList, context); + * const researchResult = configs["research_agent"].instructions; // Interpolated instructions + * configs["research_agent"].tracker.trackSuccess(); * ``` */ - agents( + agentConfigs( + agentConfigs: T, + context: LDContext, + ): Promise>; + + /** + * @deprecated Use `agentConfigs` instead. This method will be removed in a future version. + */ + agents( agentConfigs: T, context: LDContext, - ): Promise>; + ): Promise>; /** - * Initializes and returns a new TrackedChat instance for chat interactions. + * Returns a TrackedChat instance for chat interactions. * This method serves as the primary entry point for creating TrackedChat instances from configuration. * * @param key The key identifying the AI chat configuration to use. @@ -161,17 +245,16 @@ export interface LDAIClient { * const key = "customer_support_chat"; * const context = {...}; * const defaultValue = { - * config: { - * enabled: false, - * model: { name: "gpt-4" }, - * messages: [ - * { role: "system", content: "You are a helpful customer support agent." } - * ] - * } + * enabled: false, + * model: { name: "gpt-4" }, + * provider: { name: "openai" }, + * messages: [ + * { role: "system", content: "You are a helpful customer support agent." } + * ] * }; * const variables = { customerName: 'John' }; * - * const chat = await client.initChat(key, context, defaultValue, variables); + * const chat = await client.createChat(key, context, defaultValue, variables); * if (chat) { * const response = await chat.invoke("I need help with my order"); * console.log(response.message.content); @@ -182,11 +265,60 @@ export interface LDAIClient { * } * ``` */ + createChat( + key: string, + context: LDContext, + defaultValue: LDAIConversationConfigDefault, + variables?: Record, + defaultAiProvider?: SupportedAIProvider, + ): Promise; + + /** + * @deprecated Use `createChat` instead. This method will be removed in a future version. + */ initChat( key: string, context: LDContext, - defaultValue: LDAIDefaults, + defaultValue: LDAIConversationConfigDefault, variables?: Record, defaultAiProvider?: SupportedAIProvider, ): Promise; + + /** + * Creates and returns a new Judge instance for AI evaluation. + * + * @param key The key identifying the AI judge configuration to use + * @param context Standard LDContext used when evaluating flags + * @param defaultValue A default value representing a standard AI config result + * @param variables Dictionary of values for instruction interpolation + * @returns Promise that resolves to a Judge instance or undefined if disabled/unsupported + * + * @example + * ``` + * const judge = await client.createJudge( + * "relevance-judge", + * context, + * { + * enabled: true, + * model: { name: "gpt-4" }, + * provider: { name: "openai" }, + * evaluationMetricKeys: ['$ld:ai:judge:relevance'], + * messages: [{ role: 'system', content: 'You are a relevance judge.' }] + * }, + * { metric: "relevance" } + * ); + * + * if (judge) { + * const result = await judge.evaluate("User question", "AI response"); + * console.log('Relevance score:', result.evals.relevance?.score); + * } + * ``` + */ + createJudge( + key: string, + context: LDContext, + defaultValue: LDAIJudgeConfigDefault, + variables?: Record, + defaultAiProvider?: SupportedAIProvider, + ): Promise; } diff --git a/packages/sdk/server-ai/src/api/agents/LDAIAgent.ts b/packages/sdk/server-ai/src/api/agents/LDAIAgent.ts deleted file mode 100644 index 722c2bcdb0..0000000000 --- a/packages/sdk/server-ai/src/api/agents/LDAIAgent.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { LDAIConfig } from '../config'; - -/** - * AI Config agent and tracker. - */ -export interface LDAIAgent extends Omit { - /** - * Instructions for the agent. - */ - instructions?: string; -} - -/** - * Configuration for a single agent request. - */ -export interface LDAIAgentConfig { - /** - * The agent key to retrieve. - */ - key: string; - - /** - * Default configuration for the agent. - */ - defaultValue: LDAIAgentDefaults; - - /** - * Variables for instructions interpolation. - */ - variables?: Record; -} - -/** - * Default values for an agent. - */ -export type LDAIAgentDefaults = Omit; diff --git a/packages/sdk/server-ai/src/api/agents/index.ts b/packages/sdk/server-ai/src/api/agents/index.ts deleted file mode 100644 index f68fcd9a24..0000000000 --- a/packages/sdk/server-ai/src/api/agents/index.ts +++ /dev/null @@ -1 +0,0 @@ -export * from './LDAIAgent'; diff --git a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts index 68a9af2f03..1c0fad2d1e 100644 --- a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts +++ b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts @@ -1,5 +1,9 @@ -import { LDAIConfig, LDMessage } from '../config/LDAIConfig'; +import { LDLogger } from '@launchdarkly/js-server-sdk-common'; + import { LDAIConfigTracker } from '../config/LDAIConfigTracker'; +import { LDAIConversationConfig, LDMessage } from '../config/types'; +import { Judge } from '../judge/Judge'; +import { JudgeResponse } from '../judge/types'; import { AIProvider } from '../providers/AIProvider'; import { ChatResponse } from './types'; @@ -11,13 +15,19 @@ import { ChatResponse } from './types'; */ export class TrackedChat { protected messages: LDMessage[]; + protected judges: Record; + private readonly _logger?: LDLogger; constructor( - protected readonly aiConfig: LDAIConfig, + protected readonly aiConfig: LDAIConversationConfig, protected readonly tracker: LDAIConfigTracker, protected readonly provider: AIProvider, + judges?: Record, + logger?: LDLogger, ) { this.messages = []; + this.judges = judges || {}; + this._logger = logger; } /** @@ -45,13 +55,63 @@ export class TrackedChat { // Add the assistant response to the conversation history this.messages.push(response.message); + // Start judge evaluations if configured + if ( + this.aiConfig.judgeConfiguration?.judges && + this.aiConfig.judgeConfiguration.judges.length > 0 + ) { + response.evaluations = this._evaluateWithJudges(this.messages, response); + } + return response; } + /** + * Evaluates the response with all configured judges. + * Returns a promise that resolves to an array of evaluation results. + * + * @param messages Array of messages representing the conversation history + * @param response The AI response to be evaluated + * @returns Promise resolving to array of judge evaluation results + */ + private async _evaluateWithJudges( + messages: LDMessage[], + response: ChatResponse, + ): Promise> { + const judgeConfigs = this.aiConfig.judgeConfiguration!.judges; + + // Start all judge evaluations in parallel + const evaluationPromises = judgeConfigs.map(async (judgeConfig) => { + const judge = this.judges[judgeConfig.key]; + if (!judge) { + this._logger?.warn( + `Judge configuration is not enabled: ${judgeConfig.key}`, + this.tracker.getTrackData(), + ); + return undefined; + } + + const evalResult = await judge.evaluateMessages(messages, response, judgeConfig.samplingRate); + + // Track scores if evaluation was successful + if (evalResult && evalResult.success) { + this.tracker.trackEvalScores(evalResult.evals); + } + + return evalResult; + }); + + // Use Promise.allSettled to ensure all evaluations complete + // even if some fail + const results = await Promise.allSettled(evaluationPromises); + + return results.map((result) => (result.status === 'fulfilled' ? result.value : undefined)); + } + /** * Get the underlying AI configuration used to initialize this TrackedChat. */ - getConfig(): LDAIConfig { + getConfig(): LDAIConversationConfig { return this.aiConfig; } @@ -70,6 +130,14 @@ export class TrackedChat { return this.provider; } + /** + * Get the judges associated with this TrackedChat. + * Returns a record of judge instances keyed by their configuration keys. + */ + getJudges(): Record { + return this.judges; + } + /** * Append messages to the conversation history. * Adds messages to the conversation history without invoking the model, diff --git a/packages/sdk/server-ai/src/api/chat/types.ts b/packages/sdk/server-ai/src/api/chat/types.ts index 804bb21453..5b32109fcf 100644 --- a/packages/sdk/server-ai/src/api/chat/types.ts +++ b/packages/sdk/server-ai/src/api/chat/types.ts @@ -1,4 +1,5 @@ -import { LDMessage } from '../config/LDAIConfig'; +import { LDMessage } from '../config/types'; +import { JudgeResponse } from '../judge/types'; import { LDAIMetrics } from '../metrics/LDAIMetrics'; /** @@ -14,4 +15,10 @@ export interface ChatResponse { * Metrics information including success status and token usage. */ metrics: LDAIMetrics; + + /** + * Promise that resolves to judge evaluation results. + * Only present when judges are configured for evaluation. + */ + evaluations?: Promise>; } diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfig.ts b/packages/sdk/server-ai/src/api/config/LDAIConfig.ts deleted file mode 100644 index 4f958f3d3a..0000000000 --- a/packages/sdk/server-ai/src/api/config/LDAIConfig.ts +++ /dev/null @@ -1,104 +0,0 @@ -import { LDAIConfigTracker } from './LDAIConfigTracker'; -import { VercelAISDKConfig, VercelAISDKMapOptions, VercelAISDKProvider } from './VercelAISDK'; - -/** - * Configuration related to the model. - */ -export interface LDModelConfig { - /** - * The ID of the model. - */ - name: string; - - /** - * Model specific parameters. - */ - parameters?: { [index: string]: unknown }; - - /** - * Additional user-specified parameters. - */ - custom?: { [index: string]: unknown }; -} - -export interface LDProviderConfig { - /** - * The name of the provider. - */ - name: string; -} - -/** - * Information about prompts. - */ -export interface LDMessage { - /** - * The role of the prompt. - */ - role: 'user' | 'assistant' | 'system'; - /** - * Content for the prompt. - */ - content: string; -} - -/** - * AI Config and tracker. - */ -export interface LDAIConfig { - /** - * Optional model configuration. - */ - model?: LDModelConfig; - /** - * Optional prompt data. - */ - messages?: LDMessage[]; - - /** - * Optional configuration for the provider. - */ - provider?: LDProviderConfig; - - /** - * A tracker which can be used to generate analytics. - */ - tracker: LDAIConfigTracker; - - /** - * Whether the configuration is enabled. - */ - enabled: boolean; - - /** - * Maps this AI config to a format usable direcly in Vercel AI SDK generateText() - * and streamText() methods. - * - * WARNING: this method can throw an exception if a Vercel AI SDK model cannot be determined. - * - * @deprecated Use `VercelProvider.toVercelAISDK()` from the `@launchdarkly/server-sdk-ai-vercel` package instead. - * This method will be removed in a future version. - * - * @param provider A Vercel AI SDK Provider or a map of provider names to Vercel AI SDK Providers. - * @param options Optional mapping options. - * @returns A configuration directly usable in Vercel AI SDK generateText() and streamText() - * @throws {Error} if a Vercel AI SDK model cannot be determined from the given provider parameter. - */ - toVercelAISDK: ( - provider: VercelAISDKProvider | Record>, - options?: VercelAISDKMapOptions | undefined, - ) => VercelAISDKConfig; -} - -/** - * Default value for a `modelConfig`. This is the same as the LDAIConfig, but it does not include - * a tracker or mapper, and `enabled` is optional. - */ -export type LDAIDefaults = Omit & { - /** - * Whether the configuration is enabled. - * - * defaults to false - */ - enabled?: boolean; -}; diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts index 491315e02b..ed1eed4d3c 100644 --- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts +++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts @@ -1,3 +1,4 @@ +import { EvalScore } from '../judge/types'; import { LDAIMetrics, LDFeedbackKind, LDTokenUsage } from '../metrics'; /** @@ -34,6 +35,14 @@ export interface LDAIMetricSummary { * The LDAIConfigTracker is used to track various details about AI operations. */ export interface LDAIConfigTracker { + /** + * Get the data for tracking. + */ + getTrackData(): { + variationKey: string; + configKey: string; + version: number; + }; /** * Track the duration of generation. * @@ -74,6 +83,13 @@ export interface LDAIConfigTracker { */ trackTimeToFirstToken(timeToFirstTokenMs: number): void; + /** + * Track evaluation scores for multiple metrics. + * + * @param scores Record mapping metric keys to their evaluation scores + */ + trackEvalScores(scores: Record): void; + /** * Track the duration of execution of the provided function. * diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts new file mode 100644 index 0000000000..ad0ef344b3 --- /dev/null +++ b/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts @@ -0,0 +1,206 @@ +import { LDAIConfigTracker } from './LDAIConfigTracker'; +import { + LDAIAgentConfig, + LDAIConfigDefaultKind, + LDAIConfigMode, + LDAIConversationConfig, + LDAIJudgeConfig, + LDJudgeConfiguration, + LDMessage, + LDModelConfig, + LDProviderConfig, +} from './types'; + +/** + * Internal flag value structure returned by LaunchDarkly. + * This represents the raw data structure that LaunchDarkly returns for AI configuration flags. + * + * @internal - Not meant for external use + */ +export interface LDAIConfigFlagValue { + _ldMeta?: { + variationKey?: string; + enabled: boolean; + version?: number; + mode?: LDAIConfigMode; + }; + model?: LDModelConfig; + messages?: LDMessage[]; + provider?: LDProviderConfig; + instructions?: string; + evaluationMetricKeys?: string[]; + judgeConfiguration?: LDJudgeConfiguration; +} + +/** + * Utility class for converting between AI configuration types and LaunchDarkly flag values. + * + * @internal - This class and its types are internal implementation details and should not be used by SDK consumers. + */ +export class LDAIConfigUtils { + /** + * Converts a default AI configuration to a LaunchDarkly flag value format. + * + * @param config The default AI configuration to convert + * @param mode The mode for the configuration + * @returns The flag value structure for LaunchDarkly + */ + static toFlagValue(config: LDAIConfigDefaultKind, mode: LDAIConfigMode): LDAIConfigFlagValue { + const flagValue: LDAIConfigFlagValue = { + _ldMeta: { + variationKey: '', // Not available when converting from config + enabled: config.enabled ?? false, + mode, + }, + model: config.model, + }; + + if ('messages' in config && config.messages !== undefined) { + flagValue.messages = config.messages; + } + if (config.provider !== undefined) { + flagValue.provider = config.provider; + } + if ('instructions' in config && config.instructions !== undefined) { + flagValue.instructions = config.instructions; + } + if ('evaluationMetricKeys' in config && config.evaluationMetricKeys !== undefined) { + flagValue.evaluationMetricKeys = config.evaluationMetricKeys; + } + if ('judgeConfiguration' in config && config.judgeConfiguration !== undefined) { + flagValue.judgeConfiguration = config.judgeConfiguration; + } + + return flagValue; + } + + /** + * Converts a LaunchDarkly flag value to the appropriate AI configuration type. + * + * @param flagValue The flag value from LaunchDarkly + * @param tracker The tracker to add to the config + * @returns The appropriate AI configuration type + */ + static fromFlagValue( + flagValue: LDAIConfigFlagValue, + tracker: LDAIConfigTracker, + ): LDAIConversationConfig | LDAIAgentConfig | LDAIJudgeConfig { + // Determine the actual mode from flag value + // eslint-disable-next-line no-underscore-dangle + const flagValueMode = flagValue._ldMeta?.mode; + + // Convert to appropriate config type based on actual mode + switch (flagValueMode) { + case 'agent': + return this.toAgentConfig(flagValue, tracker); + case 'judge': + return this.toJudgeConfig(flagValue, tracker); + case 'completion': + default: + return this.toCompletionConfig(flagValue, tracker); + } + } + + /** + * Creates a disabled configuration of the specified mode. + * + * @param mode The mode for the disabled config + * @returns A disabled config of the appropriate type + */ + static createDisabledConfig( + mode: LDAIConfigMode, + ): LDAIConversationConfig | LDAIAgentConfig | LDAIJudgeConfig { + switch (mode) { + case 'agent': + return { + enabled: false, + tracker: undefined, + } as LDAIAgentConfig; + case 'judge': + return { + enabled: false, + tracker: undefined, + evaluationMetricKeys: [], + } as LDAIJudgeConfig; + case 'completion': + default: + // Default to completion config for completion mode or any unexpected mode + return { + enabled: false, + tracker: undefined, + } as LDAIConversationConfig; + } + } + + /** + * Creates the base configuration that all config types share. + * + * @param flagValue The flag value from LaunchDarkly + * @returns Base configuration object + */ + private static _toBaseConfig(flagValue: LDAIConfigFlagValue) { + return { + // eslint-disable-next-line no-underscore-dangle + enabled: flagValue._ldMeta?.enabled ?? false, + model: flagValue.model, + provider: flagValue.provider, + }; + } + + /** + * Creates a completion config from flag value data. + * + * @param flagValue The flag value from LaunchDarkly + * @param tracker The tracker to add to the config + * @returns A completion configuration + */ + static toCompletionConfig( + flagValue: LDAIConfigFlagValue, + tracker: LDAIConfigTracker, + ): LDAIConversationConfig { + return { + ...this._toBaseConfig(flagValue), + tracker, + messages: flagValue.messages, + judgeConfiguration: flagValue.judgeConfiguration, + }; + } + + /** + * Creates an agent config from flag value data. + * + * @param flagValue The flag value from LaunchDarkly + * @param tracker The tracker to add to the config + * @returns An agent configuration + */ + static toAgentConfig( + flagValue: LDAIConfigFlagValue, + tracker: LDAIConfigTracker, + ): LDAIAgentConfig { + return { + ...this._toBaseConfig(flagValue), + tracker, + instructions: flagValue.instructions, + judgeConfiguration: flagValue.judgeConfiguration, + }; + } + + /** + * Creates a judge config from flag value data. + * + * @param flagValue The flag value from LaunchDarkly + * @param tracker The tracker to add to the config + * @returns A judge configuration + */ + static toJudgeConfig( + flagValue: LDAIConfigFlagValue, + tracker: LDAIConfigTracker, + ): LDAIJudgeConfig { + return { + ...this._toBaseConfig(flagValue), + tracker, + messages: flagValue.messages, + evaluationMetricKeys: flagValue.evaluationMetricKeys || [], + }; + } +} diff --git a/packages/sdk/server-ai/src/api/config/VercelAISDK.ts b/packages/sdk/server-ai/src/api/config/VercelAISDK.ts index d8491a6810..290ae45f7b 100644 --- a/packages/sdk/server-ai/src/api/config/VercelAISDK.ts +++ b/packages/sdk/server-ai/src/api/config/VercelAISDK.ts @@ -1,4 +1,4 @@ -import { type LDMessage } from './LDAIConfig'; +import { type LDMessage } from './types'; /** * @deprecated Use `VercelAISDKProvider` from the `@launchdarkly/server-sdk-ai-vercel` package instead. diff --git a/packages/sdk/server-ai/src/api/config/index.ts b/packages/sdk/server-ai/src/api/config/index.ts index a3f3752908..54f799a135 100644 --- a/packages/sdk/server-ai/src/api/config/index.ts +++ b/packages/sdk/server-ai/src/api/config/index.ts @@ -1,3 +1,4 @@ -export * from './LDAIConfig'; +export * from './types'; +// LDAIConfigUtils is intentionally not exported - it's an internal utility class export * from './VercelAISDK'; export { LDAIConfigTracker } from './LDAIConfigTracker'; diff --git a/packages/sdk/server-ai/src/api/config/types.ts b/packages/sdk/server-ai/src/api/config/types.ts new file mode 100644 index 0000000000..880345b192 --- /dev/null +++ b/packages/sdk/server-ai/src/api/config/types.ts @@ -0,0 +1,262 @@ +import { LDAIConfigTracker } from './LDAIConfigTracker'; +import { VercelAISDKConfig, VercelAISDKMapOptions, VercelAISDKProvider } from './VercelAISDK'; + +/** + * Configuration related to the model. + */ +export interface LDModelConfig { + /** + * The ID of the model. + */ + name: string; + + /** + * Model specific parameters. + */ + parameters?: { [index: string]: unknown }; + + /** + * Additional user-specified parameters. + */ + custom?: { [index: string]: unknown }; +} + +export interface LDProviderConfig { + /** + * The name of the provider. + */ + name: string; +} + +/** + * Configuration for a single judge attachment. + */ +export interface LDJudge { + /** The key of the judge AI Config */ + key: string; + /** Sampling rate for evaluation (0.0 to 1.0) */ + samplingRate: number; +} + +/** + * Configuration for judge attachment to AI Configs. + */ +export interface LDJudgeConfiguration { + /** Array of judge configurations */ + judges: LDJudge[]; +} + +/** + * Base AI Config interface without mode-specific fields. + */ +export interface LDAIConfig extends Omit { + /** + * Whether the configuration is enabled. + */ + enabled: boolean; + + /** + * A tracker which can be used to generate analytics. + * Undefined for disabled configs. + */ + tracker?: LDAIConfigTracker; + + /** + * Maps this AI config to a format usable direcly in Vercel AI SDK generateText() + * and streamText() methods. + * + * WARNING: this method can throw an exception if a Vercel AI SDK model cannot be determined. + * + * @deprecated Use `VercelProvider.toVercelAISDK()` from the `@launchdarkly/server-sdk-ai-vercel` package instead. + * This method will be removed in a future version. + * + * @param provider A Vercel AI SDK Provider or a map of provider names to Vercel AI SDK Providers. + * @param options Optional mapping options. + * @returns A configuration directly usable in Vercel AI SDK generateText() and streamText() + * @throws {Error} if a Vercel AI SDK model cannot be determined from the given provider parameter. + */ + toVercelAISDK?: ( + provider: VercelAISDKProvider | Record>, + options?: VercelAISDKMapOptions | undefined, + ) => VercelAISDKConfig; +} + +/** + * Base AI Config interface for default implementations with optional enabled property. + */ +export interface LDAIConfigDefault { + /** + * Optional model configuration. + */ + model?: LDModelConfig; + + /** + * Optional configuration for the provider. + */ + provider?: LDProviderConfig; + + /** + * Whether the configuration is enabled. Defaults to false when not provided. + */ + enabled?: boolean; +} + +/** + * Default implementation types for AI Configs with optional enabled property. + */ + +/** + * Default Judge-specific AI Config with required evaluation metric key. + */ +export interface LDAIJudgeConfigDefault extends LDAIConfigDefault { + /** + * Optional prompt data for judge configurations. + */ + messages?: LDMessage[]; + /** + * Evaluation metric keys for judge configurations. + * The keys of the metrics that this judge can evaluate. + */ + evaluationMetricKeys?: string[]; +} + +/** + * Default Agent-specific AI Config with instructions. + */ +export interface LDAIAgentConfigDefault extends LDAIConfigDefault { + /** + * Instructions for the agent. + */ + instructions?: string; + /** + * Judge configuration for AI Configs being evaluated. + * References judge AI Configs that should evaluate this AI Config. + */ + judgeConfiguration?: LDJudgeConfiguration; +} + +/** + * Default Completion AI Config (default mode). + */ +export interface LDAIConversationConfigDefault extends LDAIConfigDefault { + /** + * Optional prompt data for completion configurations. + */ + messages?: LDMessage[]; + /** + * Judge configuration for AI Configs being evaluated. + * References judge AI Configs that should evaluate this AI Config. + */ + judgeConfiguration?: LDJudgeConfiguration; +} + +/** + * Non-default implementation types for AI Configs with required enabled property and tracker. + */ + +/** + * Judge-specific AI Config with required evaluation metric key. + */ +export interface LDAIJudgeConfig extends LDAIConfig { + /** + * Optional prompt data for judge configurations. + */ + messages?: LDMessage[]; + /** + * Evaluation metric keys for judge configurations. + * The keys of the metrics that this judge can evaluate. + */ + evaluationMetricKeys: string[]; +} + +/** + * Agent-specific AI Config with instructions. + */ +export interface LDAIAgentConfig extends LDAIConfig { + /** + * Instructions for the agent. + */ + instructions?: string; + /** + * Judge configuration for AI Configs being evaluated. + * References judge AI Configs that should evaluate this AI Config. + */ + judgeConfiguration?: LDJudgeConfiguration; +} + +/** + * Completion AI Config (default mode). + */ +export interface LDAIConversationConfig extends LDAIConfig { + /** + * Optional prompt data for completion configurations. + */ + messages?: LDMessage[]; + /** + * Judge configuration for AI Configs being evaluated. + * References judge AI Configs that should evaluate this AI Config. + */ + judgeConfiguration?: LDJudgeConfiguration; +} + +/** + * Information about prompts. + */ +export interface LDMessage { + /** + * The role of the prompt. + */ + role: 'user' | 'assistant' | 'system'; + /** + * Content for the prompt. + */ + content: string; +} + +/** + * Union type for all AI Config variants. + */ +export type LDAIConfigKind = LDAIConversationConfig | LDAIAgentConfig | LDAIJudgeConfig; + +/** + * Union type for all default AI Config variants. + */ +export type LDAIConfigDefaultKind = + | LDAIConversationConfigDefault + | LDAIAgentConfigDefault + | LDAIJudgeConfigDefault; + +/** + * Configuration for a single agent request. + */ +export interface LDAIAgentRequestConfig { + /** + * The agent key to retrieve. + */ + key: string; + + /** + * Default configuration for the agent. + */ + defaultValue: LDAIAgentConfigDefault; + + /** + * Variables for instructions interpolation. + */ + variables?: Record; +} + +/** + * AI Config agent interface (extends agent config without tracker and toVercelAISDK). + */ +export interface LDAIAgent extends Omit { + /** + * Instructions for the agent. + */ + instructions?: string; +} + +/** + * Mode type for AI configurations. + */ +export type LDAIConfigMode = 'completion' | 'agent' | 'judge'; diff --git a/packages/sdk/server-ai/src/api/index.ts b/packages/sdk/server-ai/src/api/index.ts index cd27112f7a..2f289b8356 100644 --- a/packages/sdk/server-ai/src/api/index.ts +++ b/packages/sdk/server-ai/src/api/index.ts @@ -1,6 +1,6 @@ export * from './config'; -export * from './agents'; export * from './chat'; +export * from './judge'; export * from './metrics'; export * from './LDAIClient'; export * from './providers'; diff --git a/packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts b/packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts new file mode 100644 index 0000000000..16d9ce651d --- /dev/null +++ b/packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts @@ -0,0 +1,54 @@ +/** + * Internal class for building dynamic evaluation response schemas. + * Not exported - only used internally by TrackedJudge. + */ +class EvaluationSchemaBuilder { + static build(evaluationMetricKeys: string[]): Record { + return { + type: 'object', + properties: { + evaluations: { + type: 'object', + description: `Object containing evaluation results for ${evaluationMetricKeys.join(', ')} metrics`, + properties: this._buildKeyProperties(evaluationMetricKeys), + required: evaluationMetricKeys, + additionalProperties: false, + }, + }, + required: ['evaluations'], + additionalProperties: false, + } as const; + } + + private static _buildKeyProperties(evaluationMetricKeys: string[]) { + return evaluationMetricKeys.reduce( + (acc, key) => { + acc[key] = this._buildKeySchema(key); + return acc; + }, + {} as Record, + ); + } + + private static _buildKeySchema(key: string) { + return { + type: 'object', + properties: { + score: { + type: 'number', + minimum: 0, + maximum: 1, + description: `Score between 0.0 and 1.0 for ${key}`, + }, + reasoning: { + type: 'string', + description: `Reasoning behind the score for ${key}`, + }, + }, + required: ['score', 'reasoning'], + additionalProperties: false, + }; + } +} + +export { EvaluationSchemaBuilder }; diff --git a/packages/sdk/server-ai/src/api/judge/Judge.ts b/packages/sdk/server-ai/src/api/judge/Judge.ts new file mode 100644 index 0000000000..b4ea4c00a9 --- /dev/null +++ b/packages/sdk/server-ai/src/api/judge/Judge.ts @@ -0,0 +1,230 @@ +import * as Mustache from 'mustache'; + +import { LDLogger } from '@launchdarkly/js-server-sdk-common'; + +import { ChatResponse } from '../chat/types'; +import { LDAIConfigTracker } from '../config/LDAIConfigTracker'; +import { LDAIJudgeConfig, LDMessage } from '../config/types'; +import { AIProvider } from '../providers/AIProvider'; +import { EvaluationSchemaBuilder } from './EvaluationSchemaBuilder'; +import { EvalScore, JudgeResponse, StructuredResponse } from './types'; + +/** + * Judge implementation that handles evaluation functionality and conversation management. + * + * According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate + * other AI Configs using structured output. + */ +export class Judge { + private readonly _logger?: LDLogger; + private readonly _evaluationResponseStructure: Record; + + constructor( + private readonly _aiConfig: LDAIJudgeConfig, + private readonly _aiConfigTracker: LDAIConfigTracker, + private readonly _aiProvider: AIProvider, + logger?: LDLogger, + ) { + this._logger = logger; + this._evaluationResponseStructure = EvaluationSchemaBuilder.build( + this._aiConfig.evaluationMetricKeys, + ); + } + + /** + * Evaluates an AI response using the judge's configuration. + * + * @param input The input prompt or question that was provided to the AI + * @param output The AI-generated response to be evaluated + * @param samplingRate Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1) + * @returns Promise that resolves to evaluation results or undefined if not sampled + */ + async evaluate( + input: string, + output: string, + samplingRate: number = 1, + ): Promise { + try { + // Check if judge configuration has evaluation metric keys + if ( + !this._aiConfig.evaluationMetricKeys || + this._aiConfig.evaluationMetricKeys.length === 0 + ) { + this._logger?.warn( + 'Judge configuration is missing required evaluationMetricKeys', + this._aiConfigTracker.getTrackData(), + ); + return undefined; + } + + // Check if judge configuration has messages before proceeding + if (!this._aiConfig.messages) { + this._logger?.warn( + 'Judge configuration must include messages', + this._aiConfigTracker.getTrackData(), + ); + return undefined; + } + + // Apply sampling + if (Math.random() > samplingRate) { + this._logger?.debug(`Judge evaluation skipped due to sampling rate: ${samplingRate}`); + return undefined; + } + + // Construct evaluation messages by combining judge's config messages with input/output + const messages = this._constructEvaluationMessages(input, output); + + // Delegate to provider-specific implementation with tracking + const response = await this._aiConfigTracker.trackMetricsOf( + (result: StructuredResponse) => result.metrics, + () => this._aiProvider.invokeStructuredModel(messages, this._evaluationResponseStructure), + ); + + let { success } = response.metrics; + + // Parse the structured response + const evals = this._parseEvaluationResponse(response.data); + + // Return null if no valid evaluations were found + if (Object.keys(evals).length !== this._aiConfig.evaluationMetricKeys.length) { + this._logger?.warn( + 'Judge evaluation did not return all evaluations', + this._aiConfigTracker.getTrackData(), + ); + success = false; + } + + return { + evals, + success, + }; + } catch (error) { + this._logger?.error('Judge evaluation failed:', error); + return { + evals: {}, + success: false, + error: error instanceof Error ? error.message : 'Unknown error', + }; + } + } + + /** + * Evaluates an AI response from chat messages and response. + * + * @param messages Array of messages representing the conversation history + * @param response The AI response to be evaluated + * @param samplingRatio Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1) + * @returns Promise that resolves to evaluation results or undefined if not sampled + */ + async evaluateMessages( + messages: LDMessage[], + response: ChatResponse, + samplingRatio: number = 1, + ): Promise { + // Convert messages to text and extract output from response + const input = messages.length === 0 ? '' : messages.map((msg) => msg.content).join('\r\n'); + const output = response.message.content; + + // Delegate to standard evaluate method + return this.evaluate(input, output, samplingRatio); + } + + /** + * Returns the AI Config used by this judge. + */ + getAIConfig(): LDAIJudgeConfig { + return this._aiConfig; + } + + /** + * Returns the tracker associated with this judge. + */ + getTracker(): LDAIConfigTracker { + return this._aiConfigTracker; + } + + /** + * Returns the AI provider used by this judge. + */ + getProvider(): AIProvider { + return this._aiProvider; + } + + /** + * Constructs evaluation messages by combining judge's config messages with input/output. + */ + private _constructEvaluationMessages(input: string, output: string): LDMessage[] { + // Create a copy of the judge's messages and interpolate input/output variables + const messages: LDMessage[] = this._aiConfig.messages!.map((msg) => ({ + ...msg, + content: this._interpolateMessage(msg.content, { + message_history: input, + response_to_evaluate: output, + }), + })); + + return messages; + } + + /** + * Interpolates message content with variables using Mustache templating. + */ + private _interpolateMessage(content: string, variables: Record): string { + return Mustache.render(content, variables, undefined, { escape: (item: any) => item }); + } + + /** + * Parses the structured evaluation response from the AI provider. + */ + private _parseEvaluationResponse(data: Record): Record { + const evaluations = data.evaluations as Record; + const results: Record = {}; + // Validate that the data has the required evaluations structure + if (!data.evaluations || typeof data.evaluations !== 'object') { + this._logger?.warn('Invalid response: missing or invalid evaluations object'); + return results; + } + + // Process each expected evaluation metric key + this._aiConfig.evaluationMetricKeys.forEach((metricKey) => { + const evaluation = evaluations[metricKey]; + + if (!evaluation || typeof evaluation !== 'object') { + this._logger?.warn( + `Missing evaluation for metric key: ${metricKey}`, + this._aiConfigTracker.getTrackData(), + ); + return; + } + + const evalData = evaluation as Record; + + // Validate score + if (typeof evalData.score !== 'number' || evalData.score < 0 || evalData.score > 1) { + this._logger?.warn( + `Invalid score evaluated for ${metricKey}: ${evalData.score}. Score must be a number between 0 and 1 inclusive`, + this._aiConfigTracker.getTrackData(), + ); + return; + } + + // Validate reasoning + if (typeof evalData.reasoning !== 'string') { + this._logger?.warn( + `Invalid reasoning evaluated for ${metricKey}: ${evalData.reasoning}. Reasoning must be a string`, + this._aiConfigTracker.getTrackData(), + ); + return; + } + + // Create the EvalScore object + results[metricKey] = { + score: evalData.score, + reasoning: evalData.reasoning, + }; + }); + + return results; + } +} diff --git a/packages/sdk/server-ai/src/api/judge/index.ts b/packages/sdk/server-ai/src/api/judge/index.ts new file mode 100644 index 0000000000..912ec47fb0 --- /dev/null +++ b/packages/sdk/server-ai/src/api/judge/index.ts @@ -0,0 +1,2 @@ +export { Judge } from './Judge'; +export type { EvalScore, JudgeResponse, StructuredResponse } from './types'; diff --git a/packages/sdk/server-ai/src/api/judge/types.ts b/packages/sdk/server-ai/src/api/judge/types.ts new file mode 100644 index 0000000000..a265506b17 --- /dev/null +++ b/packages/sdk/server-ai/src/api/judge/types.ts @@ -0,0 +1,39 @@ +import { LDAIMetrics } from '../metrics/LDAIMetrics'; + +/** + * Structured response from AI models. + */ +export interface StructuredResponse { + /** The structured data returned by the model */ + data: Record; + + /** The raw response from the model */ + rawResponse: string; + + /** + * Metrics information including success status and token usage. + */ + metrics: LDAIMetrics; +} + +/** + * Score and reasoning for a single evaluation metric. + */ +export interface EvalScore { + /** Score between 0.0 and 1.0 indicating the evaluation result for this metric */ + score: number; + /** Reasoning behind the provided score for this metric */ + reasoning: string; +} + +/** + * Response from a judge evaluation containing scores and reasoning for multiple metrics. + */ +export interface JudgeResponse { + /** Dictionary where keys are metric names and values contain score and reasoning */ + evals: Record; + /** Whether the evaluation completed successfully */ + success: boolean; + /** Error message if evaluation failed */ + error?: string; +} diff --git a/packages/sdk/server-ai/src/api/providers/AIProvider.ts b/packages/sdk/server-ai/src/api/providers/AIProvider.ts index 8f6475ef5e..ac039ea194 100644 --- a/packages/sdk/server-ai/src/api/providers/AIProvider.ts +++ b/packages/sdk/server-ai/src/api/providers/AIProvider.ts @@ -1,7 +1,8 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common'; import { ChatResponse } from '../chat/types'; -import { LDAIConfig, LDMessage } from '../config/LDAIConfig'; +import { LDAIConfig, LDMessage } from '../config/types'; +import { StructuredResponse } from '../judge/types'; /** * Abstract base class for AI providers that implement chat model functionality. @@ -22,10 +23,60 @@ export abstract class AIProvider { * This method should convert messages to provider format, invoke the model, * and return a ChatResponse with the result and metrics. * + * Default implementation takes no action and returns a placeholder response. + * Provider implementations should override this method. + * * @param messages Array of LDMessage objects representing the conversation * @returns Promise that resolves to a ChatResponse containing the model's response */ - abstract invokeModel(messages: LDMessage[]): Promise; + async invokeModel(_messages: LDMessage[]): Promise { + this.logger?.warn('invokeModel not implemented by this provider'); + return { + message: { + role: 'assistant', + content: '', + }, + metrics: { + success: false, + usage: { + total: 0, + input: 0, + output: 0, + }, + }, + }; + } + + /** + * Invoke the chat model with structured output support. + * This method should convert messages to provider format, invoke the model with + * structured output configuration, and return a structured response. + * + * Default implementation takes no action and returns a placeholder response. + * Provider implementations should override this method. + * + * @param messages Array of LDMessage objects representing the conversation + * @param responseStructure Dictionary of output configurations keyed by output name + * @returns Promise that resolves to a structured response + */ + async invokeStructuredModel( + _messages: LDMessage[], + _responseStructure: Record, + ): Promise { + this.logger?.warn('invokeStructuredModel not implemented by this provider'); + return { + data: {}, + rawResponse: '', + metrics: { + success: false, + usage: { + total: 0, + input: 0, + output: 0, + }, + }, + }; + } /** * Static method that constructs an instance of the provider. diff --git a/packages/sdk/server-ai/src/api/providers/AIProviderFactory.ts b/packages/sdk/server-ai/src/api/providers/AIProviderFactory.ts index ab3546a35e..0d33eb69a4 100644 --- a/packages/sdk/server-ai/src/api/providers/AIProviderFactory.ts +++ b/packages/sdk/server-ai/src/api/providers/AIProviderFactory.ts @@ -1,6 +1,6 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common'; -import { LDAIConfig } from '../config/LDAIConfig'; +import { LDAIConfigKind } from '../config/types'; import { AIProvider } from './AIProvider'; /** @@ -32,7 +32,7 @@ export class AIProviderFactory { * @param defaultAiProvider Optional default AI provider to use */ static async create( - aiConfig: LDAIConfig, + aiConfig: LDAIConfigKind, logger?: LDLogger, defaultAiProvider?: SupportedAIProvider, ): Promise { @@ -91,7 +91,7 @@ export class AIProviderFactory { */ private static async _tryCreateProvider( providerType: SupportedAIProvider, - aiConfig: LDAIConfig, + aiConfig: LDAIConfigKind, logger?: LDLogger, ): Promise { switch (providerType) { @@ -127,7 +127,7 @@ export class AIProviderFactory { private static async _createProvider( packageName: string, providerClassName: string, - aiConfig: LDAIConfig, + aiConfig: LDAIConfigKind, logger?: LDLogger, ): Promise { try {