diff --git a/packages/sdk/server-ai/README.md b/packages/sdk/server-ai/README.md
index 62a0b92425..c91170e41f 100644
--- a/packages/sdk/server-ai/README.md
+++ b/packages/sdk/server-ai/README.md
@@ -97,7 +97,7 @@ if (aiConfig.enabled) {
 
 ```typescript
 // Use the same defaultConfig from the retrieval section above
-const chat = await aiClient.initChat(
+const chat = await aiClient.createChat(
   'customer-support-chat',
   context,
   defaultConfig,
diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts
new file mode 100644
index 0000000000..1144f119f8
--- /dev/null
+++ b/packages/sdk/server-ai/__tests__/Judge.test.ts
@@ -0,0 +1,497 @@
+import { LDLogger } from '@launchdarkly/js-server-sdk-common';
+
+import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker';
+import { LDAIJudgeConfig, LDMessage } from '../src/api/config/types';
+import { Judge } from '../src/api/judge/Judge';
+import { StructuredResponse } from '../src/api/judge/types';
+import { AIProvider } from '../src/api/providers/AIProvider';
+
+describe('Judge', () => {
+  let mockProvider: jest.Mocked<AIProvider>;
+  let mockTracker: jest.Mocked<LDAIConfigTracker>;
+  let mockLogger: jest.Mocked<LDLogger>;
+  let judgeConfig: LDAIJudgeConfig;
+
+  const mockTrackData = {
+    variationKey: 'test-variation',
+    configKey: 'test-config',
+    version: 1,
+  };
+
+  beforeEach(() => {
+    // Mock the AIProvider - only mock what's actually used
+    mockProvider = {
+      invokeStructuredModel: jest.fn(),
+    } as any;
+
+    // Mock the LDAIConfigTracker - only mock what's actually used
+    mockTracker = {
+      trackMetricsOf: jest.fn(),
+      getTrackData: jest.fn().mockReturnValue(mockTrackData),
+    } as any;
+
+    // Mock the logger - only mock what's actually used
+    mockLogger = {
+      debug: jest.fn(),
+      warn: jest.fn(),
+      error: jest.fn(),
+    } as any;
+
+    // Create a basic judge config
+    judgeConfig = {
+      enabled: true,
+      messages: [
+        { role: 'system', content: 'You are a helpful judge that evaluates AI responses.' },
+        {
+          role: 'user',
+          content:
+            'Evaluate and report scores for important metrics: Input: {{message_history}}, Output: {{response_to_evaluate}}',
+        },
+      ],
+      model: { name: 'gpt-4' },
+      provider: { name: 'openai' },
+      tracker: mockTracker,
+      evaluationMetricKeys: ['relevance', 'accuracy', 'helpfulness'],
+      toVercelAISDK: jest.fn(),
+    };
+  });
+
+  describe('constructor', () => {
+    it('initializes with proper configuration', () => {
+      const judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
+
+      expect(judge).toBeDefined();
+    });
+  });
+
+  describe('evaluate', () => {
+    let judge: Judge;
+
+    beforeEach(() => {
+      judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
+    });
+
+    it('evaluates AI response successfully', async () => {
+      const mockStructuredResponse: StructuredResponse = {
+        data: {
+          evaluations: {
+            relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
+            accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
+            helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
+          },
+        },
+        rawResponse: JSON.stringify({
+          evaluations: {
+            relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
+            accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
+            helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
+          },
+        }),
+        metrics: {
+          success: true,
+          usage: {
+            total: 100,
+            input: 50,
+            output: 50,
+          },
+        },
+      };
+
+      mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
+      mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
+
+      const result = await judge.evaluate(
+        'What is the capital of France?',
+        'Paris is the capital of France.',
+      );
+
+      expect(result).toEqual({
+        evals: {
+          relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
+          accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
+          helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
+        },
+        success: true,
+      });
+
+      expect(mockProvider.invokeStructuredModel).toHaveBeenCalledWith(
+        expect.arrayContaining([
+          expect.objectContaining({
+            role: 'system',
+            content: 'You are a helpful judge that evaluates AI responses.',
+          }),
+          expect.objectContaining({
+            role: 'user',
+            content:
+              'Evaluate and report scores for important metrics: Input: What is the capital of France?, Output: Paris is the capital of France.',
+          }),
+        ]),
+        expect.any(Object), // evaluation response structure
+      );
+    });
+
+    it('handles sampling rate correctly', async () => {
+      // Mock Math.random to return 0.3 (should be sampled with rate 0.5 since 0.3 <= 0.5)
+      const originalRandom = Math.random;
+      Math.random = jest.fn().mockReturnValue(0.3);
+
+      // Mock the structured response
+      const mockStructuredResponse: StructuredResponse = {
+        data: {
+          evaluations: {
+            relevance: { score: 0.8, reasoning: 'Good' },
+            accuracy: { score: 0.9, reasoning: 'Accurate' },
+            helpfulness: { score: 0.7, reasoning: 'Helpful' },
+          },
+        },
+        rawResponse: JSON.stringify({
+          evaluations: {
+            relevance: { score: 0.8, reasoning: 'Good' },
+            accuracy: { score: 0.9, reasoning: 'Accurate' },
+            helpfulness: { score: 0.7, reasoning: 'Helpful' },
+          },
+        }),
+        metrics: {
+          success: true,
+          usage: { total: 100, input: 50, output: 50 },
+        },
+      };
+
+      mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
+      mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
+
+      const result = await judge.evaluate('test input', 'test output', 0.5);
+
+      expect(result).toBeDefined();
+      expect(mockProvider.invokeStructuredModel).toHaveBeenCalled();
+
+      Math.random = originalRandom;
+    });
+
+    it('returns undefined when not sampled', async () => {
+      // Mock Math.random to return 0.8 (should not be sampled with rate 0.5 since 0.8 > 0.5)
+      const originalRandom = Math.random;
+      Math.random = jest.fn().mockReturnValue(0.8);
+
+      const result = await judge.evaluate('test input', 'test output', 0.5);
+
+      expect(result).toBeUndefined();
+      expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled();
+      expect(mockLogger.debug).toHaveBeenCalledWith(
+        'Judge evaluation skipped due to sampling rate: 0.5',
+      );
+
+      Math.random = originalRandom;
+    });
+
+    it('returns undefined when evaluationMetricKeys is empty', async () => {
+      const configWithoutMetrics: LDAIJudgeConfig = {
+        ...judgeConfig,
+        evaluationMetricKeys: [],
+      };
+      const judgeWithoutMetrics = new Judge(
+        configWithoutMetrics,
+        mockTracker,
+        mockProvider,
+        mockLogger,
+      );
+
+      const result = await judgeWithoutMetrics.evaluate('test input', 'test output');
+
+      expect(result).toBeUndefined();
+      expect(mockLogger.warn).toHaveBeenCalledWith(
+        'Judge configuration is missing required evaluationMetricKeys',
+        mockTrackData,
+      );
+    });
+
+    it('returns undefined when messages are missing', async () => {
+      const configWithoutMessages: LDAIJudgeConfig = {
+        ...judgeConfig,
+        messages: undefined,
+      };
+      const judgeWithoutMessages = new Judge(
+        configWithoutMessages,
+        mockTracker,
+        mockProvider,
+        mockLogger,
+      );
+
+      const result = await judgeWithoutMessages.evaluate('test input', 'test output');
+
+      expect(result).toBeUndefined();
+      expect(mockLogger.warn).toHaveBeenCalledWith(
+        'Judge configuration must include messages',
+        mockTrackData,
+      );
+    });
+
+    it('returns partial evaluations when some metrics are missing', async () => {
+      const mockStructuredResponse: StructuredResponse = {
+        data: {
+          evaluations: {
+            relevance: { score: 0.8, reasoning: 'Good' },
+            // accuracy is missing
+            helpfulness: { score: 0.7, reasoning: 'Helpful' },
+          },
+        },
+        rawResponse: JSON.stringify({
+          evaluations: {
+            relevance: { score: 0.8, reasoning: 'Good' },
+            helpfulness: { score: 0.7, reasoning: 'Helpful' },
+          },
+        }),
+        metrics: {
+          success: true,
+          usage: { total: 100, input: 50, output: 50 },
+        },
+      };
+
+      mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
+      mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
+
+      const result = await judge.evaluate('test input', 'test output');
+
+      // When one metric is missing, it returns the partial evals it has with success: false
+      expect(result).toEqual({
+        evals: {
+          relevance: { score: 0.8, reasoning: 'Good' },
+          helpfulness: { score: 0.7, reasoning: 'Helpful' },
+        },
+        success: false,
+      });
+    });
+
+    it('returns empty evaluations when response structure is malformed', async () => {
+      const mockStructuredResponse: StructuredResponse = {
+        data: {
+          // Missing 'evaluations' wrapper - malformed structure
+          relevance: { score: 0.8, reasoning: 'Good' },
+          accuracy: { score: 0.9, reasoning: 'Accurate' },
+          helpfulness: { score: 0.7, reasoning: 'Helpful' },
+        },
+        rawResponse: JSON.stringify({
+          relevance: { score: 0.8, reasoning: 'Good' },
+          accuracy: { score: 0.9, reasoning: 'Accurate' },
+          helpfulness: { score: 0.7, reasoning: 'Helpful' },
+        }),
+        metrics: {
+          success: true,
+          usage: { total: 100, input: 50, output: 50 },
+        },
+      };
+
+      mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
+      mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
+
+      const result = await judge.evaluate('test input', 'test output');
+
+      // When the structure is completely wrong, returns empty evals with success: false
+      expect(result).toEqual({
+        evals: {},
+        success: false,
+      });
+    });
+
+    it('handles provider errors gracefully', async () => {
+      const error = new Error('Provider error');
+      mockTracker.trackMetricsOf.mockRejectedValue(error);
+
+      const result = await judge.evaluate('test input', 'test output');
+
+      expect(result).toEqual({
+        evals: {},
+        success: false,
+        error: 'Provider error',
+      });
+      expect(mockLogger.error).toHaveBeenCalledWith('Judge evaluation failed:', error);
+    });
+
+    it('handles non-Error exceptions', async () => {
+      mockTracker.trackMetricsOf.mockRejectedValue('String error');
+
+      const result = await judge.evaluate('test input', 'test output');
+
+      expect(result).toEqual({
+        evals: {},
+        success: false,
+        error: 'Unknown error',
+      });
+    });
+  });
+
+  describe('evaluateMessages', () => {
+    let judge: Judge;
+
+    beforeEach(() => {
+      judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
+    });
+
+    it('evaluates messages and response successfully', async () => {
+      const messages: LDMessage[] = [
+        { role: 'user', content: 'What is the capital of France?' },
+        { role: 'assistant', content: 'Paris is the capital of France.' },
+      ];
+      const response = {
+        message: { role: 'assistant' as const, content: 'Paris is the capital of France.' },
+        metrics: { success: true },
+      };
+
+      const mockStructuredResponse: StructuredResponse = {
+        data: {
+          evaluations: {
+            relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
+            accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
+            helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
+          },
+        },
+        rawResponse: JSON.stringify({
+          evaluations: {
+            relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
+            accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
+            helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
+          },
+        }),
+        metrics: {
+          success: true,
+          usage: { total: 100, input: 50, output: 50 },
+        },
+      };
+
+      mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
+      mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
+
+      const result = await judge.evaluateMessages(messages, response);
+
+      expect(result).toEqual({
+        evals: {
+          relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
+          accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
+          helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
+        },
+        success: true,
+      });
+
+      expect(mockProvider.invokeStructuredModel).toHaveBeenCalledWith(
+        expect.arrayContaining([
+          expect.objectContaining({
+            role: 'system',
+            content: 'You are a helpful judge that evaluates AI responses.',
+          }),
+          expect.objectContaining({
+            role: 'user',
+            content:
+              'Evaluate and report scores for important metrics: Input: What is the capital of France?\r\nParis is the capital of France., Output: Paris is the capital of France.',
+          }),
+        ]),
+        expect.any(Object), // evaluation response structure
+      );
+    });
+
+    it('handles sampling rate correctly', async () => {
+      const messages: LDMessage[] = [{ role: 'user', content: 'test' }];
+      const response = {
+        message: { role: 'assistant' as const, content: 'test response' },
+        metrics: { success: true },
+      };
+
+      // Mock Math.random to return 0.8 (should not be sampled with rate 0.5 since 0.8 > 0.5)
+      const originalRandom = Math.random;
+      Math.random = jest.fn().mockReturnValue(0.8);
+
+      const result = await judge.evaluateMessages(messages, response, 0.5);
+
+      expect(result).toBeUndefined();
+      expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled();
+
+      Math.random = originalRandom;
+    });
+  });
+
+  describe('_constructEvaluationMessages', () => {
+    let judge: Judge;
+
+    beforeEach(() => {
+      judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
+    });
+
+    it('constructs evaluation messages correctly', () => {
+      // Access private method for testing
+      // eslint-disable-next-line no-underscore-dangle
+      const constructMessages = (judge as any)._constructEvaluationMessages.bind(judge);
+      const messages = constructMessages('test input', 'test output');
+
+      expect(messages).toHaveLength(2);
+      expect(messages[0]).toEqual({
+        role: 'system',
+        content: 'You are a helpful judge that evaluates AI responses.',
+      });
+      expect(messages[1]).toEqual({
+        role: 'user',
+        content:
+          'Evaluate and report scores for important metrics: Input: test input, Output: test output',
+      });
+    });
+  });
+
+  describe('_parseEvaluationResponse', () => {
+    let judge: Judge;
+
+    beforeEach(() => {
+      judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
+    });
+
+    it('parses valid evaluation response correctly', () => {
+      // eslint-disable-next-line no-underscore-dangle
+      const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
+      const responseData = {
+        evaluations: {
+          relevance: { score: 0.8, reasoning: 'Good' },
+          accuracy: { score: 0.9, reasoning: 'Accurate' },
+          helpfulness: { score: 0.7, reasoning: 'Helpful' },
+        },
+      };
+
+      const result = parseResponse(responseData);
+
+      expect(result).toEqual({
+        relevance: { score: 0.8, reasoning: 'Good' },
+        accuracy: { score: 0.9, reasoning: 'Accurate' },
+        helpfulness: { score: 0.7, reasoning: 'Helpful' },
+      });
+    });
+
+    it('returns empty object for invalid response data', () => {
+      // eslint-disable-next-line no-underscore-dangle
+      const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
+      const responseData = {
+        relevance: { score: 0.8, reasoning: 'Good' },
+        // Missing evaluations wrapper - invalid structure
+      };
+
+      const result = parseResponse(responseData);
+
+      // Returns empty object when evaluations structure is missing
+      expect(result).toEqual({});
+    });
+
+    it('handles missing score or reasoning fields', () => {
+      // eslint-disable-next-line no-underscore-dangle
+      const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
+      const responseData = {
+        evaluations: {
+          relevance: { score: 0.8 }, // Missing reasoning
+          accuracy: { reasoning: 'Accurate' }, // Missing score
+          helpfulness: { score: 0.7, reasoning: 'Helpful' },
+        },
+      };
+
+      const result = parseResponse(responseData);
+
+      // Only helpfulness passes validation, relevance and accuracy are skipped
+      expect(result).toEqual({
+        helpfulness: { score: 0.7, reasoning: 'Helpful' },
+      });
+    });
+  });
+});
diff --git a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts
index 6badffd94a..c1d40727f9 100644
--- a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts
+++ b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts
@@ -1,21 +1,35 @@
 import { LDContext } from '@launchdarkly/js-server-sdk-common';
 
-import { LDAIAgentDefaults } from '../src/api/agents';
-import { LDAIDefaults } from '../src/api/config';
+import {
+  LDAIAgentConfigDefault,
+  LDAIConversationConfigDefault,
+  LDAIJudgeConfigDefault,
+} from '../src/api/config/types';
+import { Judge } from '../src/api/judge/Judge';
+import { AIProviderFactory } from '../src/api/providers/AIProviderFactory';
 import { LDAIClientImpl } from '../src/LDAIClientImpl';
 import { LDClientMin } from '../src/LDClientMin';
 
+// Mock Judge and AIProviderFactory
+jest.mock('../src/api/judge/Judge');
+jest.mock('../src/api/providers/AIProviderFactory');
+
 const mockLdClient: jest.Mocked<LDClientMin> = {
   variation: jest.fn(),
   track: jest.fn(),
 };
 
+// Reset mocks before each test
+beforeEach(() => {
+  jest.clearAllMocks();
+});
+
 const testContext: LDContext = { kind: 'user', key: 'test-user' };
 
 it('returns config with interpolated messages', async () => {
   const client = new LDAIClientImpl(mockLdClient);
   const key = 'test-flag';
-  const defaultValue: LDAIDefaults = {
+  const defaultValue: LDAIConversationConfigDefault = {
     model: { name: 'test', parameters: { name: 'test-model' } },
     messages: [],
     enabled: true,
@@ -36,6 +50,7 @@ it('returns config with interpolated messages', async () => {
     _ldMeta: {
       variationKey: 'v1',
       enabled: true,
+      mode: 'completion',
     },
   };
 
@@ -73,14 +88,14 @@ it('returns config with interpolated messages', async () => {
 it('includes context in variables for messages interpolation', async () => {
   const client = new LDAIClientImpl(mockLdClient);
   const key = 'test-flag';
-  const defaultValue: LDAIDefaults = {
+  const defaultValue: LDAIConversationConfigDefault = {
     model: { name: 'test', parameters: { name: 'test-model' } },
     messages: [],
   };
 
   const mockVariation = {
     messages: [{ role: 'system', content: 'User key: {{ldctx.key}}' }],
-    _ldMeta: { variationKey: 'v1', enabled: true },
+    _ldMeta: { variationKey: 'v1', enabled: true, mode: 'completion' },
   };
 
   mockLdClient.variation.mockResolvedValue(mockVariation);
@@ -94,7 +109,7 @@ it('includes context in variables for messages interpolation', async () => {
 it('handles missing metadata in variation', async () => {
   const client = new LDAIClientImpl(mockLdClient);
   const key = 'test-flag';
-  const defaultValue: LDAIDefaults = {
+  const defaultValue: LDAIConversationConfigDefault = {
     model: { name: 'test', parameters: { name: 'test-model' } },
     messages: [],
   };
@@ -108,11 +123,10 @@ it('handles missing metadata in variation', async () => {
 
   const result = await client.config(key, testContext, defaultValue);
 
+  // When metadata/mode is missing, a disabled config is returned
   expect(result).toEqual({
-    model: { name: 'example-provider', parameters: { name: 'imagination' } },
-    messages: [{ role: 'system', content: 'Hello' }],
-    tracker: expect.any(Object),
     enabled: false,
+    tracker: undefined,
     toVercelAISDK: expect.any(Function),
   });
 });
@@ -120,7 +134,7 @@ it('handles missing metadata in variation', async () => {
 it('passes the default value to the underlying client', async () => {
   const client = new LDAIClientImpl(mockLdClient);
   const key = 'non-existent-flag';
-  const defaultValue: LDAIDefaults = {
+  const defaultValue: LDAIConversationConfigDefault = {
     model: { name: 'default-model', parameters: { name: 'default' } },
     provider: { name: 'default-provider' },
     messages: [{ role: 'system', content: 'Default messages' }],
@@ -128,7 +142,7 @@ it('passes the default value to the underlying client', async () => {
   };
 
   const expectedLDFlagValue = {
-    _ldMeta: { enabled: true },
+    _ldMeta: { enabled: true, mode: 'completion', variationKey: '' },
     model: defaultValue.model,
     messages: defaultValue.messages,
     provider: defaultValue.provider,
@@ -154,7 +168,7 @@ it('passes the default value to the underlying client', async () => {
 it('returns single agent config with interpolated instructions', async () => {
   const client = new LDAIClientImpl(mockLdClient);
   const key = 'test-agent';
-  const defaultValue: LDAIAgentDefaults = {
+  const defaultValue: LDAIAgentConfigDefault = {
     model: { name: 'test', parameters: { name: 'test-model' } },
     instructions: 'You are a helpful assistant.',
     enabled: true,
@@ -206,7 +220,7 @@ it('returns single agent config with interpolated instructions', async () => {
 it('includes context in variables for agent instructions interpolation', async () => {
   const client = new LDAIClientImpl(mockLdClient);
   const key = 'test-agent';
-  const defaultValue: LDAIAgentDefaults = {
+  const defaultValue: LDAIAgentConfigDefault = {
     model: { name: 'test', parameters: { name: 'test-model' } },
     instructions: 'You are a helpful assistant.',
     enabled: true,
@@ -227,7 +241,7 @@ it('includes context in variables for agent instructions interpolation', async (
 it('handles missing metadata in agent variation', async () => {
   const client = new LDAIClientImpl(mockLdClient);
   const key = 'test-agent';
-  const defaultValue: LDAIAgentDefaults = {
+  const defaultValue: LDAIAgentConfigDefault = {
     model: { name: 'test', parameters: { name: 'test-model' } },
     instructions: 'You are a helpful assistant.',
     enabled: true,
@@ -242,18 +256,17 @@ it('handles missing metadata in agent variation', async () => {
 
   const result = await client.agent(key, testContext, defaultValue);
 
+  // When metadata/mode is missing, a disabled config is returned
   expect(result).toEqual({
-    model: { name: 'example-provider', parameters: { name: 'imagination' } },
-    instructions: 'Hello.',
-    tracker: expect.any(Object),
     enabled: false,
+    tracker: undefined,
   });
 });
 
 it('passes the default value to the underlying client for single agent', async () => {
   const client = new LDAIClientImpl(mockLdClient);
   const key = 'non-existent-agent';
-  const defaultValue: LDAIAgentDefaults = {
+  const defaultValue: LDAIAgentConfigDefault = {
     model: { name: 'default-model', parameters: { name: 'default' } },
     provider: { name: 'default-provider' },
     instructions: 'Default instructions',
@@ -261,7 +274,7 @@ it('passes the default value to the underlying client for single agent', async (
   };
 
   const expectedLDFlagValue = {
-    _ldMeta: { enabled: defaultValue.enabled },
+    _ldMeta: { enabled: defaultValue.enabled, mode: 'agent', variationKey: '' },
     model: defaultValue.model,
     provider: defaultValue.provider,
     instructions: defaultValue.instructions,
@@ -380,3 +393,227 @@ it('handles empty agent configs array', async () => {
     0,
   );
 });
+
+// New judgeConfig-related tests
+describe('judgeConfig method', () => {
+  it('retrieves judge configuration successfully', async () => {
+    const client = new LDAIClientImpl(mockLdClient);
+    const key = 'test-judge';
+    const defaultValue: LDAIJudgeConfigDefault = {
+      enabled: true,
+      model: { name: 'gpt-4' },
+      provider: { name: 'openai' },
+      evaluationMetricKeys: ['relevance', 'accuracy'],
+      messages: [{ role: 'system', content: 'You are a judge.' }],
+    };
+
+    const mockJudgeConfig = {
+      enabled: true,
+      model: { name: 'gpt-4' },
+      provider: { name: 'openai' },
+      evaluationMetricKeys: ['relevance', 'accuracy'],
+      messages: [{ role: 'system' as const, content: 'You are a judge.' }],
+      tracker: {} as any,
+      toVercelAISDK: jest.fn(),
+    };
+
+    // Mock the _evaluate method
+    const evaluateSpy = jest.spyOn(client as any, '_evaluate');
+    evaluateSpy.mockResolvedValue(mockJudgeConfig);
+
+    const result = await client.judgeConfig(key, testContext, defaultValue);
+
+    expect(mockLdClient.track).toHaveBeenCalledWith(
+      '$ld:ai:judge:function:single',
+      testContext,
+      key,
+      1,
+    );
+    expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined);
+    expect(result).toBe(mockJudgeConfig);
+  });
+
+  it('handles variables parameter', async () => {
+    const client = new LDAIClientImpl(mockLdClient);
+    const key = 'test-judge';
+    const defaultValue: LDAIJudgeConfigDefault = {
+      enabled: true,
+      model: { name: 'gpt-4' },
+      provider: { name: 'openai' },
+      evaluationMetricKeys: ['relevance'],
+      messages: [{ role: 'system', content: 'You are a judge.' }],
+    };
+    const variables = { metric: 'relevance' };
+
+    const mockJudgeConfig = {
+      enabled: true,
+      model: { name: 'gpt-4' },
+      provider: { name: 'openai' },
+      evaluationMetricKeys: ['relevance'],
+      messages: [{ role: 'system' as const, content: 'You are a judge.' }],
+      tracker: {} as any,
+      toVercelAISDK: jest.fn(),
+    };
+
+    const evaluateSpy = jest.spyOn(client as any, '_evaluate');
+    evaluateSpy.mockResolvedValue(mockJudgeConfig);
+
+    const result = await client.judgeConfig(key, testContext, defaultValue, variables);
+
+    expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', variables);
+    expect(result).toBe(mockJudgeConfig);
+  });
+});
+
+describe('createJudge method', () => {
+  let mockProvider: jest.Mocked<any>;
+  let mockJudge: jest.Mocked<Judge>;
+
+  beforeEach(() => {
+    mockProvider = {
+      invokeStructuredModel: jest.fn(),
+    };
+
+    mockJudge = {
+      evaluate: jest.fn(),
+      evaluateMessages: jest.fn(),
+    } as any;
+
+    // Mock AIProviderFactory.create
+    (AIProviderFactory.create as jest.Mock).mockResolvedValue(mockProvider);
+
+    // Mock Judge constructor
+    (Judge as jest.MockedClass<typeof Judge>).mockImplementation(() => mockJudge);
+  });
+
+  it('initializes judge successfully', async () => {
+    const client = new LDAIClientImpl(mockLdClient);
+    const key = 'test-judge';
+    const defaultValue: LDAIJudgeConfigDefault = {
+      enabled: true,
+      model: { name: 'gpt-4' },
+      provider: { name: 'openai' },
+      evaluationMetricKeys: ['relevance', 'accuracy'],
+      messages: [{ role: 'system', content: 'You are a judge.' }],
+    };
+
+    const mockJudgeConfig = {
+      enabled: true,
+      model: { name: 'gpt-4' },
+      provider: { name: 'openai' },
+      evaluationMetricKeys: ['relevance', 'accuracy'],
+      messages: [{ role: 'system' as const, content: 'You are a judge.' }],
+      tracker: {} as any,
+      toVercelAISDK: jest.fn(),
+    };
+
+    // Mock the judgeConfig method
+    const judgeConfigSpy = jest.spyOn(client, 'judgeConfig');
+    judgeConfigSpy.mockResolvedValue(mockJudgeConfig);
+
+    const result = await client.createJudge(key, testContext, defaultValue);
+
+    expect(mockLdClient.track).toHaveBeenCalledWith(
+      '$ld:ai:judge:function:createJudge',
+      testContext,
+      key,
+      1,
+    );
+    expect(judgeConfigSpy).toHaveBeenCalledWith(key, testContext, defaultValue, {
+      message_history: '{{message_history}}',
+      response_to_evaluate: '{{response_to_evaluate}}',
+    });
+    expect(AIProviderFactory.create).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined);
+    expect(Judge).toHaveBeenCalledWith(
+      mockJudgeConfig,
+      mockJudgeConfig.tracker,
+      mockProvider,
+      undefined,
+    );
+    expect(result).toBe(mockJudge);
+  });
+
+  it('returns undefined when judge configuration is disabled', async () => {
+    const client = new LDAIClientImpl(mockLdClient);
+    const key = 'test-judge';
+    const defaultValue: LDAIJudgeConfigDefault = {
+      enabled: false,
+      model: { name: 'gpt-4' },
+      provider: { name: 'openai' },
+      evaluationMetricKeys: ['relevance'],
+      messages: [{ role: 'system', content: 'You are a judge.' }],
+    };
+
+    const mockJudgeConfig = {
+      enabled: false, // This should be false to test disabled case
+      model: { name: 'gpt-4' },
+      provider: { name: 'openai' },
+      evaluationMetricKeys: ['relevance'],
+      messages: [{ role: 'system' as const, content: 'You are a judge.' }],
+      tracker: undefined, // No tracker for disabled config
+      toVercelAISDK: jest.fn(),
+    };
+
+    const judgeConfigSpy = jest.spyOn(client, 'judgeConfig');
+    judgeConfigSpy.mockResolvedValue(mockJudgeConfig);
+
+    const result = await client.createJudge(key, testContext, defaultValue);
+
+    expect(result).toBeUndefined();
+    expect(AIProviderFactory.create).not.toHaveBeenCalled();
+    expect(Judge).not.toHaveBeenCalled();
+  });
+
+  it('returns undefined when AIProviderFactory.create fails', async () => {
+    const client = new LDAIClientImpl(mockLdClient);
+    const key = 'test-judge';
+    const defaultValue: LDAIJudgeConfigDefault = {
+      enabled: true,
+      model: { name: 'gpt-4' },
+      provider: { name: 'openai' },
+      evaluationMetricKeys: ['relevance'],
+      messages: [{ role: 'system', content: 'You are a judge.' }],
+    };
+
+    const mockJudgeConfig = {
+      enabled: true,
+      model: { name: 'gpt-4' },
+      provider: { name: 'openai' },
+      evaluationMetricKeys: ['relevance'],
+      messages: [{ role: 'system' as const, content: 'You are a judge.' }],
+      tracker: {} as any,
+      toVercelAISDK: jest.fn(),
+    };
+
+    const judgeConfigSpy = jest.spyOn(client, 'judgeConfig');
+    judgeConfigSpy.mockResolvedValue(mockJudgeConfig);
+
+    (AIProviderFactory.create as jest.Mock).mockResolvedValue(undefined);
+
+    const result = await client.createJudge(key, testContext, defaultValue);
+
+    expect(result).toBeUndefined();
+    expect(AIProviderFactory.create).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined);
+    expect(Judge).not.toHaveBeenCalled();
+  });
+
+  it('handles errors gracefully', async () => {
+    const client = new LDAIClientImpl(mockLdClient);
+    const key = 'test-judge';
+    const defaultValue: LDAIJudgeConfigDefault = {
+      enabled: true,
+      model: { name: 'gpt-4' },
+      provider: { name: 'openai' },
+      evaluationMetricKeys: ['relevance'],
+      messages: [{ role: 'system', content: 'You are a judge.' }],
+    };
+
+    const error = new Error('Judge configuration error');
+    const judgeConfigSpy = jest.spyOn(client, 'judgeConfig');
+    judgeConfigSpy.mockRejectedValue(error);
+
+    const result = await client.createJudge(key, testContext, defaultValue);
+
+    expect(result).toBeUndefined();
+  });
+});
diff --git a/packages/sdk/server-ai/__tests__/TrackedChat.test.ts b/packages/sdk/server-ai/__tests__/TrackedChat.test.ts
index c91b2d3d57..4ac561307f 100644
--- a/packages/sdk/server-ai/__tests__/TrackedChat.test.ts
+++ b/packages/sdk/server-ai/__tests__/TrackedChat.test.ts
@@ -1,13 +1,13 @@
 import { TrackedChat } from '../src/api/chat/TrackedChat';
 import { ChatResponse } from '../src/api/chat/types';
-import { LDAIConfig, LDMessage } from '../src/api/config/LDAIConfig';
 import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker';
+import { LDAIConversationConfig, LDMessage } from '../src/api/config/types';
 import { AIProvider } from '../src/api/providers/AIProvider';
 
 describe('TrackedChat', () => {
   let mockProvider: jest.Mocked<AIProvider>;
   let mockTracker: jest.Mocked<LDAIConfigTracker>;
-  let aiConfig: LDAIConfig;
+  let aiConfig: LDAIConversationConfig;
 
   beforeEach(() => {
     // Mock the AIProvider
@@ -136,7 +136,7 @@ describe('TrackedChat', () => {
     });
 
     it('returns empty array when no messages exist and includeConfigMessages is false', () => {
-      const configWithoutMessages: LDAIConfig = {
+      const configWithoutMessages: LDAIConversationConfig = {
         ...aiConfig,
         messages: [],
       };
@@ -167,7 +167,7 @@ describe('TrackedChat', () => {
     });
 
     it('handles undefined config messages gracefully', () => {
-      const configWithoutMessages: LDAIConfig = {
+      const configWithoutMessages: LDAIConversationConfig = {
         ...aiConfig,
         messages: undefined,
       };
diff --git a/packages/sdk/server-ai/examples/bedrock/src/index.ts b/packages/sdk/server-ai/examples/bedrock/src/index.ts
index fc74d63ff2..ac331d23e8 100644
--- a/packages/sdk/server-ai/examples/bedrock/src/index.ts
+++ b/packages/sdk/server-ai/examples/bedrock/src/index.ts
@@ -61,9 +61,13 @@ async function main() {
       myVariable: 'My User Defined Variable',
     },
   );
-  const { tracker } = aiConfig;
 
-  const completion = tracker.trackBedrockConverseMetrics(
+  if (!aiConfig.enabled || !aiConfig.tracker) {
+    console.log('*** AI configuration is not enabled');
+    process.exit(0);
+  }
+
+  const completion = aiConfig.tracker.trackBedrockConverseMetrics(
     await awsClient.send(
       new ConverseCommand({
         modelId: aiConfig.model?.name ?? 'no-model',
diff --git a/packages/sdk/server-ai/examples/openai/src/index.ts b/packages/sdk/server-ai/examples/openai/src/index.ts
index d199edd274..d943e67a0d 100644
--- a/packages/sdk/server-ai/examples/openai/src/index.ts
+++ b/packages/sdk/server-ai/examples/openai/src/index.ts
@@ -53,12 +53,17 @@ async function main(): Promise<void> {
       model: {
         name: 'gpt-4',
       },
+      enabled: false,
     },
     { myVariable: 'My User Defined Variable' },
   );
 
-  const { tracker } = aiConfig;
-  const completion = await tracker.trackOpenAIMetrics(async () =>
+  if (!aiConfig.enabled || !aiConfig.tracker) {
+    console.log('*** AI configuration is not enabled');
+    process.exit(0);
+  }
+
+  const completion = await aiConfig.tracker.trackOpenAIMetrics(async () =>
     client.chat.completions.create({
       messages: aiConfig.messages || [],
       model: aiConfig.model?.name || 'gpt-4',
diff --git a/packages/sdk/server-ai/examples/vercel-ai/src/index.ts b/packages/sdk/server-ai/examples/vercel-ai/src/index.ts
index 4d79bd3800..bd087e2296 100644
--- a/packages/sdk/server-ai/examples/vercel-ai/src/index.ts
+++ b/packages/sdk/server-ai/examples/vercel-ai/src/index.ts
@@ -40,7 +40,10 @@ async function main() {
   const aiClient = initAi(client);
 
   // Get AI configuration from LaunchDarkly
-  const aiConfig = await aiClient.config(aiConfigKey, context, { model: { name: 'gpt-4' } });
+  const aiConfig = await aiClient.config(aiConfigKey, context, {
+    model: { name: 'gpt-4' },
+    enabled: false,
+  });
 
   if (!aiConfig.enabled || !aiConfig.tracker) {
     console.log('*** AI configuration is not enabled');
diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts
index edac548100..ef1d2b31ae 100644
--- a/packages/sdk/server-ai/src/LDAIClientImpl.ts
+++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts
@@ -2,61 +2,41 @@ import * as Mustache from 'mustache';
 
 import { LDContext, LDLogger } from '@launchdarkly/js-server-sdk-common';
 
-import { LDAIAgent, LDAIAgentConfig, LDAIAgentDefaults } from './api/agents';
 import { TrackedChat } from './api/chat';
 import {
-  LDAIConfig,
-  LDAIConfigTracker,
-  LDAIDefaults,
+  LDAIAgentConfig,
+  LDAIAgentConfigDefault,
+  LDAIAgentRequestConfig,
+  LDAIConfigDefaultKind,
+  LDAIConfigKind,
+  LDAIConfigMode,
+  LDAIConversationConfig,
+  LDAIConversationConfigDefault,
+  LDAIJudgeConfig,
+  LDAIJudgeConfigDefault,
+  LDJudge,
   LDMessage,
-  LDModelConfig,
-  LDProviderConfig,
   VercelAISDKConfig,
   VercelAISDKMapOptions,
   VercelAISDKProvider,
 } from './api/config';
+import { LDAIConfigFlagValue, LDAIConfigUtils } from './api/config/LDAIConfigUtils';
+import { Judge } from './api/judge/Judge';
 import { LDAIClient } from './api/LDAIClient';
 import { AIProviderFactory, SupportedAIProvider } from './api/providers';
 import { LDAIConfigMapper } from './LDAIConfigMapper';
 import { LDAIConfigTrackerImpl } from './LDAIConfigTrackerImpl';
 import { LDClientMin } from './LDClientMin';
 
-type Mode = 'completion' | 'agent';
-
 /**
- * Metadata associated with a model configuration variation.
+ * Tracking event keys for AI SDK usage metrics.
  */
-interface LDMeta {
-  variationKey: string;
-  enabled: boolean;
-  version?: number;
-  mode?: Mode;
-}
-
-/**
- * Interface for the model configuration variation returned by LaunchDarkly. This is the internal
- * typing and not meant for exposure to the application developer.
- */
-interface VariationContent {
-  model?: LDModelConfig;
-  messages?: LDMessage[];
-  instructions?: string;
-  provider?: LDProviderConfig;
-  _ldMeta?: LDMeta;
-}
-
-/**
- * The result of evaluating a configuration.
- */
-interface EvaluationResult {
-  tracker: LDAIConfigTracker;
-  enabled: boolean;
-  model?: LDModelConfig;
-  provider?: LDProviderConfig;
-  messages?: LDMessage[];
-  instructions?: string;
-  mode?: string;
-}
+const TRACK_CONFIG_SINGLE = '$ld:ai:config:function:single';
+const TRACK_CONFIG_CREATE_CHAT = '$ld:ai:config:function:createChat';
+const TRACK_JUDGE_SINGLE = '$ld:ai:judge:function:single';
+const TRACK_JUDGE_CREATE = '$ld:ai:judge:function:createJudge';
+const TRACK_AGENT_SINGLE = '$ld:ai:agent:function:single';
+const TRACK_AGENT_MULTIPLE = '$ld:ai:agent:function:multiple';
 
 export class LDAIClientImpl implements LDAIClient {
   private _logger?: LDLogger;
@@ -69,32 +49,26 @@ export class LDAIClientImpl implements LDAIClient {
     return Mustache.render(template, variables, undefined, { escape: (item: any) => item });
   }
 
-  private static _toLDFlagValue(defaultValue: LDAIDefaults | LDAIAgentDefaults): {
-    _ldMeta: { enabled: boolean };
-    model?: LDModelConfig;
-    messages?: LDMessage[];
-    provider?: LDProviderConfig;
-    instructions?: string;
-  } {
-    return {
-      _ldMeta: { enabled: defaultValue.enabled ?? false },
-      model: defaultValue.model,
-      messages: 'messages' in defaultValue ? defaultValue.messages : undefined,
-      provider: defaultValue.provider,
-      instructions: 'instructions' in defaultValue ? defaultValue.instructions : undefined,
-    };
-  }
-
   private async _evaluate(
     key: string,
     context: LDContext,
-    defaultValue: LDAIDefaults,
-  ): Promise<EvaluationResult> {
-    // Convert default value to LDFlagValue format
-    // eslint-disable-next-line no-underscore-dangle
-    const ldFlagValue = LDAIClientImpl._toLDFlagValue(defaultValue);
+    defaultValue: LDAIConfigDefaultKind,
+    mode: LDAIConfigMode,
+    variables?: Record<string, unknown>,
+  ): Promise<LDAIConfigKind> {
+    const ldFlagValue = LDAIConfigUtils.toFlagValue(defaultValue, mode);
 
-    const value: VariationContent = await this._ldClient.variation(key, context, ldFlagValue);
+    const value: LDAIConfigFlagValue = await this._ldClient.variation(key, context, ldFlagValue);
+
+    // Validate mode match
+    // eslint-disable-next-line no-underscore-dangle
+    const flagMode = value._ldMeta?.mode;
+    if (flagMode !== mode) {
+      this._logger?.warn(
+        `AI Config mode mismatch for ${key}: expected ${mode}, got ${flagMode}. Returning disabled config.`,
+      );
+      return LDAIConfigUtils.createDisabledConfig(mode);
+    }
 
     const tracker = new LDAIConfigTrackerImpl(
       this._ldClient,
@@ -108,174 +82,263 @@ export class LDAIClientImpl implements LDAIClient {
       context,
     );
 
-    // eslint-disable-next-line no-underscore-dangle
-    const enabled = !!value._ldMeta?.enabled;
+    const config = LDAIConfigUtils.fromFlagValue(value, tracker);
 
-    return {
-      tracker,
-      enabled,
-      model: value.model,
-      provider: value.provider,
-      messages: value.messages,
-      instructions: value.instructions,
-      // eslint-disable-next-line no-underscore-dangle
-      mode: value._ldMeta?.mode ?? 'completion',
-    };
+    // Apply variable interpolation (always needed for ldctx)
+    return this._applyInterpolation(config, context, variables);
   }
 
-  private async _evaluateAgent(
-    key: string,
+  private _applyInterpolation(
+    config: LDAIConversationConfig | LDAIAgentConfig | LDAIJudgeConfig,
     context: LDContext,
-    defaultValue: LDAIAgentDefaults,
     variables?: Record<string, unknown>,
-  ): Promise<LDAIAgent> {
-    const {
-      tracker,
-      enabled,
-      model,
-      provider: configProvider,
-      instructions,
-    } = await this._evaluate(key, context, defaultValue);
-
-    const agent: LDAIAgent = {
-      tracker,
-      enabled,
-    };
+  ): LDAIConversationConfig | LDAIAgentConfig | LDAIJudgeConfig {
+    const allVariables = { ...variables, ldctx: context };
 
-    // We are going to modify the contents before returning them, so we make a copy.
-    // This isn't a deep copy and the application developer should not modify the returned content.
-    if (model) {
-      agent.model = { ...model };
+    if ('messages' in config && config.messages) {
+      return {
+        ...config,
+        messages: config.messages.map((entry: LDMessage) => ({
+          ...entry,
+          content: this._interpolateTemplate(entry.content, allVariables),
+        })),
+      };
     }
 
-    if (configProvider) {
-      agent.provider = { ...configProvider };
+    if ('instructions' in config && config.instructions) {
+      return {
+        ...config,
+        instructions: this._interpolateTemplate(config.instructions, allVariables),
+      };
     }
 
-    const allVariables = { ...variables, ldctx: context };
+    return config;
+  }
 
-    if (instructions) {
-      agent.instructions = this._interpolateTemplate(instructions, allVariables);
-    }
+  private _addVercelAISDKSupport(config: LDAIConversationConfig): LDAIConversationConfig {
+    const { messages } = config;
+    const mapper = new LDAIConfigMapper(config.model, config.provider, messages);
+
+    return {
+      ...config,
+      toVercelAISDK: <TMod>(
+        sdkProvider: VercelAISDKProvider<TMod> | Record<string, VercelAISDKProvider<TMod>>,
+        options?: VercelAISDKMapOptions | undefined,
+      ): VercelAISDKConfig<TMod> => mapper.toVercelAISDK(sdkProvider, options),
+    };
+  }
+
+  private async _initializeJudges(
+    judgeConfigs: LDJudge[],
+    context: LDContext,
+    variables?: Record<string, unknown>,
+    defaultAiProvider?: SupportedAIProvider,
+  ): Promise<Record<string, Judge>> {
+    const judges: Record<string, Judge> = {};
+
+    const judgePromises = judgeConfigs.map(async (judgeConfig) => {
+      const judge = await this.createJudge(
+        judgeConfig.key,
+        context,
+        { enabled: false },
+        variables,
+        defaultAiProvider,
+      );
+      return judge ? { key: judgeConfig.key, judge } : null;
+    });
+
+    const results = await Promise.all(judgePromises);
+    results.forEach((result) => {
+      if (result) {
+        judges[result.key] = result.judge;
+      }
+    });
+
+    return judges;
+  }
 
-    return agent;
+  async completionConfig(
+    key: string,
+    context: LDContext,
+    defaultValue: LDAIConversationConfigDefault,
+    variables?: Record<string, unknown>,
+  ): Promise<LDAIConversationConfig> {
+    this._ldClient.track(TRACK_CONFIG_SINGLE, context, key, 1);
+
+    const config = await this._evaluate(key, context, defaultValue, 'completion', variables);
+    return this._addVercelAISDKSupport(config as LDAIConversationConfig);
   }
 
+  /**
+   * @deprecated Use `completionConfig` instead. This method will be removed in a future version.
+   */
   async config(
     key: string,
     context: LDContext,
-    defaultValue: LDAIDefaults,
+    defaultValue: LDAIConversationConfigDefault,
     variables?: Record<string, unknown>,
-  ): Promise<LDAIConfig> {
-    this._ldClient.track('$ld:ai:config:function:single', context, key, 1);
-
-    const {
-      tracker,
-      enabled,
-      model,
-      provider: configProvider,
-      messages,
-    } = await this._evaluate(key, context, defaultValue);
-
-    const config: Omit<LDAIConfig, 'toVercelAISDK'> = {
-      tracker,
-      enabled,
-    };
+  ): Promise<LDAIConversationConfig> {
+    return this.completionConfig(key, context, defaultValue, variables);
+  }
 
-    // We are going to modify the contents before returning them, so we make a copy.
-    // This isn't a deep copy and the application developer should not modify the returned content.
-    if (model) {
-      config.model = { ...model };
-    }
-    if (configProvider) {
-      config.provider = { ...configProvider };
-    }
-    const allVariables = { ...variables, ldctx: context };
+  async judgeConfig(
+    key: string,
+    context: LDContext,
+    defaultValue: LDAIJudgeConfigDefault,
+    variables?: Record<string, unknown>,
+  ): Promise<LDAIJudgeConfig> {
+    this._ldClient.track(TRACK_JUDGE_SINGLE, context, key, 1);
 
-    if (messages) {
-      config.messages = messages.map((entry: any) => ({
-        ...entry,
-        content: this._interpolateTemplate(entry.content, allVariables),
-      }));
-    }
+    const config = await this._evaluate(key, context, defaultValue, 'judge', variables);
+    return config as LDAIJudgeConfig;
+  }
 
-    const mapper = new LDAIConfigMapper(config.model, config.provider, config.messages);
+  async agentConfig(
+    key: string,
+    context: LDContext,
+    defaultValue: LDAIAgentConfigDefault,
+    variables?: Record<string, unknown>,
+  ): Promise<LDAIAgentConfig> {
+    this._ldClient.track(TRACK_AGENT_SINGLE, context, key, 1);
 
-    return {
-      ...config,
-      toVercelAISDK: <TMod>(
-        sdkProvider: VercelAISDKProvider<TMod> | Record<string, VercelAISDKProvider<TMod>>,
-        options?: VercelAISDKMapOptions | undefined,
-      ): VercelAISDKConfig<TMod> => mapper.toVercelAISDK(sdkProvider, options),
-    };
+    const config = await this._evaluate(key, context, defaultValue, 'agent', variables);
+    return config as LDAIAgentConfig;
   }
 
+  /**
+   * @deprecated Use `agentConfig` instead. This method will be removed in a future version.
+   */
   async agent(
     key: string,
     context: LDContext,
-    defaultValue: LDAIAgentDefaults,
+    defaultValue: LDAIAgentConfigDefault,
     variables?: Record<string, unknown>,
-  ): Promise<LDAIAgent> {
-    // Track agent usage
-    this._ldClient.track('$ld:ai:agent:function:single', context, key, 1);
-
-    return this._evaluateAgent(key, context, defaultValue, variables);
+  ): Promise<LDAIAgentConfig> {
+    return this.agentConfig(key, context, defaultValue, variables);
   }
 
-  async agents<const T extends readonly (LDAIAgentConfig & { defaultValue: LDAIAgentDefaults })[]>(
+  async agentConfigs<const T extends readonly LDAIAgentRequestConfig[]>(
     agentConfigs: T,
     context: LDContext,
-  ): Promise<Record<T[number]['key'], LDAIAgent>> {
-    // Track multiple agents usage
-    this._ldClient.track(
-      '$ld:ai:agent:function:multiple',
-      context,
-      agentConfigs.length,
-      agentConfigs.length,
-    );
+  ): Promise<Record<T[number]['key'], LDAIAgentConfig>> {
+    this._ldClient.track(TRACK_AGENT_MULTIPLE, context, agentConfigs.length, agentConfigs.length);
 
-    const agents = {} as Record<T[number]['key'], LDAIAgent>;
+    const agents = {} as Record<T[number]['key'], LDAIAgentConfig>;
 
     await Promise.all(
       agentConfigs.map(async (config) => {
-        const agent = await this._evaluateAgent(
+        const agent = await this._evaluate(
           config.key,
           context,
           config.defaultValue,
+          'agent',
           config.variables,
         );
-        agents[config.key as T[number]['key']] = agent;
+        agents[config.key as T[number]['key']] = agent as LDAIAgentConfig;
       }),
     );
 
     return agents;
   }
 
-  async initChat(
+  /**
+   * @deprecated Use `agentConfigs` instead. This method will be removed in a future version.
+   */
+  async agents<const T extends readonly LDAIAgentRequestConfig[]>(
+    agentConfigs: T,
+    context: LDContext,
+  ): Promise<Record<T[number]['key'], LDAIAgentConfig>> {
+    return this.agentConfigs(agentConfigs, context);
+  }
+
+  async createChat(
     key: string,
     context: LDContext,
-    defaultValue: LDAIDefaults,
+    defaultValue: LDAIConversationConfigDefault,
     variables?: Record<string, unknown>,
     defaultAiProvider?: SupportedAIProvider,
   ): Promise<TrackedChat | undefined> {
-    // Track chat initialization
-    this._ldClient.track('$ld:ai:config:function:initChat', context, key, 1);
+    this._ldClient.track(TRACK_CONFIG_CREATE_CHAT, context, key, 1);
 
-    const aiConfig = await this.config(key, context, defaultValue, variables);
+    const config = await this.completionConfig(key, context, defaultValue, variables);
 
-    // Return undefined if the configuration is disabled
-    if (!aiConfig.enabled) {
+    if (!config.enabled || !config.tracker) {
       this._logger?.info(`Chat configuration is disabled: ${key}`);
       return undefined;
     }
 
-    // Create the AIProvider instance
-    const provider = await AIProviderFactory.create(aiConfig, this._logger, defaultAiProvider);
+    const provider = await AIProviderFactory.create(config, this._logger, defaultAiProvider);
     if (!provider) {
       return undefined;
     }
 
-    // Create the TrackedChat instance with the provider
-    return new TrackedChat(aiConfig, aiConfig.tracker, provider);
+    const judges = await this._initializeJudges(
+      config.judgeConfiguration?.judges ?? [],
+      context,
+      variables,
+      defaultAiProvider,
+    );
+
+    return new TrackedChat(config, config.tracker, provider, judges, this._logger);
+  }
+
+  async createJudge(
+    key: string,
+    context: LDContext,
+    defaultValue: LDAIJudgeConfigDefault,
+    variables?: Record<string, unknown>,
+    defaultAiProvider?: SupportedAIProvider,
+  ): Promise<Judge | undefined> {
+    this._ldClient.track(TRACK_JUDGE_CREATE, context, key, 1);
+
+    try {
+      if (variables?.message_history !== undefined) {
+        this._logger?.warn(
+          "The variable 'message_history' is reserved by the judge and will be ignored.",
+        );
+      }
+      if (variables?.response_to_evaluate !== undefined) {
+        this._logger?.warn(
+          "The variable 'response_to_evaluate' is reserved by the judge and will be ignored.",
+        );
+      }
+
+      // Overwrite reserved variables to ensure they remain as placeholders for judge evaluation
+      const extendedVariables = {
+        ...variables,
+        message_history: '{{message_history}}',
+        response_to_evaluate: '{{response_to_evaluate}}',
+      };
+
+      const judgeConfig = await this.judgeConfig(key, context, defaultValue, extendedVariables);
+
+      if (!judgeConfig.enabled || !judgeConfig.tracker) {
+        this._logger?.info(`Judge configuration is disabled: ${key}`);
+        return undefined;
+      }
+
+      const provider = await AIProviderFactory.create(judgeConfig, this._logger, defaultAiProvider);
+      if (!provider) {
+        return undefined;
+      }
+
+      return new Judge(judgeConfig, judgeConfig.tracker, provider, this._logger);
+    } catch (error) {
+      this._logger?.error(`Failed to initialize judge ${key}:`, error);
+      return undefined;
+    }
+  }
+
+  /**
+   * @deprecated Use `createChat` instead. This method will be removed in a future version.
+   */
+  async initChat(
+    key: string,
+    context: LDContext,
+    defaultValue: LDAIConversationConfigDefault,
+    variables?: Record<string, unknown>,
+    defaultAiProvider?: SupportedAIProvider,
+  ): Promise<TrackedChat | undefined> {
+    return this.createChat(key, context, defaultValue, variables, defaultAiProvider);
   }
 }
diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
index 578ef1f8da..c55051f4cf 100644
--- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
+++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
@@ -2,6 +2,7 @@ import { LDContext } from '@launchdarkly/js-server-sdk-common';
 
 import { LDAIConfigTracker } from './api/config';
 import { LDAIMetricSummary } from './api/config/LDAIConfigTracker';
+import { EvalScore } from './api/judge/types';
 import {
   createBedrockTokenUsage,
   createOpenAiUsage,
@@ -25,7 +26,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     private _context: LDContext,
   ) {}
 
-  private _getTrackData(): {
+  getTrackData(): {
     variationKey: string;
     configKey: string;
     version: number;
@@ -43,7 +44,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
 
   trackDuration(duration: number): void {
     this._trackedMetrics.durationMs = duration;
-    this._ldClient.track('$ld:ai:duration:total', this._context, this._getTrackData(), duration);
+    this._ldClient.track('$ld:ai:duration:total', this._context, this.getTrackData(), duration);
   }
 
   async trackDurationOf<TRes>(func: () => Promise<TRes>): Promise<TRes> {
@@ -64,28 +65,35 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     this._ldClient.track(
       '$ld:ai:tokens:ttf',
       this._context,
-      this._getTrackData(),
+      this.getTrackData(),
       timeToFirstTokenMs,
     );
   }
 
+  trackEvalScores(scores: Record<string, EvalScore>) {
+    // Track each evaluation score individually
+    Object.entries(scores).forEach(([metricKey, evalScore]) => {
+      this._ldClient.track(metricKey, this._context, this.getTrackData(), evalScore.score);
+    });
+  }
+
   trackFeedback(feedback: { kind: LDFeedbackKind }): void {
     this._trackedMetrics.feedback = feedback;
     if (feedback.kind === LDFeedbackKind.Positive) {
-      this._ldClient.track('$ld:ai:feedback:user:positive', this._context, this._getTrackData(), 1);
+      this._ldClient.track('$ld:ai:feedback:user:positive', this._context, this.getTrackData(), 1);
     } else if (feedback.kind === LDFeedbackKind.Negative) {
-      this._ldClient.track('$ld:ai:feedback:user:negative', this._context, this._getTrackData(), 1);
+      this._ldClient.track('$ld:ai:feedback:user:negative', this._context, this.getTrackData(), 1);
     }
   }
 
   trackSuccess(): void {
     this._trackedMetrics.success = true;
-    this._ldClient.track('$ld:ai:generation:success', this._context, this._getTrackData(), 1);
+    this._ldClient.track('$ld:ai:generation:success', this._context, this.getTrackData(), 1);
   }
 
   trackError(): void {
     this._trackedMetrics.success = false;
-    this._ldClient.track('$ld:ai:generation:error', this._context, this._getTrackData(), 1);
+    this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(), 1);
   }
 
   async trackMetricsOf<TRes>(
@@ -290,7 +298,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
 
   trackTokens(tokens: LDTokenUsage): void {
     this._trackedMetrics.tokens = tokens;
-    const trackData = this._getTrackData();
+    const trackData = this.getTrackData();
     if (tokens.total > 0) {
       this._ldClient.track('$ld:ai:tokens:total', this._context, trackData, tokens.total);
     }
diff --git a/packages/sdk/server-ai/src/api/LDAIClient.ts b/packages/sdk/server-ai/src/api/LDAIClient.ts
index 3f98474a62..c726df1f23 100644
--- a/packages/sdk/server-ai/src/api/LDAIClient.ts
+++ b/packages/sdk/server-ai/src/api/LDAIClient.ts
@@ -1,8 +1,16 @@
 import { LDContext } from '@launchdarkly/js-server-sdk-common';
 
-import { LDAIAgent, LDAIAgentConfig, LDAIAgentDefaults } from './agents';
 import { TrackedChat } from './chat';
-import { LDAIConfig, LDAIDefaults } from './config/LDAIConfig';
+import {
+  LDAIAgentConfig,
+  LDAIAgentConfigDefault,
+  LDAIAgentRequestConfig,
+  LDAIConversationConfig,
+  LDAIConversationConfigDefault,
+  LDAIJudgeConfig,
+  LDAIJudgeConfigDefault,
+} from './config';
+import { Judge } from './judge/Judge';
 import { SupportedAIProvider } from './providers';
 
 /**
@@ -10,7 +18,7 @@ import { SupportedAIProvider } from './providers';
  */
 export interface LDAIClient {
   /**
-   * Retrieves and processes an AI Config based on the provided key, LaunchDarkly context,
+   * Retrieves and processes a completion AI Config based on the provided key, LaunchDarkly context,
    * and variables. This includes the model configuration and the customized messages.
    *
    * @param key The key of the AI Config.
@@ -34,9 +42,11 @@ export interface LDAIClient {
    * const variables = {username: 'john'};
    * const defaultValue = {
    *  enabled: false,
+   *  model: { name: 'gpt-4' },
+   *  provider: { name: 'openai' },
    * };
    *
-   * const result = config(key, context, defaultValue, variables);
+   * const result = completionConfig(key, context, defaultValue, variables);
    * // Output:
    * {
    *   enabled: true,
@@ -60,12 +70,22 @@ export interface LDAIClient {
    * }
    * ```
    */
+  completionConfig(
+    key: string,
+    context: LDContext,
+    defaultValue: LDAIConversationConfigDefault,
+    variables?: Record<string, unknown>,
+  ): Promise<LDAIConversationConfig>;
+
+  /**
+   * @deprecated Use `completionConfig` instead. This method will be removed in a future version.
+   */
   config(
     key: string,
     context: LDContext,
-    defaultValue: LDAIDefaults,
+    defaultValue: LDAIConversationConfigDefault,
     variables?: Record<string, unknown>,
-  ): Promise<LDAIConfig>;
+  ): Promise<LDAIConversationConfig>;
 
   /**
    * Retrieves and processes a single AI Config agent based on the provided key, LaunchDarkly context,
@@ -89,21 +109,67 @@ export interface LDAIClient {
    * const key = "research_agent";
    * const context = {...};
    * const variables = { topic: 'climate change' };
-   * const agent = await client.agent(key, context, {
+   * const agentConfig = await client.agentConfig(key, context, {
    *   enabled: true,
+   *   model: { name: 'gpt-4' },
+   *   provider: { name: 'openai' },
    *   instructions: 'You are a research assistant.',
    * }, variables);
    *
-   * const researchResult = agent.instructions; // Interpolated instructions
-   * agent.tracker.trackSuccess();
+   * const researchResult = agentConfig.instructions; // Interpolated instructions
+   * agentConfig.tracker.trackSuccess();
    * ```
    */
+  agentConfig(
+    key: string,
+    context: LDContext,
+    defaultValue: LDAIAgentConfigDefault,
+    variables?: Record<string, unknown>,
+  ): Promise<LDAIAgentConfig>;
+
+  /**
+   * @deprecated Use `agentConfig` instead. This method will be removed in a future version.
+   */
   agent(
     key: string,
     context: LDContext,
-    defaultValue: LDAIAgentDefaults,
+    defaultValue: LDAIAgentConfigDefault,
     variables?: Record<string, unknown>,
-  ): Promise<LDAIAgent>;
+  ): Promise<LDAIAgentConfig>;
+
+  /**
+   * Retrieves and processes a Judge AI Config based on the provided key, LaunchDarkly context,
+   * and variables. This includes the model configuration and the customized messages for evaluation.
+   *
+   * @param key The key of the Judge AI Config.
+   * @param context The LaunchDarkly context object that contains relevant information about the
+   * current environment, user, or session. This context may influence how the configuration is
+   * processed or personalized.
+   * @param defaultValue A fallback value containing model configuration and messages. This will
+   * be used if the configuration is not available from LaunchDarkly.
+   * @param variables Optional variables for template interpolation in messages and instructions.
+   * @returns A promise that resolves to a tracked judge configuration.
+   *
+   * @example
+   * ```typescript
+   * const judgeConf = await client.judgeConfig(key, context, {
+   *   enabled: true,
+   *   model: { name: 'gpt-4' },
+   *   provider: { name: 'openai' },
+   *   evaluationMetricKeys: ['$ld:ai:judge:relevance'],
+   *   messages: [{ role: 'system', content: 'You are a relevance judge.' }]
+   * }, variables);
+   *
+   * const config = judgeConf.config; // Interpolated configuration
+   * judgeConf.tracker.trackSuccess();
+   * ```
+   */
+  judgeConfig(
+    key: string,
+    context: LDContext,
+    defaultValue: LDAIJudgeConfigDefault,
+    variables?: Record<string, unknown>,
+  ): Promise<LDAIJudgeConfig>;
 
   /**
    * Retrieves and processes multiple AI Config agents based on the provided agent configurations
@@ -122,32 +188,50 @@ export interface LDAIClient {
    *
    * @example
    * ```
-   * const agentConfigs = [
+   * const agentConfigsList = [
    *   {
    *     key: 'research_agent',
-   *     defaultValue: { enabled: true, instructions: 'You are a research assistant.' },
+   *     defaultValue: {
+   *       enabled: true,
+   *       model: { name: 'gpt-4' },
+   *       provider: { name: 'openai' },
+   *       instructions: 'You are a research assistant.'
+   *     },
    *     variables: { topic: 'climate change' }
    *   },
    *   {
    *     key: 'writing_agent',
-   *     defaultValue: { enabled: true, instructions: 'You are a writing assistant.' },
+   *     defaultValue: {
+   *       enabled: true,
+   *       model: { name: 'gpt-4' },
+   *       provider: { name: 'openai' },
+   *       instructions: 'You are a writing assistant.'
+   *     },
    *     variables: { style: 'academic' }
    *   }
    * ] as const;
    * const context = {...};
    *
-   * const agents = await client.agents(agentConfigs, context);
-   * const researchResult = agents["research_agent"].instructions; // Interpolated instructions
-   * agents["research_agent"].tracker.trackSuccess();
+   * const configs = await client.agentConfigs(agentConfigsList, context);
+   * const researchResult = configs["research_agent"].instructions; // Interpolated instructions
+   * configs["research_agent"].tracker.trackSuccess();
    * ```
    */
-  agents<const T extends readonly LDAIAgentConfig[]>(
+  agentConfigs<const T extends readonly LDAIAgentRequestConfig[]>(
+    agentConfigs: T,
+    context: LDContext,
+  ): Promise<Record<T[number]['key'], LDAIAgentConfig>>;
+
+  /**
+   * @deprecated Use `agentConfigs` instead. This method will be removed in a future version.
+   */
+  agents<const T extends readonly LDAIAgentRequestConfig[]>(
     agentConfigs: T,
     context: LDContext,
-  ): Promise<Record<T[number]['key'], LDAIAgent>>;
+  ): Promise<Record<T[number]['key'], LDAIAgentConfig>>;
 
   /**
-   * Initializes and returns a new TrackedChat instance for chat interactions.
+   * Returns a TrackedChat instance for chat interactions.
    * This method serves as the primary entry point for creating TrackedChat instances from configuration.
    *
    * @param key The key identifying the AI chat configuration to use.
@@ -161,17 +245,16 @@ export interface LDAIClient {
    * const key = "customer_support_chat";
    * const context = {...};
    * const defaultValue = {
-   *   config: {
-   *     enabled: false,
-   *     model: { name: "gpt-4" },
-   *     messages: [
-   *       { role: "system", content: "You are a helpful customer support agent." }
-   *     ]
-   *   }
+   *   enabled: false,
+   *   model: { name: "gpt-4" },
+   *   provider: { name: "openai" },
+   *   messages: [
+   *     { role: "system", content: "You are a helpful customer support agent." }
+   *   ]
    * };
    * const variables = { customerName: 'John' };
    *
-   * const chat = await client.initChat(key, context, defaultValue, variables);
+   * const chat = await client.createChat(key, context, defaultValue, variables);
    * if (chat) {
    *   const response = await chat.invoke("I need help with my order");
    *   console.log(response.message.content);
@@ -182,11 +265,60 @@ export interface LDAIClient {
    * }
    * ```
    */
+  createChat(
+    key: string,
+    context: LDContext,
+    defaultValue: LDAIConversationConfigDefault,
+    variables?: Record<string, unknown>,
+    defaultAiProvider?: SupportedAIProvider,
+  ): Promise<TrackedChat | undefined>;
+
+  /**
+   * @deprecated Use `createChat` instead. This method will be removed in a future version.
+   */
   initChat(
     key: string,
     context: LDContext,
-    defaultValue: LDAIDefaults,
+    defaultValue: LDAIConversationConfigDefault,
     variables?: Record<string, unknown>,
     defaultAiProvider?: SupportedAIProvider,
   ): Promise<TrackedChat | undefined>;
+
+  /**
+   * Creates and returns a new Judge instance for AI evaluation.
+   *
+   * @param key The key identifying the AI judge configuration to use
+   * @param context Standard LDContext used when evaluating flags
+   * @param defaultValue A default value representing a standard AI config result
+   * @param variables Dictionary of values for instruction interpolation
+   * @returns Promise that resolves to a Judge instance or undefined if disabled/unsupported
+   *
+   * @example
+   * ```
+   * const judge = await client.createJudge(
+   *   "relevance-judge",
+   *   context,
+   *   {
+   *     enabled: true,
+   *     model: { name: "gpt-4" },
+   *     provider: { name: "openai" },
+   *     evaluationMetricKeys: ['$ld:ai:judge:relevance'],
+   *     messages: [{ role: 'system', content: 'You are a relevance judge.' }]
+   *   },
+   *   { metric: "relevance" }
+   * );
+   *
+   * if (judge) {
+   *   const result = await judge.evaluate("User question", "AI response");
+   *   console.log('Relevance score:', result.evals.relevance?.score);
+   * }
+   * ```
+   */
+  createJudge(
+    key: string,
+    context: LDContext,
+    defaultValue: LDAIJudgeConfigDefault,
+    variables?: Record<string, unknown>,
+    defaultAiProvider?: SupportedAIProvider,
+  ): Promise<Judge | undefined>;
 }
diff --git a/packages/sdk/server-ai/src/api/agents/LDAIAgent.ts b/packages/sdk/server-ai/src/api/agents/LDAIAgent.ts
deleted file mode 100644
index 722c2bcdb0..0000000000
--- a/packages/sdk/server-ai/src/api/agents/LDAIAgent.ts
+++ /dev/null
@@ -1,36 +0,0 @@
-import { LDAIConfig } from '../config';
-
-/**
- * AI Config agent and tracker.
- */
-export interface LDAIAgent extends Omit<LDAIConfig, 'messages' | 'toVercelAISDK'> {
-  /**
-   * Instructions for the agent.
-   */
-  instructions?: string;
-}
-
-/**
- * Configuration for a single agent request.
- */
-export interface LDAIAgentConfig {
-  /**
-   * The agent key to retrieve.
-   */
-  key: string;
-
-  /**
-   * Default configuration for the agent.
-   */
-  defaultValue: LDAIAgentDefaults;
-
-  /**
-   * Variables for instructions interpolation.
-   */
-  variables?: Record<string, unknown>;
-}
-
-/**
- * Default values for an agent.
- */
-export type LDAIAgentDefaults = Omit<LDAIAgent, 'tracker'>;
diff --git a/packages/sdk/server-ai/src/api/agents/index.ts b/packages/sdk/server-ai/src/api/agents/index.ts
deleted file mode 100644
index f68fcd9a24..0000000000
--- a/packages/sdk/server-ai/src/api/agents/index.ts
+++ /dev/null
@@ -1 +0,0 @@
-export * from './LDAIAgent';
diff --git a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts
index 68a9af2f03..1c0fad2d1e 100644
--- a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts
+++ b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts
@@ -1,5 +1,9 @@
-import { LDAIConfig, LDMessage } from '../config/LDAIConfig';
+import { LDLogger } from '@launchdarkly/js-server-sdk-common';
+
 import { LDAIConfigTracker } from '../config/LDAIConfigTracker';
+import { LDAIConversationConfig, LDMessage } from '../config/types';
+import { Judge } from '../judge/Judge';
+import { JudgeResponse } from '../judge/types';
 import { AIProvider } from '../providers/AIProvider';
 import { ChatResponse } from './types';
 
@@ -11,13 +15,19 @@ import { ChatResponse } from './types';
  */
 export class TrackedChat {
   protected messages: LDMessage[];
+  protected judges: Record<string, Judge>;
+  private readonly _logger?: LDLogger;
 
   constructor(
-    protected readonly aiConfig: LDAIConfig,
+    protected readonly aiConfig: LDAIConversationConfig,
     protected readonly tracker: LDAIConfigTracker,
     protected readonly provider: AIProvider,
+    judges?: Record<string, Judge>,
+    logger?: LDLogger,
   ) {
     this.messages = [];
+    this.judges = judges || {};
+    this._logger = logger;
   }
 
   /**
@@ -45,13 +55,63 @@ export class TrackedChat {
     // Add the assistant response to the conversation history
     this.messages.push(response.message);
 
+    // Start judge evaluations if configured
+    if (
+      this.aiConfig.judgeConfiguration?.judges &&
+      this.aiConfig.judgeConfiguration.judges.length > 0
+    ) {
+      response.evaluations = this._evaluateWithJudges(this.messages, response);
+    }
+
     return response;
   }
 
+  /**
+   * Evaluates the response with all configured judges.
+   * Returns a promise that resolves to an array of evaluation results.
+   *
+   * @param messages Array of messages representing the conversation history
+   * @param response The AI response to be evaluated
+   * @returns Promise resolving to array of judge evaluation results
+   */
+  private async _evaluateWithJudges(
+    messages: LDMessage[],
+    response: ChatResponse,
+  ): Promise<Array<JudgeResponse | undefined>> {
+    const judgeConfigs = this.aiConfig.judgeConfiguration!.judges;
+
+    // Start all judge evaluations in parallel
+    const evaluationPromises = judgeConfigs.map(async (judgeConfig) => {
+      const judge = this.judges[judgeConfig.key];
+      if (!judge) {
+        this._logger?.warn(
+          `Judge configuration is not enabled: ${judgeConfig.key}`,
+          this.tracker.getTrackData(),
+        );
+        return undefined;
+      }
+
+      const evalResult = await judge.evaluateMessages(messages, response, judgeConfig.samplingRate);
+
+      // Track scores if evaluation was successful
+      if (evalResult && evalResult.success) {
+        this.tracker.trackEvalScores(evalResult.evals);
+      }
+
+      return evalResult;
+    });
+
+    // Use Promise.allSettled to ensure all evaluations complete
+    // even if some fail
+    const results = await Promise.allSettled(evaluationPromises);
+
+    return results.map((result) => (result.status === 'fulfilled' ? result.value : undefined));
+  }
+
   /**
    * Get the underlying AI configuration used to initialize this TrackedChat.
    */
-  getConfig(): LDAIConfig {
+  getConfig(): LDAIConversationConfig {
     return this.aiConfig;
   }
 
@@ -70,6 +130,14 @@ export class TrackedChat {
     return this.provider;
   }
 
+  /**
+   * Get the judges associated with this TrackedChat.
+   * Returns a record of judge instances keyed by their configuration keys.
+   */
+  getJudges(): Record<string, Judge> {
+    return this.judges;
+  }
+
   /**
    * Append messages to the conversation history.
    * Adds messages to the conversation history without invoking the model,
diff --git a/packages/sdk/server-ai/src/api/chat/types.ts b/packages/sdk/server-ai/src/api/chat/types.ts
index 804bb21453..5b32109fcf 100644
--- a/packages/sdk/server-ai/src/api/chat/types.ts
+++ b/packages/sdk/server-ai/src/api/chat/types.ts
@@ -1,4 +1,5 @@
-import { LDMessage } from '../config/LDAIConfig';
+import { LDMessage } from '../config/types';
+import { JudgeResponse } from '../judge/types';
 import { LDAIMetrics } from '../metrics/LDAIMetrics';
 
 /**
@@ -14,4 +15,10 @@ export interface ChatResponse {
    * Metrics information including success status and token usage.
    */
   metrics: LDAIMetrics;
+
+  /**
+   * Promise that resolves to judge evaluation results.
+   * Only present when judges are configured for evaluation.
+   */
+  evaluations?: Promise<Array<JudgeResponse | undefined>>;
 }
diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfig.ts b/packages/sdk/server-ai/src/api/config/LDAIConfig.ts
deleted file mode 100644
index 4f958f3d3a..0000000000
--- a/packages/sdk/server-ai/src/api/config/LDAIConfig.ts
+++ /dev/null
@@ -1,104 +0,0 @@
-import { LDAIConfigTracker } from './LDAIConfigTracker';
-import { VercelAISDKConfig, VercelAISDKMapOptions, VercelAISDKProvider } from './VercelAISDK';
-
-/**
- * Configuration related to the model.
- */
-export interface LDModelConfig {
-  /**
-   * The ID of the model.
-   */
-  name: string;
-
-  /**
-   * Model specific parameters.
-   */
-  parameters?: { [index: string]: unknown };
-
-  /**
-   * Additional user-specified parameters.
-   */
-  custom?: { [index: string]: unknown };
-}
-
-export interface LDProviderConfig {
-  /**
-   * The name of the provider.
-   */
-  name: string;
-}
-
-/**
- * Information about prompts.
- */
-export interface LDMessage {
-  /**
-   * The role of the prompt.
-   */
-  role: 'user' | 'assistant' | 'system';
-  /**
-   * Content for the prompt.
-   */
-  content: string;
-}
-
-/**
- * AI Config and tracker.
- */
-export interface LDAIConfig {
-  /**
-   * Optional model configuration.
-   */
-  model?: LDModelConfig;
-  /**
-   * Optional prompt data.
-   */
-  messages?: LDMessage[];
-
-  /**
-   * Optional configuration for the provider.
-   */
-  provider?: LDProviderConfig;
-
-  /**
-   * A tracker which can be used to generate analytics.
-   */
-  tracker: LDAIConfigTracker;
-
-  /**
-   * Whether the configuration is enabled.
-   */
-  enabled: boolean;
-
-  /**
-   * Maps this AI config to a format usable direcly in Vercel AI SDK generateText()
-   * and streamText() methods.
-   *
-   * WARNING: this method can throw an exception if a Vercel AI SDK model cannot be determined.
-   *
-   * @deprecated Use `VercelProvider.toVercelAISDK()` from the `@launchdarkly/server-sdk-ai-vercel` package instead.
-   * This method will be removed in a future version.
-   *
-   * @param provider A Vercel AI SDK Provider or a map of provider names to Vercel AI SDK Providers.
-   * @param options Optional mapping options.
-   * @returns A configuration directly usable in Vercel AI SDK generateText() and streamText()
-   * @throws {Error} if a Vercel AI SDK model cannot be determined from the given provider parameter.
-   */
-  toVercelAISDK: <TMod>(
-    provider: VercelAISDKProvider<TMod> | Record<string, VercelAISDKProvider<TMod>>,
-    options?: VercelAISDKMapOptions | undefined,
-  ) => VercelAISDKConfig<TMod>;
-}
-
-/**
- * Default value for a `modelConfig`. This is the same as the LDAIConfig, but it does not include
- * a tracker or mapper, and `enabled` is optional.
- */
-export type LDAIDefaults = Omit<LDAIConfig, 'tracker' | 'enabled' | 'toVercelAISDK'> & {
-  /**
-   * Whether the configuration is enabled.
-   *
-   * defaults to false
-   */
-  enabled?: boolean;
-};
diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
index 491315e02b..ed1eed4d3c 100644
--- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
+++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
@@ -1,3 +1,4 @@
+import { EvalScore } from '../judge/types';
 import { LDAIMetrics, LDFeedbackKind, LDTokenUsage } from '../metrics';
 
 /**
@@ -34,6 +35,14 @@ export interface LDAIMetricSummary {
  * The LDAIConfigTracker is used to track various details about AI operations.
  */
 export interface LDAIConfigTracker {
+  /**
+   * Get the data for tracking.
+   */
+  getTrackData(): {
+    variationKey: string;
+    configKey: string;
+    version: number;
+  };
   /**
    * Track the duration of generation.
    *
@@ -74,6 +83,13 @@ export interface LDAIConfigTracker {
    */
   trackTimeToFirstToken(timeToFirstTokenMs: number): void;
 
+  /**
+   * Track evaluation scores for multiple metrics.
+   *
+   * @param scores Record mapping metric keys to their evaluation scores
+   */
+  trackEvalScores(scores: Record<string, EvalScore>): void;
+
   /**
    * Track the duration of execution of the provided function.
    *
diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts
new file mode 100644
index 0000000000..ad0ef344b3
--- /dev/null
+++ b/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts
@@ -0,0 +1,206 @@
+import { LDAIConfigTracker } from './LDAIConfigTracker';
+import {
+  LDAIAgentConfig,
+  LDAIConfigDefaultKind,
+  LDAIConfigMode,
+  LDAIConversationConfig,
+  LDAIJudgeConfig,
+  LDJudgeConfiguration,
+  LDMessage,
+  LDModelConfig,
+  LDProviderConfig,
+} from './types';
+
+/**
+ * Internal flag value structure returned by LaunchDarkly.
+ * This represents the raw data structure that LaunchDarkly returns for AI configuration flags.
+ *
+ * @internal - Not meant for external use
+ */
+export interface LDAIConfigFlagValue {
+  _ldMeta?: {
+    variationKey?: string;
+    enabled: boolean;
+    version?: number;
+    mode?: LDAIConfigMode;
+  };
+  model?: LDModelConfig;
+  messages?: LDMessage[];
+  provider?: LDProviderConfig;
+  instructions?: string;
+  evaluationMetricKeys?: string[];
+  judgeConfiguration?: LDJudgeConfiguration;
+}
+
+/**
+ * Utility class for converting between AI configuration types and LaunchDarkly flag values.
+ *
+ * @internal - This class and its types are internal implementation details and should not be used by SDK consumers.
+ */
+export class LDAIConfigUtils {
+  /**
+   * Converts a default AI configuration to a LaunchDarkly flag value format.
+   *
+   * @param config The default AI configuration to convert
+   * @param mode The mode for the configuration
+   * @returns The flag value structure for LaunchDarkly
+   */
+  static toFlagValue(config: LDAIConfigDefaultKind, mode: LDAIConfigMode): LDAIConfigFlagValue {
+    const flagValue: LDAIConfigFlagValue = {
+      _ldMeta: {
+        variationKey: '', // Not available when converting from config
+        enabled: config.enabled ?? false,
+        mode,
+      },
+      model: config.model,
+    };
+
+    if ('messages' in config && config.messages !== undefined) {
+      flagValue.messages = config.messages;
+    }
+    if (config.provider !== undefined) {
+      flagValue.provider = config.provider;
+    }
+    if ('instructions' in config && config.instructions !== undefined) {
+      flagValue.instructions = config.instructions;
+    }
+    if ('evaluationMetricKeys' in config && config.evaluationMetricKeys !== undefined) {
+      flagValue.evaluationMetricKeys = config.evaluationMetricKeys;
+    }
+    if ('judgeConfiguration' in config && config.judgeConfiguration !== undefined) {
+      flagValue.judgeConfiguration = config.judgeConfiguration;
+    }
+
+    return flagValue;
+  }
+
+  /**
+   * Converts a LaunchDarkly flag value to the appropriate AI configuration type.
+   *
+   * @param flagValue The flag value from LaunchDarkly
+   * @param tracker The tracker to add to the config
+   * @returns The appropriate AI configuration type
+   */
+  static fromFlagValue(
+    flagValue: LDAIConfigFlagValue,
+    tracker: LDAIConfigTracker,
+  ): LDAIConversationConfig | LDAIAgentConfig | LDAIJudgeConfig {
+    // Determine the actual mode from flag value
+    // eslint-disable-next-line no-underscore-dangle
+    const flagValueMode = flagValue._ldMeta?.mode;
+
+    // Convert to appropriate config type based on actual mode
+    switch (flagValueMode) {
+      case 'agent':
+        return this.toAgentConfig(flagValue, tracker);
+      case 'judge':
+        return this.toJudgeConfig(flagValue, tracker);
+      case 'completion':
+      default:
+        return this.toCompletionConfig(flagValue, tracker);
+    }
+  }
+
+  /**
+   * Creates a disabled configuration of the specified mode.
+   *
+   * @param mode The mode for the disabled config
+   * @returns A disabled config of the appropriate type
+   */
+  static createDisabledConfig(
+    mode: LDAIConfigMode,
+  ): LDAIConversationConfig | LDAIAgentConfig | LDAIJudgeConfig {
+    switch (mode) {
+      case 'agent':
+        return {
+          enabled: false,
+          tracker: undefined,
+        } as LDAIAgentConfig;
+      case 'judge':
+        return {
+          enabled: false,
+          tracker: undefined,
+          evaluationMetricKeys: [],
+        } as LDAIJudgeConfig;
+      case 'completion':
+      default:
+        // Default to completion config for completion mode or any unexpected mode
+        return {
+          enabled: false,
+          tracker: undefined,
+        } as LDAIConversationConfig;
+    }
+  }
+
+  /**
+   * Creates the base configuration that all config types share.
+   *
+   * @param flagValue The flag value from LaunchDarkly
+   * @returns Base configuration object
+   */
+  private static _toBaseConfig(flagValue: LDAIConfigFlagValue) {
+    return {
+      // eslint-disable-next-line no-underscore-dangle
+      enabled: flagValue._ldMeta?.enabled ?? false,
+      model: flagValue.model,
+      provider: flagValue.provider,
+    };
+  }
+
+  /**
+   * Creates a completion config from flag value data.
+   *
+   * @param flagValue The flag value from LaunchDarkly
+   * @param tracker The tracker to add to the config
+   * @returns A completion configuration
+   */
+  static toCompletionConfig(
+    flagValue: LDAIConfigFlagValue,
+    tracker: LDAIConfigTracker,
+  ): LDAIConversationConfig {
+    return {
+      ...this._toBaseConfig(flagValue),
+      tracker,
+      messages: flagValue.messages,
+      judgeConfiguration: flagValue.judgeConfiguration,
+    };
+  }
+
+  /**
+   * Creates an agent config from flag value data.
+   *
+   * @param flagValue The flag value from LaunchDarkly
+   * @param tracker The tracker to add to the config
+   * @returns An agent configuration
+   */
+  static toAgentConfig(
+    flagValue: LDAIConfigFlagValue,
+    tracker: LDAIConfigTracker,
+  ): LDAIAgentConfig {
+    return {
+      ...this._toBaseConfig(flagValue),
+      tracker,
+      instructions: flagValue.instructions,
+      judgeConfiguration: flagValue.judgeConfiguration,
+    };
+  }
+
+  /**
+   * Creates a judge config from flag value data.
+   *
+   * @param flagValue The flag value from LaunchDarkly
+   * @param tracker The tracker to add to the config
+   * @returns A judge configuration
+   */
+  static toJudgeConfig(
+    flagValue: LDAIConfigFlagValue,
+    tracker: LDAIConfigTracker,
+  ): LDAIJudgeConfig {
+    return {
+      ...this._toBaseConfig(flagValue),
+      tracker,
+      messages: flagValue.messages,
+      evaluationMetricKeys: flagValue.evaluationMetricKeys || [],
+    };
+  }
+}
diff --git a/packages/sdk/server-ai/src/api/config/VercelAISDK.ts b/packages/sdk/server-ai/src/api/config/VercelAISDK.ts
index d8491a6810..290ae45f7b 100644
--- a/packages/sdk/server-ai/src/api/config/VercelAISDK.ts
+++ b/packages/sdk/server-ai/src/api/config/VercelAISDK.ts
@@ -1,4 +1,4 @@
-import { type LDMessage } from './LDAIConfig';
+import { type LDMessage } from './types';
 
 /**
  * @deprecated Use `VercelAISDKProvider` from the `@launchdarkly/server-sdk-ai-vercel` package instead.
diff --git a/packages/sdk/server-ai/src/api/config/index.ts b/packages/sdk/server-ai/src/api/config/index.ts
index a3f3752908..54f799a135 100644
--- a/packages/sdk/server-ai/src/api/config/index.ts
+++ b/packages/sdk/server-ai/src/api/config/index.ts
@@ -1,3 +1,4 @@
-export * from './LDAIConfig';
+export * from './types';
+// LDAIConfigUtils is intentionally not exported - it's an internal utility class
 export * from './VercelAISDK';
 export { LDAIConfigTracker } from './LDAIConfigTracker';
diff --git a/packages/sdk/server-ai/src/api/config/types.ts b/packages/sdk/server-ai/src/api/config/types.ts
new file mode 100644
index 0000000000..880345b192
--- /dev/null
+++ b/packages/sdk/server-ai/src/api/config/types.ts
@@ -0,0 +1,262 @@
+import { LDAIConfigTracker } from './LDAIConfigTracker';
+import { VercelAISDKConfig, VercelAISDKMapOptions, VercelAISDKProvider } from './VercelAISDK';
+
+/**
+ * Configuration related to the model.
+ */
+export interface LDModelConfig {
+  /**
+   * The ID of the model.
+   */
+  name: string;
+
+  /**
+   * Model specific parameters.
+   */
+  parameters?: { [index: string]: unknown };
+
+  /**
+   * Additional user-specified parameters.
+   */
+  custom?: { [index: string]: unknown };
+}
+
+export interface LDProviderConfig {
+  /**
+   * The name of the provider.
+   */
+  name: string;
+}
+
+/**
+ * Configuration for a single judge attachment.
+ */
+export interface LDJudge {
+  /** The key of the judge AI Config */
+  key: string;
+  /** Sampling rate for evaluation (0.0 to 1.0) */
+  samplingRate: number;
+}
+
+/**
+ * Configuration for judge attachment to AI Configs.
+ */
+export interface LDJudgeConfiguration {
+  /** Array of judge configurations */
+  judges: LDJudge[];
+}
+
+/**
+ * Base AI Config interface without mode-specific fields.
+ */
+export interface LDAIConfig extends Omit<LDAIConfigDefault, 'enabled'> {
+  /**
+   * Whether the configuration is enabled.
+   */
+  enabled: boolean;
+
+  /**
+   * A tracker which can be used to generate analytics.
+   * Undefined for disabled configs.
+   */
+  tracker?: LDAIConfigTracker;
+
+  /**
+   * Maps this AI config to a format usable direcly in Vercel AI SDK generateText()
+   * and streamText() methods.
+   *
+   * WARNING: this method can throw an exception if a Vercel AI SDK model cannot be determined.
+   *
+   * @deprecated Use `VercelProvider.toVercelAISDK()` from the `@launchdarkly/server-sdk-ai-vercel` package instead.
+   * This method will be removed in a future version.
+   *
+   * @param provider A Vercel AI SDK Provider or a map of provider names to Vercel AI SDK Providers.
+   * @param options Optional mapping options.
+   * @returns A configuration directly usable in Vercel AI SDK generateText() and streamText()
+   * @throws {Error} if a Vercel AI SDK model cannot be determined from the given provider parameter.
+   */
+  toVercelAISDK?: <TMod>(
+    provider: VercelAISDKProvider<TMod> | Record<string, VercelAISDKProvider<TMod>>,
+    options?: VercelAISDKMapOptions | undefined,
+  ) => VercelAISDKConfig<TMod>;
+}
+
+/**
+ * Base AI Config interface for default implementations with optional enabled property.
+ */
+export interface LDAIConfigDefault {
+  /**
+   * Optional model configuration.
+   */
+  model?: LDModelConfig;
+
+  /**
+   * Optional configuration for the provider.
+   */
+  provider?: LDProviderConfig;
+
+  /**
+   * Whether the configuration is enabled. Defaults to false when not provided.
+   */
+  enabled?: boolean;
+}
+
+/**
+ * Default implementation types for AI Configs with optional enabled property.
+ */
+
+/**
+ * Default Judge-specific AI Config with required evaluation metric key.
+ */
+export interface LDAIJudgeConfigDefault extends LDAIConfigDefault {
+  /**
+   * Optional prompt data for judge configurations.
+   */
+  messages?: LDMessage[];
+  /**
+   * Evaluation metric keys for judge configurations.
+   * The keys of the metrics that this judge can evaluate.
+   */
+  evaluationMetricKeys?: string[];
+}
+
+/**
+ * Default Agent-specific AI Config with instructions.
+ */
+export interface LDAIAgentConfigDefault extends LDAIConfigDefault {
+  /**
+   * Instructions for the agent.
+   */
+  instructions?: string;
+  /**
+   * Judge configuration for AI Configs being evaluated.
+   * References judge AI Configs that should evaluate this AI Config.
+   */
+  judgeConfiguration?: LDJudgeConfiguration;
+}
+
+/**
+ * Default Completion AI Config (default mode).
+ */
+export interface LDAIConversationConfigDefault extends LDAIConfigDefault {
+  /**
+   * Optional prompt data for completion configurations.
+   */
+  messages?: LDMessage[];
+  /**
+   * Judge configuration for AI Configs being evaluated.
+   * References judge AI Configs that should evaluate this AI Config.
+   */
+  judgeConfiguration?: LDJudgeConfiguration;
+}
+
+/**
+ * Non-default implementation types for AI Configs with required enabled property and tracker.
+ */
+
+/**
+ * Judge-specific AI Config with required evaluation metric key.
+ */
+export interface LDAIJudgeConfig extends LDAIConfig {
+  /**
+   * Optional prompt data for judge configurations.
+   */
+  messages?: LDMessage[];
+  /**
+   * Evaluation metric keys for judge configurations.
+   * The keys of the metrics that this judge can evaluate.
+   */
+  evaluationMetricKeys: string[];
+}
+
+/**
+ * Agent-specific AI Config with instructions.
+ */
+export interface LDAIAgentConfig extends LDAIConfig {
+  /**
+   * Instructions for the agent.
+   */
+  instructions?: string;
+  /**
+   * Judge configuration for AI Configs being evaluated.
+   * References judge AI Configs that should evaluate this AI Config.
+   */
+  judgeConfiguration?: LDJudgeConfiguration;
+}
+
+/**
+ * Completion AI Config (default mode).
+ */
+export interface LDAIConversationConfig extends LDAIConfig {
+  /**
+   * Optional prompt data for completion configurations.
+   */
+  messages?: LDMessage[];
+  /**
+   * Judge configuration for AI Configs being evaluated.
+   * References judge AI Configs that should evaluate this AI Config.
+   */
+  judgeConfiguration?: LDJudgeConfiguration;
+}
+
+/**
+ * Information about prompts.
+ */
+export interface LDMessage {
+  /**
+   * The role of the prompt.
+   */
+  role: 'user' | 'assistant' | 'system';
+  /**
+   * Content for the prompt.
+   */
+  content: string;
+}
+
+/**
+ * Union type for all AI Config variants.
+ */
+export type LDAIConfigKind = LDAIConversationConfig | LDAIAgentConfig | LDAIJudgeConfig;
+
+/**
+ * Union type for all default AI Config variants.
+ */
+export type LDAIConfigDefaultKind =
+  | LDAIConversationConfigDefault
+  | LDAIAgentConfigDefault
+  | LDAIJudgeConfigDefault;
+
+/**
+ * Configuration for a single agent request.
+ */
+export interface LDAIAgentRequestConfig {
+  /**
+   * The agent key to retrieve.
+   */
+  key: string;
+
+  /**
+   * Default configuration for the agent.
+   */
+  defaultValue: LDAIAgentConfigDefault;
+
+  /**
+   * Variables for instructions interpolation.
+   */
+  variables?: Record<string, unknown>;
+}
+
+/**
+ * AI Config agent interface (extends agent config without tracker and toVercelAISDK).
+ */
+export interface LDAIAgent extends Omit<LDAIAgentConfig, 'toVercelAISDK' | 'tracker'> {
+  /**
+   * Instructions for the agent.
+   */
+  instructions?: string;
+}
+
+/**
+ * Mode type for AI configurations.
+ */
+export type LDAIConfigMode = 'completion' | 'agent' | 'judge';
diff --git a/packages/sdk/server-ai/src/api/index.ts b/packages/sdk/server-ai/src/api/index.ts
index cd27112f7a..2f289b8356 100644
--- a/packages/sdk/server-ai/src/api/index.ts
+++ b/packages/sdk/server-ai/src/api/index.ts
@@ -1,6 +1,6 @@
 export * from './config';
-export * from './agents';
 export * from './chat';
+export * from './judge';
 export * from './metrics';
 export * from './LDAIClient';
 export * from './providers';
diff --git a/packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts b/packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts
new file mode 100644
index 0000000000..16d9ce651d
--- /dev/null
+++ b/packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts
@@ -0,0 +1,54 @@
+/**
+ * Internal class for building dynamic evaluation response schemas.
+ * Not exported - only used internally by TrackedJudge.
+ */
+class EvaluationSchemaBuilder {
+  static build(evaluationMetricKeys: string[]): Record<string, unknown> {
+    return {
+      type: 'object',
+      properties: {
+        evaluations: {
+          type: 'object',
+          description: `Object containing evaluation results for ${evaluationMetricKeys.join(', ')} metrics`,
+          properties: this._buildKeyProperties(evaluationMetricKeys),
+          required: evaluationMetricKeys,
+          additionalProperties: false,
+        },
+      },
+      required: ['evaluations'],
+      additionalProperties: false,
+    } as const;
+  }
+
+  private static _buildKeyProperties(evaluationMetricKeys: string[]) {
+    return evaluationMetricKeys.reduce(
+      (acc, key) => {
+        acc[key] = this._buildKeySchema(key);
+        return acc;
+      },
+      {} as Record<string, unknown>,
+    );
+  }
+
+  private static _buildKeySchema(key: string) {
+    return {
+      type: 'object',
+      properties: {
+        score: {
+          type: 'number',
+          minimum: 0,
+          maximum: 1,
+          description: `Score between 0.0 and 1.0 for ${key}`,
+        },
+        reasoning: {
+          type: 'string',
+          description: `Reasoning behind the score for ${key}`,
+        },
+      },
+      required: ['score', 'reasoning'],
+      additionalProperties: false,
+    };
+  }
+}
+
+export { EvaluationSchemaBuilder };
diff --git a/packages/sdk/server-ai/src/api/judge/Judge.ts b/packages/sdk/server-ai/src/api/judge/Judge.ts
new file mode 100644
index 0000000000..b4ea4c00a9
--- /dev/null
+++ b/packages/sdk/server-ai/src/api/judge/Judge.ts
@@ -0,0 +1,230 @@
+import * as Mustache from 'mustache';
+
+import { LDLogger } from '@launchdarkly/js-server-sdk-common';
+
+import { ChatResponse } from '../chat/types';
+import { LDAIConfigTracker } from '../config/LDAIConfigTracker';
+import { LDAIJudgeConfig, LDMessage } from '../config/types';
+import { AIProvider } from '../providers/AIProvider';
+import { EvaluationSchemaBuilder } from './EvaluationSchemaBuilder';
+import { EvalScore, JudgeResponse, StructuredResponse } from './types';
+
+/**
+ * Judge implementation that handles evaluation functionality and conversation management.
+ *
+ * According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate
+ * other AI Configs using structured output.
+ */
+export class Judge {
+  private readonly _logger?: LDLogger;
+  private readonly _evaluationResponseStructure: Record<string, unknown>;
+
+  constructor(
+    private readonly _aiConfig: LDAIJudgeConfig,
+    private readonly _aiConfigTracker: LDAIConfigTracker,
+    private readonly _aiProvider: AIProvider,
+    logger?: LDLogger,
+  ) {
+    this._logger = logger;
+    this._evaluationResponseStructure = EvaluationSchemaBuilder.build(
+      this._aiConfig.evaluationMetricKeys,
+    );
+  }
+
+  /**
+   * Evaluates an AI response using the judge's configuration.
+   *
+   * @param input The input prompt or question that was provided to the AI
+   * @param output The AI-generated response to be evaluated
+   * @param samplingRate Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
+   * @returns Promise that resolves to evaluation results or undefined if not sampled
+   */
+  async evaluate(
+    input: string,
+    output: string,
+    samplingRate: number = 1,
+  ): Promise<JudgeResponse | undefined> {
+    try {
+      // Check if judge configuration has evaluation metric keys
+      if (
+        !this._aiConfig.evaluationMetricKeys ||
+        this._aiConfig.evaluationMetricKeys.length === 0
+      ) {
+        this._logger?.warn(
+          'Judge configuration is missing required evaluationMetricKeys',
+          this._aiConfigTracker.getTrackData(),
+        );
+        return undefined;
+      }
+
+      // Check if judge configuration has messages before proceeding
+      if (!this._aiConfig.messages) {
+        this._logger?.warn(
+          'Judge configuration must include messages',
+          this._aiConfigTracker.getTrackData(),
+        );
+        return undefined;
+      }
+
+      // Apply sampling
+      if (Math.random() > samplingRate) {
+        this._logger?.debug(`Judge evaluation skipped due to sampling rate: ${samplingRate}`);
+        return undefined;
+      }
+
+      // Construct evaluation messages by combining judge's config messages with input/output
+      const messages = this._constructEvaluationMessages(input, output);
+
+      // Delegate to provider-specific implementation with tracking
+      const response = await this._aiConfigTracker.trackMetricsOf(
+        (result: StructuredResponse) => result.metrics,
+        () => this._aiProvider.invokeStructuredModel(messages, this._evaluationResponseStructure),
+      );
+
+      let { success } = response.metrics;
+
+      // Parse the structured response
+      const evals = this._parseEvaluationResponse(response.data);
+
+      // Return null if no valid evaluations were found
+      if (Object.keys(evals).length !== this._aiConfig.evaluationMetricKeys.length) {
+        this._logger?.warn(
+          'Judge evaluation did not return all evaluations',
+          this._aiConfigTracker.getTrackData(),
+        );
+        success = false;
+      }
+
+      return {
+        evals,
+        success,
+      };
+    } catch (error) {
+      this._logger?.error('Judge evaluation failed:', error);
+      return {
+        evals: {},
+        success: false,
+        error: error instanceof Error ? error.message : 'Unknown error',
+      };
+    }
+  }
+
+  /**
+   * Evaluates an AI response from chat messages and response.
+   *
+   * @param messages Array of messages representing the conversation history
+   * @param response The AI response to be evaluated
+   * @param samplingRatio Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
+   * @returns Promise that resolves to evaluation results or undefined if not sampled
+   */
+  async evaluateMessages(
+    messages: LDMessage[],
+    response: ChatResponse,
+    samplingRatio: number = 1,
+  ): Promise<JudgeResponse | undefined> {
+    // Convert messages to text and extract output from response
+    const input = messages.length === 0 ? '' : messages.map((msg) => msg.content).join('\r\n');
+    const output = response.message.content;
+
+    // Delegate to standard evaluate method
+    return this.evaluate(input, output, samplingRatio);
+  }
+
+  /**
+   * Returns the AI Config used by this judge.
+   */
+  getAIConfig(): LDAIJudgeConfig {
+    return this._aiConfig;
+  }
+
+  /**
+   * Returns the tracker associated with this judge.
+   */
+  getTracker(): LDAIConfigTracker {
+    return this._aiConfigTracker;
+  }
+
+  /**
+   * Returns the AI provider used by this judge.
+   */
+  getProvider(): AIProvider {
+    return this._aiProvider;
+  }
+
+  /**
+   * Constructs evaluation messages by combining judge's config messages with input/output.
+   */
+  private _constructEvaluationMessages(input: string, output: string): LDMessage[] {
+    // Create a copy of the judge's messages and interpolate input/output variables
+    const messages: LDMessage[] = this._aiConfig.messages!.map((msg) => ({
+      ...msg,
+      content: this._interpolateMessage(msg.content, {
+        message_history: input,
+        response_to_evaluate: output,
+      }),
+    }));
+
+    return messages;
+  }
+
+  /**
+   * Interpolates message content with variables using Mustache templating.
+   */
+  private _interpolateMessage(content: string, variables: Record<string, string>): string {
+    return Mustache.render(content, variables, undefined, { escape: (item: any) => item });
+  }
+
+  /**
+   * Parses the structured evaluation response from the AI provider.
+   */
+  private _parseEvaluationResponse(data: Record<string, unknown>): Record<string, EvalScore> {
+    const evaluations = data.evaluations as Record<string, unknown>;
+    const results: Record<string, EvalScore> = {};
+    // Validate that the data has the required evaluations structure
+    if (!data.evaluations || typeof data.evaluations !== 'object') {
+      this._logger?.warn('Invalid response: missing or invalid evaluations object');
+      return results;
+    }
+
+    // Process each expected evaluation metric key
+    this._aiConfig.evaluationMetricKeys.forEach((metricKey) => {
+      const evaluation = evaluations[metricKey];
+
+      if (!evaluation || typeof evaluation !== 'object') {
+        this._logger?.warn(
+          `Missing evaluation for metric key: ${metricKey}`,
+          this._aiConfigTracker.getTrackData(),
+        );
+        return;
+      }
+
+      const evalData = evaluation as Record<string, unknown>;
+
+      // Validate score
+      if (typeof evalData.score !== 'number' || evalData.score < 0 || evalData.score > 1) {
+        this._logger?.warn(
+          `Invalid score evaluated for ${metricKey}: ${evalData.score}. Score must be a number between 0 and 1 inclusive`,
+          this._aiConfigTracker.getTrackData(),
+        );
+        return;
+      }
+
+      // Validate reasoning
+      if (typeof evalData.reasoning !== 'string') {
+        this._logger?.warn(
+          `Invalid reasoning evaluated for ${metricKey}: ${evalData.reasoning}. Reasoning must be a string`,
+          this._aiConfigTracker.getTrackData(),
+        );
+        return;
+      }
+
+      // Create the EvalScore object
+      results[metricKey] = {
+        score: evalData.score,
+        reasoning: evalData.reasoning,
+      };
+    });
+
+    return results;
+  }
+}
diff --git a/packages/sdk/server-ai/src/api/judge/index.ts b/packages/sdk/server-ai/src/api/judge/index.ts
new file mode 100644
index 0000000000..912ec47fb0
--- /dev/null
+++ b/packages/sdk/server-ai/src/api/judge/index.ts
@@ -0,0 +1,2 @@
+export { Judge } from './Judge';
+export type { EvalScore, JudgeResponse, StructuredResponse } from './types';
diff --git a/packages/sdk/server-ai/src/api/judge/types.ts b/packages/sdk/server-ai/src/api/judge/types.ts
new file mode 100644
index 0000000000..a265506b17
--- /dev/null
+++ b/packages/sdk/server-ai/src/api/judge/types.ts
@@ -0,0 +1,39 @@
+import { LDAIMetrics } from '../metrics/LDAIMetrics';
+
+/**
+ * Structured response from AI models.
+ */
+export interface StructuredResponse {
+  /** The structured data returned by the model */
+  data: Record<string, unknown>;
+
+  /** The raw response from the model */
+  rawResponse: string;
+
+  /**
+   * Metrics information including success status and token usage.
+   */
+  metrics: LDAIMetrics;
+}
+
+/**
+ * Score and reasoning for a single evaluation metric.
+ */
+export interface EvalScore {
+  /** Score between 0.0 and 1.0 indicating the evaluation result for this metric */
+  score: number;
+  /** Reasoning behind the provided score for this metric */
+  reasoning: string;
+}
+
+/**
+ * Response from a judge evaluation containing scores and reasoning for multiple metrics.
+ */
+export interface JudgeResponse {
+  /** Dictionary where keys are metric names and values contain score and reasoning */
+  evals: Record<string, EvalScore>;
+  /** Whether the evaluation completed successfully */
+  success: boolean;
+  /** Error message if evaluation failed */
+  error?: string;
+}
diff --git a/packages/sdk/server-ai/src/api/providers/AIProvider.ts b/packages/sdk/server-ai/src/api/providers/AIProvider.ts
index 8f6475ef5e..ac039ea194 100644
--- a/packages/sdk/server-ai/src/api/providers/AIProvider.ts
+++ b/packages/sdk/server-ai/src/api/providers/AIProvider.ts
@@ -1,7 +1,8 @@
 import { LDLogger } from '@launchdarkly/js-server-sdk-common';
 
 import { ChatResponse } from '../chat/types';
-import { LDAIConfig, LDMessage } from '../config/LDAIConfig';
+import { LDAIConfig, LDMessage } from '../config/types';
+import { StructuredResponse } from '../judge/types';
 
 /**
  * Abstract base class for AI providers that implement chat model functionality.
@@ -22,10 +23,60 @@ export abstract class AIProvider {
    * This method should convert messages to provider format, invoke the model,
    * and return a ChatResponse with the result and metrics.
    *
+   * Default implementation takes no action and returns a placeholder response.
+   * Provider implementations should override this method.
+   *
    * @param messages Array of LDMessage objects representing the conversation
    * @returns Promise that resolves to a ChatResponse containing the model's response
    */
-  abstract invokeModel(messages: LDMessage[]): Promise<ChatResponse>;
+  async invokeModel(_messages: LDMessage[]): Promise<ChatResponse> {
+    this.logger?.warn('invokeModel not implemented by this provider');
+    return {
+      message: {
+        role: 'assistant',
+        content: '',
+      },
+      metrics: {
+        success: false,
+        usage: {
+          total: 0,
+          input: 0,
+          output: 0,
+        },
+      },
+    };
+  }
+
+  /**
+   * Invoke the chat model with structured output support.
+   * This method should convert messages to provider format, invoke the model with
+   * structured output configuration, and return a structured response.
+   *
+   * Default implementation takes no action and returns a placeholder response.
+   * Provider implementations should override this method.
+   *
+   * @param messages Array of LDMessage objects representing the conversation
+   * @param responseStructure Dictionary of output configurations keyed by output name
+   * @returns Promise that resolves to a structured response
+   */
+  async invokeStructuredModel(
+    _messages: LDMessage[],
+    _responseStructure: Record<string, unknown>,
+  ): Promise<StructuredResponse> {
+    this.logger?.warn('invokeStructuredModel not implemented by this provider');
+    return {
+      data: {},
+      rawResponse: '',
+      metrics: {
+        success: false,
+        usage: {
+          total: 0,
+          input: 0,
+          output: 0,
+        },
+      },
+    };
+  }
 
   /**
    * Static method that constructs an instance of the provider.
diff --git a/packages/sdk/server-ai/src/api/providers/AIProviderFactory.ts b/packages/sdk/server-ai/src/api/providers/AIProviderFactory.ts
index ab3546a35e..0d33eb69a4 100644
--- a/packages/sdk/server-ai/src/api/providers/AIProviderFactory.ts
+++ b/packages/sdk/server-ai/src/api/providers/AIProviderFactory.ts
@@ -1,6 +1,6 @@
 import { LDLogger } from '@launchdarkly/js-server-sdk-common';
 
-import { LDAIConfig } from '../config/LDAIConfig';
+import { LDAIConfigKind } from '../config/types';
 import { AIProvider } from './AIProvider';
 
 /**
@@ -32,7 +32,7 @@ export class AIProviderFactory {
    * @param defaultAiProvider Optional default AI provider to use
    */
   static async create(
-    aiConfig: LDAIConfig,
+    aiConfig: LDAIConfigKind,
     logger?: LDLogger,
     defaultAiProvider?: SupportedAIProvider,
   ): Promise<AIProvider | undefined> {
@@ -91,7 +91,7 @@ export class AIProviderFactory {
    */
   private static async _tryCreateProvider(
     providerType: SupportedAIProvider,
-    aiConfig: LDAIConfig,
+    aiConfig: LDAIConfigKind,
     logger?: LDLogger,
   ): Promise<AIProvider | undefined> {
     switch (providerType) {
@@ -127,7 +127,7 @@ export class AIProviderFactory {
   private static async _createProvider(
     packageName: string,
     providerClassName: string,
-    aiConfig: LDAIConfig,
+    aiConfig: LDAIConfigKind,
     logger?: LDLogger,
   ): Promise<AIProvider | undefined> {
     try {