Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions src/__tests__/unit/checks/moderation-secret-keys.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,85 @@ describe('moderation guardrail', () => {
expect(result.tripwireTriggered).toBe(false);
expect(result.info?.error).toBe('Moderation API call failed');
});

it('uses context client when available', async () => {
// Track whether context client was used
let contextClientUsed = false;
const contextCreateMock = vi.fn().mockImplementation(async () => {
contextClientUsed = true;
Comment on lines +74 to +75
Copy link

Copilot AI Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The contextClientUsed flag is unnecessary. You can check if the mock was called using expect(contextCreateMock).toHaveBeenCalled() instead of tracking this manually.

Copilot uses AI. Check for mistakes.
return {
results: [
{
categories: {
[Category.HATE]: false,
[Category.VIOLENCE]: false,
},
},
],
};
});

// Create a context with a guardrailLlm client
// We need to import OpenAI to create a proper instance
const OpenAI = (await import('openai')).default;
const contextClient = new OpenAI({ apiKey: 'test-context-key' });
contextClient.moderations = {
create: contextCreateMock,
} as unknown as typeof contextClient.moderations;

const ctx = { guardrailLlm: contextClient };
const cfg = ModerationConfig.parse({ categories: [Category.HATE] });
const result = await moderationCheck(ctx, 'test text', cfg);

// Verify the context client was used
expect(contextClientUsed).toBe(true);
expect(contextCreateMock).toHaveBeenCalledWith({
model: 'omni-moderation-latest',
input: 'test text',
});
expect(result.tripwireTriggered).toBe(false);
});

it('falls back to default client for third-party providers', async () => {
// Track whether fallback client was used
let fallbackUsed = false;

// The default mock from vi.mock will be used for the fallback
createMock.mockImplementation(async () => {
fallbackUsed = true;
Comment on lines +115 to +116
Copy link

Copilot AI Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The fallbackUsed flag is unnecessary. You can verify fallback behavior by checking that createMock was called, which already indicates the fallback client was used.

Copilot uses AI. Check for mistakes.
return {
results: [
{
categories: {
[Category.HATE]: false,
},
},
],
};
});

// Create a context client that simulates a third-party provider
// When moderation is called, it should raise a 404 error
const contextCreateMock = vi.fn().mockRejectedValue({
status: 404,
message: '404 page not found',
});

const OpenAI = (await import('openai')).default;
const thirdPartyClient = new OpenAI({ apiKey: 'third-party-key', baseURL: 'https://localhost:8080/v1' });
thirdPartyClient.moderations = {
create: contextCreateMock,
} as unknown as typeof thirdPartyClient.moderations;

const ctx = { guardrailLlm: thirdPartyClient };
const cfg = ModerationConfig.parse({ categories: [Category.HATE] });
const result = await moderationCheck(ctx, 'test text', cfg);

// Verify the fallback client was used (not the third-party one)
expect(contextCreateMock).toHaveBeenCalled();
expect(fallbackUsed).toBe(true);
expect(result.tripwireTriggered).toBe(false);
});
});

describe('secret key guardrail', () => {
Expand Down
80 changes: 50 additions & 30 deletions src/checks/moderation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,30 @@ export const ModerationContext = z.object({

export type ModerationContext = z.infer<typeof ModerationContext>;

/**
* Check if an error is a 404 Not Found error from the OpenAI API.
*
* @param error The error to check
* @returns True if the error is a 404 error
*/
function isNotFoundError(error: unknown): boolean {
return !!(error && typeof error === 'object' && 'status' in error && error.status === 404);
}

/**
* Call the OpenAI moderation API.
*
* @param client The OpenAI client to use
* @param data The text to analyze
* @returns The moderation API response
*/
function callModerationAPI(client: OpenAI, data: string) {
return client.moderations.create({
model: 'omni-moderation-latest',
input: data,
});
}

/**
* Guardrail check_fn to flag disallowed content categories using OpenAI moderation API.
*
Expand All @@ -102,39 +126,35 @@ export const moderationCheck: CheckFn<ModerationContext, string, ModerationConfi
const configObj = actualConfig as Record<string, unknown>;
const categories = (configObj.categories as string[]) || Object.values(Category);

// Reuse provided client only if it targets the official OpenAI API.
const reuseClientIfOpenAI = (context: unknown): OpenAI | null => {
try {
const contextObj = context as Record<string, unknown>;
const candidate = contextObj?.guardrailLlm;
if (!candidate || typeof candidate !== 'object') return null;
if (!(candidate instanceof OpenAI)) return null;

const candidateObj = candidate as unknown as Record<string, unknown>;
const baseURL: string | undefined =
(candidateObj.baseURL as string) ??
((candidateObj._client as Record<string, unknown>)?.baseURL as string) ??
(candidateObj._baseURL as string);

if (
baseURL === undefined ||
(typeof baseURL === 'string' && baseURL.includes('api.openai.com'))
) {
return candidate as OpenAI;
}
return null;
} catch {
return null;
// Get client from context if available
let client: OpenAI | null = null;
if (ctx) {
const contextObj = ctx as Record<string, unknown>;
const candidate = contextObj.guardrailLlm;
if (candidate && candidate instanceof OpenAI) {
client = candidate;
}
};

const client = reuseClientIfOpenAI(ctx) ?? new OpenAI();
}
Comment on lines 142 to 150
Copy link

Copilot AI Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The removal of base URL validation means the code will now attempt to use any OpenAI client from context, regardless of whether it points to the official API. While the 404 fallback handles providers without moderation support, this could lead to unnecessary API calls and latency when using third-party providers. Consider documenting this behavioral change or adding a configuration option to skip the initial attempt for known incompatible providers.

Copilot uses AI. Check for mistakes.

try {
const resp = await client.moderations.create({
model: 'omni-moderation-latest',
input: data,
});
// Try the context client first, fall back if moderation endpoint doesn't exist
let resp: Awaited<ReturnType<typeof callModerationAPI>>;
if (client !== null) {
try {
resp = await callModerationAPI(client, data);
} catch (error) {
// Moderation endpoint doesn't exist on this provider (e.g., third-party)
// Fall back to the OpenAI client
if (isNotFoundError(error)) {
resp = await callModerationAPI(new OpenAI(), data);
} else {
throw error;
}
}
} else {
// No context client, use fallback
resp = await callModerationAPI(new OpenAI(), data);
}

const results = resp.results || [];
if (!results.length) {
Expand Down