Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 52 additions & 3 deletions src/__tests__/unit/checks/keywords-urls.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,65 @@ describe('keywords guardrail', () => {
expect(result.info?.totalKeywords).toBe(1);
});

it('ignores text without the configured keywords', () => {
const result = keywordsCheck(
it('ignores text without the configured keywords', async () => {
const result = await keywordsCheck(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

keywordsCheck is not async can you remove async and await here and in all the tests added below.

{},
'All clear content',
KeywordsConfig.parse({ keywords: ['secret'] })
) as GuardrailResult;
);

expect(result.tripwireTriggered).toBe(false);
expect(result.info?.matchedKeywords).toEqual([]);
});

it('should not match partial words', async () => {
const result = await keywordsCheck({}, 'Hello, world!', KeywordsConfig.parse({ keywords: ['orld'] }));
expect(result.tripwireTriggered).toEqual(false);
});

it('should match numbers', async () => {
const result = await keywordsCheck({}, 'Hello, world123', KeywordsConfig.parse({ keywords: ['world123'] }));
expect(result.tripwireTriggered).toEqual(true);
expect(result.info.matchedKeywords).toEqual(['world123']);
});

it('should not match partial numbers', async () => {
const result = await keywordsCheck({}, 'Hello, world12345', KeywordsConfig.parse({ keywords: ['world123'] }));
expect(result.tripwireTriggered).toEqual(false);
});

it('should match underscore', async () => {
const result = await keywordsCheck({}, 'Hello, w_o_r_l_d', KeywordsConfig.parse({ keywords: ['w_o_r_l_d'] }));
expect(result.tripwireTriggered).toEqual(true);
expect(result.info.matchedKeywords).toEqual(['w_o_r_l_d']);
});

it('should not match in between underscore', async () => {
const result = await keywordsCheck({}, 'Hello, test_world_test', KeywordsConfig.parse({ keywords: ['world'] }));
expect(result.tripwireTriggered).toEqual(false);
});

it('should work with chinese characters', async () => {
const result = await keywordsCheck({}, '你好', KeywordsConfig.parse({ keywords: ['你好'] }));
expect(result.tripwireTriggered).toEqual(true);
});

it('should work with chinese characters with numbers', async () => {
const result = await keywordsCheck({}, '你好123', KeywordsConfig.parse({ keywords: ['你好123'] }));
expect(result.tripwireTriggered).toEqual(true);
expect(result.info.matchedKeywords).toEqual(['你好123']);
});

it('should not match partial chinese characters with numbers', async () => {
const result = await keywordsCheck({}, '你好12345', KeywordsConfig.parse({ keywords: ['你好123'] }));
expect(result.tripwireTriggered).toEqual(false);
});

it('should apply word boundaries to all keywords in a multi-keyword pattern', async () => {
const result = await keywordsCheck({}, 'testing hello world', KeywordsConfig.parse({ keywords: ['test', 'hello', 'world'] }));
expect(result.tripwireTriggered).toEqual(true);
expect(result.info.matchedKeywords).toEqual(['hello', 'world']);
});
});

describe('urls guardrail', () => {
Expand Down
8 changes: 6 additions & 2 deletions src/checks/keywords.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,12 @@ export const keywordsCheck: CheckFn<KeywordsContext, string, KeywordsConfig> = (
const escapedKeywords = sanitizedKeywords.map((k: string) =>
k.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
);
const patternText = `\\b(?:${escapedKeywords.join('|')})\\b`;
const pattern = new RegExp(patternText, 'gi'); // case-insensitive, global

// \p{L}|\p{N}|_ - any unicode letter, number, or underscore. Alternative to \b
// (?<!\\p{L}|\\p{N}|_) - not preceded by a letter
// (?!\\p{L}|\\p{N}|_) - not followed by a letter
const patternText = `(?<!\\p{L}|\\p{N}|_)(?:${escapedKeywords.join('|')})(?!\\p{L}|\\p{N}|_)`;
const pattern = new RegExp(patternText, 'giu'); // case-insensitive, global, unicode

const matches: string[] = [];
let match;
Expand Down