From 97927536f81085b80edace1aab62606eb510d80c Mon Sep 17 00:00:00 2001 From: yehorkardash Date: Fri, 14 Nov 2025 16:04:16 +0100 Subject: [PATCH 1/4] support Unicode characters --- .../unit/checks/keywords-urls.test.ts | 54 +++++++++++++++++-- src/checks/keywords.ts | 7 ++- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/src/__tests__/unit/checks/keywords-urls.test.ts b/src/__tests__/unit/checks/keywords-urls.test.ts index 8f5cfa5..7946167 100644 --- a/src/__tests__/unit/checks/keywords-urls.test.ts +++ b/src/__tests__/unit/checks/keywords-urls.test.ts @@ -22,16 +22,64 @@ describe('keywords guardrail', () => { expect(result.info?.totalKeywords).toBe(1); }); - it('ignores text without the configured keywords', () => { - const result = keywordsCheck( + it('ignores text without the configured keywords', async () => { + const result = await keywordsCheck( {}, 'All clear content', KeywordsConfig.parse({ keywords: ['secret'] }) - ) as GuardrailResult; + ); expect(result.tripwireTriggered).toBe(false); expect(result.info?.matchedKeywords).toEqual([]); }); + + it('should return the correct result', async () => { + const result = await keywordsCheck({}, 'Hello, world!', KeywordsConfig.parse({ keywords: ['hello', 'world'] })); + expect(result.tripwireTriggered).toEqual(true); + }); + + it('should not match partial words', async () => { + const result = await keywordsCheck({}, 'Hello, world!', KeywordsConfig.parse({ keywords: ['orld'] })); + expect(result.tripwireTriggered).toEqual(false); + }); + + it('should match numbers', async () => { + const result = await keywordsCheck({}, 'Hello, world123', KeywordsConfig.parse({ keywords: ['world123'] })); + expect(result.tripwireTriggered).toEqual(true); + expect(result.info.matchedKeywords).toEqual(['world123']); + }); + + it('should not match partial numbers', async () => { + const result = await keywordsCheck({}, 'Hello, world12345', KeywordsConfig.parse({ keywords: ['world123'] })); + expect(result.tripwireTriggered).toEqual(false); + }); + + it('should match underscore', async () => { + const result = await keywordsCheck({}, 'Hello, w_o_r_l_d', KeywordsConfig.parse({ keywords: ['w_o_r_l_d'] })); + expect(result.tripwireTriggered).toEqual(true); + expect(result.info.matchedKeywords).toEqual(['w_o_r_l_d']); + }); + + it('should not match in between underscore', async () => { + const result = await keywordsCheck({}, 'Hello, test_world_test', KeywordsConfig.parse({ keywords: ['world'] })); + expect(result.tripwireTriggered).toEqual(false); + }); + + it('should work with chinese characters', async () => { + const result = await keywordsCheck({}, '你好', KeywordsConfig.parse({ keywords: ['你好'] })); + expect(result.tripwireTriggered).toEqual(true); + }); + + it('should work with chinese characters with numbers', async () => { + const result = await keywordsCheck({}, '你好123', KeywordsConfig.parse({ keywords: ['你好123'] })); + expect(result.tripwireTriggered).toEqual(true); + expect(result.info.matchedKeywords).toEqual(['你好123']); + }); + + it('should not match partial chinese characters with numbers', async () => { + const result = await keywordsCheck({}, '你好12345', KeywordsConfig.parse({ keywords: ['你好123'] })); + expect(result.tripwireTriggered).toEqual(false); + }); }); describe('urls guardrail', () => { diff --git a/src/checks/keywords.ts b/src/checks/keywords.ts index fe23ea6..3f843f7 100644 --- a/src/checks/keywords.ts +++ b/src/checks/keywords.ts @@ -57,8 +57,11 @@ export const keywordsCheck: CheckFn = ( const escapedKeywords = sanitizedKeywords.map((k: string) => k.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') ); - const patternText = `\\b(?:${escapedKeywords.join('|')})\\b`; - const pattern = new RegExp(patternText, 'gi'); // case-insensitive, global + // \p{L}|\p{N}|_ - any unicode letter, number, or underscore. Alternative to \b + // (? Date: Fri, 14 Nov 2025 16:05:47 +0100 Subject: [PATCH 2/4] add line break --- src/checks/keywords.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/checks/keywords.ts b/src/checks/keywords.ts index 3f843f7..fd6a282 100644 --- a/src/checks/keywords.ts +++ b/src/checks/keywords.ts @@ -57,6 +57,7 @@ export const keywordsCheck: CheckFn = ( const escapedKeywords = sanitizedKeywords.map((k: string) => k.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') ); + // \p{L}|\p{N}|_ - any unicode letter, number, or underscore. Alternative to \b // (? Date: Fri, 14 Nov 2025 16:20:26 +0100 Subject: [PATCH 3/4] add non-capturing group --- src/__tests__/unit/checks/keywords-urls.test.ts | 6 ++++++ src/checks/keywords.ts | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/__tests__/unit/checks/keywords-urls.test.ts b/src/__tests__/unit/checks/keywords-urls.test.ts index 7946167..60abc52 100644 --- a/src/__tests__/unit/checks/keywords-urls.test.ts +++ b/src/__tests__/unit/checks/keywords-urls.test.ts @@ -80,6 +80,12 @@ describe('keywords guardrail', () => { const result = await keywordsCheck({}, '你好12345', KeywordsConfig.parse({ keywords: ['你好123'] })); expect(result.tripwireTriggered).toEqual(false); }); + + it('should apply word boundaries to all keywords in a multi-keyword pattern', async () => { + const result = await keywordsCheck({}, 'testing hello world', KeywordsConfig.parse({ keywords: ['test', 'hello', 'world'] })); + expect(result.tripwireTriggered).toEqual(true); + expect(result.info.matchedKeywords).toEqual(['hello', 'world']); + }); }); describe('urls guardrail', () => { diff --git a/src/checks/keywords.ts b/src/checks/keywords.ts index fd6a282..35f6c32 100644 --- a/src/checks/keywords.ts +++ b/src/checks/keywords.ts @@ -61,7 +61,7 @@ export const keywordsCheck: CheckFn = ( // \p{L}|\p{N}|_ - any unicode letter, number, or underscore. Alternative to \b // (? Date: Mon, 17 Nov 2025 09:22:18 +0100 Subject: [PATCH 4/4] update comments, remove redundant test --- src/__tests__/unit/checks/keywords-urls.test.ts | 5 ----- src/checks/keywords.ts | 8 ++++---- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/__tests__/unit/checks/keywords-urls.test.ts b/src/__tests__/unit/checks/keywords-urls.test.ts index 60abc52..03b18cb 100644 --- a/src/__tests__/unit/checks/keywords-urls.test.ts +++ b/src/__tests__/unit/checks/keywords-urls.test.ts @@ -33,11 +33,6 @@ describe('keywords guardrail', () => { expect(result.info?.matchedKeywords).toEqual([]); }); - it('should return the correct result', async () => { - const result = await keywordsCheck({}, 'Hello, world!', KeywordsConfig.parse({ keywords: ['hello', 'world'] })); - expect(result.tripwireTriggered).toEqual(true); - }); - it('should not match partial words', async () => { const result = await keywordsCheck({}, 'Hello, world!', KeywordsConfig.parse({ keywords: ['orld'] })); expect(result.tripwireTriggered).toEqual(false); diff --git a/src/checks/keywords.ts b/src/checks/keywords.ts index 35f6c32..01cd525 100644 --- a/src/checks/keywords.ts +++ b/src/checks/keywords.ts @@ -59,10 +59,10 @@ export const keywordsCheck: CheckFn = ( ); // \p{L}|\p{N}|_ - any unicode letter, number, or underscore. Alternative to \b - // (?