Skip to content

Commit b26aad8

Browse files
authored
Adding Korean RRN PII detection (#25)
* Adding Korean RRN PII detection
1 parent c8cae92 commit b26aad8

File tree

3 files changed

+72
-1
lines changed

3 files changed

+72
-1
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,5 @@ __pycache__/
102102
*.pyc
103103
.pytest_cache/
104104

105-
# internal
105+
# internal examples
106106
internal_examples/

src/__tests__/unit/checks/pii.test.ts

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,68 @@ describe('pii guardrail', () => {
4242

4343
await expect(pii({}, '', config)).rejects.toThrow('Text cannot be empty or null');
4444
});
45+
46+
it('detects valid Korean Resident Registration Number (KR_RRN)', async () => {
47+
const config = PIIConfig.parse({
48+
entities: [PIIEntity.KR_RRN],
49+
block: false,
50+
});
51+
// Valid format: YYMMDD-GNNNNNN (900101 = Jan 1, 1990, gender digit 1)
52+
const text = 'Korean RRN: 900101-1234567';
53+
54+
const result = await pii({}, text, config);
55+
56+
expect(result.tripwireTriggered).toBe(false);
57+
expect((result.info?.detected_entities as Record<string, string[]>)?.KR_RRN).toEqual(['900101-1234567']);
58+
expect(result.info?.checked_text).toBe('Korean RRN: <KR_RRN>');
59+
});
60+
61+
it('detects multiple valid KR_RRN formats', async () => {
62+
const config = PIIConfig.parse({
63+
entities: [PIIEntity.KR_RRN],
64+
block: false,
65+
});
66+
// Testing different valid date ranges and gender digits (1-4)
67+
const text = 'RRNs: 850315-2345678, 001231-3456789, 750628-4123456';
68+
69+
const result = await pii({}, text, config);
70+
71+
expect(result.tripwireTriggered).toBe(false);
72+
expect((result.info?.detected_entities as Record<string, string[]>)?.KR_RRN).toHaveLength(3);
73+
expect((result.info?.detected_entities as Record<string, string[]>)?.KR_RRN).toContain('850315-2345678');
74+
expect((result.info?.detected_entities as Record<string, string[]>)?.KR_RRN).toContain('001231-3456789');
75+
expect((result.info?.detected_entities as Record<string, string[]>)?.KR_RRN).toContain('750628-4123456');
76+
});
77+
78+
it('does not detect invalid KR_RRN patterns (false positives)', async () => {
79+
const config = PIIConfig.parse({
80+
entities: [PIIEntity.KR_RRN],
81+
block: false,
82+
});
83+
// Invalid patterns that should NOT be detected:
84+
// - Invalid month (13)
85+
// - Invalid day (00, 32)
86+
// - Invalid gender digit (0, 5, 9)
87+
// - Random tracking numbers
88+
const text = 'Invalid: 901301-1234567, 900100-1234567, 900132-1234567, 900101-0234567, 900101-5234567, 123456-7890123';
89+
90+
const result = await pii({}, text, config);
91+
92+
expect(result.tripwireTriggered).toBe(false);
93+
expect(result.info?.detected_entities).toEqual({});
94+
expect(result.info?.checked_text).toBe(text); // No masking should occur
95+
});
96+
97+
it('triggers tripwire for KR_RRN when block=true', async () => {
98+
const config = PIIConfig.parse({
99+
entities: [PIIEntity.KR_RRN],
100+
block: true,
101+
});
102+
const text = 'Korean RRN: 900101-1234567';
103+
104+
const result = await pii({}, text, config);
105+
106+
expect(result.tripwireTriggered).toBe(true);
107+
expect((result.info?.detected_entities as Record<string, string[]>)?.KR_RRN).toEqual(['900101-1234567']);
108+
});
45109
});

src/checks/pii.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ export enum PIIEntity {
125125

126126
// Finland
127127
FI_PERSONAL_IDENTITY_CODE = 'FI_PERSONAL_IDENTITY_CODE',
128+
129+
// Korea
130+
KR_RRN = 'KR_RRN',
128131
}
129132

130133
/**
@@ -236,6 +239,10 @@ const DEFAULT_PII_PATTERNS: Record<PIIEntity, RegExp> = {
236239

237240
// Finland
238241
[PIIEntity.FI_PERSONAL_IDENTITY_CODE]: /\b\d{6}[+-A]\d{3}[A-Z0-9]\b/g,
242+
243+
// Korea
244+
// Format: YYMMDD-GNNNNNN where YY=year, MM=month(01-12), DD=day(01-31), G=gender/century(1-4)
245+
[PIIEntity.KR_RRN]: /\b\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])-[1-4]\d{6}\b/g,
239246
};
240247

241248
/**

0 commit comments

Comments
 (0)