@@ -19,6 +19,8 @@ import {
1919 NormalizedConversationEntry ,
2020} from './utils/conversation' ;
2121
22+ const ZERO_WIDTH_CHARACTERS = / (?: \u200B | \u200C | \u200D | \u2060 | \uFEFF ) / g;
23+
2224type UnknownFunction = ( ...args : unknown [ ] ) => unknown ;
2325
2426function toRecord ( value : unknown ) : Record < string , unknown > | null {
@@ -191,6 +193,7 @@ export abstract class GuardrailsBaseClient {
191193 }
192194
193195 const piiMappings : Record < string , string > = { } ;
196+ let maskedTextOverride : string | undefined ;
194197 for ( const result of preflightResults ) {
195198 if ( result . info && 'detected_entities' in result . info ) {
196199 const detected = result . info . detected_entities as Record < string , string [ ] > ;
@@ -199,28 +202,51 @@ export abstract class GuardrailsBaseClient {
199202 piiMappings [ entity ] = `<${ entityType } >` ;
200203 }
201204 }
205+ if ( typeof result . info . checked_text === 'string' && ! maskedTextOverride ) {
206+ maskedTextOverride = result . info . checked_text ;
207+ }
202208 }
203209 }
204210
205- if ( Object . keys ( piiMappings ) . length === 0 ) {
211+ if ( ! maskedTextOverride && Object . keys ( piiMappings ) . length === 0 ) {
206212 return data ;
207213 }
208214
215+ const normalizeForMasking = ( text : string ) : string =>
216+ text . normalize ( 'NFKC' ) . replace ( ZERO_WIDTH_CHARACTERS , '' ) ;
217+
218+ const originalStringData = typeof data === 'string' ? data : undefined ;
219+
209220 const maskText = ( text : string ) : string => {
210221 if ( typeof text !== 'string' ) {
211222 return text as unknown as string ;
212223 }
213224
214- let maskedText = text ;
225+ const hasMappings = Object . keys ( piiMappings ) . length > 0 ;
226+ const normalizedOriginal = normalizeForMasking ( text ) ;
227+ let maskedText = normalizedOriginal ;
215228 const sortedPii = Object . entries ( piiMappings ) . sort ( ( a , b ) => b [ 0 ] . length - a [ 0 ] . length ) ;
216229
217- for ( const [ originalPii , maskedToken ] of sortedPii ) {
218- if ( maskedText . includes ( originalPii ) ) {
219- maskedText = maskedText . split ( originalPii ) . join ( maskedToken ) ;
230+ if ( hasMappings ) {
231+ for ( const [ originalPii , maskedToken ] of sortedPii ) {
232+ const normalizedKey = normalizeForMasking ( originalPii ) ;
233+ if ( normalizedKey && maskedText . includes ( normalizedKey ) ) {
234+ maskedText = maskedText . split ( normalizedKey ) . join ( maskedToken ) ;
235+ }
220236 }
221237 }
222238
223- return maskedText ;
239+ const replacementsApplied = hasMappings && maskedText !== normalizedOriginal ;
240+
241+ if ( replacementsApplied ) {
242+ return maskedText ;
243+ }
244+
245+ if ( maskedTextOverride && originalStringData !== undefined && text === originalStringData ) {
246+ return maskedTextOverride ;
247+ }
248+
249+ return text ;
224250 } ;
225251
226252 if ( typeof data === 'string' ) {
0 commit comments