@@ -5,13 +5,10 @@ import { profaneWords } from "./data";
55
66export class Profanity {
77 options : ProfanityOptions ;
8-
98 whitelist : List ;
109
1110 private blacklist : List ;
12-
1311 private removed : List ;
14-
1512 private regexes : Map < string , RegExp > ;
1613
1714 constructor ( options ?: ProfanityOptions | Partial < ProfanityOptions > ) {
@@ -23,6 +20,13 @@ export class Profanity {
2320 this . regexes = new Map < string , RegExp > ( ) ;
2421 }
2522
23+ /**
24+ * Checks if the given text contains any profanity.
25+ * @param text - The text to check for profanity.
26+ * @param languages - Optional array of language codes to use for profanity detection.
27+ * If not provided, uses the languages specified in the options.
28+ * @returns True if profanity is found, false otherwise.
29+ */
2630 exists ( text : string , languages ?: string [ ] ) : boolean {
2731 if ( typeof text !== "string" ) {
2832 return false ;
@@ -34,49 +38,26 @@ export class Profanity {
3438 const lowercaseText = text . toLowerCase ( ) ;
3539
3640 let match : RegExpExecArray | null ;
37- do {
38- match = regex . exec ( lowercaseText ) ;
39- if ( match !== null ) {
40- const matchStart = match . index ;
41- const matchEnd = matchStart + match [ 0 ] . length ;
42-
43- // Check if the matched word is part of a whitelisted word
44- let isWhitelisted = false ;
45- this . whitelist . words . forEach ( ( whitelistedWord ) => {
46- const whitelistedIndex = lowercaseText . indexOf ( whitelistedWord , Math . max ( 0 , matchStart - whitelistedWord . length + 1 ) ) ;
47- if ( whitelistedIndex !== - 1 ) {
48- const whitelistedEnd = whitelistedIndex + whitelistedWord . length ;
49-
50- if ( this . options . wholeWord ) {
51- // For whole word matching, ensure the whitelisted word exactly matches the profane word
52- // and is not part of a hyphenated or underscore-separated word
53- if (
54- matchStart === whitelistedIndex &&
55- matchEnd === whitelistedEnd &&
56- ( matchStart === 0 || ! / [ \w - _ ] / . test ( lowercaseText [ matchStart - 1 ] ) ) &&
57- // eslint-disable-next-line security/detect-object-injection
58- ( matchEnd === lowercaseText . length || ! / [ \w - _ ] / . test ( lowercaseText [ matchEnd ] ) )
59- ) {
60- isWhitelisted = true ;
61- }
62- } else {
63- // For partial matching, check if the profane word is contained within the whitelisted word
64- if ( ( matchStart >= whitelistedIndex && matchStart < whitelistedEnd ) || ( matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd ) ) {
65- isWhitelisted = true ;
66- }
67- }
68- }
69- } ) ;
41+ while ( ( match = regex . exec ( lowercaseText ) ) !== null ) {
42+ const matchStart = match . index ;
43+ const matchEnd = matchStart + match [ 0 ] . length ;
7044
71- if ( ! isWhitelisted ) {
72- return true ;
73- }
45+ if ( ! this . isWhitelisted ( matchStart , matchEnd , lowercaseText ) ) {
46+ return true ;
7447 }
75- } while ( match !== null ) ;
48+ }
7649
7750 return false ;
7851 }
7952
53+ /**
54+ * Censors profanity in the given text.
55+ * @param text - The text to censor.
56+ * @param censorType - The type of censoring to apply. Defaults to CensorType.Word.
57+ * @param languages - Optional array of language codes to use for profanity detection.
58+ * If not provided, uses the languages specified in the options.
59+ * @returns The censored text.
60+ */
8061 censor ( text : string , censorType : CensorType = CensorType . Word , languages ?: string [ ] ) : string {
8162 if ( typeof text !== "string" ) {
8263 return text ;
@@ -87,45 +68,37 @@ export class Profanity {
8768
8869 const lowercaseText = text . toLowerCase ( ) ;
8970
90- switch ( censorType ) {
91- case CensorType . Word :
92- return text . replace ( regex , ( match ) => {
93- const underscore = match . includes ( "_" ) ? "_" : "" ;
94- return this . options . grawlix + underscore ;
95- } ) ;
96- case CensorType . FirstChar : {
97- return this . replaceProfanity ( text , lowercaseText , ( word ) => this . options . grawlixChar + word . slice ( 1 ) , regex ) ;
98- }
99- case CensorType . FirstVowel :
100- case CensorType . AllVowels : {
101- const vowelRegex = new RegExp ( "[aeiou]" , censorType === CensorType . FirstVowel ? "i" : "ig" ) ;
102- return this . replaceProfanity ( text , lowercaseText , ( word ) => word . replace ( vowelRegex , this . options . grawlixChar ) , regex ) ;
103- }
104- default :
105- throw new Error ( `Invalid replacement type: "${ censorType } "` ) ;
106- }
107- }
108-
109- private replaceProfanity ( text : string , lowercaseText : string , replacer : ( word : string ) => string , regex : RegExp ) : string {
110- let result = text ;
111- let offset = 0 ;
112-
113- let match : RegExpExecArray | null ;
114- do {
115- match = regex . exec ( lowercaseText ) ;
116- if ( match !== null ) {
117- const matchStart = match . index ;
118- const matchEnd = matchStart + match [ 0 ] . length ;
119- const originalWord = text . slice ( matchStart + offset , matchEnd + offset ) ;
120- const censoredWord = replacer ( originalWord ) ;
121- result = result . slice ( 0 , matchStart + offset ) + censoredWord + result . slice ( matchEnd + offset ) ;
122- offset += censoredWord . length - originalWord . length ;
123- }
124- } while ( match !== null ) ;
125-
126- return result ;
71+ return this . replaceProfanity (
72+ text ,
73+ lowercaseText ,
74+ ( word , start , end ) => {
75+ if ( this . isWhitelisted ( start , end , lowercaseText ) ) {
76+ return word ;
77+ }
78+ switch ( censorType ) {
79+ case CensorType . Word : {
80+ const underscore = word . includes ( "_" ) ? "_" : "" ;
81+ return this . options . grawlix + underscore ;
82+ }
83+ case CensorType . FirstChar :
84+ return this . options . grawlixChar + word . slice ( 1 ) ;
85+ case CensorType . FirstVowel :
86+ case CensorType . AllVowels : {
87+ const vowelRegex = new RegExp ( "[aeiou]" , censorType === CensorType . FirstVowel ? "i" : "ig" ) ;
88+ return word . replace ( vowelRegex , this . options . grawlixChar ) ;
89+ }
90+ default :
91+ throw new Error ( `Invalid replacement type: "${ censorType } "` ) ;
92+ }
93+ } ,
94+ regex ,
95+ ) ;
12796 }
12897
98+ /**
99+ * Adds words to the profanity blacklist.
100+ * @param words - An array of words to add to the blacklist.
101+ */
129102 addWords ( words : string [ ] ) : void {
130103 const removedWords : string [ ] = [ ] ;
131104 const blacklistWords : string [ ] = [ ] ;
@@ -147,6 +120,10 @@ export class Profanity {
147120 }
148121 }
149122
123+ /**
124+ * Removes words from the profanity blacklist.
125+ * @param words - An array of words to remove from the blacklist.
126+ */
150127 removeWords ( words : string [ ] ) : void {
151128 const blacklistedWords : string [ ] = [ ] ;
152129 const removeWords : string [ ] = [ ] ;
@@ -168,6 +145,72 @@ export class Profanity {
168145 }
169146 }
170147
148+ /**
149+ * Checks if a given match is whitelisted.
150+ * @param matchStart - The starting index of the match in the text.
151+ * @param matchEnd - The ending index of the match in the text.
152+ * @param text - The lowercase text being checked.
153+ * @returns True if the match is whitelisted, false otherwise.
154+ */
155+ private isWhitelisted ( matchStart : number , matchEnd : number , text : string ) : boolean {
156+ for ( const whitelistedWord of this . whitelist . words ) {
157+ const whitelistedIndex = text . indexOf ( whitelistedWord , Math . max ( 0 , matchStart - whitelistedWord . length + 1 ) ) ;
158+ if ( whitelistedIndex !== - 1 ) {
159+ const whitelistedEnd = whitelistedIndex + whitelistedWord . length ;
160+
161+ if ( this . options . wholeWord ) {
162+ if (
163+ matchStart === whitelistedIndex &&
164+ matchEnd === whitelistedEnd &&
165+ ( matchStart === 0 || ! / [ \w - _ ] / . test ( text [ matchStart - 1 ] ) ) &&
166+ ( matchEnd === text . length || ! / [ \w - _ ] / . test ( text [ matchEnd ] ) )
167+ ) {
168+ return true ;
169+ }
170+ } else {
171+ if (
172+ ( matchStart >= whitelistedIndex && matchStart < whitelistedEnd ) ||
173+ ( matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd ) ||
174+ ( whitelistedIndex >= matchStart && whitelistedEnd <= matchEnd )
175+ ) {
176+ return true ;
177+ }
178+ }
179+ }
180+ }
181+ return false ;
182+ }
183+
184+ /**
185+ * Replaces profanity in the text using the provided replacer function.
186+ * @param text - The original text.
187+ * @param lowercaseText - The lowercase version of the text.
188+ * @param replacer - A function that determines how to replace profane words.
189+ * @param regex - The regular expression used to find profane words.
190+ * @returns The text with profanity replaced.
191+ */
192+ private replaceProfanity (
193+ text : string ,
194+ lowercaseText : string ,
195+ replacer : ( word : string , start : number , end : number ) => string ,
196+ regex : RegExp ,
197+ ) : string {
198+ let result = text ;
199+ let offset = 0 ;
200+
201+ let match : RegExpExecArray | null ;
202+ while ( ( match = regex . exec ( lowercaseText ) ) !== null ) {
203+ const matchStart = match . index ;
204+ const matchEnd = matchStart + match [ 0 ] . length ;
205+ const originalWord = text . slice ( matchStart + offset , matchEnd + offset ) ;
206+ const censoredWord = replacer ( originalWord , matchStart , matchEnd ) ;
207+ result = result . slice ( 0 , matchStart + offset ) + censoredWord + result . slice ( matchEnd + offset ) ;
208+ offset += censoredWord . length - originalWord . length ;
209+ }
210+
211+ return result ;
212+ }
213+
171214 /**
172215 * Determines the list of languages to use, either from the provided list or falling back to default languages.
173216 * @param languages - An optional list of languages to use.
0 commit comments