Skip to content

Commit 3039747

Browse files
authored
Merge pull request #122 from 2Toad/jp-issue-120
Fixes #120: Words added to whitelist are still being censored
2 parents 097a908 + c4571f2 commit 3039747

File tree

3 files changed

+152
-79
lines changed

3 files changed

+152
-79
lines changed

src/benchmark/benchmark.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ suite
110110
.on("cycle", (event: Event) => {
111111
console.log(String(event.target));
112112
})
113-
.on("complete", () => {
114-
console.log(`Fastest: ${suite.filter("fastest").map("name")}`);
113+
.on("complete", function () {
114+
console.log(`Fastest: ${this.filter("fastest").map("name")[0]}`);
115115
})
116116
.run({ async: true });

src/profanity.ts

Lines changed: 120 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,10 @@ import { profaneWords } from "./data";
55

66
export class Profanity {
77
options: ProfanityOptions;
8-
98
whitelist: List;
109

1110
private blacklist: List;
12-
1311
private removed: List;
14-
1512
private regexes: Map<string, RegExp>;
1613

1714
constructor(options?: ProfanityOptions | Partial<ProfanityOptions>) {
@@ -23,6 +20,13 @@ export class Profanity {
2320
this.regexes = new Map<string, RegExp>();
2421
}
2522

23+
/**
24+
* Checks if the given text contains any profanity.
25+
* @param text - The text to check for profanity.
26+
* @param languages - Optional array of language codes to use for profanity detection.
27+
* If not provided, uses the languages specified in the options.
28+
* @returns True if profanity is found, false otherwise.
29+
*/
2630
exists(text: string, languages?: string[]): boolean {
2731
if (typeof text !== "string") {
2832
return false;
@@ -34,49 +38,26 @@ export class Profanity {
3438
const lowercaseText = text.toLowerCase();
3539

3640
let match: RegExpExecArray | null;
37-
do {
38-
match = regex.exec(lowercaseText);
39-
if (match !== null) {
40-
const matchStart = match.index;
41-
const matchEnd = matchStart + match[0].length;
42-
43-
// Check if the matched word is part of a whitelisted word
44-
let isWhitelisted = false;
45-
this.whitelist.words.forEach((whitelistedWord) => {
46-
const whitelistedIndex = lowercaseText.indexOf(whitelistedWord, Math.max(0, matchStart - whitelistedWord.length + 1));
47-
if (whitelistedIndex !== -1) {
48-
const whitelistedEnd = whitelistedIndex + whitelistedWord.length;
49-
50-
if (this.options.wholeWord) {
51-
// For whole word matching, ensure the whitelisted word exactly matches the profane word
52-
// and is not part of a hyphenated or underscore-separated word
53-
if (
54-
matchStart === whitelistedIndex &&
55-
matchEnd === whitelistedEnd &&
56-
(matchStart === 0 || !/[\w-_]/.test(lowercaseText[matchStart - 1])) &&
57-
// eslint-disable-next-line security/detect-object-injection
58-
(matchEnd === lowercaseText.length || !/[\w-_]/.test(lowercaseText[matchEnd]))
59-
) {
60-
isWhitelisted = true;
61-
}
62-
} else {
63-
// For partial matching, check if the profane word is contained within the whitelisted word
64-
if ((matchStart >= whitelistedIndex && matchStart < whitelistedEnd) || (matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd)) {
65-
isWhitelisted = true;
66-
}
67-
}
68-
}
69-
});
41+
while ((match = regex.exec(lowercaseText)) !== null) {
42+
const matchStart = match.index;
43+
const matchEnd = matchStart + match[0].length;
7044

71-
if (!isWhitelisted) {
72-
return true;
73-
}
45+
if (!this.isWhitelisted(matchStart, matchEnd, lowercaseText)) {
46+
return true;
7447
}
75-
} while (match !== null);
48+
}
7649

7750
return false;
7851
}
7952

53+
/**
54+
* Censors profanity in the given text.
55+
* @param text - The text to censor.
56+
* @param censorType - The type of censoring to apply. Defaults to CensorType.Word.
57+
* @param languages - Optional array of language codes to use for profanity detection.
58+
* If not provided, uses the languages specified in the options.
59+
* @returns The censored text.
60+
*/
8061
censor(text: string, censorType: CensorType = CensorType.Word, languages?: string[]): string {
8162
if (typeof text !== "string") {
8263
return text;
@@ -87,45 +68,37 @@ export class Profanity {
8768

8869
const lowercaseText = text.toLowerCase();
8970

90-
switch (censorType) {
91-
case CensorType.Word:
92-
return text.replace(regex, (match) => {
93-
const underscore = match.includes("_") ? "_" : "";
94-
return this.options.grawlix + underscore;
95-
});
96-
case CensorType.FirstChar: {
97-
return this.replaceProfanity(text, lowercaseText, (word) => this.options.grawlixChar + word.slice(1), regex);
98-
}
99-
case CensorType.FirstVowel:
100-
case CensorType.AllVowels: {
101-
const vowelRegex = new RegExp("[aeiou]", censorType === CensorType.FirstVowel ? "i" : "ig");
102-
return this.replaceProfanity(text, lowercaseText, (word) => word.replace(vowelRegex, this.options.grawlixChar), regex);
103-
}
104-
default:
105-
throw new Error(`Invalid replacement type: "${censorType}"`);
106-
}
107-
}
108-
109-
private replaceProfanity(text: string, lowercaseText: string, replacer: (word: string) => string, regex: RegExp): string {
110-
let result = text;
111-
let offset = 0;
112-
113-
let match: RegExpExecArray | null;
114-
do {
115-
match = regex.exec(lowercaseText);
116-
if (match !== null) {
117-
const matchStart = match.index;
118-
const matchEnd = matchStart + match[0].length;
119-
const originalWord = text.slice(matchStart + offset, matchEnd + offset);
120-
const censoredWord = replacer(originalWord);
121-
result = result.slice(0, matchStart + offset) + censoredWord + result.slice(matchEnd + offset);
122-
offset += censoredWord.length - originalWord.length;
123-
}
124-
} while (match !== null);
125-
126-
return result;
71+
return this.replaceProfanity(
72+
text,
73+
lowercaseText,
74+
(word, start, end) => {
75+
if (this.isWhitelisted(start, end, lowercaseText)) {
76+
return word;
77+
}
78+
switch (censorType) {
79+
case CensorType.Word: {
80+
const underscore = word.includes("_") ? "_" : "";
81+
return this.options.grawlix + underscore;
82+
}
83+
case CensorType.FirstChar:
84+
return this.options.grawlixChar + word.slice(1);
85+
case CensorType.FirstVowel:
86+
case CensorType.AllVowels: {
87+
const vowelRegex = new RegExp("[aeiou]", censorType === CensorType.FirstVowel ? "i" : "ig");
88+
return word.replace(vowelRegex, this.options.grawlixChar);
89+
}
90+
default:
91+
throw new Error(`Invalid replacement type: "${censorType}"`);
92+
}
93+
},
94+
regex,
95+
);
12796
}
12897

98+
/**
99+
* Adds words to the profanity blacklist.
100+
* @param words - An array of words to add to the blacklist.
101+
*/
129102
addWords(words: string[]): void {
130103
const removedWords: string[] = [];
131104
const blacklistWords: string[] = [];
@@ -147,6 +120,10 @@ export class Profanity {
147120
}
148121
}
149122

123+
/**
124+
* Removes words from the profanity blacklist.
125+
* @param words - An array of words to remove from the blacklist.
126+
*/
150127
removeWords(words: string[]): void {
151128
const blacklistedWords: string[] = [];
152129
const removeWords: string[] = [];
@@ -168,6 +145,72 @@ export class Profanity {
168145
}
169146
}
170147

148+
/**
149+
* Checks if a given match is whitelisted.
150+
* @param matchStart - The starting index of the match in the text.
151+
* @param matchEnd - The ending index of the match in the text.
152+
* @param text - The lowercase text being checked.
153+
* @returns True if the match is whitelisted, false otherwise.
154+
*/
155+
private isWhitelisted(matchStart: number, matchEnd: number, text: string): boolean {
156+
for (const whitelistedWord of this.whitelist.words) {
157+
const whitelistedIndex = text.indexOf(whitelistedWord, Math.max(0, matchStart - whitelistedWord.length + 1));
158+
if (whitelistedIndex !== -1) {
159+
const whitelistedEnd = whitelistedIndex + whitelistedWord.length;
160+
161+
if (this.options.wholeWord) {
162+
if (
163+
matchStart === whitelistedIndex &&
164+
matchEnd === whitelistedEnd &&
165+
(matchStart === 0 || !/[\w-_]/.test(text[matchStart - 1])) &&
166+
(matchEnd === text.length || !/[\w-_]/.test(text[matchEnd]))
167+
) {
168+
return true;
169+
}
170+
} else {
171+
if (
172+
(matchStart >= whitelistedIndex && matchStart < whitelistedEnd) ||
173+
(matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd) ||
174+
(whitelistedIndex >= matchStart && whitelistedEnd <= matchEnd)
175+
) {
176+
return true;
177+
}
178+
}
179+
}
180+
}
181+
return false;
182+
}
183+
184+
/**
185+
* Replaces profanity in the text using the provided replacer function.
186+
* @param text - The original text.
187+
* @param lowercaseText - The lowercase version of the text.
188+
* @param replacer - A function that determines how to replace profane words.
189+
* @param regex - The regular expression used to find profane words.
190+
* @returns The text with profanity replaced.
191+
*/
192+
private replaceProfanity(
193+
text: string,
194+
lowercaseText: string,
195+
replacer: (word: string, start: number, end: number) => string,
196+
regex: RegExp,
197+
): string {
198+
let result = text;
199+
let offset = 0;
200+
201+
let match: RegExpExecArray | null;
202+
while ((match = regex.exec(lowercaseText)) !== null) {
203+
const matchStart = match.index;
204+
const matchEnd = matchStart + match[0].length;
205+
const originalWord = text.slice(matchStart + offset, matchEnd + offset);
206+
const censoredWord = replacer(originalWord, matchStart, matchEnd);
207+
result = result.slice(0, matchStart + offset) + censoredWord + result.slice(matchEnd + offset);
208+
offset += censoredWord.length - originalWord.length;
209+
}
210+
211+
return result;
212+
}
213+
171214
/**
172215
* Determines the list of languages to use, either from the provided list or falling back to default languages.
173216
* @param languages - An optional list of languages to use.

tests/profanity.spec.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,29 +81,39 @@ describe("Profanity", () => {
8181
it("should detect custom added words (wholeWord = true)", () => {
8282
customProfanity.addWords(["cucumber", "banana"]);
8383
expect(customProfanity.exists("I love cucumbers")).to.be.false;
84+
expect(customProfanity.censor("I love cucumbers")).to.equal("I love cucumbers");
8485
expect(customProfanity.exists("I love cucumber")).to.be.true;
86+
expect(customProfanity.censor("I love cucumber")).to.equal(`I love ${customProfanity.options.grawlix}`);
8587
expect(customProfanity.exists("Bananas are yellow")).to.be.false;
88+
expect(customProfanity.censor("Bananas are yellow")).to.equal("Bananas are yellow");
8689
expect(customProfanity.exists("This banana is yellow")).to.be.true;
90+
expect(customProfanity.censor("This banana is yellow")).to.equal(`This ${customProfanity.options.grawlix} is yellow`);
8791
});
8892

8993
it("should detect custom added words (wholeWord = false)", () => {
9094
const customProfanityPartial = new Profanity({ wholeWord: false });
9195
customProfanityPartial.addWords(["cucumber", "banana"]);
9296
expect(customProfanityPartial.exists("I love cucumbers")).to.be.true;
97+
expect(customProfanityPartial.censor("I love cucumbers")).to.equal(`I love ${customProfanityPartial.options.grawlix}s`);
9398
expect(customProfanityPartial.exists("Bananas are yellow")).to.be.true;
99+
expect(customProfanityPartial.censor("Bananas are yellow")).to.equal(`${customProfanityPartial.options.grawlix}s are yellow`);
94100
});
95101

96102
it("should not detect removed words", () => {
97103
customProfanity.removeWords(["butt", "arse"]);
98104
expect(customProfanity.exists("Don't be a butt")).to.be.false;
105+
expect(customProfanity.censor("Don't be a butt")).to.equal("Don't be a butt");
99106
expect(customProfanity.exists("You're an arse")).to.be.false;
107+
expect(customProfanity.censor("You're an arse")).to.equal("You're an arse");
100108
});
101109

102110
it("should handle adding and removing words in sequence", () => {
103111
customProfanity.addWords(["test"]);
104112
expect(customProfanity.exists("test")).to.be.true;
113+
expect(customProfanity.censor("test")).to.equal(customProfanity.options.grawlix);
105114
customProfanity.removeWords(["test"]);
106115
expect(customProfanity.exists("test")).to.be.false;
116+
expect(customProfanity.censor("test")).to.equal("test");
107117
});
108118
});
109119
});
@@ -116,15 +126,24 @@ describe("Profanity", () => {
116126
});
117127

118128
describe("wholeWord = true", () => {
129+
it("should whitelist a word", () => {
130+
customProfanity.whitelist.addWords(["butt"]);
131+
expect(customProfanity.exists("Don't be a butt")).to.be.false;
132+
expect(customProfanity.censor("Don't be a butt")).to.equal("Don't be a butt");
133+
});
134+
119135
it("should whitelist multiple words", () => {
120136
customProfanity.whitelist.addWords(["butt", "arse"]);
121137
expect(customProfanity.exists("Should we censor the word butt or arse?")).to.be.false;
138+
expect(customProfanity.censor("Should we censor the word butt or arse?")).to.equal("Should we censor the word butt or arse?");
122139
});
123140

124141
it("should only whitelist exact whole words", () => {
125142
customProfanity.whitelist.addWords(["but"]);
126143
expect(customProfanity.exists("Don't be a but")).to.be.false;
144+
expect(customProfanity.censor("Don't be a but")).to.equal("Don't be a but");
127145
expect(customProfanity.exists("Don't be a butt")).to.be.true;
146+
expect(customProfanity.censor("Don't be a butt")).to.equal("Don't be a @#$%&!");
128147
});
129148

130149
describe("Hyphenated and underscore-separated words", () => {
@@ -134,10 +153,12 @@ describe("Profanity", () => {
134153

135154
it("should detect profanity in hyphenated words when part is whitelisted", () => {
136155
expect(customProfanity.exists("Don't be a butt-head")).to.be.true;
156+
expect(customProfanity.censor("Don't be a butt-head")).to.equal(`Don't be a ${customProfanity.options.grawlix}-head`);
137157
});
138158

139159
it("should detect profanity in underscore-separated words when part is whitelisted", () => {
140160
expect(customProfanity.exists("Don't be a butt_head")).to.be.true;
161+
expect(customProfanity.censor("Don't be a butt_head")).to.equal(`Don't be a ${customProfanity.options.grawlix}_head`);
141162
});
142163
});
143164
});
@@ -161,22 +182,27 @@ describe("Profanity", () => {
161182

162183
it("should detect 'arse' as profanity", () => {
163184
expect(customProfanityPartial.exists("what an arse")).to.be.true;
185+
expect(customProfanityPartial.censor("what an arse")).to.equal(`what an ${customProfanityPartial.options.grawlix}`);
164186
});
165187

166188
it("should not detect 'arsenic' as profanity due to whitelist", () => {
167189
expect(customProfanityPartial.exists("dedicated arsenic")).to.be.false;
190+
expect(customProfanityPartial.censor("dedicated arsenic")).to.equal("dedicated arsenic");
168191
});
169192

170193
it("should not detect 'class' as profanity due to whitelist", () => {
171194
expect(customProfanityPartial.exists("dedicated class person")).to.be.false;
195+
expect(customProfanityPartial.censor("dedicated class person")).to.equal("dedicated class person");
172196
});
173197

174198
it("should not detect 'classic' as profanity due to whitelist", () => {
175199
expect(customProfanityPartial.exists("dedicated classic")).to.be.false;
200+
expect(customProfanityPartial.censor("dedicated classic")).to.equal("dedicated classic");
176201
});
177202

178203
it("should not detect 'password' as profanity due to whitelist", () => {
179204
expect(customProfanityPartial.exists("dedicated password")).to.be.false;
205+
expect(customProfanityPartial.censor("dedicated password")).to.equal("dedicated password");
180206
});
181207
});
182208
});
@@ -199,19 +225,23 @@ describe("Profanity", () => {
199225
it("should not detect whitelisted words", () => {
200226
customProfanity.whitelist.addWords(["classic", "assembly"]);
201227
expect(customProfanity.exists("That's a classic movie")).to.be.false;
228+
expect(customProfanity.censor("That's a classic movie")).to.equal("That's a classic movie");
202229
expect(customProfanity.exists("The assembly line is efficient")).to.be.false;
230+
expect(customProfanity.censor("The assembly line is efficient")).to.equal("The assembly line is efficient");
203231
});
204232

205233
it("should detect profanity after removing from whitelist", () => {
206234
customProfanity.whitelist.addWords(["classic"]);
207235
customProfanity.whitelist.removeWords(["classic"]);
208236
expect(customProfanity.exists("That's a classic butt movie")).to.be.true;
237+
expect(customProfanity.censor("That's a classic butt movie")).to.equal(`That's a classic ${customProfanity.options.grawlix} movie`);
209238
});
210239

211240
it("should handle adding and removing words from whitelist in sequence", () => {
212241
customProfanity.whitelist.addWords(["test"]);
213242
customProfanity.addWords(["test"]);
214243
expect(customProfanity.exists("test")).to.be.false;
244+
expect(customProfanity.censor("test")).to.equal("test");
215245
});
216246
});
217247

0 commit comments

Comments
 (0)