Skip to content

Commit 6b441f9

Browse files
committed
Tweak the profanity filter
1 parent a32b7a8 commit 6b441f9

File tree

1 file changed

+44
-13
lines changed

1 file changed

+44
-13
lines changed

src/util.js

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,43 @@ import yellow from "@material-ui/core/colors/yellow";
2727

2828
import animals from "./utils/animals.json";
2929

30-
function isPunctuation(charCode) {
31-
return (
32-
(charCode >= 33 && charCode <= 47) ||
33-
(charCode >= 58 && charCode <= 64) ||
34-
(charCode >= 91 && charCode <= 96) ||
35-
(charCode >= 0x2000 && charCode <= 0x206f)
36-
);
37-
}
30+
const invisibleChars = new Set([
31+
0x00a0, // No-break space
32+
0x00ad, // Soft hyphen
33+
0x034f, // Combining grapheme joiner
34+
0x061c, // Arabic letter mark
35+
0x06dd, // Arabic end of ayah
36+
0x070f, // Syriac abbreviation mark
37+
0x08e2, // Arabic discourse mark
38+
0x1680, // Ogham space mark
39+
0x180e, // Mongolian vowel separator
40+
0x110bd, // Kaithi number sign (formatting)
41+
0x110cd, // Grantha punctuation (invisible)
42+
0x2800, // Braille pattern blank
43+
0x3000, // Ideographic space
44+
0xfeff, // Zero-width no-break space (also BOM)
45+
]);
46+
(function () {
47+
const ranges = [
48+
[0x00, 0x1f], // ASCII control characters
49+
[0x7f, 0x9f], // Delete and extended C1 control characters
50+
[0x0600, 0x0605], // Arabic control characters (e.g., Arabic number signs)
51+
[0x0890, 0x0891], // Arabic script invisible formatting marks
52+
[0x2000, 0x200f], // En quad to Right-to-Left mark (various spaces and directional marks)
53+
[0x2028, 0x2029], // Line separator, Paragraph separator
54+
[0x202a, 0x202e], // Left-to-right embedding to Right-to-left override
55+
[0x2060, 0x206f], // Word joiner to Nominal digit shapes (invisible formatting characters)
56+
[0xfff9, 0xfffb], // Interlinear annotation formatting characters
57+
[0x1bca0, 0x1bca3], // Shorthand format controls
58+
[0x1d173, 0x1d17a], // Musical symbol formatting characters
59+
[0xe0020, 0xe007f], // TAG characters (invisible language tags)
60+
];
61+
for (const [start, end] of ranges) {
62+
for (let code = start; code <= end; code++) {
63+
invisibleChars.add(code);
64+
}
65+
}
66+
})();
3867

3968
const fixedDataset = englishDataset
4069
.addPhrase((phrase) =>
@@ -43,7 +72,10 @@ const fixedDataset = englishDataset
4372
.addPhrase((phrase) =>
4473
phrase
4574
.setMetadata({ originalWord: "brainrot" })
46-
.addPattern(pattern`skibidi`)
75+
.addPattern(pattern`skibid`)
76+
.addPattern(pattern`skidib`)
77+
.addPattern(pattern`sybau`)
78+
.addPattern(pattern`sygau`)
4779
.addPattern(pattern`|riz`)
4880
.addPattern(pattern`gyat`)
4981
.addPattern(pattern`sigma`)
@@ -53,9 +85,8 @@ const fixedDataset = englishDataset
5385
.addPattern(pattern`xooink`)
5486
.addPattern(pattern`xioix`)
5587
.addPattern(pattern`xiooix`)
56-
.addPattern(pattern`admits`)
57-
.addPattern(pattern`lebron`)
58-
.addPattern(pattern`lebroon`)
88+
.addPattern(pattern`l[l]e[e]b[b]ro[o]n`)
89+
.addPattern(pattern`prickl[l]e`)
5990
);
6091
// Work-around for:
6192
// https://github.com/jo3-l/obscenity/issues/100
@@ -68,7 +99,7 @@ export const badWords = new RegExpMatcher({
6899
...englishRecommendedTransformers,
69100
blacklistMatcherTransformers: [
70101
...englishRecommendedTransformers.blacklistMatcherTransformers,
71-
createSimpleTransformer((c) => (!isPunctuation(c) ? c : undefined)),
102+
createSimpleTransformer((c) => (!invisibleChars.has(c) ? c : undefined)),
72103
],
73104
});
74105
const censor = new TextCensor().setStrategy(fixedPhraseCensorStrategy("🤬"));

0 commit comments

Comments
 (0)