@@ -27,14 +27,43 @@ import yellow from "@material-ui/core/colors/yellow";
2727
2828import animals from "./utils/animals.json" ;
2929
30- function isPunctuation ( charCode ) {
31- return (
32- ( charCode >= 33 && charCode <= 47 ) ||
33- ( charCode >= 58 && charCode <= 64 ) ||
34- ( charCode >= 91 && charCode <= 96 ) ||
35- ( charCode >= 0x2000 && charCode <= 0x206f )
36- ) ;
37- }
30+ const invisibleChars = new Set ( [
31+ 0x00a0 , // No-break space
32+ 0x00ad , // Soft hyphen
33+ 0x034f , // Combining grapheme joiner
34+ 0x061c , // Arabic letter mark
35+ 0x06dd , // Arabic end of ayah
36+ 0x070f , // Syriac abbreviation mark
37+ 0x08e2 , // Arabic discourse mark
38+ 0x1680 , // Ogham space mark
39+ 0x180e , // Mongolian vowel separator
40+ 0x110bd , // Kaithi number sign (formatting)
41+ 0x110cd , // Grantha punctuation (invisible)
42+ 0x2800 , // Braille pattern blank
43+ 0x3000 , // Ideographic space
44+ 0xfeff , // Zero-width no-break space (also BOM)
45+ ] ) ;
46+ ( function ( ) {
47+ const ranges = [
48+ [ 0x00 , 0x1f ] , // ASCII control characters
49+ [ 0x7f , 0x9f ] , // Delete and extended C1 control characters
50+ [ 0x0600 , 0x0605 ] , // Arabic control characters (e.g., Arabic number signs)
51+ [ 0x0890 , 0x0891 ] , // Arabic script invisible formatting marks
52+ [ 0x2000 , 0x200f ] , // En quad to Right-to-Left mark (various spaces and directional marks)
53+ [ 0x2028 , 0x2029 ] , // Line separator, Paragraph separator
54+ [ 0x202a , 0x202e ] , // Left-to-right embedding to Right-to-left override
55+ [ 0x2060 , 0x206f ] , // Word joiner to Nominal digit shapes (invisible formatting characters)
56+ [ 0xfff9 , 0xfffb ] , // Interlinear annotation formatting characters
57+ [ 0x1bca0 , 0x1bca3 ] , // Shorthand format controls
58+ [ 0x1d173 , 0x1d17a ] , // Musical symbol formatting characters
59+ [ 0xe0020 , 0xe007f ] , // TAG characters (invisible language tags)
60+ ] ;
61+ for ( const [ start , end ] of ranges ) {
62+ for ( let code = start ; code <= end ; code ++ ) {
63+ invisibleChars . add ( code ) ;
64+ }
65+ }
66+ } ) ( ) ;
3867
3968const fixedDataset = englishDataset
4069 . addPhrase ( ( phrase ) =>
@@ -43,7 +72,10 @@ const fixedDataset = englishDataset
4372 . addPhrase ( ( phrase ) =>
4473 phrase
4574 . setMetadata ( { originalWord : "brainrot" } )
46- . addPattern ( pattern `skibidi` )
75+ . addPattern ( pattern `skibid` )
76+ . addPattern ( pattern `skidib` )
77+ . addPattern ( pattern `sybau` )
78+ . addPattern ( pattern `sygau` )
4779 . addPattern ( pattern `|riz` )
4880 . addPattern ( pattern `gyat` )
4981 . addPattern ( pattern `sigma` )
@@ -53,9 +85,8 @@ const fixedDataset = englishDataset
5385 . addPattern ( pattern `xooink` )
5486 . addPattern ( pattern `xioix` )
5587 . addPattern ( pattern `xiooix` )
56- . addPattern ( pattern `admits` )
57- . addPattern ( pattern `lebron` )
58- . addPattern ( pattern `lebroon` )
88+ . addPattern ( pattern `l[l]e[e]b[b]ro[o]n` )
89+ . addPattern ( pattern `prickl[l]e` )
5990 ) ;
6091// Work-around for:
6192// https://github.com/jo3-l/obscenity/issues/100
@@ -68,7 +99,7 @@ export const badWords = new RegExpMatcher({
6899 ...englishRecommendedTransformers ,
69100 blacklistMatcherTransformers : [
70101 ...englishRecommendedTransformers . blacklistMatcherTransformers ,
71- createSimpleTransformer ( ( c ) => ( ! isPunctuation ( c ) ? c : undefined ) ) ,
102+ createSimpleTransformer ( ( c ) => ( ! invisibleChars . has ( c ) ? c : undefined ) ) ,
72103 ] ,
73104} ) ;
74105const censor = new TextCensor ( ) . setStrategy ( fixedPhraseCensorStrategy ( "🤬" ) ) ;
0 commit comments