@@ -26,6 +26,13 @@ static const struct {
2626 const char * bad ;
2727 const char * suggested ;
2828} FOSSIL_SOAP_SUGGESTIONS [] = {
29+ {"u" , "you" },
30+ {"gonna" , "going to" },
31+ {"ppl" , "people" },
32+ {"funny" , "laugh out loud" },
33+ {"lol" , "laugh out loud" },
34+ {"idk" , "I don't know" },
35+ {"wanna" , "want to" },
2936 {"rizz" , "charisma" },
3037 {"skibidi" , "dance" },
3138 {"yeet" , "throw" },
@@ -56,22 +63,46 @@ static const struct {
5663 {"zaddy" , "attractive man" },
5764 {"drip" , "fashion" },
5865 {"fire" , "amazing" },
59- {"lol" , "funny" },
6066 {"omg" , "surprising" },
6167 {"brb" , "be right back" },
62- {"idk" , "I don't know" },
6368 {"imo" , "in my opinion" },
6469 {"lmao" , "laughing" },
6570 {"nvm" , "never mind" },
6671 {"tbh" , "to be honest" },
67- {"tldr" , "too long" },
72+ {"tldr" , "too long; didn't read " },
6873 {"ttyl" , "talk to you later" },
6974 {"wyd" , "what are you doing" },
7075 {"wtf" , "what the heck" },
7176 {"yolo" , "you only live once" },
7277 {"rot-brain" , "stupid" },
7378 {"rot brain" , "stupid" },
7479 {"rotbrain" , "stupid" },
80+ {"smh" , "shaking my head" },
81+ {"fomo" , "fear of missing out" },
82+ {"bff" , "best friend forever" },
83+ {"irl" , "in real life" },
84+ {"afaik" , "as far as I know" },
85+ {"btw" , "by the way" },
86+ {"omw" , "on my way" },
87+ {"ikr" , "I know right" },
88+ {"ikr" , "I know, right" },
89+ {"tgif" , "thank goodness it's Friday" },
90+ {"np" , "no problem" },
91+ {"rofl" , "rolling on the floor laughing" },
92+ {"lmk" , "let me know" },
93+ {"dm" , "direct message" },
94+ {"ikr" , "I know, right" },
95+ {"rn" , "right now" },
96+ {"ikr" , "I know, right" },
97+ {"smh" , "shaking my head" },
98+ {"yw" , "you're welcome" },
99+ {"af" , "very" },
100+ {"ftw" , "for the win" },
101+ {"gg" , "good game" },
102+ {"pov" , "point of view" },
103+ {"omfg" , "oh my goodness" },
104+ {"tl;dr" , "too long; didn't read" },
105+ {"fwiw" , "for what it's worth" },
75106 {NULL , NULL } // Sentinel to mark the end
76107};
77108
@@ -84,9 +115,30 @@ static const struct {
84115 {"ain't" , "isn't" },
85116 {"should of" , "should have" },
86117 {"could of" , "could have" },
118+ {"would of" , "would have" },
87119 {"not never" , "never" },
88120 {"free gift" , "gift" },
89121 {"very unique" , "unique" },
122+ {"actually true" , "true" },
123+ {"past history" , "history" },
124+ {"future plans" , "plans" },
125+ {"true fact" , "fact" },
126+ {"basic fundamentals" , "fundamentals" },
127+ {"completely destroyed" , "destroyed" },
128+ {"revert back" , "revert" },
129+ {"each and every" , "each" },
130+ {"end result" , "result" },
131+ {"final outcome" , "outcome" },
132+ {"unexpected surprise" , "surprise" },
133+ {"advance planning" , "planning" },
134+ {"close proximity" , "proximity" },
135+ {"ATM machine" , "ATM" },
136+ {"PIN number" , "PIN" },
137+ {"ISBN number" , "ISBN" },
138+ {"LCD display" , "LCD" },
139+ {"HIV virus" , "HIV" },
140+ {"true facts" , "facts" },
141+ {"past experiences" , "experiences" },
90142 {NULL , NULL } // Sentinel to mark the end
91143};
92144
@@ -102,6 +154,23 @@ static const char *SARCASTIC_PHRASES[] = {
102154 "Brilliant" ,
103155 "Wonderful" ,
104156 "Perfect" ,
157+ "Oh, just what I needed" ,
158+ "Wow, amazing" ,
159+ "How original" ,
160+ "Incredible" ,
161+ "As if that will work" ,
162+ "Sure, that's smart" ,
163+ "Totally believable" ,
164+ "Oh, really?" ,
165+ "You're a genius" ,
166+ "Thanks a lot" ,
167+ "Couldn't be better" ,
168+ "That's exactly what I wanted" ,
169+ "Well, isn't that special" ,
170+ "Lovely" ,
171+ "Just perfect" ,
172+ "What could go wrong?" ,
173+ "Right, because that makes sense" ,
105174 NULL // Sentinel to mark the end
106175};
107176
@@ -117,16 +186,150 @@ static const char *FORMAL_PHRASES[] = {
117186 "I am writing to" ,
118187 "Please find attached" ,
119188 "Thank you for your consideration" ,
189+ "I look forward to your response" ,
190+ "Kindly note" ,
191+ "Please be advised" ,
192+ "It is my pleasure to" ,
193+ "I would appreciate your assistance" ,
194+ "Should you require any further information" ,
195+ "I remain at your disposal" ,
196+ "With kind regards" ,
197+ "Thank you for your attention" ,
198+ "I am writing on behalf of" ,
199+ "Please accept my apologies" ,
200+ "I wish to inform you" ,
201+ "We would be grateful if" ,
202+ "I hope this message finds you well" ,
120203 NULL // Sentinel to mark the end
121204};
122205
206+ static const char * CLICKBAIT_PATTERNS [] = {
207+ "you won't believe" ,
208+ "shocking" ,
209+ "what happened next" ,
210+ "top [0-9]" ,
211+ "things you didn't know" ,
212+ "one weird trick" ,
213+ "will blow your mind" ,
214+ "can't handle this" ,
215+ "before you die" ,
216+ "this is why" ,
217+ "the reason is shocking" ,
218+ "you need to see" ,
219+ "never guess" ,
220+ "what they found" ,
221+ "will surprise you" ,
222+ "what no one tells you" ,
223+ "you'll never believe" ,
224+ "this changes everything" ,
225+ "x things you should know" ,
226+ "you won't expect" ,
227+ "hidden secret" ,
228+ "finally revealed" ,
229+ "the truth about" ,
230+ "this is insane" ,
231+ "what happens next will amaze you" ,
232+ NULL
233+ };
234+
235+ static const char * RAGEBAIT_PATTERNS [] = {
236+ "you won't believe" ,
237+ "infuriating" ,
238+ "makes me angry" ,
239+ "outrageous" ,
240+ "how dare they" ,
241+ "unbelievable" ,
242+ "ridiculous" ,
243+ "trigger warning" ,
244+ "enraging" ,
245+ "shocking injustice" ,
246+ "this will anger you" ,
247+ "prepare to be outraged" ,
248+ "makes no sense" ,
249+ "disgusting" ,
250+ "furious" ,
251+ "utterly unacceptable" ,
252+ "outrage" ,
253+ "you won't forgive" ,
254+ "shocking betrayal" ,
255+ "how could they" ,
256+ "beyond belief" ,
257+ "makes my blood boil" ,
258+ "appalling" ,
259+ "you'll be mad" ,
260+ "infuriated by" ,
261+ "outrageously unfair" ,
262+ "absurd" ,
263+ "scandalous" ,
264+ "unacceptable" ,
265+ "utter nonsense" ,
266+ "provoking anger" ,
267+ "makes me furious" ,
268+ NULL // Sentinel
269+ };
270+
271+ static const char * EXAGGERATED_WORDS [] = {
272+ "literally" ,
273+ "always" ,
274+ "never" ,
275+ "every" ,
276+ "everyone" ,
277+ "nobody" ,
278+ "forever" ,
279+ "insane" ,
280+ "unbelievable" ,
281+ "outrageous" ,
282+ "epic" ,
283+ "mind-blowing" ,
284+ "extremely" ,
285+ "completely" ,
286+ "totally" ,
287+ "absolutely" ,
288+ "massive" ,
289+ "huge" ,
290+ "gigantic" ,
291+ "tremendous" ,
292+ "incredible" ,
293+ "unreal" ,
294+ "astonishing" ,
295+ "stunning" ,
296+ "jaw-dropping" ,
297+ "ridiculous" ,
298+ "crazy" ,
299+ "fantastic" ,
300+ "amazing" ,
301+ "phenomenal" ,
302+ NULL
303+ };
304+
123305static char custom_storage [MAX_CUSTOM_FILTERS ][64 ];
124306static const char * custom_filters [MAX_CUSTOM_FILTERS ] = {0 };
125307
126308/** Lookup table for words that need to be skipped due to misdetection */
127309static const char * SKIP_WORDS [] = {
128310 "limit" ,
129311 "size" ,
312+ "width" ,
313+ "height" ,
314+ "length" ,
315+ "depth" ,
316+ "volume" ,
317+ "capacity" ,
318+ "weight" ,
319+ "age" ,
320+ "year" ,
321+ "month" ,
322+ "day" ,
323+ "hour" ,
324+ "minute" ,
325+ "second" ,
326+ "ID" ,
327+ "serial" ,
328+ "version" ,
329+ "code" ,
330+ "label" ,
331+ "status" ,
332+ "level" ,
130333 NULL // Sentinel to mark the end
131334};
132335
@@ -466,21 +669,20 @@ char *fossil_io_soap_normalize_slang(const char *text) {
466669 return result ;
467670}
468671
469- int fossil_io_soap_detect_clickbait (const char * text ) {
672+ int fossil_io_soap_detect_ragebait (const char * text ) {
470673 if (!text ) return 0 ;
471674
472- static const char * CLICKBAIT_PATTERNS [] = {
473- "you won't believe" ,
474- "shocking" ,
475- "what happened next" ,
476- "top [0-9]" ,
477- "things you didn't know" ,
478- "one weird trick" ,
479- "will blow your mind" ,
480- "can't handle this" ,
481- "before you die" ,
482- NULL
483- };
675+ for (int i = 0 ; RAGEBAIT_PATTERNS [i ] != NULL ; i ++ ) {
676+ if (custom_strcasestr (text , RAGEBAIT_PATTERNS [i ])) {
677+ return 1 ;
678+ }
679+ }
680+
681+ return 0 ;
682+ }
683+
684+ int fossil_io_soap_detect_clickbait (const char * text ) {
685+ if (!text ) return 0 ;
484686
485687 for (int i = 0 ; CLICKBAIT_PATTERNS [i ] != NULL ; i ++ ) {
486688 if (custom_strcasestr (text , CLICKBAIT_PATTERNS [i ])) {
@@ -494,12 +696,6 @@ int fossil_io_soap_detect_clickbait(const char *text) {
494696int fossil_io_soap_detect_exaggeration (const char * text ) {
495697 if (!text ) return 0 ;
496698
497- static const char * EXAGGERATED_WORDS [] = {
498- "literally" , "always" , "never" , "every" , "everyone" , "nobody" ,
499- "forever" , "insane" , "unbelievable" , "outrageous" , "epic" , "mind-blowing" ,
500- NULL
501- };
502-
503699 for (int i = 0 ; EXAGGERATED_WORDS [i ] != NULL ; i ++ ) {
504700 if (custom_strcasestr (text , EXAGGERATED_WORDS [i ])) {
505701 return 1 ;
@@ -521,6 +717,19 @@ char *fossil_io_soap_filter_offensive(const char *text) {
521717 {"idiot" , "misguided" },
522718 {"moron" , "uninformed" },
523719 {"sucks" , "is not ideal" },
720+ {"fool" , "misguided" },
721+ {"jerk" , "unpleasant person" },
722+ {"loser" , "underperformer" },
723+ {"dork" , "awkward person" },
724+ {"lame" , "unsatisfactory" },
725+ {"crazy" , "unreasonable" },
726+ {"idiotic" , "poorly thought out" },
727+ {"dunce" , "uninformed individual" },
728+ {"nasty" , "unpleasant" },
729+ {"worthless" , "lacking value" },
730+ {"pathetic" , "disappointing" },
731+ {"dimwit" , "uninformed" },
732+ {"clueless" , "uninformed" },
524733 {NULL , NULL }
525734 };
526735
0 commit comments