Skip to content

Commit b6a538b

Browse files
Enhance slang and bait detection in soap.c
1 parent 27675f3 commit b6a538b

File tree

1 file changed

+231
-22
lines changed

1 file changed

+231
-22
lines changed

code/logic/soap.c

Lines changed: 231 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@ static const struct {
2626
const char *bad;
2727
const char *suggested;
2828
} FOSSIL_SOAP_SUGGESTIONS[] = {
29+
{"u", "you"},
30+
{"gonna", "going to"},
31+
{"ppl", "people"},
32+
{"funny", "laugh out loud"},
33+
{"lol", "laugh out loud"},
34+
{"idk", "I don't know"},
35+
{"wanna", "want to"},
2936
{"rizz", "charisma"},
3037
{"skibidi", "dance"},
3138
{"yeet", "throw"},
@@ -56,22 +63,46 @@ static const struct {
5663
{"zaddy", "attractive man"},
5764
{"drip", "fashion"},
5865
{"fire", "amazing"},
59-
{"lol", "funny"},
6066
{"omg", "surprising"},
6167
{"brb", "be right back"},
62-
{"idk", "I don't know"},
6368
{"imo", "in my opinion"},
6469
{"lmao", "laughing"},
6570
{"nvm", "never mind"},
6671
{"tbh", "to be honest"},
67-
{"tldr", "too long"},
72+
{"tldr", "too long; didn't read"},
6873
{"ttyl", "talk to you later"},
6974
{"wyd", "what are you doing"},
7075
{"wtf", "what the heck"},
7176
{"yolo", "you only live once"},
7277
{"rot-brain", "stupid"},
7378
{"rot brain", "stupid"},
7479
{"rotbrain", "stupid"},
80+
{"smh", "shaking my head"},
81+
{"fomo", "fear of missing out"},
82+
{"bff", "best friend forever"},
83+
{"irl", "in real life"},
84+
{"afaik", "as far as I know"},
85+
{"btw", "by the way"},
86+
{"omw", "on my way"},
87+
{"ikr", "I know right"},
88+
{"ikr", "I know, right"},
89+
{"tgif", "thank goodness it's Friday"},
90+
{"np", "no problem"},
91+
{"rofl", "rolling on the floor laughing"},
92+
{"lmk", "let me know"},
93+
{"dm", "direct message"},
94+
{"ikr", "I know, right"},
95+
{"rn", "right now"},
96+
{"ikr", "I know, right"},
97+
{"smh", "shaking my head"},
98+
{"yw", "you're welcome"},
99+
{"af", "very"},
100+
{"ftw", "for the win"},
101+
{"gg", "good game"},
102+
{"pov", "point of view"},
103+
{"omfg", "oh my goodness"},
104+
{"tl;dr", "too long; didn't read"},
105+
{"fwiw", "for what it's worth"},
75106
{NULL, NULL} // Sentinel to mark the end
76107
};
77108

@@ -84,9 +115,30 @@ static const struct {
84115
{"ain't", "isn't"},
85116
{"should of", "should have"},
86117
{"could of", "could have"},
118+
{"would of", "would have"},
87119
{"not never", "never"},
88120
{"free gift", "gift"},
89121
{"very unique", "unique"},
122+
{"actually true", "true"},
123+
{"past history", "history"},
124+
{"future plans", "plans"},
125+
{"true fact", "fact"},
126+
{"basic fundamentals", "fundamentals"},
127+
{"completely destroyed", "destroyed"},
128+
{"revert back", "revert"},
129+
{"each and every", "each"},
130+
{"end result", "result"},
131+
{"final outcome", "outcome"},
132+
{"unexpected surprise", "surprise"},
133+
{"advance planning", "planning"},
134+
{"close proximity", "proximity"},
135+
{"ATM machine", "ATM"},
136+
{"PIN number", "PIN"},
137+
{"ISBN number", "ISBN"},
138+
{"LCD display", "LCD"},
139+
{"HIV virus", "HIV"},
140+
{"true facts", "facts"},
141+
{"past experiences", "experiences"},
90142
{NULL, NULL} // Sentinel to mark the end
91143
};
92144

@@ -102,6 +154,23 @@ static const char *SARCASTIC_PHRASES[] = {
102154
"Brilliant",
103155
"Wonderful",
104156
"Perfect",
157+
"Oh, just what I needed",
158+
"Wow, amazing",
159+
"How original",
160+
"Incredible",
161+
"As if that will work",
162+
"Sure, that's smart",
163+
"Totally believable",
164+
"Oh, really?",
165+
"You're a genius",
166+
"Thanks a lot",
167+
"Couldn't be better",
168+
"That's exactly what I wanted",
169+
"Well, isn't that special",
170+
"Lovely",
171+
"Just perfect",
172+
"What could go wrong?",
173+
"Right, because that makes sense",
105174
NULL // Sentinel to mark the end
106175
};
107176

@@ -117,16 +186,150 @@ static const char *FORMAL_PHRASES[] = {
117186
"I am writing to",
118187
"Please find attached",
119188
"Thank you for your consideration",
189+
"I look forward to your response",
190+
"Kindly note",
191+
"Please be advised",
192+
"It is my pleasure to",
193+
"I would appreciate your assistance",
194+
"Should you require any further information",
195+
"I remain at your disposal",
196+
"With kind regards",
197+
"Thank you for your attention",
198+
"I am writing on behalf of",
199+
"Please accept my apologies",
200+
"I wish to inform you",
201+
"We would be grateful if",
202+
"I hope this message finds you well",
120203
NULL // Sentinel to mark the end
121204
};
122205

206+
static const char *CLICKBAIT_PATTERNS[] = {
207+
"you won't believe",
208+
"shocking",
209+
"what happened next",
210+
"top [0-9]",
211+
"things you didn't know",
212+
"one weird trick",
213+
"will blow your mind",
214+
"can't handle this",
215+
"before you die",
216+
"this is why",
217+
"the reason is shocking",
218+
"you need to see",
219+
"never guess",
220+
"what they found",
221+
"will surprise you",
222+
"what no one tells you",
223+
"you'll never believe",
224+
"this changes everything",
225+
"x things you should know",
226+
"you won't expect",
227+
"hidden secret",
228+
"finally revealed",
229+
"the truth about",
230+
"this is insane",
231+
"what happens next will amaze you",
232+
NULL
233+
};
234+
235+
static const char *RAGEBAIT_PATTERNS[] = {
236+
"you won't believe",
237+
"infuriating",
238+
"makes me angry",
239+
"outrageous",
240+
"how dare they",
241+
"unbelievable",
242+
"ridiculous",
243+
"trigger warning",
244+
"enraging",
245+
"shocking injustice",
246+
"this will anger you",
247+
"prepare to be outraged",
248+
"makes no sense",
249+
"disgusting",
250+
"furious",
251+
"utterly unacceptable",
252+
"outrage",
253+
"you won't forgive",
254+
"shocking betrayal",
255+
"how could they",
256+
"beyond belief",
257+
"makes my blood boil",
258+
"appalling",
259+
"you'll be mad",
260+
"infuriated by",
261+
"outrageously unfair",
262+
"absurd",
263+
"scandalous",
264+
"unacceptable",
265+
"utter nonsense",
266+
"provoking anger",
267+
"makes me furious",
268+
NULL // Sentinel
269+
};
270+
271+
static const char *EXAGGERATED_WORDS[] = {
272+
"literally",
273+
"always",
274+
"never",
275+
"every",
276+
"everyone",
277+
"nobody",
278+
"forever",
279+
"insane",
280+
"unbelievable",
281+
"outrageous",
282+
"epic",
283+
"mind-blowing",
284+
"extremely",
285+
"completely",
286+
"totally",
287+
"absolutely",
288+
"massive",
289+
"huge",
290+
"gigantic",
291+
"tremendous",
292+
"incredible",
293+
"unreal",
294+
"astonishing",
295+
"stunning",
296+
"jaw-dropping",
297+
"ridiculous",
298+
"crazy",
299+
"fantastic",
300+
"amazing",
301+
"phenomenal",
302+
NULL
303+
};
304+
123305
static char custom_storage[MAX_CUSTOM_FILTERS][64];
124306
static const char *custom_filters[MAX_CUSTOM_FILTERS] = {0};
125307

126308
/** Lookup table for words that need to be skipped due to misdetection */
127309
static const char *SKIP_WORDS[] = {
128310
"limit",
129311
"size",
312+
"width",
313+
"height",
314+
"length",
315+
"depth",
316+
"volume",
317+
"capacity",
318+
"weight",
319+
"age",
320+
"year",
321+
"month",
322+
"day",
323+
"hour",
324+
"minute",
325+
"second",
326+
"ID",
327+
"serial",
328+
"version",
329+
"code",
330+
"label",
331+
"status",
332+
"level",
130333
NULL // Sentinel to mark the end
131334
};
132335

@@ -466,21 +669,20 @@ char *fossil_io_soap_normalize_slang(const char *text) {
466669
return result;
467670
}
468671

469-
int fossil_io_soap_detect_clickbait(const char *text) {
672+
int fossil_io_soap_detect_ragebait(const char *text) {
470673
if (!text) return 0;
471674

472-
static const char *CLICKBAIT_PATTERNS[] = {
473-
"you won't believe",
474-
"shocking",
475-
"what happened next",
476-
"top [0-9]",
477-
"things you didn't know",
478-
"one weird trick",
479-
"will blow your mind",
480-
"can't handle this",
481-
"before you die",
482-
NULL
483-
};
675+
for (int i = 0; RAGEBAIT_PATTERNS[i] != NULL; i++) {
676+
if (custom_strcasestr(text, RAGEBAIT_PATTERNS[i])) {
677+
return 1;
678+
}
679+
}
680+
681+
return 0;
682+
}
683+
684+
int fossil_io_soap_detect_clickbait(const char *text) {
685+
if (!text) return 0;
484686

485687
for (int i = 0; CLICKBAIT_PATTERNS[i] != NULL; i++) {
486688
if (custom_strcasestr(text, CLICKBAIT_PATTERNS[i])) {
@@ -494,12 +696,6 @@ int fossil_io_soap_detect_clickbait(const char *text) {
494696
int fossil_io_soap_detect_exaggeration(const char *text) {
495697
if (!text) return 0;
496698

497-
static const char *EXAGGERATED_WORDS[] = {
498-
"literally", "always", "never", "every", "everyone", "nobody",
499-
"forever", "insane", "unbelievable", "outrageous", "epic", "mind-blowing",
500-
NULL
501-
};
502-
503699
for (int i = 0; EXAGGERATED_WORDS[i] != NULL; i++) {
504700
if (custom_strcasestr(text, EXAGGERATED_WORDS[i])) {
505701
return 1;
@@ -521,6 +717,19 @@ char *fossil_io_soap_filter_offensive(const char *text) {
521717
{"idiot", "misguided"},
522718
{"moron", "uninformed"},
523719
{"sucks", "is not ideal"},
720+
{"fool", "misguided"},
721+
{"jerk", "unpleasant person"},
722+
{"loser", "underperformer"},
723+
{"dork", "awkward person"},
724+
{"lame", "unsatisfactory"},
725+
{"crazy", "unreasonable"},
726+
{"idiotic", "poorly thought out"},
727+
{"dunce", "uninformed individual"},
728+
{"nasty", "unpleasant"},
729+
{"worthless", "lacking value"},
730+
{"pathetic", "disappointing"},
731+
{"dimwit", "uninformed"},
732+
{"clueless", "uninformed"},
524733
{NULL, NULL}
525734
};
526735

0 commit comments

Comments
 (0)