Skip to content

Commit bd2f122

Browse files
Merge pull request #71 from dreamer-coding/main
2 parents a2dae93 + cc0c423 commit bd2f122

File tree

3 files changed

+313
-7
lines changed

3 files changed

+313
-7
lines changed

code/logic/fossil/io/soap.h

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,54 @@ int fossil_io_soap_detect_offensive(const char *text);
136136
*/
137137
int fossil_io_soap_detect_neutral(const char *text);
138138

139+
/**
140+
* @brief Detect hype-related phrases in text.
141+
*
142+
* @param text Input text to scan.
143+
* @return Non-zero if hype phrases are detected, 0 otherwise.
144+
*/
145+
int fossil_io_soap_detect_hype(const char *text);
146+
147+
/**
148+
* @brief Detect quality-related phrases in text.
149+
*
150+
* @param text Input text to scan.
151+
* @return Non-zero if quality phrases are detected, 0 otherwise.
152+
*/
153+
int fossil_io_soap_detect_quality(const char *text);
154+
155+
/**
156+
* @brief Detect political content in text.
157+
*
158+
* @param text Input text to scan.
159+
* @return Non-zero if political patterns are detected, 0 otherwise.
160+
*/
161+
int fossil_io_soap_detect_political(const char *text);
162+
163+
/**
164+
* @brief Detect conspiracy-related content in text.
165+
*
166+
* @param text Input text to scan.
167+
* @return Non-zero if conspiracy patterns are detected, 0 otherwise.
168+
*/
169+
int fossil_io_soap_detect_conspiracy(const char *text);
170+
171+
/**
172+
* @brief Detect marketing/jargon-heavy content in text.
173+
*
174+
* @param text Input text to scan.
175+
* @return Non-zero if marketing patterns are detected, 0 otherwise.
176+
*/
177+
int fossil_io_soap_detect_marketing(const char *text);
178+
179+
/**
180+
* @brief Detect technobabble or meaningless jargon in text.
181+
*
182+
* @param text Input text to scan.
183+
* @return Non-zero if technobabble patterns are detected, 0 otherwise.
184+
*/
185+
int fossil_io_soap_detect_technobabble(const char *text);
186+
139187
// filter functions
140188

141189
/**
@@ -357,6 +405,66 @@ namespace fossil {
357405
return fossil_io_soap_detect_neutral(text.c_str()) != 0;
358406
}
359407

408+
/**
409+
* @brief Detects if a given text contains hype-related content.
410+
*
411+
* @param text Input string to analyze
412+
* @return true if hype detected, false otherwise
413+
*/
414+
static bool is_hype(const std::string &text) {
415+
return fossil_io_soap_detect_hype(text.c_str()) != 0;
416+
}
417+
418+
/**
419+
* @brief Detects if a given text contains high-quality phrasing.
420+
*
421+
* @param text Input string to analyze
422+
* @return true if quality phrasing detected, false otherwise
423+
*/
424+
static bool is_quality(const std::string &text) {
425+
return fossil_io_soap_detect_quality(text.c_str()) != 0;
426+
}
427+
428+
/**
429+
* @brief Detects if a given text contains political content.
430+
*
431+
* @param text Input string to analyze
432+
* @return true if political content detected, false otherwise
433+
*/
434+
static bool is_political(const std::string &text) {
435+
return fossil_io_soap_detect_political(text.c_str()) != 0;
436+
}
437+
438+
/**
439+
* @brief Detects if a given text contains conspiracy-related content.
440+
*
441+
* @param text Input string to analyze
442+
* @return true if conspiracy content detected, false otherwise
443+
*/
444+
static bool is_conspiracy(const std::string &text) {
445+
return fossil_io_soap_detect_conspiracy(text.c_str()) != 0;
446+
}
447+
448+
/**
449+
* @brief Detects if a given text contains marketing/jargon-heavy content.
450+
*
451+
* @param text Input string to analyze
452+
* @return true if marketing jargon detected, false otherwise
453+
*/
454+
static bool is_marketing(const std::string &text) {
455+
return fossil_io_soap_detect_marketing(text.c_str()) != 0;
456+
}
457+
458+
/**
459+
* @brief Detects if a given text contains technobabble or meaningless jargon.
460+
*
461+
* @param text Input string to analyze
462+
* @return true if technobabble detected, false otherwise
463+
*/
464+
static bool is_technobabble(const std::string &text) {
465+
return fossil_io_soap_detect_technobabble(text.c_str()) != 0;
466+
}
467+
360468
/**
361469
* @brief Fix common grammar errors in input text.
362470
*

code/logic/soap.c

Lines changed: 115 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,84 @@ static const char *SOAP_NEUTRAL_PATTERNS[] = {
373373
"clear", "direct", "simple", "uncomplicated", NULL
374374
};
375375

376+
/** Lookup table for hype phrases */
377+
static const char *SOAP_HYPE_PATTERNS[] = {
378+
"game-changing","revolutionary","cutting-edge","disruptive","never before seen",
379+
"unprecedented","the ultimate","next-level","life-changing","epic","legendary",
380+
"the best ever","mind-blowing","world-class","exclusive","groundbreaking","innovative",
381+
"unparalleled","must-have","hot new","phenomenal","incredible","jaw-dropping",
382+
"guaranteed","proven results","miracle","unstoppable","the future of","beyond belief",
383+
"state-of-the-art","amazing","once in a lifetime","extraordinary","unbeatable","elite",
384+
"remarkable","top-tier","exceptional","stellar","premium","best-in-class","outstanding",
385+
"next big thing","ultimate solution","transformative","game-changer","reimagined","breakthrough",
386+
"trailblazing","legendary product","redefining","NULL"
387+
};
388+
389+
/** Political */
390+
static const char *SOAP_POLITICAL_PATTERNS[] = {
391+
"left-wing","right-wing","liberal agenda","conservative values","fake news media",
392+
"mainstream media","deep state","radical","extremist","social justice warrior","tax and spend",
393+
"big government","free market","personal freedom","authoritarian","totalitarian","censorship",
394+
"political correctness","cancel culture","big brother","elitist","grassroots movement",
395+
"identity politics","partisan","special interests","lobbyist","corrupt politicians","vote rigging",
396+
"rigged system","polarized nation","propaganda","electoral fraud","media bias","government overreach",
397+
"legislation","policy reform","constitutional","impeachment","regulatory capture","voter suppression",
398+
"political scandal","public office","government shutdown","state control","national security",
399+
"campaign finance","party platform","NULL"
400+
};
401+
402+
/** Conspiracy */
403+
static const char *SOAP_CONSPIRACY_PATTERNS[] = {
404+
"they don’t want you to know","hidden truth","cover up","shadow government","mind control",
405+
"secret society","illuminati","new world order","false flag","plandemic","chemtrails",
406+
"crisis actors","fabricated evidence","hollow earth","flat earth","alien cover-up",
407+
"government lies","deep underground base","suppressed technology","mass surveillance",
408+
"thought police","fabricated by the media","controlled opposition","secret labs",
409+
"fake moon landing","reptilian","massive hoax","in on it","they control everything",
410+
"behind closed doors","manipulation","hidden agenda","classified files","covert operations",
411+
"black ops","unexplained phenomena","shadowy figures","secret programs","disinformation",
412+
"conspiratorial","hidden messages","mind games","secret experiments","underground network",
413+
"NULL"
414+
};
415+
416+
/** Marketing */
417+
static const char *SOAP_MARKETING_PATTERNS[] = {
418+
"limited time offer","act now","don’t miss out","guaranteed results","risk free",
419+
"sign up today","exclusive deal","free trial","buy one get one","special promotion",
420+
"limited stock","save big","lowest price ever","best deal","offer ends soon",
421+
"only for today","unlock savings","no obligation","instant access","money back guarantee",
422+
"redeem now","hot deal","flash sale","discounted rate","claim your spot","exclusive offer",
423+
"limited edition","join thousands","be the first","special savings","new arrival",
424+
"get yours now","best value","exclusive bonus","early bird","special launch","today only",
425+
"final chance","bonus included","premium package","offer valid","special offer","don’t wait",
426+
"hurry up","exclusive access","deal of the day","NULL"
427+
};
428+
429+
/** Technobabble */
430+
static const char *SOAP_TECHNOBABBLE_PATTERNS[] = {
431+
"synergy","blockchain-enabled","AI-powered","machine learning solution",
432+
"next-gen","hyper scalable","paradigm shift","deep tech","seamless integration",
433+
"big data","cloud-native","cutting-edge platform","quantum leap","value proposition",
434+
"turnkey solution","innovative ecosystem","frictionless","low-hanging fruit",
435+
"mission critical","digital transformation","smart disruption","empower users",
436+
"end-to-end","future-proof","holistic approach","thought leadership","revolutionize",
437+
"strategic alignment","game-changer","intelligent automation","data-driven","disruptive innovation",
438+
"scalable architecture","AI-driven","cloud-first","next-generation","hyper convergence",
439+
"machine intelligence","tech stack","real-time analytics","dynamic workflow","intelligent design",
440+
"agile methodology","NULL"
441+
};
442+
443+
/** Lookup table for low-quality signals */
444+
static const char *SOAP_QUALITY_PATTERNS[] = {
445+
"just saying","you know","like literally","basically","whatever","stuff and things",
446+
"random nonsense","blah blah","and so on","thingy","meh","idk","not sure","somehow",
447+
"something like that","kind of","sort of","whatever works","in a way","obviously",
448+
"clearly","everyone knows","trust me","believe me","it is what it is","that kind of thing",
449+
"doesn’t matter","whatever you think","unimportant","insignificant","minor detail",
450+
"whatever floats your boat","trivial","meaningless","small stuff","irrelevant",
451+
"empty words","hollow statement","noise","filler","pointless","NULL"
452+
};
453+
376454
/** Lookup table for words that need to be skipped due to misdetection */
377455
static const char *SKIP_WORDS[] = {
378456
"limit", "size", "width", "height", "length", "depth", "volume", "capacity", "weight",
@@ -691,13 +769,19 @@ const char *fossil_io_soap_detect_tone(const char *text) {
691769
{SOAP_CLICKBAIT_PATTERNS, "clickbait"},
692770
{SOAP_SPAM_PATTERNS, "spam"},
693771
{SOAP_WOKE_PATTERNS, "woke"},
694-
{SOAP_BOT_PATTERNS, "bot"},
695-
{SOAP_SARCASTIC_PATTERNS, "sarcastic"},
696-
{SOAP_SNOWFLAKE_PATTERNS, "snowflake"},
697-
{SOAP_FORMAL_PATTERNS, "formal"},
698-
{SOAP_OFFENSIVE_PATTERNS, "offensive"},
699-
{SOAP_NEUTRAL_PATTERNS, "neutral"},
700-
{NULL, NULL}
772+
{SOAP_BOT_PATTERNS, "bot"},
773+
{SOAP_SARCASTIC_PATTERNS, "sarcastic"},
774+
{SOAP_SNOWFLAKE_PATTERNS, "snowflake"},
775+
{SOAP_FORMAL_PATTERNS, "formal"},
776+
{SOAP_OFFENSIVE_PATTERNS, "offensive"},
777+
{SOAP_NEUTRAL_PATTERNS, "neutral"},
778+
{ SOAP_QUALITY_PATTERNS, "quality" },
779+
{ SOAP_HYPE_PATTERNS, "hype" },
780+
{ SOAP_POLITICAL_PATTERNS, "political" },
781+
{ SOAP_CONSPIRACY_PATTERNS, "conspiracy" },
782+
{ SOAP_MARKETING_PATTERNS, "marketing" },
783+
{ SOAP_TECHNOBABBLE_PATTERNS, "technobabble"},
784+
{ NULL, NULL }
701785
};
702786

703787
// Split text into words for more accurate matching
@@ -894,6 +978,30 @@ int fossil_io_soap_detect_neutral(const char *text) {
894978
return soap_detect_patterns(text, SOAP_NEUTRAL_PATTERNS);
895979
}
896980

981+
int fossil_io_soap_detect_hype(const char *text) {
982+
return soap_detect_patterns(text, SOAP_HYPE_PATTERNS);
983+
}
984+
985+
int fossil_io_soap_detect_quality(const char *text) {
986+
return soap_detect_patterns(text, SOAP_QUALITY_PATTERNS);
987+
}
988+
989+
int fossil_io_soap_detect_political(const char *text) {
990+
return soap_detect_patterns(text, SOAP_POLITICAL_PATTERNS);
991+
}
992+
993+
int fossil_io_soap_detect_conspiracy(const char *text) {
994+
return soap_detect_patterns(text, SOAP_CONSPIRACY_PATTERNS);
995+
}
996+
997+
int fossil_io_soap_detect_marketing(const char *text) {
998+
return soap_detect_patterns(text, SOAP_MARKETING_PATTERNS);
999+
}
1000+
1001+
int fossil_io_soap_detect_technobabble(const char *text) {
1002+
return soap_detect_patterns(text, SOAP_TECHNOBABBLE_PATTERNS);
1003+
}
1004+
8971005
/**
8981006
* @brief Filter text by replacing words/phrases matching any pattern (comma-separated) with '*'.
8991007
* Patterns support '*' and '?' wildcards, case-insensitive.

code/tests/cases/test_soap.c

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,84 @@ FOSSIL_TEST(c_test_io_soap_detect_offensive_false) {
151151
ASSUME_ITS_FALSE(result);
152152
}
153153

154+
// --- HYPE detection ---
155+
FOSSIL_TEST(c_test_io_soap_detect_hype_true) {
156+
const char *input = "This is the ultimate revolutionary game-changing breakthrough!";
157+
int result = fossil_io_soap_detect_hype(input);
158+
ASSUME_ITS_TRUE(result);
159+
}
160+
161+
FOSSIL_TEST(c_test_io_soap_detect_hype_false) {
162+
const char *input = "This is a normal system update with minor improvements.";
163+
int result = fossil_io_soap_detect_hype(input);
164+
ASSUME_ITS_FALSE(result);
165+
}
166+
167+
// --- QUALITY detection ---
168+
FOSSIL_TEST(c_test_io_soap_detect_quality_true) {
169+
const char *input = "Everyone knows this method is reliable and clearly follows strict methodology.";
170+
int result = fossil_io_soap_detect_quality(input);
171+
ASSUME_ITS_TRUE(result);
172+
}
173+
174+
FOSSIL_TEST(c_test_io_soap_detect_quality_false) {
175+
const char *input = "Wow, this is amazing and unbelievable!";
176+
int result = fossil_io_soap_detect_quality(input);
177+
ASSUME_ITS_FALSE(result);
178+
}
179+
180+
// --- POLITICAL detection ---
181+
FOSSIL_TEST(c_test_io_soap_detect_political_true) {
182+
const char *input = "The government overreach and big government policies affect personal freedom.";
183+
int result = fossil_io_soap_detect_political(input);
184+
ASSUME_ITS_TRUE(result);
185+
}
186+
187+
FOSSIL_TEST(c_test_io_soap_detect_political_false) {
188+
const char *input = "I enjoy going on long hikes in the mountains and reading books.";
189+
int result = fossil_io_soap_detect_political(input);
190+
ASSUME_ITS_FALSE(result);
191+
}
192+
193+
// --- CONSPIRACY detection ---
194+
FOSSIL_TEST(c_test_io_soap_detect_conspiracy_true) {
195+
const char *input = "Hidden truth and secret societies control world events.";
196+
int result = fossil_io_soap_detect_conspiracy(input);
197+
ASSUME_ITS_TRUE(result);
198+
}
199+
200+
FOSSIL_TEST(c_test_io_soap_detect_conspiracy_false) {
201+
const char *input = "Astronomers study the moon landing and other space phenomena.";
202+
int result = fossil_io_soap_detect_conspiracy(input);
203+
ASSUME_ITS_FALSE(result);
204+
}
205+
206+
// --- MARKETING detection ---
207+
FOSSIL_TEST(c_test_io_soap_detect_marketing_true) {
208+
const char *input = "Sign up today for our exclusive limited-time offer!";
209+
int result = fossil_io_soap_detect_marketing(input);
210+
ASSUME_ITS_TRUE(result);
211+
}
212+
213+
FOSSIL_TEST(c_test_io_soap_detect_marketing_false) {
214+
const char *input = "This is a technical description of a microcontroller circuit.";
215+
int result = fossil_io_soap_detect_marketing(input);
216+
ASSUME_ITS_FALSE(result);
217+
}
218+
219+
// --- TECHNOBABBLE detection ---
220+
FOSSIL_TEST(c_test_io_soap_detect_technobabble_true) {
221+
const char *input = "Our cloud-native AI-powered platform enables seamless integration and next-gen innovation.";
222+
int result = fossil_io_soap_detect_technobabble(input);
223+
ASSUME_ITS_TRUE(result);
224+
}
225+
226+
FOSSIL_TEST(c_test_io_soap_detect_technobabble_false) {
227+
const char *input = "The client connects to the server via a standard HTTPS request.";
228+
int result = fossil_io_soap_detect_technobabble(input);
229+
ASSUME_ITS_FALSE(result);
230+
}
231+
154232
// filter cases
155233

156234
FOSSIL_TEST(c_test_io_soap_add_custom_filter) {
@@ -326,6 +404,18 @@ FOSSIL_TEST_GROUP(c_soap_tests) {
326404
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_snowflake_false);
327405
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_offensive_true);
328406
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_offensive_false);
407+
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_hype_true);
408+
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_hype_false);
409+
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_quality_true);
410+
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_quality_false);
411+
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_political_true);
412+
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_political_false);
413+
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_conspiracy_true);
414+
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_conspiracy_false);
415+
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_marketing_true);
416+
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_marketing_false);
417+
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_technobabble_true);
418+
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_detect_technobabble_false);
329419

330420
// filter tests
331421
FOSSIL_TEST_ADD(c_soap_suite, c_test_io_soap_add_custom_filter);

0 commit comments

Comments
 (0)