@@ -373,6 +373,84 @@ static const char *SOAP_NEUTRAL_PATTERNS[] = {
373373 "clear" , "direct" , "simple" , "uncomplicated" , NULL
374374};
375375
376+ /** Lookup table for hype phrases */
377+ static const char * SOAP_HYPE_PATTERNS [] = {
378+ "game-changing" ,"revolutionary" ,"cutting-edge" ,"disruptive" ,"never before seen" ,
379+ "unprecedented" ,"the ultimate" ,"next-level" ,"life-changing" ,"epic" ,"legendary" ,
380+ "the best ever" ,"mind-blowing" ,"world-class" ,"exclusive" ,"groundbreaking" ,"innovative" ,
381+ "unparalleled" ,"must-have" ,"hot new" ,"phenomenal" ,"incredible" ,"jaw-dropping" ,
382+ "guaranteed" ,"proven results" ,"miracle" ,"unstoppable" ,"the future of" ,"beyond belief" ,
383+ "state-of-the-art" ,"amazing" ,"once in a lifetime" ,"extraordinary" ,"unbeatable" ,"elite" ,
384+ "remarkable" ,"top-tier" ,"exceptional" ,"stellar" ,"premium" ,"best-in-class" ,"outstanding" ,
385+ "next big thing" ,"ultimate solution" ,"transformative" ,"game-changer" ,"reimagined" ,"breakthrough" ,
386+ "trailblazing" ,"legendary product" ,"redefining" ,"NULL"
387+ };
388+
389+ /** Political */
390+ static const char * SOAP_POLITICAL_PATTERNS [] = {
391+ "left-wing" ,"right-wing" ,"liberal agenda" ,"conservative values" ,"fake news media" ,
392+ "mainstream media" ,"deep state" ,"radical" ,"extremist" ,"social justice warrior" ,"tax and spend" ,
393+ "big government" ,"free market" ,"personal freedom" ,"authoritarian" ,"totalitarian" ,"censorship" ,
394+ "political correctness" ,"cancel culture" ,"big brother" ,"elitist" ,"grassroots movement" ,
395+ "identity politics" ,"partisan" ,"special interests" ,"lobbyist" ,"corrupt politicians" ,"vote rigging" ,
396+ "rigged system" ,"polarized nation" ,"propaganda" ,"electoral fraud" ,"media bias" ,"government overreach" ,
397+ "legislation" ,"policy reform" ,"constitutional" ,"impeachment" ,"regulatory capture" ,"voter suppression" ,
398+ "political scandal" ,"public office" ,"government shutdown" ,"state control" ,"national security" ,
399+ "campaign finance" ,"party platform" ,"NULL"
400+ };
401+
402+ /** Conspiracy */
403+ static const char * SOAP_CONSPIRACY_PATTERNS [] = {
404+ "they don’t want you to know" ,"hidden truth" ,"cover up" ,"shadow government" ,"mind control" ,
405+ "secret society" ,"illuminati" ,"new world order" ,"false flag" ,"plandemic" ,"chemtrails" ,
406+ "crisis actors" ,"fabricated evidence" ,"hollow earth" ,"flat earth" ,"alien cover-up" ,
407+ "government lies" ,"deep underground base" ,"suppressed technology" ,"mass surveillance" ,
408+ "thought police" ,"fabricated by the media" ,"controlled opposition" ,"secret labs" ,
409+ "fake moon landing" ,"reptilian" ,"massive hoax" ,"in on it" ,"they control everything" ,
410+ "behind closed doors" ,"manipulation" ,"hidden agenda" ,"classified files" ,"covert operations" ,
411+ "black ops" ,"unexplained phenomena" ,"shadowy figures" ,"secret programs" ,"disinformation" ,
412+ "conspiratorial" ,"hidden messages" ,"mind games" ,"secret experiments" ,"underground network" ,
413+ "NULL"
414+ };
415+
416+ /** Marketing */
417+ static const char * SOAP_MARKETING_PATTERNS [] = {
418+ "limited time offer" ,"act now" ,"don’t miss out" ,"guaranteed results" ,"risk free" ,
419+ "sign up today" ,"exclusive deal" ,"free trial" ,"buy one get one" ,"special promotion" ,
420+ "limited stock" ,"save big" ,"lowest price ever" ,"best deal" ,"offer ends soon" ,
421+ "only for today" ,"unlock savings" ,"no obligation" ,"instant access" ,"money back guarantee" ,
422+ "redeem now" ,"hot deal" ,"flash sale" ,"discounted rate" ,"claim your spot" ,"exclusive offer" ,
423+ "limited edition" ,"join thousands" ,"be the first" ,"special savings" ,"new arrival" ,
424+ "get yours now" ,"best value" ,"exclusive bonus" ,"early bird" ,"special launch" ,"today only" ,
425+ "final chance" ,"bonus included" ,"premium package" ,"offer valid" ,"special offer" ,"don’t wait" ,
426+ "hurry up" ,"exclusive access" ,"deal of the day" ,"NULL"
427+ };
428+
429+ /** Technobabble */
430+ static const char * SOAP_TECHNOBABBLE_PATTERNS [] = {
431+ "synergy" ,"blockchain-enabled" ,"AI-powered" ,"machine learning solution" ,
432+ "next-gen" ,"hyper scalable" ,"paradigm shift" ,"deep tech" ,"seamless integration" ,
433+ "big data" ,"cloud-native" ,"cutting-edge platform" ,"quantum leap" ,"value proposition" ,
434+ "turnkey solution" ,"innovative ecosystem" ,"frictionless" ,"low-hanging fruit" ,
435+ "mission critical" ,"digital transformation" ,"smart disruption" ,"empower users" ,
436+ "end-to-end" ,"future-proof" ,"holistic approach" ,"thought leadership" ,"revolutionize" ,
437+ "strategic alignment" ,"game-changer" ,"intelligent automation" ,"data-driven" ,"disruptive innovation" ,
438+ "scalable architecture" ,"AI-driven" ,"cloud-first" ,"next-generation" ,"hyper convergence" ,
439+ "machine intelligence" ,"tech stack" ,"real-time analytics" ,"dynamic workflow" ,"intelligent design" ,
440+ "agile methodology" ,"NULL"
441+ };
442+
443+ /** Lookup table for low-quality signals */
444+ static const char * SOAP_QUALITY_PATTERNS [] = {
445+ "just saying" ,"you know" ,"like literally" ,"basically" ,"whatever" ,"stuff and things" ,
446+ "random nonsense" ,"blah blah" ,"and so on" ,"thingy" ,"meh" ,"idk" ,"not sure" ,"somehow" ,
447+ "something like that" ,"kind of" ,"sort of" ,"whatever works" ,"in a way" ,"obviously" ,
448+ "clearly" ,"everyone knows" ,"trust me" ,"believe me" ,"it is what it is" ,"that kind of thing" ,
449+ "doesn’t matter" ,"whatever you think" ,"unimportant" ,"insignificant" ,"minor detail" ,
450+ "whatever floats your boat" ,"trivial" ,"meaningless" ,"small stuff" ,"irrelevant" ,
451+ "empty words" ,"hollow statement" ,"noise" ,"filler" ,"pointless" ,"NULL"
452+ };
453+
376454/** Lookup table for words that need to be skipped due to misdetection */
377455static const char * SKIP_WORDS [] = {
378456 "limit" , "size" , "width" , "height" , "length" , "depth" , "volume" , "capacity" , "weight" ,
@@ -691,13 +769,19 @@ const char *fossil_io_soap_detect_tone(const char *text) {
691769 {SOAP_CLICKBAIT_PATTERNS , "clickbait" },
692770 {SOAP_SPAM_PATTERNS , "spam" },
693771 {SOAP_WOKE_PATTERNS , "woke" },
694- {SOAP_BOT_PATTERNS , "bot" },
695- {SOAP_SARCASTIC_PATTERNS , "sarcastic" },
696- {SOAP_SNOWFLAKE_PATTERNS , "snowflake" },
697- {SOAP_FORMAL_PATTERNS , "formal" },
698- {SOAP_OFFENSIVE_PATTERNS , "offensive" },
699- {SOAP_NEUTRAL_PATTERNS , "neutral" },
700- {NULL , NULL }
772+ {SOAP_BOT_PATTERNS , "bot" },
773+ {SOAP_SARCASTIC_PATTERNS , "sarcastic" },
774+ {SOAP_SNOWFLAKE_PATTERNS , "snowflake" },
775+ {SOAP_FORMAL_PATTERNS , "formal" },
776+ {SOAP_OFFENSIVE_PATTERNS , "offensive" },
777+ {SOAP_NEUTRAL_PATTERNS , "neutral" },
778+ { SOAP_QUALITY_PATTERNS , "quality" },
779+ { SOAP_HYPE_PATTERNS , "hype" },
780+ { SOAP_POLITICAL_PATTERNS , "political" },
781+ { SOAP_CONSPIRACY_PATTERNS , "conspiracy" },
782+ { SOAP_MARKETING_PATTERNS , "marketing" },
783+ { SOAP_TECHNOBABBLE_PATTERNS , "technobabble" },
784+ { NULL , NULL }
701785 };
702786
703787 // Split text into words for more accurate matching
@@ -894,6 +978,30 @@ int fossil_io_soap_detect_neutral(const char *text) {
894978 return soap_detect_patterns (text , SOAP_NEUTRAL_PATTERNS );
895979}
896980
981+ int fossil_io_soap_detect_hype (const char * text ) {
982+ return soap_detect_patterns (text , SOAP_HYPE_PATTERNS );
983+ }
984+
985+ int fossil_io_soap_detect_quality (const char * text ) {
986+ return soap_detect_patterns (text , SOAP_QUALITY_PATTERNS );
987+ }
988+
989+ int fossil_io_soap_detect_political (const char * text ) {
990+ return soap_detect_patterns (text , SOAP_POLITICAL_PATTERNS );
991+ }
992+
993+ int fossil_io_soap_detect_conspiracy (const char * text ) {
994+ return soap_detect_patterns (text , SOAP_CONSPIRACY_PATTERNS );
995+ }
996+
997+ int fossil_io_soap_detect_marketing (const char * text ) {
998+ return soap_detect_patterns (text , SOAP_MARKETING_PATTERNS );
999+ }
1000+
1001+ int fossil_io_soap_detect_technobabble (const char * text ) {
1002+ return soap_detect_patterns (text , SOAP_TECHNOBABBLE_PATTERNS );
1003+ }
1004+
8971005/**
8981006 * @brief Filter text by replacing words/phrases matching any pattern (comma-separated) with '*'.
8991007 * Patterns support '*' and '?' wildcards, case-insensitive.
0 commit comments