@@ -57,22 +57,16 @@ static const char *FOSSIL_SOAP_OFFENSIVE[] = {
5757
5858// garbage words and phrases
5959// (slightly easier to maintain since it's just slang from social media spoken from people who need to touch grass)
60- const char * FOSSIL_SOAP_ROTBRAIN [] = {
60+ static const char * FOSSIL_SOAP_ROTBRAIN [] = {
6161 "rizz" , "skibidi" , "yeet" , "sus" , "vibe" , "lit" , "no cap" , "bet" , "fam" , "bruh" ,
6262 "flex" , "ghost" , "goat" , "gucci" , "hype" , "janky" , "lowkey" , "mood" , "salty" , "shade" ,
63- "slay" , "snatched" , "stan" , "tea" , "thirsty" , "woke" , "yolo" , "zaddy" , "drip" , "fire"
63+ "slay" , "snatched" , "stan" , "tea" , "thirsty" , "woke" , "yolo" , "zaddy" , "drip" , "fire" ,
64+ "lol" , "omg" , "brb" , "sus"
6465
6566 // Support for other terms can be added via PR to this repository
6667};
6768
68- // Fuzzy matching of synonyms (basic implementation)
69- const char * FOSSIL_SOAP_SYNONYMS [] = {
70- "rizz" , "charm" , "flirt" , "tease" ,
71- "sus" , "suspicious" , "shady"
72- };
73-
74-
75- static inline char * _custom_fossil_strdup (const char * str ) {
69+ static inline char * custom_strdup (const char * str ) {
7670 if (!str ) return NULL ; // Handle NULL pointer gracefully
7771
7872 size_t len = 0 ;
@@ -89,132 +83,56 @@ static inline char* _custom_fossil_strdup(const char* str) {
8983 return dup ;
9084}
9185
92- // Utility: Case-insensitive substring search
86+ // Fallback implementation for platforms that don't support strcasestr
9387static char * custom_strcasestr (const char * haystack , const char * needle ) {
94- if (!haystack || !needle ) return NULL ;
95-
96- size_t needle_len = strlen (needle );
97- if (needle_len == 0 ) return (char * )haystack ;
98-
99- for (; * haystack ; ++ haystack ) {
100- if (strncasecmp (haystack , needle , needle_len ) == 0 ) {
88+ while (* haystack ) {
89+ size_t i = 0 ;
90+ while (tolower ((unsigned char )haystack [i ]) == tolower ((unsigned char )needle [i ]) && needle [i ] != '\0' ) {
91+ i ++ ;
92+ }
93+ if (needle [i ] == '\0' ) {
10194 return (char * )haystack ;
10295 }
96+ haystack ++ ;
10397 }
10498 return NULL ;
10599}
106100
107- // Utility: Replace a substring in a string (case-insensitive)
101+ // Function to replace a substring in a string (case-insensitive)
108102static void replace_substring_case_insensitive (char * str , const char * old_substr , const char * new_substr ) {
109103 char * position = custom_strcasestr (str , old_substr );
110- if (position != NULL ) {
104+ while (position != NULL ) {
111105 size_t old_len = strlen (old_substr );
112106 size_t new_len = strlen (new_substr );
113107 size_t tail_len = strlen (position + old_len );
114108
115- // Adjust memory for new length
109+ // Check if the new length is greater than the old length
116110 if (new_len > old_len ) {
117111 memmove (position + new_len , position + old_len , tail_len + 1 );
118112 } else {
119113 memmove (position + new_len , position + old_len , tail_len + 1 );
120- memcpy (position , new_substr , new_len );
121114 }
122- }
123- }
124-
125- // Tokenization: Split input into words
126- void fossil_soap_tokenize_and_process (char * input ) {
127- char * copy = _custom_fossil_strdup (input );
128- if (copy == NULL ) return ;
115+ memcpy (position , new_substr , new_len );
129116
130- char * token = strtok (copy , " " );
131- while (token != NULL ) {
132- if (fossil_soap_is_offensive (token ) == EXIT_FAILURE ) {
133- printf ("Offensive word detected: %s\n" , token );
134- }
135- if (fossil_soap_is_rotbrain (token ) == EXIT_FAILURE ) {
136- printf ("Meme speak detected: %s\n" , token );
137- }
138- token = strtok (NULL , " " );
117+ // Find the next occurrence
118+ position = custom_strcasestr (position + new_len , old_substr );
139119 }
140- free (copy );
141120}
142121
143- // Synonym detection and replacement function (case-insensitive)
144- void replace_synonym_with_placeholder (char * input , const char * old_word , const char * new_word ) {
145- while (custom_strcasestr (input , old_word ) != NULL ) {
146- replace_substring_case_insensitive (input , old_word , new_word );
147- }
148- }
149-
150- // Run fuzzer and test the enhanced sanitization
151- void fossil_soap_fuzzer (const char * input ) {
152- const size_t fuzz_count = 10 ;
153- printf ("\n=== Fuzzer Test Cases ===\n" );
154- for (size_t i = 0 ; i < fuzz_count ; ++ i ) {
155- char fuzzed_input [512 ];
156- snprintf (fuzzed_input , sizeof (fuzzed_input ), "%s %c%s" ,
157- input ,
158- (rand () % 26 ) + 'A' ,
159- "!!" );
160- printf ("Fuzzed Input %lld: %s\n" , (long long int )(i + 1 ), fuzzed_input );
161- char sanitized_output [512 ];
162- strncpy (sanitized_output , fuzzed_input , sizeof (sanitized_output ));
163- fossil_soap_sanitize (sanitized_output );
164- printf ("Sanitized Output %lld: %s\n" , (long long int )(i + 1 ), sanitized_output );
165- }
166- }
167-
168- // Utility: Trim extra spaces and ensure proper sentence capitalization
169- void fossil_soap_correct_grammar (char * input ) {
170- if (input == NULL || * input == '\0' ) return ;
171-
172- char * dst = input ;
173- int capitalize = 1 ;
174-
175- for (char * src = input ; * src ; ++ src ) {
176- if (isspace (* src )) {
177- if (dst == input || isspace (* (dst - 1 ))) continue ; // Skip duplicate spaces
178- * dst ++ = ' ' ; // Single space
179- } else {
180- * dst ++ = capitalize ? toupper (* src ) : tolower (* src );
181- capitalize = ispunct (* src ) || isspace (* src );
182- }
183- }
184- if (dst > input && isspace (* (dst - 1 ))) dst -- ; // Remove trailing space
185- * dst = '\0' ;
186- }
187-
188- // Main sanitization function
189122void fossil_soap_sanitize (char * input ) {
190123 if (input == NULL || * input == '\0' ) return ;
191124
192- // Handle synonyms and offensive words
125+ // Perform single-threaded sanitization
193126 for (size_t i = 0 ; i < sizeof (FOSSIL_SOAP_OFFENSIVE ) / sizeof (FOSSIL_SOAP_OFFENSIVE [0 ]); ++ i ) {
194- while (custom_strcasestr (input , FOSSIL_SOAP_OFFENSIVE [i ]) != NULL ) {
195- replace_substring_case_insensitive (input , FOSSIL_SOAP_OFFENSIVE [i ], "***" );
196- }
127+ replace_substring_case_insensitive (input , FOSSIL_SOAP_OFFENSIVE [i ], "***" );
197128 }
198129
199- // Handle meme speak
200130 for (size_t i = 0 ; i < sizeof (FOSSIL_SOAP_ROTBRAIN ) / sizeof (FOSSIL_SOAP_ROTBRAIN [0 ]); ++ i ) {
201- while (custom_strcasestr (input , FOSSIL_SOAP_ROTBRAIN [i ]) != NULL ) {
202- replace_substring_case_insensitive (input , FOSSIL_SOAP_ROTBRAIN [i ], "[ROT]" );
203- }
204- }
205-
206- // Handle synonyms
207- for (size_t i = 0 ; i < sizeof (FOSSIL_SOAP_SYNONYMS ) / sizeof (FOSSIL_SOAP_SYNONYMS [0 ]); ++ i ) {
208- while (custom_strcasestr (input , FOSSIL_SOAP_SYNONYMS [i ]) != NULL ) {
209- replace_substring_case_insensitive (input , FOSSIL_SOAP_SYNONYMS [i ], "[SYNONYM]" );
210- }
131+ replace_substring_case_insensitive (input , FOSSIL_SOAP_ROTBRAIN [i ], "***" );
211132 }
212-
213- // Optional: Correct grammar after sanitizing
214- fossil_soap_correct_grammar (input );
215133}
216134
217- // Check if a word is offensive
135+ // Function to check if a word is an offensive word or phrase
218136int32_t fossil_soap_is_offensive (const char * word ) {
219137 if (word == NULL || * word == '\0' ) return EXIT_SUCCESS ;
220138
@@ -226,52 +144,50 @@ int32_t fossil_soap_is_offensive(const char *word) {
226144 return EXIT_SUCCESS ;
227145}
228146
229- // Check if a word is meme speak
230- int32_t fossil_soap_is_rotbrain (const char * word ) {
231- if (word == NULL || * word == '\0' ) return EXIT_SUCCESS ;
232-
233- for (size_t i = 0 ; i < sizeof (FOSSIL_SOAP_ROTBRAIN ) / sizeof (FOSSIL_SOAP_ROTBRAIN [0 ]); ++ i ) {
234- if (strcasecmp (word , FOSSIL_SOAP_ROTBRAIN [i ]) == 0 ) {
235- return EXIT_FAILURE ;
236- }
237- }
238- return EXIT_SUCCESS ;
239- }
240-
241- // Count offensive words in a string
147+ // Function to get the number of offensive words found in a string
242148int32_t fossil_soap_count_offensive (const char * input ) {
243149 if (input == NULL || * input == '\0' ) return 0 ;
244150
245151 int count = 0 ;
246- char * copy = _custom_fossil_strdup (input );
152+ char * copy = custom_strdup (input );
247153 if (copy == NULL ) return EXIT_SUCCESS ;
248154
249- char * token = strtok (copy , " " );
155+ char * token = strtok (copy , " ,.!?;: " ); // Tokenize the string by space and punctuation
250156 while (token != NULL ) {
251- if (fossil_soap_is_offensive (token ) == EXIT_FAILURE ) {
157+ if (fossil_soap_is_offensive (token )) {
252158 count ++ ;
253159 }
254- token = strtok (NULL , " " );
160+ token = strtok (NULL , " ,.!?;: " );
255161 }
256- free (copy );
162+ free (copy ); // Free the memory allocated for the copy
257163 return count ;
258164}
259165
260- // Count meme speak words in a string
166+ int32_t fossil_soap_is_rotbrain (const char * word ) {
167+ if (word == NULL || * word == '\0' ) return EXIT_SUCCESS ;
168+
169+ for (size_t i = 0 ; i < sizeof (FOSSIL_SOAP_ROTBRAIN ) / sizeof (FOSSIL_SOAP_ROTBRAIN [0 ]); ++ i ) {
170+ if (strcasecmp (word , FOSSIL_SOAP_ROTBRAIN [i ]) == 0 ) {
171+ return EXIT_FAILURE ;
172+ }
173+ }
174+ return EXIT_SUCCESS ;
175+ }
176+
261177int32_t fossil_soap_count_rotbrain (const char * input ) {
262178 if (input == NULL || * input == '\0' ) return 0 ;
263179
264180 int count = 0 ;
265- char * copy = _custom_fossil_strdup (input );
181+ char * copy = custom_strdup (input );
266182 if (copy == NULL ) return EXIT_SUCCESS ;
267183
268- char * token = strtok (copy , " " );
184+ char * token = strtok (copy , " ,.!?;: " ); // Tokenize the string by space and punctuation
269185 while (token != NULL ) {
270- if (fossil_soap_is_rotbrain (token ) == EXIT_FAILURE ) {
186+ if (fossil_soap_is_rotbrain (token )) {
271187 count ++ ;
272188 }
273- token = strtok (NULL , " " );
189+ token = strtok (NULL , " ,.!?;: " );
274190 }
275- free (copy );
191+ free (copy ); // Free the memory allocated for the copy
276192 return count ;
277193}
0 commit comments