Skip to content

Commit a23c1fc

Browse files
Merge pull request #3 from dreamer-coding/soap_upgrade
Soap upgrade
2 parents a971646 + 8010beb commit a23c1fc

File tree

4 files changed

+389
-27
lines changed

4 files changed

+389
-27
lines changed

code/logic/fossil/io/soap.h

Lines changed: 78 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,29 +31,93 @@ extern "C" {
3131
void fossil_soap_sanitize(char *input);
3232

3333
/**
34-
* Check if a word is a curse word or racist phrase.
35-
* This function checks whether a given word is considered offensive.
36-
* It compares the word against a predefined list of offensive words and phrases
37-
* to determine its offensiveness.
38-
*
39-
* @param word The word to be checked for offensiveness.
40-
* @return True if the word is offensive, false otherwise.
34+
* Check if a word is an offensive word or phrase.
35+
* Returns EXIT_FAILURE if the word is considered offensive, EXIT_SUCCESS otherwise.
4136
*/
4237
int32_t fossil_soap_is_offensive(const char *word);
4338

4439
/**
45-
* Get the number of offensive words found in a string.
46-
* This function counts the number of offensive words found in the input string.
47-
* It scans the entire string and checks each word against a list of offensive words
48-
* to determine the count of offensive occurrences.
49-
*
50-
* @param input The input string to be checked for offensive words.
51-
* @return The number of offensive words found.
40+
* Check if a word is meme speak.
41+
* Returns EXIT_FAILURE if the word is considered meme speak, EXIT_SUCCESS otherwise.
42+
*/
43+
int32_t fossil_soap_is_rotbrain(const char *word);
44+
45+
/**
46+
* Count offensive words in a string.
47+
* Returns the number of offensive words found in the input string.
5248
*/
5349
int32_t fossil_soap_count_offensive(const char *input);
5450

51+
/**
52+
* Count meme speak words in a string.
53+
* Returns the number of meme speak words found in the input string.
54+
*/
55+
int32_t fossil_soap_count_rotbrain(const char *input);
56+
5557
#ifdef __cplusplus
5658
}
59+
60+
/**
61+
* C++ wrapper for the SOAP API.
62+
*/
63+
namespace fossil {
64+
65+
/**
66+
* Namespace for I/O operations.
67+
*/
68+
namespace io {
69+
/**
70+
* SOAP API for sanitizing strings.
71+
*/
72+
class Soap {
73+
public:
74+
/**
75+
* Sanitize a string by replacing curse words with asterisks.
76+
* This function scans the input string for offensive words and replaces them with asterisks,
77+
* thereby making the string suitable for use in contexts where offensive language is not allowed.
78+
* The input string is modified in place.
79+
*
80+
* @param input The input string to be sanitized in-place.
81+
*/
82+
static void sanitize(char *input) {
83+
fossil_soap_sanitize(input);
84+
}
85+
86+
/**
87+
* Check if a word is an offensive word or phrase.
88+
* Returns EXIT_FAILURE if the word is considered offensive, EXIT_SUCCESS otherwise.
89+
*/
90+
static int32_t is_offensive(const char *word) {
91+
return fossil_soap_is_offensive(word);
92+
}
93+
94+
/**
95+
* Check if a word is meme speak.
96+
* Returns EXIT_FAILURE if the word is considered meme speak, EXIT_SUCCESS otherwise.
97+
*/
98+
static int32_t is_rotbrain(const char *word) {
99+
return fossil_soap_is_rotbrain(word);
100+
}
101+
102+
/**
103+
* Count offensive words in a string.
104+
* Returns the number of offensive words found in the input string.
105+
*/
106+
static int32_t count_offensive(const char *input) {
107+
return fossil_soap_count_offensive(input);
108+
}
109+
110+
/**
111+
* Count meme speak words in a string.
112+
* Returns the number of meme speak words found in the input string.
113+
*/
114+
static int32_t count_rotbrain(const char *input) {
115+
return fossil_soap_count_rotbrain(input);
116+
}
117+
};
118+
}
119+
}
120+
57121
#endif
58122

59123
#endif /* FOSSIL_IO_FRAMEWORK_H */

code/logic/soap.c

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
#include <stdlib.h>
2323

2424
// List of offensive words and phrases (super hard to mainting thisw list as GitHub Copilot doesnt wanna help with this part of the SOAP API)
25-
static const char *offensive_words[] = {
25+
static const char *FOSSIL_SOAP_OFFENSIVE[] = {
2626
"curse1",
2727
"curse2",
2828
"racist_phrase1",
@@ -55,7 +55,18 @@ static const char *offensive_words[] = {
5555
// Support for other languages can be added via PR to this repository
5656
};
5757

58-
static inline char* _custom_fossil_strdup(const char* str) {
58+
// garbage words and phrases
59+
// (slightly easier to maintain since it's just slang from social media spoken from people who need to touch grass)
60+
static const char *FOSSIL_SOAP_ROTBRAIN[] = {
61+
"rizz", "skibidi", "yeet", "sus", "vibe", "lit", "no cap", "bet", "fam", "bruh",
62+
"flex", "ghost", "goat", "gucci", "hype", "janky", "lowkey", "mood", "salty", "shade",
63+
"slay", "snatched", "stan", "tea", "thirsty", "woke", "yolo", "zaddy", "drip", "fire",
64+
"lol", "omg", "brb", "sus"
65+
66+
// Support for other terms can be added via PR to this repository
67+
};
68+
69+
static inline char* custom_strdup(const char* str) {
5970
if (!str) return NULL; // Handle NULL pointer gracefully
6071

6172
size_t len = 0;
@@ -90,7 +101,7 @@ static char *custom_strcasestr(const char *haystack, const char *needle) {
90101
// Function to replace a substring in a string (case-insensitive)
91102
static void replace_substring_case_insensitive(char *str, const char *old_substr, const char *new_substr) {
92103
char *position = custom_strcasestr(str, old_substr);
93-
if (position != NULL) {
104+
while (position != NULL) {
94105
size_t old_len = strlen(old_substr);
95106
size_t new_len = strlen(new_substr);
96107
size_t tail_len = strlen(position + old_len);
@@ -100,28 +111,33 @@ static void replace_substring_case_insensitive(char *str, const char *old_substr
100111
memmove(position + new_len, position + old_len, tail_len + 1);
101112
} else {
102113
memmove(position + new_len, position + old_len, tail_len + 1);
103-
memcpy(position, new_substr, new_len);
104114
}
115+
memcpy(position, new_substr, new_len);
116+
117+
// Find the next occurrence
118+
position = custom_strcasestr(position + new_len, old_substr);
105119
}
106120
}
107121

108122
void fossil_soap_sanitize(char *input) {
109123
if (input == NULL || *input == '\0') return;
110124

111125
// Perform single-threaded sanitization
112-
for (size_t i = 0; i < sizeof(offensive_words) / sizeof(offensive_words[0]); ++i) {
113-
while (custom_strcasestr(input, offensive_words[i]) != NULL) {
114-
replace_substring_case_insensitive(input, offensive_words[i], "***");
115-
}
126+
for (size_t i = 0; i < sizeof(FOSSIL_SOAP_OFFENSIVE) / sizeof(FOSSIL_SOAP_OFFENSIVE[0]); ++i) {
127+
replace_substring_case_insensitive(input, FOSSIL_SOAP_OFFENSIVE[i], "***");
128+
}
129+
130+
for (size_t i = 0; i < sizeof(FOSSIL_SOAP_ROTBRAIN) / sizeof(FOSSIL_SOAP_ROTBRAIN[0]); ++i) {
131+
replace_substring_case_insensitive(input, FOSSIL_SOAP_ROTBRAIN[i], "***");
116132
}
117133
}
118134

119135
// Function to check if a word is an offensive word or phrase
120136
int32_t fossil_soap_is_offensive(const char *word) {
121137
if (word == NULL || *word == '\0') return EXIT_SUCCESS;
122138

123-
for (size_t i = 0; i < sizeof(offensive_words) / sizeof(offensive_words[0]); ++i) {
124-
if (strcasecmp(word, offensive_words[i]) == 0) {
139+
for (size_t i = 0; i < sizeof(FOSSIL_SOAP_OFFENSIVE) / sizeof(FOSSIL_SOAP_OFFENSIVE[0]); ++i) {
140+
if (strcasecmp(word, FOSSIL_SOAP_OFFENSIVE[i]) == 0) {
125141
return EXIT_FAILURE;
126142
}
127143
}
@@ -133,15 +149,44 @@ int32_t fossil_soap_count_offensive(const char *input) {
133149
if (input == NULL || *input == '\0') return 0;
134150

135151
int count = 0;
136-
char *copy = _custom_fossil_strdup(input);
152+
char *copy = custom_strdup(input);
137153
if (copy == NULL) return EXIT_SUCCESS;
138154

139-
char *token = strtok(copy, " "); // Tokenize the string by space
155+
char *token = strtok(copy, " ,.!?;:"); // Tokenize the string by space and punctuation
140156
while (token != NULL) {
141157
if (fossil_soap_is_offensive(token)) {
142158
count++;
143159
}
144-
token = strtok(NULL, " ");
160+
token = strtok(NULL, " ,.!?;:");
161+
}
162+
free(copy); // Free the memory allocated for the copy
163+
return count;
164+
}
165+
166+
int32_t fossil_soap_is_rotbrain(const char *word) {
167+
if (word == NULL || *word == '\0') return EXIT_SUCCESS;
168+
169+
for (size_t i = 0; i < sizeof(FOSSIL_SOAP_ROTBRAIN) / sizeof(FOSSIL_SOAP_ROTBRAIN[0]); ++i) {
170+
if (strcasecmp(word, FOSSIL_SOAP_ROTBRAIN[i]) == 0) {
171+
return EXIT_FAILURE;
172+
}
173+
}
174+
return EXIT_SUCCESS;
175+
}
176+
177+
int32_t fossil_soap_count_rotbrain(const char *input) {
178+
if (input == NULL || *input == '\0') return 0;
179+
180+
int count = 0;
181+
char *copy = custom_strdup(input);
182+
if (copy == NULL) return EXIT_SUCCESS;
183+
184+
char *token = strtok(copy, " ,.!?;:"); // Tokenize the string by space and punctuation
185+
while (token != NULL) {
186+
if (fossil_soap_is_rotbrain(token)) {
187+
count++;
188+
}
189+
token = strtok(NULL, " ,.!?;:");
145190
}
146191
free(copy); // Free the memory allocated for the copy
147192
return count;

code/tests/cases/test_soap.c

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,94 @@ FOSSIL_TEST_CASE(c_test_soap_count_offensive) {
6363
ASSUME_ITS_EQUAL_I32(2, fossil_soap_count_offensive(input));
6464
}
6565

66+
FOSSIL_TEST_CASE(c_test_soap_is_rotbrain) {
67+
ASSUME_ITS_TRUE(fossil_soap_is_rotbrain("lol"));
68+
ASSUME_ITS_TRUE(fossil_soap_is_rotbrain("brb"));
69+
ASSUME_ITS_FALSE(fossil_soap_is_rotbrain("hello"));
70+
}
71+
72+
FOSSIL_TEST_CASE(c_test_soap_count_rotbrain) {
73+
char input[] = "This is a test with lol and brb";
74+
ASSUME_ITS_EQUAL_I32(2, fossil_soap_count_rotbrain(input));
75+
}
76+
77+
FOSSIL_TEST_CASE(c_test_soap_sanitize_multiple_offensive) {
78+
char input[] = "curse1 curse2 racist_phrase1 racist_phrase2";
79+
char expected[] = "*** *** *** ***";
80+
81+
fossil_soap_sanitize(input);
82+
83+
ASSUME_ITS_EQUAL_CSTR(expected, input);
84+
}
85+
86+
FOSSIL_TEST_CASE(c_test_soap_sanitize_no_offensive) {
87+
char input[] = "This is a clean sentence.";
88+
char expected[] = "This is a clean sentence.";
89+
90+
fossil_soap_sanitize(input);
91+
92+
ASSUME_ITS_EQUAL_CSTR(expected, input);
93+
}
94+
95+
FOSSIL_TEST_CASE(c_test_soap_is_offensive_case_insensitive) {
96+
ASSUME_ITS_TRUE(fossil_soap_is_offensive("CuRsE1"));
97+
ASSUME_ITS_TRUE(fossil_soap_is_offensive("RaCiSt_PhrAsE2"));
98+
ASSUME_ITS_FALSE(fossil_soap_is_offensive("Non_Offensive_Word"));
99+
}
100+
101+
FOSSIL_TEST_CASE(c_test_soap_count_offensive_mixed_case) {
102+
char input[] = "This is a test with CuRsE1 and RaCiSt_PhrAsE1";
103+
ASSUME_ITS_EQUAL_I32(2, fossil_soap_count_offensive(input));
104+
}
105+
106+
FOSSIL_TEST_CASE(c_test_soap_is_rotbrain_case_insensitive) {
107+
ASSUME_ITS_TRUE(fossil_soap_is_rotbrain("LoL"));
108+
ASSUME_ITS_TRUE(fossil_soap_is_rotbrain("BrB"));
109+
ASSUME_ITS_FALSE(fossil_soap_is_rotbrain("Hello"));
110+
}
111+
112+
FOSSIL_TEST_CASE(c_test_soap_count_rotbrain_mixed_case) {
113+
char input[] = "This is a test with LoL and BrB";
114+
ASSUME_ITS_EQUAL_I32(2, fossil_soap_count_rotbrain(input));
115+
}
116+
117+
FOSSIL_TEST_CASE(c_test_soap_sanitize_synonyms) {
118+
char input[] = "This is a test with rizz and sus.";
119+
char expected[] = "This is a test with *** and ***.";
120+
121+
fossil_soap_sanitize(input);
122+
123+
ASSUME_ITS_EQUAL_CSTR(expected, input);
124+
}
125+
126+
FOSSIL_TEST_CASE(c_test_soap_sanitize_with_punctuation) {
127+
char input[] = "This is a test with curse1, and racist_phrase1!";
128+
char expected[] = "This is a test with ***, and ***!";
129+
130+
fossil_soap_sanitize(input);
131+
132+
ASSUME_ITS_EQUAL_CSTR(expected, input);
133+
}
134+
135+
FOSSIL_TEST_CASE(c_test_soap_count_offensive_with_punctuation) {
136+
char input[] = "This is a test with curse1, and racist_phrase1!";
137+
ASSUME_ITS_EQUAL_I32(2, fossil_soap_count_offensive(input));
138+
}
139+
140+
FOSSIL_TEST_CASE(c_test_soap_sanitize_rotbrain_with_punctuation) {
141+
char input[] = "This is a test with lol, and brb!";
142+
char expected[] = "This is a test with ***, and ***!";
143+
144+
fossil_soap_sanitize(input);
145+
146+
ASSUME_ITS_EQUAL_CSTR(expected, input);
147+
}
148+
149+
FOSSIL_TEST_CASE(c_test_soap_count_rotbrain_with_punctuation) {
150+
char input[] = "This is a test with lol, and brb!";
151+
ASSUME_ITS_EQUAL_I32(2, fossil_soap_count_rotbrain(input));
152+
}
153+
66154
// * * * * * * * * * * * * * * * * * * * * * * * *
67155
// * Fossil Logic Test Pool
68156
// * * * * * * * * * * * * * * * * * * * * * * * *
@@ -71,6 +159,19 @@ FOSSIL_TEST_GROUP(c_soap_tests) {
71159
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_sanitize);
72160
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_is_offensive);
73161
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_count_offensive);
162+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_is_rotbrain);
163+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_count_rotbrain);
164+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_sanitize_multiple_offensive);
165+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_sanitize_no_offensive);
166+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_is_offensive_case_insensitive);
167+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_count_offensive_mixed_case);
168+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_is_rotbrain_case_insensitive);
169+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_count_rotbrain_mixed_case);
170+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_sanitize_synonyms);
171+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_sanitize_with_punctuation);
172+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_count_offensive_with_punctuation);
173+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_sanitize_rotbrain_with_punctuation);
174+
FOSSIL_TEST_ADD(c_soap_suite, c_test_soap_count_rotbrain_with_punctuation);
74175

75176
FOSSIL_TEST_REGISTER(c_soap_suite);
76177
}

0 commit comments

Comments
 (0)