|
19 | 19 | #include <stdio.h>
|
20 | 20 | #include <math.h>
|
21 | 21 |
|
22 |
| -#define FOSSIL_JELLYFISH_HASH_SIZE 16 |
23 | 22 | #define MAX_CUSTOM_FILTERS 64
|
24 | 23 |
|
25 | 24 | /** Lookup table for rot-brain words and their suggested replacements */
|
@@ -131,48 +130,6 @@ static const char *SKIP_WORDS[] = {
|
131 | 130 | NULL // Sentinel to mark the end
|
132 | 131 | };
|
133 | 132 |
|
134 |
| -#define FNV_PRIME 0x01000193 |
135 |
| -#define FNV_BASIS 0x811c9dc5 |
136 |
| - |
137 |
| -void fossil_io_soap_jellyfish_hash(const char *input, const char *output, uint8_t *hash_out) { |
138 |
| - uint32_t hash = FNV_BASIS; |
139 |
| - size_t in_len = strlen(input); |
140 |
| - size_t out_len = strlen(output); |
141 |
| - |
142 |
| - // Mix lengths |
143 |
| - hash ^= in_len; |
144 |
| - hash *= FNV_PRIME; |
145 |
| - hash ^= out_len; |
146 |
| - hash *= FNV_PRIME; |
147 |
| - |
148 |
| - // Mix input string |
149 |
| - for (size_t i = 0; i < in_len; ++i) { |
150 |
| - hash ^= (uint8_t)input[i]; |
151 |
| - hash *= FNV_PRIME; |
152 |
| - hash ^= (hash >> 5); |
153 |
| - } |
154 |
| - |
155 |
| - // Mix output string |
156 |
| - for (size_t i = 0; i < out_len; ++i) { |
157 |
| - hash ^= (uint8_t)output[i]; |
158 |
| - hash *= FNV_PRIME; |
159 |
| - hash ^= (hash >> 5); |
160 |
| - } |
161 |
| - |
162 |
| - // Final avalanche |
163 |
| - hash ^= (hash << 7); |
164 |
| - hash ^= (hash >> 3); |
165 |
| - |
166 |
| - // Expand to fixed size |
167 |
| - uint32_t h = hash; |
168 |
| - for (size_t i = 0; i < FOSSIL_JELLYFISH_HASH_SIZE; ++i) { |
169 |
| - h ^= (h >> 13); |
170 |
| - h *= FNV_PRIME; |
171 |
| - h ^= (h << 11); |
172 |
| - hash_out[i] = (uint8_t)((h >> (8 * (i % 4))) & 0xFF); |
173 |
| - } |
174 |
| -} |
175 |
| - |
176 | 133 | static void hash_to_hex(const uint8_t *hash, size_t len, char *out_hex) {
|
177 | 134 | for (size_t i = 0; i < len; ++i) {
|
178 | 135 | sprintf(out_hex + i * 2, "%02x", hash[i]);
|
@@ -470,129 +427,130 @@ char *fossil_io_soap_normalize(const char *text) {
|
470 | 427 | return normalized;
|
471 | 428 | }
|
472 | 429 |
|
473 |
| -char *fossil_io_soap_generate_audit_block(const char *text) { |
474 |
| - if (!text) return NULL; |
475 |
| - |
476 |
| - char *sanitized = fossil_io_soap_sanitize(text); |
477 |
| - if (!sanitized) return NULL; |
478 | 430 |
|
479 |
| - uint8_t hash[FOSSIL_JELLYFISH_HASH_SIZE]; |
480 |
| - char hash_hex[FOSSIL_JELLYFISH_HASH_SIZE * 2 + 1]; |
| 431 | +char *fossil_io_soap_normalize_slang(const char *text) { |
| 432 | + if (!text) return NULL; |
481 | 433 |
|
482 |
| - fossil_io_soap_jellyfish_hash(text, sanitized, hash); |
483 |
| - hash_to_hex(hash, FOSSIL_JELLYFISH_HASH_SIZE, hash_hex); |
| 434 | + char *result = fossil_io_cstring_dup(text); |
| 435 | + if (!result) return NULL; |
484 | 436 |
|
485 |
| - char *audit_block = malloc(1024); |
486 |
| - if (!audit_block) { |
487 |
| - free(sanitized); |
488 |
| - return NULL; |
| 437 | + for (size_t i = 0; result[i]; i++) { |
| 438 | + result[i] = tolower(result[i]); |
489 | 439 | }
|
490 | 440 |
|
491 |
| - snprintf(audit_block, 1024, |
492 |
| - "{ \"original\": \"%s\", \"sanitized\": \"%s\", \"hash\": \"%s\" }", |
493 |
| - text, sanitized, hash_hex); |
494 |
| - |
495 |
| - free(sanitized); |
496 |
| - return audit_block; |
497 |
| -} |
| 441 | + for (size_t i = 0; FOSSIL_SOAP_SUGGESTIONS[i].bad != NULL; i++) { |
| 442 | + const char *bad = FOSSIL_SOAP_SUGGESTIONS[i].bad; |
| 443 | + const char *sugg = FOSSIL_SOAP_SUGGESTIONS[i].suggested; |
498 | 444 |
|
499 |
| -char *fossil_io_soap_diff_digest(const char *original, const char *transformed) { |
500 |
| - if (!original || !transformed) return NULL; |
| 445 | + char *found = NULL; |
| 446 | + while ((found = custom_strcasestr(result, bad)) != NULL) { |
| 447 | + size_t offset = found - result; |
| 448 | + size_t newlen = strlen(result) - strlen(bad) + strlen(sugg) + 1; |
501 | 449 |
|
502 |
| - size_t size = strlen(original) + strlen(transformed) + 2; |
503 |
| - char *combo = malloc(size); |
504 |
| - if (!combo) return NULL; |
| 450 | + char *temp = malloc(newlen); |
| 451 | + if (!temp) { |
| 452 | + free(result); |
| 453 | + return NULL; |
| 454 | + } |
505 | 455 |
|
506 |
| - snprintf(combo, size, "%s|%s", original, transformed); |
| 456 | + strncpy(temp, result, offset); |
| 457 | + temp[offset] = '\0'; |
| 458 | + strcat(temp, sugg); |
| 459 | + strcat(temp, found + strlen(bad)); |
507 | 460 |
|
508 |
| - char *digest = fossil_io_cstring_dup(fossil_hash_string(combo)); |
509 |
| - free(combo); |
510 |
| - return digest; |
511 |
| -} |
512 |
| - |
513 |
| -int fossil_io_soap_detect_sarcasm(const char *text) { |
514 |
| - if (!text) return 0; |
515 |
| - for (size_t i = 0; SARCASTIC_PHRASES[i]; i++) { |
516 |
| - if (custom_strcasestr(text, SARCASTIC_PHRASES[i])) return 1; |
| 461 | + free(result); |
| 462 | + result = temp; |
| 463 | + } |
517 | 464 | }
|
518 |
| - return 0; |
519 |
| -} |
520 | 465 |
|
521 |
| -const char *fossil_io_soap_detect_intent(const char *text) { |
522 |
| - if (!text) return "unknown"; |
523 |
| - if (fossil_io_soap_detect_sarcasm(text)) return "sarcastic"; |
524 |
| - if (custom_strcasestr(text, "please") || custom_strcasestr(text, "I would like")) |
525 |
| - return "formal"; |
526 |
| - if (strchr(text, '!') != NULL) return "emotional"; |
527 |
| - return "neutral"; |
| 466 | + return result; |
528 | 467 | }
|
529 | 468 |
|
530 |
| -char *fossil_io_soap_flag_ethics(const char *text) { |
531 |
| - if (!text) return NULL; |
532 |
| - |
533 |
| - // Simulate simple filter |
534 |
| - const char *bad_words[] = {"stupid", "idiot", "hate", "kill", NULL}; |
535 |
| - |
536 |
| - char *flagged = fossil_io_cstring_dup(text); |
537 |
| - if (!flagged) return NULL; |
| 469 | +int fossil_io_soap_detect_clickbait(const char *text) { |
| 470 | + if (!text) return 0; |
538 | 471 |
|
539 |
| - for (size_t i = 0; bad_words[i]; i++) { |
540 |
| - const char *ptr; |
541 |
| - while ((ptr = custom_strcasestr(flagged, bad_words[i]))) { |
542 |
| - size_t offset = ptr - flagged; |
543 |
| - memset(flagged + offset, '*', strlen(bad_words[i])); |
| 472 | + static const char *CLICKBAIT_PATTERNS[] = { |
| 473 | + "you won't believe", |
| 474 | + "shocking", |
| 475 | + "what happened next", |
| 476 | + "top [0-9]", |
| 477 | + "things you didn't know", |
| 478 | + "one weird trick", |
| 479 | + "will blow your mind", |
| 480 | + "can't handle this", |
| 481 | + "before you die", |
| 482 | + NULL |
| 483 | + }; |
| 484 | + |
| 485 | + for (int i = 0; CLICKBAIT_PATTERNS[i] != NULL; i++) { |
| 486 | + if (custom_strcasestr(text, CLICKBAIT_PATTERNS[i])) { |
| 487 | + return 1; |
544 | 488 | }
|
545 | 489 | }
|
546 |
| - return flagged; |
| 490 | + |
| 491 | + return 0; |
547 | 492 | }
|
548 | 493 |
|
549 |
| -char *fossil_io_soap_list_ethics_flags(const char *text) { |
550 |
| - if (!text) return NULL; |
| 494 | +int fossil_io_soap_detect_exaggeration(const char *text) { |
| 495 | + if (!text) return 0; |
551 | 496 |
|
552 |
| - const char *categories[] = {"violence", "hate-speech", "bias", NULL}; |
553 |
| - const char *keywords[] = {"kill", "hate", "stupid", NULL}; |
| 497 | + static const char *EXAGGERATED_WORDS[] = { |
| 498 | + "literally", "always", "never", "every", "everyone", "nobody", |
| 499 | + "forever", "insane", "unbelievable", "outrageous", "epic", "mind-blowing", |
| 500 | + NULL |
| 501 | + }; |
554 | 502 |
|
555 |
| - char buffer[256] = {0}; |
556 |
| - for (size_t i = 0; keywords[i]; i++) { |
557 |
| - if (custom_strcasestr(text, keywords[i])) { |
558 |
| - strcat(buffer, categories[i]); |
559 |
| - strcat(buffer, ","); |
| 503 | + for (int i = 0; EXAGGERATED_WORDS[i] != NULL; i++) { |
| 504 | + if (custom_strcasestr(text, EXAGGERATED_WORDS[i])) { |
| 505 | + return 1; |
560 | 506 | }
|
561 | 507 | }
|
562 | 508 |
|
563 |
| - if (strlen(buffer) > 0) buffer[strlen(buffer) - 1] = '\0'; // Remove last comma |
564 |
| - return fossil_io_cstring_dup(buffer); |
| 509 | + return 0; |
565 | 510 | }
|
566 | 511 |
|
567 |
| -int fossil_io_soap_load_mindset_file(const char *filename) { |
568 |
| - FILE *f = fopen(filename, "r"); |
569 |
| - if (!f) return -1; |
570 |
| - |
571 |
| - char line[256]; |
572 |
| - while (fgets(line, sizeof(line), f)) { |
573 |
| - if (strncmp(line, "filter:", 7) == 0) { |
574 |
| - char *phrase = line + 7; |
575 |
| - while (*phrase == ' ') phrase++; |
576 |
| - phrase[strcspn(phrase, "\r\n")] = 0; |
577 |
| - fossil_io_soap_add_custom_filter(phrase); |
578 |
| - } |
579 |
| - } |
| 512 | +char *fossil_io_soap_filter_offensive(const char *text) { |
| 513 | + if (!text) return NULL; |
580 | 514 |
|
581 |
| - fclose(f); |
582 |
| - return 0; |
583 |
| -} |
| 515 | + static const struct { |
| 516 | + const char *offensive; |
| 517 | + const char *replacement; |
| 518 | + } OFFENSIVE_WORDS[] = { |
| 519 | + {"dumb", "uninformed"}, |
| 520 | + {"stupid", "ill-advised"}, |
| 521 | + {"idiot", "misguided"}, |
| 522 | + {"moron", "uninformed"}, |
| 523 | + {"sucks", "is not ideal"}, |
| 524 | + {NULL, NULL} |
| 525 | + }; |
| 526 | + |
| 527 | + char *result = fossil_io_cstring_dup(text); |
| 528 | + if (!result) return NULL; |
| 529 | + |
| 530 | + for (size_t i = 0; OFFENSIVE_WORDS[i].offensive != NULL; i++) { |
| 531 | + const char *bad = OFFENSIVE_WORDS[i].offensive; |
| 532 | + const char *good = OFFENSIVE_WORDS[i].replacement; |
| 533 | + |
| 534 | + char *found = NULL; |
| 535 | + while ((found = custom_strcasestr(result, bad)) != NULL) { |
| 536 | + size_t offset = found - result; |
| 537 | + size_t newlen = strlen(result) - strlen(bad) + strlen(good) + 1; |
| 538 | + |
| 539 | + char *temp = malloc(newlen); |
| 540 | + if (!temp) { |
| 541 | + free(result); |
| 542 | + return NULL; |
| 543 | + } |
584 | 544 |
|
585 |
| -char *fossil_io_soap_export_mindset(void) { |
586 |
| - char *output = malloc(2048); |
587 |
| - if (!output) return NULL; |
588 |
| - strcpy(output, "#mindset('soap') {\n"); |
| 545 | + strncpy(temp, result, offset); |
| 546 | + temp[offset] = '\0'; |
| 547 | + strcat(temp, good); |
| 548 | + strcat(temp, found + strlen(bad)); |
589 | 549 |
|
590 |
| - for (size_t i = 0; i < MAX_CUSTOM_FILTERS && custom_filters[i]; i++) { |
591 |
| - strcat(output, " filter: "); |
592 |
| - strcat(output, custom_filters[i]); |
593 |
| - strcat(output, "\n"); |
| 550 | + free(result); |
| 551 | + result = temp; |
| 552 | + } |
594 | 553 | }
|
595 | 554 |
|
596 |
| - strcat(output, "}\n"); |
597 |
| - return output; |
| 555 | + return result; |
598 | 556 | }
|
0 commit comments