|
30 | 30 | #include <unistd.h> |
31 | 31 | #endif |
32 | 32 |
|
| 33 | +/* ============================================================ |
| 34 | + * Bitmask flags for detection |
| 35 | + * ============================================================ */ |
| 36 | +#define FOSSIL_SAN_OK 0x00 |
| 37 | +#define FOSSIL_SAN_MODIFIED 0x01 |
| 38 | +#define FOSSIL_SAN_SCRIPT 0x02 |
| 39 | +#define FOSSIL_SAN_SQL 0x04 |
| 40 | +#define FOSSIL_SAN_SHELL 0x08 |
| 41 | +#define FOSSIL_SAN_BASE64 0x10 |
| 42 | +#define FOSSIL_SAN_PATH 0x20 |
| 43 | +#define FOSSIL_SAN_BOT 0x40 |
| 44 | +#define FOSSIL_SAN_SPAM 0x80 |
| 45 | + |
| 46 | +/* ============================================================ |
| 47 | + * Context enum — what’s allowed depends on usage |
| 48 | + * ============================================================ */ |
| 49 | +typedef enum { |
| 50 | + FOSSIL_CTX_GENERIC = 0, /* balanced approach (default) */ |
| 51 | + FOSSIL_CTX_HTML, /* for rendering in HTML */ |
| 52 | + FOSSIL_CTX_SQL, /* for SQL queries (still use params!) */ |
| 53 | + FOSSIL_CTX_SHELL, /* for shell commands */ |
| 54 | + FOSSIL_CTX_FILENAME /* for filenames */ |
| 55 | +} fossil_context_t; |
| 56 | + |
| 57 | + |
33 | 58 | // Function to trim leading and trailing spaces from a string |
34 | 59 | void fossil_io_trim(char *str) { |
35 | 60 | if (str == NULL) return; |
@@ -131,33 +156,88 @@ char *fossil_io_gets_from_stream(char *buf, size_t size, fossil_fstream_t *input |
131 | 156 | return buf; |
132 | 157 | } |
133 | 158 |
|
134 | | -char *fossil_io_gets_from_stream_ex(char *buf, size_t size, fossil_fstream_t *input_stream, int *error_code) { |
135 | | - if (buf == NULL || size == 0 || input_stream == NULL || error_code == NULL) { |
136 | | - fossil_io_fprintf(FOSSIL_STDERR, "Error: Invalid buffer, stream, or error code.\n"); |
137 | | - return NULL; |
138 | | - } |
139 | | - |
140 | | - // Use fgets to get the input from the stream |
141 | | - if (fgets(buf, size, input_stream->file) == NULL) { |
142 | | - if (feof(input_stream->file)) { |
143 | | - *error_code = EOF; |
144 | | - return NULL; // End of file reached |
| 159 | +/* --- sanitizer --- */ |
| 160 | +int fossil_io_validate_sanitize_string_ctx(const char *input, |
| 161 | + char *output, |
| 162 | + size_t output_size, |
| 163 | + fossil_context_t ctx) { |
| 164 | + if (!input || !output || output_size == 0) { |
| 165 | + if (output && output_size > 0) output[0] = '\0'; |
| 166 | + return FOSSIL_SAN_MODIFIED; |
| 167 | + } |
| 168 | + |
| 169 | + size_t in_len = strnlen(input, 4096); /* cap scanning to 4k */ |
| 170 | + size_t out_i = 0; |
| 171 | + int flags = FOSSIL_SAN_OK; |
| 172 | + |
| 173 | + /* Context-specific allowed char filter */ |
| 174 | + int (*is_allowed)(char) = is_allowed_generic; |
| 175 | + switch (ctx) { |
| 176 | + case FOSSIL_CTX_HTML: is_allowed = is_allowed_html; break; |
| 177 | + case FOSSIL_CTX_SQL: is_allowed = is_allowed_sql; break; |
| 178 | + case FOSSIL_CTX_SHELL: is_allowed = is_allowed_shell; break; |
| 179 | + case FOSSIL_CTX_FILENAME: is_allowed = is_allowed_filename; break; |
| 180 | + default: is_allowed = is_allowed_generic; break; |
| 181 | + } |
| 182 | + |
| 183 | + /* Suspicious patterns */ |
| 184 | + const char *script_patterns[] = { |
| 185 | + "<script", "javascript:", "onerror=", "onload=", "onclick=", "eval(", NULL |
| 186 | + }; |
| 187 | + const char *sql_patterns[] = { |
| 188 | + "select ", "insert ", "update ", "delete ", "drop ", "union ", |
| 189 | + "--", ";--", "/*", "*/", "0x", NULL |
| 190 | + }; |
| 191 | + const char *shell_patterns[] = { |
| 192 | + "curl ", "wget ", "rm -rf", "powershell", "cmd.exe", |
| 193 | + "exec(", "system(", "|", "&&", "||", NULL |
| 194 | + }; |
| 195 | + const char *bot_patterns[] = { |
| 196 | + "bot", "crawler", "spider", "curl/", "python-requests", "scrapy", NULL |
| 197 | + }; |
| 198 | + const char *spam_patterns[] = { |
| 199 | + "viagra", "free money", "winner", "prize", "click here", |
| 200 | + "http://", "https://", "meta refresh", NULL |
| 201 | + }; |
| 202 | + const char *path_patterns[] = { |
| 203 | + "../", "..\\", "/etc/passwd", "C:\\", NULL |
| 204 | + }; |
| 205 | + |
| 206 | + /* Scan categories */ |
| 207 | + for (const char **p = script_patterns; *p; ++p) |
| 208 | + if (strncase_contains(input, *p, in_len)) flags |= FOSSIL_SAN_SCRIPT; |
| 209 | + |
| 210 | + for (const char **p = sql_patterns; *p; ++p) |
| 211 | + if (strncase_contains(input, *p, in_len)) flags |= FOSSIL_SAN_SQL; |
| 212 | + |
| 213 | + for (const char **p = shell_patterns; *p; ++p) |
| 214 | + if (strncase_contains(input, *p, in_len)) flags |= FOSSIL_SAN_SHELL; |
| 215 | + |
| 216 | + for (const char **p = bot_patterns; *p; ++p) |
| 217 | + if (strncase_contains(input, *p, in_len)) flags |= FOSSIL_SAN_BOT; |
| 218 | + |
| 219 | + for (const char **p = spam_patterns; *p; ++p) |
| 220 | + if (strncase_contains(input, *p, in_len)) flags |= FOSSIL_SAN_SPAM; |
| 221 | + |
| 222 | + for (const char **p = path_patterns; *p; ++p) |
| 223 | + if (strncase_contains(input, *p, in_len)) flags |= FOSSIL_SAN_PATH; |
| 224 | + |
| 225 | + if (long_base64_run(input, in_len, 80)) |
| 226 | + flags |= FOSSIL_SAN_BASE64; |
| 227 | + |
| 228 | + /* Sanitization pass */ |
| 229 | + for (size_t i = 0; i < in_len && out_i < output_size - 1; i++) { |
| 230 | + char c = input[i]; |
| 231 | + if (is_allowed(c)) { |
| 232 | + output[out_i++] = c; |
| 233 | + } else { |
| 234 | + output[out_i++] = '_'; /* neutralize */ |
| 235 | + flags |= FOSSIL_SAN_MODIFIED; |
145 | 236 | } |
146 | | - *error_code = ferror(input_stream->file); |
147 | | - fossil_io_fprintf(FOSSIL_STDERR, "Error: Failed to read from input stream.\n"); |
148 | | - return NULL; |
149 | 237 | } |
| 238 | + output[out_i] = '\0'; |
150 | 239 |
|
151 | | - // Ensure the string is null-terminated |
152 | | - size_t len = strlen(buf); |
153 | | - if (len > 0 && buf[len - 1] == '\n') { |
154 | | - buf[len - 1] = '\0'; // Remove the newline character |
155 | | - } |
156 | | - |
157 | | - // Trim any leading or trailing whitespace |
158 | | - fossil_io_trim(buf); |
159 | | - |
160 | | - return buf; |
| 240 | + return flags == 0 ? FOSSIL_SAN_OK : flags; |
161 | 241 | } |
162 | 242 |
|
163 | 243 | int fossil_io_scanf(const char *format, ...) { |
@@ -210,6 +290,150 @@ char *fossil_io_gets_utf8(char *buf, size_t size, fossil_fstream_t *input_stream |
210 | 290 | return buf; |
211 | 291 | } |
212 | 292 |
|
| 293 | +int fossil_io_validate_is_suspicious_user(const char *input) { |
| 294 | + if (input == NULL) return 0; |
| 295 | + |
| 296 | + size_t len = strlen(input); |
| 297 | + if (len == 0) return 0; |
| 298 | + |
| 299 | + // 1. Too long or too short |
| 300 | + if (len < 3 || len > 32) return 1; |
| 301 | + |
| 302 | + // 2. Check digit runs |
| 303 | + int digit_run = 0, max_digit_run = 0, digit_count = 0, alpha_count = 0; |
| 304 | + for (size_t i = 0; i < len; i++) { |
| 305 | + if (isdigit((unsigned char)input[i])) { |
| 306 | + digit_run++; |
| 307 | + digit_count++; |
| 308 | + if (digit_run > max_digit_run) max_digit_run = digit_run; |
| 309 | + } else { |
| 310 | + digit_run = 0; |
| 311 | + if (isalpha((unsigned char)input[i])) alpha_count++; |
| 312 | + } |
| 313 | + } |
| 314 | + if (max_digit_run >= 5) return 1; // suspicious long digit tail |
| 315 | + if ((float)digit_count / len > 0.5) return 1; // mostly digits |
| 316 | + |
| 317 | + // 3. Suspicious keywords |
| 318 | + const char *bad_keywords[] = {"bot", "test", "fake", "spam", "zzz", "null", "admin"}; |
| 319 | + size_t nkeys = sizeof(bad_keywords) / sizeof(bad_keywords[0]); |
| 320 | + for (size_t i = 0; i < nkeys; i++) { |
| 321 | + if (strcasestr(input, bad_keywords[i]) != NULL) { |
| 322 | + return 1; |
| 323 | + } |
| 324 | + } |
| 325 | + |
| 326 | + // 4. Very high entropy (simple Shannon estimate) |
| 327 | + int freq[256] = {0}; |
| 328 | + for (size_t i = 0; i < len; i++) freq[(unsigned char)input[i]]++; |
| 329 | + double entropy = 0.0; |
| 330 | + for (int i = 0; i < 256; i++) { |
| 331 | + if (freq[i] > 0) { |
| 332 | + double p = (double)freq[i] / len; |
| 333 | + entropy -= p * log2(p); |
| 334 | + } |
| 335 | + } |
| 336 | + if (entropy > 4.5) return 1; // suspiciously random-like |
| 337 | + |
| 338 | + return 0; // not flagged |
| 339 | +} |
| 340 | + |
| 341 | +int fossil_io_validate_is_disposable_email(const char *input) { |
| 342 | + if (input == NULL) return 0; |
| 343 | + const char *at = strchr(input, '@'); |
| 344 | + if (at == NULL) return 0; |
| 345 | + |
| 346 | + const char *disposable_domains[] = { |
| 347 | + "mailinator.com", "10minutemail.com", "guerrillamail.com", |
| 348 | + "tempmail.com", "trashmail.com", "yopmail.com" |
| 349 | + }; |
| 350 | + size_t ndomains = sizeof(disposable_domains) / sizeof(disposable_domains[0]); |
| 351 | + |
| 352 | + for (size_t i = 0; i < ndomains; i++) { |
| 353 | + if (strcasecmp(at + 1, disposable_domains[i]) == 0) { |
| 354 | + return 1; |
| 355 | + } |
| 356 | + } |
| 357 | + return 0; |
| 358 | +} |
| 359 | + |
| 360 | +int fossil_io_validate_is_suspicious_bot(const char *input) { |
| 361 | + if (input == NULL) return 0; |
| 362 | + |
| 363 | + const char *bot_signatures[] = { |
| 364 | + "bot", "crawl", "spider", "scrape", "httpclient", "libwww", |
| 365 | + "wget", "curl", "python-requests", "java", "go-http-client" |
| 366 | + }; |
| 367 | + size_t nsignatures = sizeof(bot_signatures) / sizeof(bot_signatures[0]); |
| 368 | + |
| 369 | + for (size_t i = 0; i < nsignatures; i++) { |
| 370 | + if (strcasestr(input, bot_signatures[i]) != NULL) { |
| 371 | + return 1; |
| 372 | + } |
| 373 | + } |
| 374 | + |
| 375 | + return 0; |
| 376 | +} |
| 377 | + |
| 378 | +int fossil_io_validate_is_weak_password(const char *password, |
| 379 | + const char *username, |
| 380 | + const char *email) { |
| 381 | + if (password == NULL) return 1; |
| 382 | + |
| 383 | + size_t len = strlen(password); |
| 384 | + |
| 385 | + // 1. Length check |
| 386 | + if (len < 8 || len > 64) { |
| 387 | + return 1; // too short or unreasonably long |
| 388 | + } |
| 389 | + |
| 390 | + // 2. Check character diversity |
| 391 | + int has_lower = 0, has_upper = 0, has_digit = 0, has_symbol = 0; |
| 392 | + for (size_t i = 0; i < len; i++) { |
| 393 | + if (islower((unsigned char)password[i])) has_lower = 1; |
| 394 | + else if (isupper((unsigned char)password[i])) has_upper = 1; |
| 395 | + else if (isdigit((unsigned char)password[i])) has_digit = 1; |
| 396 | + else has_symbol = 1; |
| 397 | + } |
| 398 | + int diversity = has_lower + has_upper + has_digit + has_symbol; |
| 399 | + if (diversity < 3) { |
| 400 | + return 1; // not diverse enough |
| 401 | + } |
| 402 | + |
| 403 | + // 3. Common weak passwords |
| 404 | + const char *weak_list[] = { |
| 405 | + "password", "123456", "123456789", "qwerty", "abc123", |
| 406 | + "letmein", "111111", "123123", "iloveyou", "admin" |
| 407 | + }; |
| 408 | + size_t weak_count = sizeof(weak_list) / sizeof(weak_list[0]); |
| 409 | + for (size_t i = 0; i < weak_count; i++) { |
| 410 | + if (strcasecmp(password, weak_list[i]) == 0) { |
| 411 | + return 1; |
| 412 | + } |
| 413 | + } |
| 414 | + |
| 415 | + // 4. Sequential/repetitive patterns |
| 416 | + int seq_inc = 1, seq_dec = 1, same = 1; |
| 417 | + for (size_t i = 1; i < len; i++) { |
| 418 | + if (password[i] != password[i - 1]) same = 0; |
| 419 | + if ((unsigned char)password[i] != (unsigned char)password[i - 1] + 1) seq_inc = 0; |
| 420 | + if ((unsigned char)password[i] != (unsigned char)password[i - 1] - 1) seq_dec = 0; |
| 421 | + } |
| 422 | + if (same || seq_inc || seq_dec) { |
| 423 | + return 1; |
| 424 | + } |
| 425 | + |
| 426 | + // 5. Prevent reuse of username or email as password |
| 427 | + if (username && *username && strcasecmp(password, username) == 0) { |
| 428 | + return 1; |
| 429 | + } |
| 430 | + if (email && *email && strcasecmp(password, email) == 0) { |
| 431 | + return 1; |
| 432 | + } |
| 433 | + |
| 434 | + return 0; // password passed basic strength checks |
| 435 | +} |
| 436 | + |
213 | 437 | int fossil_io_validate_is_int(const char *input, int *output) { |
214 | 438 | if (input == NULL || output == NULL) { |
215 | 439 | return 0; |
@@ -295,17 +519,78 @@ int fossil_io_validate_is_length(const char *input, size_t max_length) { |
295 | 519 | return strlen(input) <= max_length; |
296 | 520 | } |
297 | 521 |
|
298 | | -int fossil_io_validate_sanitize_string(const char *input, char *output, size_t output_size) { |
299 | | - if (input == NULL || output == NULL || output_size == 0) { |
300 | | - return 0; |
| 522 | +/* ============================================================ |
| 523 | + * Helpers |
| 524 | + * ============================================================ */ |
| 525 | + |
| 526 | +static inline int is_allowed_generic(char c) { |
| 527 | + if (isalnum((unsigned char)c)) return 1; |
| 528 | + switch (c) { |
| 529 | + case ' ': case '_': case '-': case '.': case ',': case ':': |
| 530 | + case '/': case '\\': case '@': case '+': case '=': case '#': |
| 531 | + case '%': case '(': case ')': case '[': case ']': |
| 532 | + return 1; |
| 533 | + default: |
| 534 | + return 0; |
301 | 535 | } |
| 536 | +} |
302 | 537 |
|
303 | | - // Copy the input string to the output buffer |
304 | | - strncpy(output, input, output_size); |
| 538 | +/* Allowed chars for specific contexts */ |
| 539 | +static inline int is_allowed_html(char c) { |
| 540 | + return (isalnum((unsigned char)c) || c==' ' || c=='-' || c=='_' || c=='.' || c==',' ); |
| 541 | +} |
305 | 542 |
|
306 | | - return 1; |
| 543 | +static inline int is_allowed_sql(char c) { |
| 544 | + return (isalnum((unsigned char)c) || c==' ' || c=='_' || c=='-' ); |
| 545 | +} |
| 546 | + |
| 547 | +static inline int is_allowed_shell(char c) { |
| 548 | + return (isalnum((unsigned char)c) || c==' ' || c=='_' || c=='-' || c=='.' || c=='/' ); |
| 549 | +} |
| 550 | + |
| 551 | +static inline int is_allowed_filename(char c) { |
| 552 | + return (isalnum((unsigned char)c) || c=='_' || c=='-' || c=='.'); |
307 | 553 | } |
308 | 554 |
|
| 555 | +/* Base64 heuristic */ |
| 556 | +static int long_base64_run(const char *s, size_t len, size_t threshold) { |
| 557 | + size_t run = 0; |
| 558 | + for (size_t i = 0; i < len; ++i) { |
| 559 | + unsigned char c = (unsigned char)s[i]; |
| 560 | + if ((c >= 'A' && c <= 'Z') || |
| 561 | + (c >= 'a' && c <= 'z') || |
| 562 | + (c >= '0' && c <= '9') || |
| 563 | + c == '+' || c == '/' || c == '=') { |
| 564 | + run++; |
| 565 | + if (run >= threshold) return 1; |
| 566 | + } else { |
| 567 | + run = 0; |
| 568 | + } |
| 569 | + } |
| 570 | + return 0; |
| 571 | +} |
| 572 | + |
| 573 | +/* Case-insensitive contains */ |
| 574 | +static int strncase_contains(const char *haystack, const char *needle, size_t len) { |
| 575 | + size_t nlen = strlen(needle); |
| 576 | + if (nlen == 0 || nlen > len) return 0; |
| 577 | + for (size_t i = 0; i + nlen <= len; i++) { |
| 578 | + size_t j; |
| 579 | + for (j = 0; j < nlen; j++) { |
| 580 | + char a = haystack[i+j]; |
| 581 | + char b = needle[j]; |
| 582 | + if (tolower((unsigned char)a) != tolower((unsigned char)b)) break; |
| 583 | + } |
| 584 | + if (j == nlen) return 1; |
| 585 | + } |
| 586 | + return 0; |
| 587 | +} |
| 588 | + |
| 589 | +/* ============================================================ |
| 590 | + * Sanitizer with bitmask + context |
| 591 | + * ============================================================ */ |
| 592 | +z |
| 593 | + |
309 | 594 | int fossil_io_gets(char *buffer, size_t size) { |
310 | 595 | if (fgets(buffer, size, stdin) == NULL) { |
311 | 596 | return -1; // Error or EOF |
|
0 commit comments