|
17 | 17 | #include <stdarg.h> |
18 | 18 | #include "stream.h" |
19 | 19 |
|
| 20 | +/* Contexts */ |
| 21 | +typedef enum { |
| 22 | + FOSSIL_CTX_GENERIC, |
| 23 | + FOSSIL_CTX_HTML, |
| 24 | + FOSSIL_CTX_SQL, |
| 25 | + FOSSIL_CTX_SHELL, |
| 26 | + FOSSIL_CTX_FILENAME |
| 27 | +} fossil_context_t; |
| 28 | + |
20 | 29 | #ifdef __cplusplus |
21 | 30 | extern "C" { |
22 | 31 | #endif |
@@ -134,14 +143,68 @@ int fossil_io_validate_is_email(const char *input); |
134 | 143 | int fossil_io_validate_is_length(const char *input, size_t max_length); |
135 | 144 |
|
136 | 145 | /** |
137 | | - * @brief Sanitizes the input string and stores the sanitized result in the output buffer. |
138 | | - * |
139 | | - * @param input The input string to sanitize. |
140 | | - * @param output The buffer where the sanitized string will be stored. |
141 | | - * @param output_size The size of the output buffer. |
142 | | - * @return A fossil_io_validate_error_t indicating the result of the sanitization process. |
| 146 | + * Check if password is weak or bad. |
| 147 | + * |
| 148 | + * Returns: |
| 149 | + * 1 if password is weak/bad |
| 150 | + * 0 if password passes the basic checks |
| 151 | + */ |
| 152 | +int fossil_io_validate_is_weak_password(const char *password, |
| 153 | + const char *username, |
| 154 | + const char *email); |
| 155 | + |
| 156 | +/** |
| 157 | + * Check if a user-agent string looks like a bot/crawler |
| 158 | + */ |
| 159 | +int fossil_io_validate_is_suspicious_bot(const char *input); |
| 160 | + |
| 161 | +/** |
| 162 | + * Check if an email belongs to a disposable / suspicious domain |
| 163 | + */ |
| 164 | +int fossil_io_validate_is_disposable_email(const char *input); |
| 165 | + |
| 166 | +/** |
| 167 | + * Check if a string looks like a bot-style username: |
| 168 | + * - Too many digits in a row |
| 169 | + * - High ratio of digits to letters |
| 170 | + * - Contains suspicious words like "bot", "test", "fake" |
| 171 | + * - Looks like random noise (entropy check) |
| 172 | + */ |
| 173 | +int fossil_io_validate_is_suspicious_user(const char *input) |
| 174 | + |
| 175 | +/** |
| 176 | + * @brief Validate and sanitize a string according to a specified context. |
| 177 | + * |
| 178 | + * This function scans the input string for suspicious content (scripts, SQL injection, |
| 179 | + * shell commands, bots, spam, path traversal, or long base64 blobs) and performs |
| 180 | + * context-aware sanitization by replacing disallowed characters with underscores. |
| 181 | + * It also returns a bitmask indicating the types of issues detected. |
| 182 | + * |
| 183 | + * @param input The input string to be validated and sanitized. Must not be NULL. |
| 184 | + * @param output The buffer to receive the sanitized string. Must not be NULL. |
| 185 | + * @param output_size The size of the output buffer. Must be greater than 0. |
| 186 | + * @param ctx The context in which the string will be used, which determines |
| 187 | + * the allowed character set and stricter rules for certain contexts. |
| 188 | + * |
| 189 | + * @return Bitmask of flags indicating results: |
| 190 | + * - FOSSIL_SAN_OK (0x00): No issues detected; string is clean. |
| 191 | + * - FOSSIL_SAN_MODIFIED (0x01): Input was modified during sanitization. |
| 192 | + * - FOSSIL_SAN_SCRIPT (0x02): Script or JavaScript patterns detected. |
| 193 | + * - FOSSIL_SAN_SQL (0x04): SQL injection patterns detected. |
| 194 | + * - FOSSIL_SAN_SHELL (0x08): Shell or command execution patterns detected. |
| 195 | + * - FOSSIL_SAN_BASE64 (0x10): Suspiciously long base64 sequences detected. |
| 196 | + * - FOSSIL_SAN_PATH (0x20): Path traversal or filesystem patterns detected. |
| 197 | + * - FOSSIL_SAN_BOT (0x40): Bot or automated agent patterns detected. |
| 198 | + * - FOSSIL_SAN_SPAM (0x80): Spam or suspicious marketing content detected. |
| 199 | + * |
| 200 | + * @note The sanitized output is always null-terminated and will not exceed |
| 201 | + * output_size bytes. The function uses heuristics and is not a substitute |
| 202 | + * for context-specific escaping or prepared statements in SQL/HTML. |
143 | 203 | */ |
144 | | -int fossil_io_validate_sanitize_string(const char *input, char *output, size_t output_size); |
| 204 | +int fossil_io_validate_sanitize_string_ctx(const char *input, |
| 205 | + char *output, |
| 206 | + size_t output_size, |
| 207 | + fossil_context_t ctx); |
145 | 208 |
|
146 | 209 | /** |
147 | 210 | * Displays a menu of choices and returns the selected choice. |
@@ -425,15 +488,53 @@ namespace fossil { |
425 | 488 | } |
426 | 489 |
|
427 | 490 | /** |
428 | | - * @brief Sanitizes the input string and stores the sanitized result in the output buffer. |
429 | | - * |
430 | | - * @param input The input string to sanitize. |
431 | | - * @param output The buffer where the sanitized string will be stored. |
432 | | - * @param output_size The size of the output buffer. |
433 | | - * @return A fossil_io_validate_error_t indicating the result of the sanitization process. |
| 491 | + * Check if password is weak or bad. |
| 492 | + * Returns true if weak/bad, false otherwise. |
| 493 | + */ |
| 494 | + static bool is_weak_password(const std::string &password, |
| 495 | + const std::string &username = "", |
| 496 | + const std::string &email = "") { |
| 497 | + return fossil_io_validate_is_weak_password( |
| 498 | + password.c_str(), |
| 499 | + username.empty() ? nullptr : username.c_str(), |
| 500 | + email.empty() ? nullptr : email.c_str()) != 0; |
| 501 | + } |
| 502 | + |
| 503 | + /** |
| 504 | + * Check if a user-agent string looks like a bot/crawler |
| 505 | + */ |
| 506 | + static bool is_suspicious_bot(const std::string &userAgent) { |
| 507 | + return fossil_io_validate_is_suspicious_bot(userAgent.c_str()) != 0; |
| 508 | + } |
| 509 | + |
| 510 | + /** |
| 511 | + * Check if an email belongs to a disposable / suspicious domain |
| 512 | + */ |
| 513 | + static bool is_disposable_email(const std::string &email) { |
| 514 | + return fossil_io_validate_is_disposable_email(email.c_str()) != 0; |
| 515 | + } |
| 516 | + |
| 517 | + /** |
| 518 | + * Check if a string looks like a bot-style username |
| 519 | + */ |
| 520 | + static bool is_suspicious_user(const std::string &username) { |
| 521 | + return fossil_io_validate_is_suspicious_user(username.c_str()) != 0; |
| 522 | + } |
| 523 | + |
| 524 | + /** |
| 525 | + * Sanitize a string according to context. |
| 526 | + * Returns the bitmask flags from the sanitizer. |
434 | 527 | */ |
435 | | - static int validate_sanitize_string(const char *input, char *output, size_t output_size) { |
436 | | - return fossil_io_validate_sanitize_string(input, output, output_size); |
| 528 | + static int validate_sanitize_string(std::string &input, fossil_context_t ctx) { |
| 529 | + std::vector<char> buffer(input.size() + 1); |
| 530 | + int flags = fossil_io_validate_sanitize_string_ctx( |
| 531 | + input.c_str(), |
| 532 | + buffer.data(), |
| 533 | + buffer.size(), |
| 534 | + ctx |
| 535 | + ); |
| 536 | + input.assign(buffer.data()); |
| 537 | + return flags; |
437 | 538 | } |
438 | 539 |
|
439 | 540 | /** |
|
0 commit comments