1717#include < stdarg.h>
1818#include " stream.h"
1919
20+ /* Contexts */
21+ typedef enum {
22+ FOSSIL_CTX_GENERIC,
23+ FOSSIL_CTX_HTML,
24+ FOSSIL_CTX_SQL,
25+ FOSSIL_CTX_SHELL,
26+ FOSSIL_CTX_FILENAME
27+ } fossil_context_t ;
28+
2029#ifdef __cplusplus
2130extern " C" {
2231#endif
@@ -29,6 +38,28 @@ extern "C" {
2938 */
3039int fossil_io_getc (fossil_fstream_t *input_stream);
3140
41+ /* *
42+ * @brief Trim leading and trailing whitespace from a string in place.
43+ *
44+ * This function removes all whitespace characters (spaces, tabs, newlines, etc.)
45+ * from the beginning and end of the input string. The trimming is done **in place**,
46+ * modifying the original string, and the result is always null-terminated.
47+ *
48+ * @param str Pointer to the null-terminated string to be trimmed.
49+ * Must not be NULL. If the string is empty or contains only whitespace,
50+ * it will be converted to an empty string ("").
51+ *
52+ * @note This function does not allocate memory; it simply shifts characters within
53+ * the existing string buffer. The caller is responsible for ensuring that
54+ * the buffer is writable and large enough to hold the original string.
55+ *
56+ * @example
57+ * char s[] = " hello world \n";
58+ * fossil_io_trim(s);
59+ * // s now contains "hello world"
60+ */
61+ void fossil_io_trim (char *str);
62+
3263/* *
3364 * Reads a line from the input stream and stores it into the buffer pointed to by 'buf'.
3465 *
@@ -134,14 +165,68 @@ int fossil_io_validate_is_email(const char *input);
134165int fossil_io_validate_is_length (const char *input, size_t max_length);
135166
136167/* *
137- * @brief Sanitizes the input string and stores the sanitized result in the output buffer.
138- *
139- * @param input The input string to sanitize.
140- * @param output The buffer where the sanitized string will be stored.
141- * @param output_size The size of the output buffer.
142- * @return A fossil_io_validate_error_t indicating the result of the sanitization process.
168+ * Check if password is weak or bad.
169+ *
170+ * Returns:
171+ * 1 if password is weak/bad
172+ * 0 if password passes the basic checks
173+ */
174+ int fossil_io_validate_is_weak_password (const char *password,
175+ const char *username,
176+ const char *email);
177+
178+ /* *
179+ * Check if a user-agent string looks like a bot/crawler
180+ */
181+ int fossil_io_validate_is_suspicious_bot (const char *input);
182+
183+ /* *
184+ * Check if an email belongs to a disposable / suspicious domain
185+ */
186+ int fossil_io_validate_is_disposable_email (const char *input);
187+
188+ /* *
189+ * Check if a string looks like a bot-style username:
190+ * - Too many digits in a row
191+ * - High ratio of digits to letters
192+ * - Contains suspicious words like "bot", "test", "fake"
193+ * - Looks like random noise (entropy check)
143194 */
144- int fossil_io_validate_sanitize_string (const char *input, char *output, size_t output_size);
195+ int fossil_io_validate_is_suspicious_user (const char *input);
196+
197+ /* *
198+ * @brief Validate and sanitize a string according to a specified context.
199+ *
200+ * This function scans the input string for suspicious content (scripts, SQL injection,
201+ * shell commands, bots, spam, path traversal, or long base64 blobs) and performs
202+ * context-aware sanitization by replacing disallowed characters with underscores.
203+ * It also returns a bitmask indicating the types of issues detected.
204+ *
205+ * @param input The input string to be validated and sanitized. Must not be NULL.
206+ * @param output The buffer to receive the sanitized string. Must not be NULL.
207+ * @param output_size The size of the output buffer. Must be greater than 0.
208+ * @param ctx The context in which the string will be used, which determines
209+ * the allowed character set and stricter rules for certain contexts.
210+ *
211+ * @return Bitmask of flags indicating results:
212+ * - FOSSIL_SAN_OK (0x00): No issues detected; string is clean.
213+ * - FOSSIL_SAN_MODIFIED (0x01): Input was modified during sanitization.
214+ * - FOSSIL_SAN_SCRIPT (0x02): Script or JavaScript patterns detected.
215+ * - FOSSIL_SAN_SQL (0x04): SQL injection patterns detected.
216+ * - FOSSIL_SAN_SHELL (0x08): Shell or command execution patterns detected.
217+ * - FOSSIL_SAN_BASE64 (0x10): Suspiciously long base64 sequences detected.
218+ * - FOSSIL_SAN_PATH (0x20): Path traversal or filesystem patterns detected.
219+ * - FOSSIL_SAN_BOT (0x40): Bot or automated agent patterns detected.
220+ * - FOSSIL_SAN_SPAM (0x80): Spam or suspicious marketing content detected.
221+ *
222+ * @note The sanitized output is always null-terminated and will not exceed
223+ * output_size bytes. The function uses heuristics and is not a substitute
224+ * for context-specific escaping or prepared statements in SQL/HTML.
225+ */
226+ int fossil_io_validate_sanitize_string_ctx (const char *input,
227+ char *output,
228+ size_t output_size,
229+ fossil_context_t ctx);
145230
146231/* *
147232 * Displays a menu of choices and returns the selected choice.
@@ -276,6 +361,30 @@ namespace fossil {
276361 return fossil_io_getc (input_stream);
277362 }
278363
364+ /* *
365+ * @brief Trim leading and trailing whitespace from a string in place.
366+ *
367+ * This function removes all whitespace characters (spaces, tabs, newlines, etc.)
368+ * from the beginning and end of the input string. The trimming is done **in place**,
369+ * modifying the original string, and the result is always null-terminated.
370+ *
371+ * @param str Pointer to the null-terminated string to be trimmed.
372+ * Must not be NULL. If the string is empty or contains only whitespace,
373+ * it will be converted to an empty string ("").
374+ *
375+ * @note This function does not allocate memory; it simply shifts characters within
376+ * the existing string buffer. The caller is responsible for ensuring that
377+ * the buffer is writable and large enough to hold the original string.
378+ *
379+ * @example
380+ * char s[] = " hello world \n";
381+ * fossil_io_trim(s);
382+ * // s now contains "hello world"
383+ */
384+ static void trim (char *str) {
385+ fossil_io_trim (str);
386+ }
387+
279388 /* *
280389 * Reads a line from the input stream and stores it into the buffer pointed to by 'buf'.
281390 *
@@ -425,15 +534,53 @@ namespace fossil {
425534 }
426535
427536 /* *
428- * @brief Sanitizes the input string and stores the sanitized result in the output buffer.
429- *
430- * @param input The input string to sanitize.
431- * @param output The buffer where the sanitized string will be stored.
432- * @param output_size The size of the output buffer.
433- * @return A fossil_io_validate_error_t indicating the result of the sanitization process.
537+ * Check if password is weak or bad.
538+ * Returns true if weak/bad, false otherwise.
539+ */
540+ static bool is_weak_password (const std::string &password,
541+ const std::string &username = " " ,
542+ const std::string &email = " " ) {
543+ return fossil_io_validate_is_weak_password (
544+ password.c_str (),
545+ username.empty () ? nullptr : username.c_str (),
546+ email.empty () ? nullptr : email.c_str ()) != 0 ;
547+ }
548+
549+ /* *
550+ * Check if a user-agent string looks like a bot/crawler
551+ */
552+ static bool is_suspicious_bot (const std::string &userAgent) {
553+ return fossil_io_validate_is_suspicious_bot (userAgent.c_str ()) != 0 ;
554+ }
555+
556+ /* *
557+ * Check if an email belongs to a disposable / suspicious domain
558+ */
559+ static bool is_disposable_email (const std::string &email) {
560+ return fossil_io_validate_is_disposable_email (email.c_str ()) != 0 ;
561+ }
562+
563+ /* *
564+ * Check if a string looks like a bot-style username
565+ */
566+ static bool is_suspicious_user (const std::string &username) {
567+ return fossil_io_validate_is_suspicious_user (username.c_str ()) != 0 ;
568+ }
569+
570+ /* *
571+ * Sanitize a string according to context.
572+ * Returns the bitmask flags from the sanitizer.
434573 */
435- static int validate_sanitize_string (const char *input, char *output, size_t output_size) {
436- return fossil_io_validate_sanitize_string (input, output, output_size);
574+ static int validate_sanitize_string (std::string &input, fossil_context_t ctx) {
575+ std::vector<char > buffer (input.size () + 1 );
576+ int flags = fossil_io_validate_sanitize_string_ctx (
577+ input.c_str (),
578+ buffer.data (),
579+ buffer.size (),
580+ ctx
581+ );
582+ input.assign (buffer.data ());
583+ return flags;
437584 }
438585
439586 /* *
0 commit comments