Skip to content

Commit d97b3dc

Browse files
Merge pull request #73 from dreamer-coding/extend_input_secerity
2 parents dd64d5e + b8af122 commit d97b3dc

File tree

2 files changed

+502
-49
lines changed

2 files changed

+502
-49
lines changed

code/logic/fossil/io/input.h

Lines changed: 162 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@
1717
#include <stdarg.h>
1818
#include "stream.h"
1919

20+
/* Contexts */
21+
typedef enum {
22+
FOSSIL_CTX_GENERIC,
23+
FOSSIL_CTX_HTML,
24+
FOSSIL_CTX_SQL,
25+
FOSSIL_CTX_SHELL,
26+
FOSSIL_CTX_FILENAME
27+
} fossil_context_t;
28+
2029
#ifdef __cplusplus
2130
extern "C" {
2231
#endif
@@ -29,6 +38,28 @@ extern "C" {
2938
*/
3039
int fossil_io_getc(fossil_fstream_t *input_stream);
3140

41+
/**
42+
* @brief Trim leading and trailing whitespace from a string in place.
43+
*
44+
* This function removes all whitespace characters (spaces, tabs, newlines, etc.)
45+
* from the beginning and end of the input string. The trimming is done **in place**,
46+
* modifying the original string, and the result is always null-terminated.
47+
*
48+
* @param str Pointer to the null-terminated string to be trimmed.
49+
* Must not be NULL. If the string is empty or contains only whitespace,
50+
* it will be converted to an empty string ("").
51+
*
52+
* @note This function does not allocate memory; it simply shifts characters within
53+
* the existing string buffer. The caller is responsible for ensuring that
54+
* the buffer is writable and large enough to hold the original string.
55+
*
56+
* @example
57+
* char s[] = " hello world \n";
58+
* fossil_io_trim(s);
59+
* // s now contains "hello world"
60+
*/
61+
void fossil_io_trim(char *str);
62+
3263
/**
3364
* Reads a line from the input stream and stores it into the buffer pointed to by 'buf'.
3465
*
@@ -134,14 +165,68 @@ int fossil_io_validate_is_email(const char *input);
134165
int fossil_io_validate_is_length(const char *input, size_t max_length);
135166

136167
/**
137-
* @brief Sanitizes the input string and stores the sanitized result in the output buffer.
138-
*
139-
* @param input The input string to sanitize.
140-
* @param output The buffer where the sanitized string will be stored.
141-
* @param output_size The size of the output buffer.
142-
* @return A fossil_io_validate_error_t indicating the result of the sanitization process.
168+
* Check if password is weak or bad.
169+
*
170+
* Returns:
171+
* 1 if password is weak/bad
172+
* 0 if password passes the basic checks
173+
*/
174+
int fossil_io_validate_is_weak_password(const char *password,
175+
const char *username,
176+
const char *email);
177+
178+
/**
179+
* Check if a user-agent string looks like a bot/crawler
180+
*/
181+
int fossil_io_validate_is_suspicious_bot(const char *input);
182+
183+
/**
184+
* Check if an email belongs to a disposable / suspicious domain
185+
*/
186+
int fossil_io_validate_is_disposable_email(const char *input);
187+
188+
/**
189+
* Check if a string looks like a bot-style username:
190+
* - Too many digits in a row
191+
* - High ratio of digits to letters
192+
* - Contains suspicious words like "bot", "test", "fake"
193+
* - Looks like random noise (entropy check)
143194
*/
144-
int fossil_io_validate_sanitize_string(const char *input, char *output, size_t output_size);
195+
int fossil_io_validate_is_suspicious_user(const char *input);
196+
197+
/**
198+
* @brief Validate and sanitize a string according to a specified context.
199+
*
200+
* This function scans the input string for suspicious content (scripts, SQL injection,
201+
* shell commands, bots, spam, path traversal, or long base64 blobs) and performs
202+
* context-aware sanitization by replacing disallowed characters with underscores.
203+
* It also returns a bitmask indicating the types of issues detected.
204+
*
205+
* @param input The input string to be validated and sanitized. Must not be NULL.
206+
* @param output The buffer to receive the sanitized string. Must not be NULL.
207+
* @param output_size The size of the output buffer. Must be greater than 0.
208+
* @param ctx The context in which the string will be used, which determines
209+
* the allowed character set and stricter rules for certain contexts.
210+
*
211+
* @return Bitmask of flags indicating results:
212+
* - FOSSIL_SAN_OK (0x00): No issues detected; string is clean.
213+
* - FOSSIL_SAN_MODIFIED (0x01): Input was modified during sanitization.
214+
* - FOSSIL_SAN_SCRIPT (0x02): Script or JavaScript patterns detected.
215+
* - FOSSIL_SAN_SQL (0x04): SQL injection patterns detected.
216+
* - FOSSIL_SAN_SHELL (0x08): Shell or command execution patterns detected.
217+
* - FOSSIL_SAN_BASE64 (0x10): Suspiciously long base64 sequences detected.
218+
* - FOSSIL_SAN_PATH (0x20): Path traversal or filesystem patterns detected.
219+
* - FOSSIL_SAN_BOT (0x40): Bot or automated agent patterns detected.
220+
* - FOSSIL_SAN_SPAM (0x80): Spam or suspicious marketing content detected.
221+
*
222+
* @note The sanitized output is always null-terminated and will not exceed
223+
* output_size bytes. The function uses heuristics and is not a substitute
224+
* for context-specific escaping or prepared statements in SQL/HTML.
225+
*/
226+
int fossil_io_validate_sanitize_string_ctx(const char *input,
227+
char *output,
228+
size_t output_size,
229+
fossil_context_t ctx);
145230

146231
/**
147232
* Displays a menu of choices and returns the selected choice.
@@ -276,6 +361,30 @@ namespace fossil {
276361
return fossil_io_getc(input_stream);
277362
}
278363

364+
/**
365+
* @brief Trim leading and trailing whitespace from a string in place.
366+
*
367+
* This function removes all whitespace characters (spaces, tabs, newlines, etc.)
368+
* from the beginning and end of the input string. The trimming is done **in place**,
369+
* modifying the original string, and the result is always null-terminated.
370+
*
371+
* @param str Pointer to the null-terminated string to be trimmed.
372+
* Must not be NULL. If the string is empty or contains only whitespace,
373+
* it will be converted to an empty string ("").
374+
*
375+
* @note This function does not allocate memory; it simply shifts characters within
376+
* the existing string buffer. The caller is responsible for ensuring that
377+
* the buffer is writable and large enough to hold the original string.
378+
*
379+
* @example
380+
* char s[] = " hello world \n";
381+
* fossil_io_trim(s);
382+
* // s now contains "hello world"
383+
*/
384+
static void trim(char *str) {
385+
fossil_io_trim(str);
386+
}
387+
279388
/**
280389
* Reads a line from the input stream and stores it into the buffer pointed to by 'buf'.
281390
*
@@ -425,15 +534,53 @@ namespace fossil {
425534
}
426535

427536
/**
428-
* @brief Sanitizes the input string and stores the sanitized result in the output buffer.
429-
*
430-
* @param input The input string to sanitize.
431-
* @param output The buffer where the sanitized string will be stored.
432-
* @param output_size The size of the output buffer.
433-
* @return A fossil_io_validate_error_t indicating the result of the sanitization process.
537+
* Check if password is weak or bad.
538+
* Returns true if weak/bad, false otherwise.
539+
*/
540+
static bool is_weak_password(const std::string &password,
541+
const std::string &username = "",
542+
const std::string &email = "") {
543+
return fossil_io_validate_is_weak_password(
544+
password.c_str(),
545+
username.empty() ? nullptr : username.c_str(),
546+
email.empty() ? nullptr : email.c_str()) != 0;
547+
}
548+
549+
/**
550+
* Check if a user-agent string looks like a bot/crawler
551+
*/
552+
static bool is_suspicious_bot(const std::string &userAgent) {
553+
return fossil_io_validate_is_suspicious_bot(userAgent.c_str()) != 0;
554+
}
555+
556+
/**
557+
* Check if an email belongs to a disposable / suspicious domain
558+
*/
559+
static bool is_disposable_email(const std::string &email) {
560+
return fossil_io_validate_is_disposable_email(email.c_str()) != 0;
561+
}
562+
563+
/**
564+
* Check if a string looks like a bot-style username
565+
*/
566+
static bool is_suspicious_user(const std::string &username) {
567+
return fossil_io_validate_is_suspicious_user(username.c_str()) != 0;
568+
}
569+
570+
/**
571+
* Sanitize a string according to context.
572+
* Returns the bitmask flags from the sanitizer.
434573
*/
435-
static int validate_sanitize_string(const char *input, char *output, size_t output_size) {
436-
return fossil_io_validate_sanitize_string(input, output, output_size);
574+
static int validate_sanitize_string(std::string &input, fossil_context_t ctx) {
575+
std::vector<char> buffer(input.size() + 1);
576+
int flags = fossil_io_validate_sanitize_string_ctx(
577+
input.c_str(),
578+
buffer.data(),
579+
buffer.size(),
580+
ctx
581+
);
582+
input.assign(buffer.data());
583+
return flags;
437584
}
438585

439586
/**

0 commit comments

Comments
 (0)