Skip to content

Commit c3ed4f6

Browse files
Update input.c
1 parent bd2f122 commit c3ed4f6

File tree

1 file changed

+315
-30
lines changed

1 file changed

+315
-30
lines changed

code/logic/input.c

Lines changed: 315 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,31 @@
3030
#include <unistd.h>
3131
#endif
3232

33+
/* ============================================================
34+
* Bitmask flags for detection
35+
* ============================================================ */
36+
#define FOSSIL_SAN_OK 0x00
37+
#define FOSSIL_SAN_MODIFIED 0x01
38+
#define FOSSIL_SAN_SCRIPT 0x02
39+
#define FOSSIL_SAN_SQL 0x04
40+
#define FOSSIL_SAN_SHELL 0x08
41+
#define FOSSIL_SAN_BASE64 0x10
42+
#define FOSSIL_SAN_PATH 0x20
43+
#define FOSSIL_SAN_BOT 0x40
44+
#define FOSSIL_SAN_SPAM 0x80
45+
46+
/* ============================================================
47+
* Context enum — what’s allowed depends on usage
48+
* ============================================================ */
49+
typedef enum {
50+
FOSSIL_CTX_GENERIC = 0, /* balanced approach (default) */
51+
FOSSIL_CTX_HTML, /* for rendering in HTML */
52+
FOSSIL_CTX_SQL, /* for SQL queries (still use params!) */
53+
FOSSIL_CTX_SHELL, /* for shell commands */
54+
FOSSIL_CTX_FILENAME /* for filenames */
55+
} fossil_context_t;
56+
57+
3358
// Function to trim leading and trailing spaces from a string
3459
void fossil_io_trim(char *str) {
3560
if (str == NULL) return;
@@ -131,33 +156,88 @@ char *fossil_io_gets_from_stream(char *buf, size_t size, fossil_fstream_t *input
131156
return buf;
132157
}
133158

134-
char *fossil_io_gets_from_stream_ex(char *buf, size_t size, fossil_fstream_t *input_stream, int *error_code) {
135-
if (buf == NULL || size == 0 || input_stream == NULL || error_code == NULL) {
136-
fossil_io_fprintf(FOSSIL_STDERR, "Error: Invalid buffer, stream, or error code.\n");
137-
return NULL;
138-
}
139-
140-
// Use fgets to get the input from the stream
141-
if (fgets(buf, size, input_stream->file) == NULL) {
142-
if (feof(input_stream->file)) {
143-
*error_code = EOF;
144-
return NULL; // End of file reached
159+
/* --- sanitizer --- */
160+
int fossil_io_validate_sanitize_string_ctx(const char *input,
161+
char *output,
162+
size_t output_size,
163+
fossil_context_t ctx) {
164+
if (!input || !output || output_size == 0) {
165+
if (output && output_size > 0) output[0] = '\0';
166+
return FOSSIL_SAN_MODIFIED;
167+
}
168+
169+
size_t in_len = strnlen(input, 4096); /* cap scanning to 4k */
170+
size_t out_i = 0;
171+
int flags = FOSSIL_SAN_OK;
172+
173+
/* Context-specific allowed char filter */
174+
int (*is_allowed)(char) = is_allowed_generic;
175+
switch (ctx) {
176+
case FOSSIL_CTX_HTML: is_allowed = is_allowed_html; break;
177+
case FOSSIL_CTX_SQL: is_allowed = is_allowed_sql; break;
178+
case FOSSIL_CTX_SHELL: is_allowed = is_allowed_shell; break;
179+
case FOSSIL_CTX_FILENAME: is_allowed = is_allowed_filename; break;
180+
default: is_allowed = is_allowed_generic; break;
181+
}
182+
183+
/* Suspicious patterns */
184+
const char *script_patterns[] = {
185+
"<script", "javascript:", "onerror=", "onload=", "onclick=", "eval(", NULL
186+
};
187+
const char *sql_patterns[] = {
188+
"select ", "insert ", "update ", "delete ", "drop ", "union ",
189+
"--", ";--", "/*", "*/", "0x", NULL
190+
};
191+
const char *shell_patterns[] = {
192+
"curl ", "wget ", "rm -rf", "powershell", "cmd.exe",
193+
"exec(", "system(", "|", "&&", "||", NULL
194+
};
195+
const char *bot_patterns[] = {
196+
"bot", "crawler", "spider", "curl/", "python-requests", "scrapy", NULL
197+
};
198+
const char *spam_patterns[] = {
199+
"viagra", "free money", "winner", "prize", "click here",
200+
"http://", "https://", "meta refresh", NULL
201+
};
202+
const char *path_patterns[] = {
203+
"../", "..\\", "/etc/passwd", "C:\\", NULL
204+
};
205+
206+
/* Scan categories */
207+
for (const char **p = script_patterns; *p; ++p)
208+
if (strncase_contains(input, *p, in_len)) flags |= FOSSIL_SAN_SCRIPT;
209+
210+
for (const char **p = sql_patterns; *p; ++p)
211+
if (strncase_contains(input, *p, in_len)) flags |= FOSSIL_SAN_SQL;
212+
213+
for (const char **p = shell_patterns; *p; ++p)
214+
if (strncase_contains(input, *p, in_len)) flags |= FOSSIL_SAN_SHELL;
215+
216+
for (const char **p = bot_patterns; *p; ++p)
217+
if (strncase_contains(input, *p, in_len)) flags |= FOSSIL_SAN_BOT;
218+
219+
for (const char **p = spam_patterns; *p; ++p)
220+
if (strncase_contains(input, *p, in_len)) flags |= FOSSIL_SAN_SPAM;
221+
222+
for (const char **p = path_patterns; *p; ++p)
223+
if (strncase_contains(input, *p, in_len)) flags |= FOSSIL_SAN_PATH;
224+
225+
if (long_base64_run(input, in_len, 80))
226+
flags |= FOSSIL_SAN_BASE64;
227+
228+
/* Sanitization pass */
229+
for (size_t i = 0; i < in_len && out_i < output_size - 1; i++) {
230+
char c = input[i];
231+
if (is_allowed(c)) {
232+
output[out_i++] = c;
233+
} else {
234+
output[out_i++] = '_'; /* neutralize */
235+
flags |= FOSSIL_SAN_MODIFIED;
145236
}
146-
*error_code = ferror(input_stream->file);
147-
fossil_io_fprintf(FOSSIL_STDERR, "Error: Failed to read from input stream.\n");
148-
return NULL;
149237
}
238+
output[out_i] = '\0';
150239

151-
// Ensure the string is null-terminated
152-
size_t len = strlen(buf);
153-
if (len > 0 && buf[len - 1] == '\n') {
154-
buf[len - 1] = '\0'; // Remove the newline character
155-
}
156-
157-
// Trim any leading or trailing whitespace
158-
fossil_io_trim(buf);
159-
160-
return buf;
240+
return flags == 0 ? FOSSIL_SAN_OK : flags;
161241
}
162242

163243
int fossil_io_scanf(const char *format, ...) {
@@ -210,6 +290,150 @@ char *fossil_io_gets_utf8(char *buf, size_t size, fossil_fstream_t *input_stream
210290
return buf;
211291
}
212292

293+
int fossil_io_validate_is_suspicious_user(const char *input) {
294+
if (input == NULL) return 0;
295+
296+
size_t len = strlen(input);
297+
if (len == 0) return 0;
298+
299+
// 1. Too long or too short
300+
if (len < 3 || len > 32) return 1;
301+
302+
// 2. Check digit runs
303+
int digit_run = 0, max_digit_run = 0, digit_count = 0, alpha_count = 0;
304+
for (size_t i = 0; i < len; i++) {
305+
if (isdigit((unsigned char)input[i])) {
306+
digit_run++;
307+
digit_count++;
308+
if (digit_run > max_digit_run) max_digit_run = digit_run;
309+
} else {
310+
digit_run = 0;
311+
if (isalpha((unsigned char)input[i])) alpha_count++;
312+
}
313+
}
314+
if (max_digit_run >= 5) return 1; // suspicious long digit tail
315+
if ((float)digit_count / len > 0.5) return 1; // mostly digits
316+
317+
// 3. Suspicious keywords
318+
const char *bad_keywords[] = {"bot", "test", "fake", "spam", "zzz", "null", "admin"};
319+
size_t nkeys = sizeof(bad_keywords) / sizeof(bad_keywords[0]);
320+
for (size_t i = 0; i < nkeys; i++) {
321+
if (strcasestr(input, bad_keywords[i]) != NULL) {
322+
return 1;
323+
}
324+
}
325+
326+
// 4. Very high entropy (simple Shannon estimate)
327+
int freq[256] = {0};
328+
for (size_t i = 0; i < len; i++) freq[(unsigned char)input[i]]++;
329+
double entropy = 0.0;
330+
for (int i = 0; i < 256; i++) {
331+
if (freq[i] > 0) {
332+
double p = (double)freq[i] / len;
333+
entropy -= p * log2(p);
334+
}
335+
}
336+
if (entropy > 4.5) return 1; // suspiciously random-like
337+
338+
return 0; // not flagged
339+
}
340+
341+
int fossil_io_validate_is_disposable_email(const char *input) {
342+
if (input == NULL) return 0;
343+
const char *at = strchr(input, '@');
344+
if (at == NULL) return 0;
345+
346+
const char *disposable_domains[] = {
347+
"mailinator.com", "10minutemail.com", "guerrillamail.com",
348+
"tempmail.com", "trashmail.com", "yopmail.com"
349+
};
350+
size_t ndomains = sizeof(disposable_domains) / sizeof(disposable_domains[0]);
351+
352+
for (size_t i = 0; i < ndomains; i++) {
353+
if (strcasecmp(at + 1, disposable_domains[i]) == 0) {
354+
return 1;
355+
}
356+
}
357+
return 0;
358+
}
359+
360+
int fossil_io_validate_is_suspicious_bot(const char *input) {
361+
if (input == NULL) return 0;
362+
363+
const char *bot_signatures[] = {
364+
"bot", "crawl", "spider", "scrape", "httpclient", "libwww",
365+
"wget", "curl", "python-requests", "java", "go-http-client"
366+
};
367+
size_t nsignatures = sizeof(bot_signatures) / sizeof(bot_signatures[0]);
368+
369+
for (size_t i = 0; i < nsignatures; i++) {
370+
if (strcasestr(input, bot_signatures[i]) != NULL) {
371+
return 1;
372+
}
373+
}
374+
375+
return 0;
376+
}
377+
378+
int fossil_io_validate_is_weak_password(const char *password,
379+
const char *username,
380+
const char *email) {
381+
if (password == NULL) return 1;
382+
383+
size_t len = strlen(password);
384+
385+
// 1. Length check
386+
if (len < 8 || len > 64) {
387+
return 1; // too short or unreasonably long
388+
}
389+
390+
// 2. Check character diversity
391+
int has_lower = 0, has_upper = 0, has_digit = 0, has_symbol = 0;
392+
for (size_t i = 0; i < len; i++) {
393+
if (islower((unsigned char)password[i])) has_lower = 1;
394+
else if (isupper((unsigned char)password[i])) has_upper = 1;
395+
else if (isdigit((unsigned char)password[i])) has_digit = 1;
396+
else has_symbol = 1;
397+
}
398+
int diversity = has_lower + has_upper + has_digit + has_symbol;
399+
if (diversity < 3) {
400+
return 1; // not diverse enough
401+
}
402+
403+
// 3. Common weak passwords
404+
const char *weak_list[] = {
405+
"password", "123456", "123456789", "qwerty", "abc123",
406+
"letmein", "111111", "123123", "iloveyou", "admin"
407+
};
408+
size_t weak_count = sizeof(weak_list) / sizeof(weak_list[0]);
409+
for (size_t i = 0; i < weak_count; i++) {
410+
if (strcasecmp(password, weak_list[i]) == 0) {
411+
return 1;
412+
}
413+
}
414+
415+
// 4. Sequential/repetitive patterns
416+
int seq_inc = 1, seq_dec = 1, same = 1;
417+
for (size_t i = 1; i < len; i++) {
418+
if (password[i] != password[i - 1]) same = 0;
419+
if ((unsigned char)password[i] != (unsigned char)password[i - 1] + 1) seq_inc = 0;
420+
if ((unsigned char)password[i] != (unsigned char)password[i - 1] - 1) seq_dec = 0;
421+
}
422+
if (same || seq_inc || seq_dec) {
423+
return 1;
424+
}
425+
426+
// 5. Prevent reuse of username or email as password
427+
if (username && *username && strcasecmp(password, username) == 0) {
428+
return 1;
429+
}
430+
if (email && *email && strcasecmp(password, email) == 0) {
431+
return 1;
432+
}
433+
434+
return 0; // password passed basic strength checks
435+
}
436+
213437
int fossil_io_validate_is_int(const char *input, int *output) {
214438
if (input == NULL || output == NULL) {
215439
return 0;
@@ -295,17 +519,78 @@ int fossil_io_validate_is_length(const char *input, size_t max_length) {
295519
return strlen(input) <= max_length;
296520
}
297521

298-
int fossil_io_validate_sanitize_string(const char *input, char *output, size_t output_size) {
299-
if (input == NULL || output == NULL || output_size == 0) {
300-
return 0;
522+
/* ============================================================
523+
* Helpers
524+
* ============================================================ */
525+
526+
static inline int is_allowed_generic(char c) {
527+
if (isalnum((unsigned char)c)) return 1;
528+
switch (c) {
529+
case ' ': case '_': case '-': case '.': case ',': case ':':
530+
case '/': case '\\': case '@': case '+': case '=': case '#':
531+
case '%': case '(': case ')': case '[': case ']':
532+
return 1;
533+
default:
534+
return 0;
301535
}
536+
}
302537

303-
// Copy the input string to the output buffer
304-
strncpy(output, input, output_size);
538+
/* Allowed chars for specific contexts */
539+
static inline int is_allowed_html(char c) {
540+
return (isalnum((unsigned char)c) || c==' ' || c=='-' || c=='_' || c=='.' || c==',' );
541+
}
305542

306-
return 1;
543+
static inline int is_allowed_sql(char c) {
544+
return (isalnum((unsigned char)c) || c==' ' || c=='_' || c=='-' );
545+
}
546+
547+
static inline int is_allowed_shell(char c) {
548+
return (isalnum((unsigned char)c) || c==' ' || c=='_' || c=='-' || c=='.' || c=='/' );
549+
}
550+
551+
static inline int is_allowed_filename(char c) {
552+
return (isalnum((unsigned char)c) || c=='_' || c=='-' || c=='.');
307553
}
308554

555+
/* Base64 heuristic */
556+
static int long_base64_run(const char *s, size_t len, size_t threshold) {
557+
size_t run = 0;
558+
for (size_t i = 0; i < len; ++i) {
559+
unsigned char c = (unsigned char)s[i];
560+
if ((c >= 'A' && c <= 'Z') ||
561+
(c >= 'a' && c <= 'z') ||
562+
(c >= '0' && c <= '9') ||
563+
c == '+' || c == '/' || c == '=') {
564+
run++;
565+
if (run >= threshold) return 1;
566+
} else {
567+
run = 0;
568+
}
569+
}
570+
return 0;
571+
}
572+
573+
/* Case-insensitive contains */
574+
static int strncase_contains(const char *haystack, const char *needle, size_t len) {
575+
size_t nlen = strlen(needle);
576+
if (nlen == 0 || nlen > len) return 0;
577+
for (size_t i = 0; i + nlen <= len; i++) {
578+
size_t j;
579+
for (j = 0; j < nlen; j++) {
580+
char a = haystack[i+j];
581+
char b = needle[j];
582+
if (tolower((unsigned char)a) != tolower((unsigned char)b)) break;
583+
}
584+
if (j == nlen) return 1;
585+
}
586+
return 0;
587+
}
588+
589+
/* ============================================================
590+
* Sanitizer with bitmask + context
591+
* ============================================================ */
592+
z
593+
309594
int fossil_io_gets(char *buffer, size_t size) {
310595
if (fgets(buffer, size, stdin) == NULL) {
311596
return -1; // Error or EOF

0 commit comments

Comments
 (0)