Skip to content

Commit ca90eaa

Browse files
add better scanning for santize method
1 parent ccb5f5d commit ca90eaa

File tree

2 files changed

+90
-16
lines changed

2 files changed

+90
-16
lines changed

code/logic/fossil/io/input.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ int fossil_io_validate_is_suspicious_user(const char *input);
223223
* output_size bytes. The function uses heuristics and is not a substitute
224224
* for context-specific escaping or prepared statements in SQL/HTML.
225225
*/
226-
int fossil_io_validate_sanitize_string_ctx(const char *input,
226+
int fossil_io_validate_sanitize_string(const char *input,
227227
char *output,
228228
size_t output_size,
229229
fossil_context_t ctx);
@@ -573,7 +573,7 @@ namespace fossil {
573573
*/
574574
static int validate_sanitize_string(std::string &input, fossil_context_t ctx) {
575575
std::vector<char> buffer(input.size() + 1);
576-
int flags = fossil_io_validate_sanitize_string_ctx(
576+
int flags = fossil_io_validate_sanitize_string(
577577
input.c_str(),
578578
buffer.data(),
579579
buffer.size(),

code/logic/input.c

Lines changed: 88 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ char *fossil_io_gets_from_stream(char *buf, size_t size, fossil_fstream_t *input
243243
}
244244

245245
/* --- sanitizer --- */
246-
int fossil_io_validate_sanitize_string_ctx(const char *input,
246+
int fossil_io_validate_sanitize_string(const char *input,
247247
char *output,
248248
size_t output_size,
249249
fossil_context_t ctx) {
@@ -268,25 +268,51 @@ int fossil_io_validate_sanitize_string_ctx(const char *input,
268268

269269
/* Suspicious patterns */
270270
const char *script_patterns[] = {
271-
"<script", "javascript:", "onerror=", "onload=", "onclick=", "eval(", NULL
271+
"<script", "javascript:", "onerror=", "onload=", "onclick=",
272+
"eval(", "document.cookie", "alert(", "src=", "iframe", "onmouseover=",
273+
"onfocus=", "onblur=", "onchange=", "oninput=", "onreset=", "onsubmit=",
274+
"onselect=", "onkeydown=", "onkeyup=", "onkeypress=", "onmousedown=",
275+
"onmouseup=", "onmousemove=", "onmouseenter=", "onmouseleave=", "onwheel=",
276+
"oncontextmenu=", "oncopy=", "oncut=", "onpaste=", "location.href",
277+
"window.open", "window.location", NULL
272278
};
273279
const char *sql_patterns[] = {
274280
"select ", "insert ", "update ", "delete ", "drop ", "union ",
275-
"--", ";--", "/*", "*/", "0x", NULL
281+
"--", ";--", "/*", "*/", "0x", "xp_", "exec ", "sp_", "information_schema",
282+
"truncate ", "alter ", "create ", "rename ", "grant ", "revoke ", "cast(",
283+
"convert(", "declare ", "fetch ", "open ", "close ", "rollback ", "commit ",
284+
"savepoint ", "release ", "begin ", "end ", NULL
276285
};
277286
const char *shell_patterns[] = {
278287
"curl ", "wget ", "rm -rf", "powershell", "cmd.exe",
279-
"exec(", "system(", "|", "&&", "||", NULL
288+
"exec(", "system(", "|", "&&", "||", "bash", "sh", "zsh", "fish", "scp ",
289+
"ssh ", "ftp ", "tftp ", "nc ", "netcat ", "nmap ", "chmod ", "chown ",
290+
"sudo ", "kill ", "pkill ", "ps ", "ls ", "cat ", "dd ", "mkfs ", "mount ",
291+
"umount ", "service ", "systemctl ", "init ", "reboot ", "shutdown ",
292+
"start ", "stop ", "restart ", NULL
280293
};
281294
const char *bot_patterns[] = {
282-
"bot", "crawler", "spider", "curl/", "python-requests", "scrapy", NULL
295+
"bot", "crawler", "spider", "curl/", "python-requests", "scrapy", "httpclient",
296+
"libwww", "wget", "java", "go-http-client", "phantomjs", "selenium", "headless",
297+
"robot", "checker", "monitor", "scan", "probe", "harvest", "grabber", "fetcher",
298+
"indexer", "parser", "api-client", "node-fetch", "axios", NULL
283299
};
284300
const char *spam_patterns[] = {
285301
"viagra", "free money", "winner", "prize", "click here",
286-
"http://", "https://", "meta refresh", NULL
302+
"http://", "https://", "meta refresh", "casino", "loan", "credit", "bitcoin",
303+
"crypto", "forex", "investment", "guaranteed", "risk-free", "unsubscribe",
304+
"buy now", "limited offer", "act now", "earn cash", "work from home", "miracle",
305+
"weight loss", "no prescription", "cheap", "discount", "deal", "promo", "bonus",
306+
"gift", "exclusive", "urgent", "clearance", "bargain", "order now", "trial",
307+
"winner!", "congratulations", "selected", "luxury", "get rich", "easy money",
308+
NULL
287309
};
288310
const char *path_patterns[] = {
289-
"../", "..\\", "/etc/passwd", "C:\\", NULL
311+
"../", "..\\", "/etc/passwd", "C:\\", "/proc/self/environ", "/proc/version",
312+
"/proc/cpuinfo", "/proc/meminfo", "/boot.ini", "/windows/", "/winnt/", "/system32/",
313+
"/sys/", "/dev/", "/bin/", "/sbin/", "/usr/", "/var/", "/tmp/", "/root/", "/home/",
314+
"/Users/", "/Documents/", "/AppData/", "/Local/", "/Roaming/", "/Program Files/",
315+
"/ProgramData/", "/Desktop/", "/Downloads/", NULL
290316
};
291317

292318
/* Scan categories */
@@ -385,34 +411,72 @@ int fossil_io_validate_is_suspicious_user(const char *input) {
385411
// 1. Too long or too short
386412
if (len < 3 || len > 32) return 1;
387413

388-
// 2. Count digits, letters, and digit runs
414+
// 2. Count digits, letters, digit runs, and symbol runs
389415
int digit_run = 0, max_digit_run = 0, digit_count = 0, alpha_count = 0;
416+
int symbol_run = 0, max_symbol_run = 0, symbol_count = 0;
390417
for (size_t i = 0; i < len; i++) {
391418
if (isdigit((unsigned char)input[i])) {
392419
digit_run++;
393420
digit_count++;
394421
if (digit_run > max_digit_run) max_digit_run = digit_run;
422+
symbol_run = 0;
423+
} else if (isalpha((unsigned char)input[i])) {
424+
alpha_count++;
425+
digit_run = 0;
426+
symbol_run = 0;
395427
} else {
428+
symbol_run++;
429+
symbol_count++;
396430
digit_run = 0;
397-
if (isalpha((unsigned char)input[i])) alpha_count++;
431+
if (symbol_run > max_symbol_run) max_symbol_run = symbol_run;
398432
}
399433
}
400434

401-
// 3. Check for long digit runs or too few letters
435+
// 3. Check for long digit/symbol runs or too few letters
402436
if (max_digit_run >= 5) return 1; // suspicious long digit tail
437+
if (max_symbol_run >= 4) return 1; // suspicious long symbol run
403438
if ((float)digit_count / len > 0.5) return 1; // mostly digits
404439
if ((float)alpha_count / len < 0.3) return 1; // too few letters
405-
406-
// 4. Suspicious keywords
407-
const char *bad_keywords[] = {"bot", "test", "fake", "spam", "zzz", "null", "admin"};
440+
if ((float)symbol_count / len > 0.3) return 1; // too many symbols
441+
442+
// 4. Suspicious keywords and patterns
443+
const char *bad_keywords[] = {
444+
"bot", "test", "fake", "spam", "zzz", "null", "admin",
445+
"user", "guest", "demo", "temp", "unknown", "default", "root",
446+
"system", "anonymous", "trial", "sample", "password", "qwerty",
447+
"abc123", "123456", "login", "register", "support", "contact",
448+
"info", "webmaster", "help", "service", "account", "manager",
449+
"api", "sys", "operator", "mod", "moderator", "superuser",
450+
"owner", "master", "testuser", "tester", "dev", "developer",
451+
"backup", "restore", "error", "fail", "invalid", "void"
452+
};
408453
size_t nkeys = sizeof(bad_keywords) / sizeof(bad_keywords[0]);
409454
for (size_t i = 0; i < nkeys; i++) {
410455
if (fossil_io_cstring_case_search(input, bad_keywords[i]) != NULL) {
411456
return 1;
412457
}
413458
}
414459

415-
// 5. Very high entropy (simple Shannon estimate)
460+
// 5. Common suspicious patterns (repetitive, alternating, keyboard walks)
461+
int repetitive = 1, alternating = 1;
462+
for (size_t i = 1; i < len; i++) {
463+
if (input[i] != input[i - 1]) repetitive = 0;
464+
if (i > 1 && input[i] != input[i - 2]) alternating = 0;
465+
}
466+
if (repetitive || alternating) return 1;
467+
468+
// 6. Keyboard walk detection (e.g., "qwerty", "asdf", "zxcv")
469+
const char *keyboard_walks[] = {
470+
"qwerty", "asdf", "zxcv", "12345", "67890", "poiuy", "lkjhg", "mnbvc"
471+
};
472+
size_t nwalks = sizeof(keyboard_walks) / sizeof(keyboard_walks[0]);
473+
for (size_t i = 0; i < nwalks; i++) {
474+
if (fossil_io_cstring_case_search(input, keyboard_walks[i]) != NULL) {
475+
return 1;
476+
}
477+
}
478+
479+
// 7. Very high entropy (simple Shannon estimate)
416480
int freq[256] = {0};
417481
for (size_t i = 0; i < len; i++) freq[(unsigned char)input[i]]++;
418482
double entropy = 0.0;
@@ -424,6 +488,16 @@ int fossil_io_validate_is_suspicious_user(const char *input) {
424488
}
425489
if (entropy > 4.5) return 1; // suspiciously random-like
426490

491+
// 8. Looks like an email or URL
492+
if (strchr(input, '@') || fossil_io_cstring_case_search(input, "http") != NULL) return 1;
493+
494+
// 9. Looks like a UUID or hex string
495+
int hex_count = 0;
496+
for (size_t i = 0; i < len; i++) {
497+
if (isxdigit((unsigned char)input[i])) hex_count++;
498+
}
499+
if (hex_count == len && len >= 16) return 1;
500+
427501
return 0; // not flagged
428502
}
429503

0 commit comments

Comments
 (0)