@@ -243,7 +243,7 @@ char *fossil_io_gets_from_stream(char *buf, size_t size, fossil_fstream_t *input
243243}
244244
245245/* --- sanitizer --- */
246- int fossil_io_validate_sanitize_string_ctx (const char * input ,
246+ int fossil_io_validate_sanitize_string (const char * input ,
247247 char * output ,
248248 size_t output_size ,
249249 fossil_context_t ctx ) {
@@ -268,25 +268,51 @@ int fossil_io_validate_sanitize_string_ctx(const char *input,
268268
269269 /* Suspicious patterns */
270270 const char * script_patterns [] = {
271- "<script" , "javascript:" , "onerror=" , "onload=" , "onclick=" , "eval(" , NULL
271+ "<script" , "javascript:" , "onerror=" , "onload=" , "onclick=" ,
272+ "eval(" , "document.cookie" , "alert(" , "src=" , "iframe" , "onmouseover=" ,
273+ "onfocus=" , "onblur=" , "onchange=" , "oninput=" , "onreset=" , "onsubmit=" ,
274+ "onselect=" , "onkeydown=" , "onkeyup=" , "onkeypress=" , "onmousedown=" ,
275+ "onmouseup=" , "onmousemove=" , "onmouseenter=" , "onmouseleave=" , "onwheel=" ,
276+ "oncontextmenu=" , "oncopy=" , "oncut=" , "onpaste=" , "location.href" ,
277+ "window.open" , "window.location" , NULL
272278 };
273279 const char * sql_patterns [] = {
274280 "select " , "insert " , "update " , "delete " , "drop " , "union " ,
275- "--" , ";--" , "/*" , "*/" , "0x" , NULL
281+ "--" , ";--" , "/*" , "*/" , "0x" , "xp_" , "exec " , "sp_" , "information_schema" ,
282+ "truncate " , "alter " , "create " , "rename " , "grant " , "revoke " , "cast(" ,
283+ "convert(" , "declare " , "fetch " , "open " , "close " , "rollback " , "commit " ,
284+ "savepoint " , "release " , "begin " , "end " , NULL
276285 };
277286 const char * shell_patterns [] = {
278287 "curl " , "wget " , "rm -rf" , "powershell" , "cmd.exe" ,
279- "exec(" , "system(" , "|" , "&&" , "||" , NULL
288+ "exec(" , "system(" , "|" , "&&" , "||" , "bash" , "sh" , "zsh" , "fish" , "scp " ,
289+ "ssh " , "ftp " , "tftp " , "nc " , "netcat " , "nmap " , "chmod " , "chown " ,
290+ "sudo " , "kill " , "pkill " , "ps " , "ls " , "cat " , "dd " , "mkfs " , "mount " ,
291+ "umount " , "service " , "systemctl " , "init " , "reboot " , "shutdown " ,
292+ "start " , "stop " , "restart " , NULL
280293 };
281294 const char * bot_patterns [] = {
282- "bot" , "crawler" , "spider" , "curl/" , "python-requests" , "scrapy" , NULL
295+ "bot" , "crawler" , "spider" , "curl/" , "python-requests" , "scrapy" , "httpclient" ,
296+ "libwww" , "wget" , "java" , "go-http-client" , "phantomjs" , "selenium" , "headless" ,
297+ "robot" , "checker" , "monitor" , "scan" , "probe" , "harvest" , "grabber" , "fetcher" ,
298+ "indexer" , "parser" , "api-client" , "node-fetch" , "axios" , NULL
283299 };
284300 const char * spam_patterns [] = {
285301 "viagra" , "free money" , "winner" , "prize" , "click here" ,
286- "http://" , "https://" , "meta refresh" , NULL
302+ "http://" , "https://" , "meta refresh" , "casino" , "loan" , "credit" , "bitcoin" ,
303+ "crypto" , "forex" , "investment" , "guaranteed" , "risk-free" , "unsubscribe" ,
304+ "buy now" , "limited offer" , "act now" , "earn cash" , "work from home" , "miracle" ,
305+ "weight loss" , "no prescription" , "cheap" , "discount" , "deal" , "promo" , "bonus" ,
306+ "gift" , "exclusive" , "urgent" , "clearance" , "bargain" , "order now" , "trial" ,
307+ "winner!" , "congratulations" , "selected" , "luxury" , "get rich" , "easy money" ,
308+ NULL
287309 };
288310 const char * path_patterns [] = {
289- "../" , "..\\" , "/etc/passwd" , "C:\\" , NULL
311+ "../" , "..\\" , "/etc/passwd" , "C:\\" , "/proc/self/environ" , "/proc/version" ,
312+ "/proc/cpuinfo" , "/proc/meminfo" , "/boot.ini" , "/windows/" , "/winnt/" , "/system32/" ,
313+ "/sys/" , "/dev/" , "/bin/" , "/sbin/" , "/usr/" , "/var/" , "/tmp/" , "/root/" , "/home/" ,
314+ "/Users/" , "/Documents/" , "/AppData/" , "/Local/" , "/Roaming/" , "/Program Files/" ,
315+ "/ProgramData/" , "/Desktop/" , "/Downloads/" , NULL
290316 };
291317
292318 /* Scan categories */
@@ -385,34 +411,72 @@ int fossil_io_validate_is_suspicious_user(const char *input) {
385411 // 1. Too long or too short
386412 if (len < 3 || len > 32 ) return 1 ;
387413
388- // 2. Count digits, letters, and digit runs
414+ // 2. Count digits, letters, digit runs, and symbol runs
389415 int digit_run = 0 , max_digit_run = 0 , digit_count = 0 , alpha_count = 0 ;
416+ int symbol_run = 0 , max_symbol_run = 0 , symbol_count = 0 ;
390417 for (size_t i = 0 ; i < len ; i ++ ) {
391418 if (isdigit ((unsigned char )input [i ])) {
392419 digit_run ++ ;
393420 digit_count ++ ;
394421 if (digit_run > max_digit_run ) max_digit_run = digit_run ;
422+ symbol_run = 0 ;
423+ } else if (isalpha ((unsigned char )input [i ])) {
424+ alpha_count ++ ;
425+ digit_run = 0 ;
426+ symbol_run = 0 ;
395427 } else {
428+ symbol_run ++ ;
429+ symbol_count ++ ;
396430 digit_run = 0 ;
397- if (isalpha (( unsigned char ) input [ i ])) alpha_count ++ ;
431+ if (symbol_run > max_symbol_run ) max_symbol_run = symbol_run ;
398432 }
399433 }
400434
401- // 3. Check for long digit runs or too few letters
435+ // 3. Check for long digit/symbol runs or too few letters
402436 if (max_digit_run >= 5 ) return 1 ; // suspicious long digit tail
437+ if (max_symbol_run >= 4 ) return 1 ; // suspicious long symbol run
403438 if ((float )digit_count / len > 0.5 ) return 1 ; // mostly digits
404439 if ((float )alpha_count / len < 0.3 ) return 1 ; // too few letters
405-
406- // 4. Suspicious keywords
407- const char * bad_keywords [] = {"bot" , "test" , "fake" , "spam" , "zzz" , "null" , "admin" };
440+ if ((float )symbol_count / len > 0.3 ) return 1 ; // too many symbols
441+
442+ // 4. Suspicious keywords and patterns
443+ const char * bad_keywords [] = {
444+ "bot" , "test" , "fake" , "spam" , "zzz" , "null" , "admin" ,
445+ "user" , "guest" , "demo" , "temp" , "unknown" , "default" , "root" ,
446+ "system" , "anonymous" , "trial" , "sample" , "password" , "qwerty" ,
447+ "abc123" , "123456" , "login" , "register" , "support" , "contact" ,
448+ "info" , "webmaster" , "help" , "service" , "account" , "manager" ,
449+ "api" , "sys" , "operator" , "mod" , "moderator" , "superuser" ,
450+ "owner" , "master" , "testuser" , "tester" , "dev" , "developer" ,
451+ "backup" , "restore" , "error" , "fail" , "invalid" , "void"
452+ };
408453 size_t nkeys = sizeof (bad_keywords ) / sizeof (bad_keywords [0 ]);
409454 for (size_t i = 0 ; i < nkeys ; i ++ ) {
410455 if (fossil_io_cstring_case_search (input , bad_keywords [i ]) != NULL ) {
411456 return 1 ;
412457 }
413458 }
414459
415- // 5. Very high entropy (simple Shannon estimate)
460+ // 5. Common suspicious patterns (repetitive, alternating, keyboard walks)
461+ int repetitive = 1 , alternating = 1 ;
462+ for (size_t i = 1 ; i < len ; i ++ ) {
463+ if (input [i ] != input [i - 1 ]) repetitive = 0 ;
464+ if (i > 1 && input [i ] != input [i - 2 ]) alternating = 0 ;
465+ }
466+ if (repetitive || alternating ) return 1 ;
467+
468+ // 6. Keyboard walk detection (e.g., "qwerty", "asdf", "zxcv")
469+ const char * keyboard_walks [] = {
470+ "qwerty" , "asdf" , "zxcv" , "12345" , "67890" , "poiuy" , "lkjhg" , "mnbvc"
471+ };
472+ size_t nwalks = sizeof (keyboard_walks ) / sizeof (keyboard_walks [0 ]);
473+ for (size_t i = 0 ; i < nwalks ; i ++ ) {
474+ if (fossil_io_cstring_case_search (input , keyboard_walks [i ]) != NULL ) {
475+ return 1 ;
476+ }
477+ }
478+
479+ // 7. Very high entropy (simple Shannon estimate)
416480 int freq [256 ] = {0 };
417481 for (size_t i = 0 ; i < len ; i ++ ) freq [(unsigned char )input [i ]]++ ;
418482 double entropy = 0.0 ;
@@ -424,6 +488,16 @@ int fossil_io_validate_is_suspicious_user(const char *input) {
424488 }
425489 if (entropy > 4.5 ) return 1 ; // suspiciously random-like
426490
491+ // 8. Looks like an email or URL
492+ if (strchr (input , '@' ) || fossil_io_cstring_case_search (input , "http" ) != NULL ) return 1 ;
493+
494+ // 9. Looks like a UUID or hex string
495+ int hex_count = 0 ;
496+ for (size_t i = 0 ; i < len ; i ++ ) {
497+ if (isxdigit ((unsigned char )input [i ])) hex_count ++ ;
498+ }
499+ if (hex_count == len && len >= 16 ) return 1 ;
500+
427501 return 0 ; // not flagged
428502}
429503
0 commit comments