@@ -127,20 +127,6 @@ static int strncase_contains(const char *haystack, const char *needle, size_t le
127127 return 0 ;
128128}
129129
130- /* ============================================================
131- * Bitmask flags for detection
132- * ============================================================ */
133- #define FOSSIL_SAN_OK 0x00
134- #define FOSSIL_SAN_MODIFIED 0x01
135- #define FOSSIL_SAN_SCRIPT 0x02
136- #define FOSSIL_SAN_SQL 0x04
137- #define FOSSIL_SAN_SHELL 0x08
138- #define FOSSIL_SAN_BASE64 0x10
139- #define FOSSIL_SAN_PATH 0x20
140- #define FOSSIL_SAN_BOT 0x40
141- #define FOSSIL_SAN_SPAM 0x80
142-
143-
144130// Function to trim leading and trailing spaces from a string
145131void fossil_io_trim (char * str ) {
146132 if (str == NULL ) return ;
@@ -243,7 +229,7 @@ char *fossil_io_gets_from_stream(char *buf, size_t size, fossil_fstream_t *input
243229}
244230
245231/* --- sanitizer --- */
246- int fossil_io_validate_sanitize_string_ctx (const char * input ,
232+ int fossil_io_validate_sanitize_string (const char * input ,
247233 char * output ,
248234 size_t output_size ,
249235 fossil_context_t ctx ) {
@@ -268,25 +254,51 @@ int fossil_io_validate_sanitize_string_ctx(const char *input,
268254
269255 /* Suspicious patterns */
270256 const char * script_patterns [] = {
271- "<script" , "javascript:" , "onerror=" , "onload=" , "onclick=" , "eval(" , NULL
257+ "<script" , "javascript:" , "onerror=" , "onload=" , "onclick=" ,
258+ "eval(" , "document.cookie" , "alert(" , "src=" , "iframe" , "onmouseover=" ,
259+ "onfocus=" , "onblur=" , "onchange=" , "oninput=" , "onreset=" , "onsubmit=" ,
260+ "onselect=" , "onkeydown=" , "onkeyup=" , "onkeypress=" , "onmousedown=" ,
261+ "onmouseup=" , "onmousemove=" , "onmouseenter=" , "onmouseleave=" , "onwheel=" ,
262+ "oncontextmenu=" , "oncopy=" , "oncut=" , "onpaste=" , "location.href" ,
263+ "window.open" , "window.location" , NULL
272264 };
273265 const char * sql_patterns [] = {
274266 "select " , "insert " , "update " , "delete " , "drop " , "union " ,
275- "--" , ";--" , "/*" , "*/" , "0x" , NULL
267+ "--" , ";--" , "/*" , "*/" , "0x" , "xp_" , "exec " , "sp_" , "information_schema" ,
268+ "truncate " , "alter " , "create " , "rename " , "grant " , "revoke " , "cast(" ,
269+ "convert(" , "declare " , "fetch " , "open " , "close " , "rollback " , "commit " ,
270+ "savepoint " , "release " , "begin " , "end " , NULL
276271 };
277272 const char * shell_patterns [] = {
278273 "curl " , "wget " , "rm -rf" , "powershell" , "cmd.exe" ,
279- "exec(" , "system(" , "|" , "&&" , "||" , NULL
274+ "exec(" , "system(" , "|" , "&&" , "||" , "bash" , "sh" , "zsh" , "fish" , "scp " ,
275+ "ssh " , "ftp " , "tftp " , "nc " , "netcat " , "nmap " , "chmod " , "chown " ,
276+ "sudo " , "kill " , "pkill " , "ps " , "ls " , "cat " , "dd " , "mkfs " , "mount " ,
277+ "umount " , "service " , "systemctl " , "init " , "reboot " , "shutdown " ,
278+ "start " , "stop " , "restart " , NULL
280279 };
281280 const char * bot_patterns [] = {
282- "bot" , "crawler" , "spider" , "curl/" , "python-requests" , "scrapy" , NULL
281+ "bot" , "crawler" , "spider" , "curl/" , "python-requests" , "scrapy" , "httpclient" ,
282+ "libwww" , "wget" , "java" , "go-http-client" , "phantomjs" , "selenium" , "headless" ,
283+ "robot" , "checker" , "monitor" , "scan" , "probe" , "harvest" , "grabber" , "fetcher" ,
284+ "indexer" , "parser" , "api-client" , "node-fetch" , "axios" , NULL
283285 };
284286 const char * spam_patterns [] = {
285287 "viagra" , "free money" , "winner" , "prize" , "click here" ,
286- "http://" , "https://" , "meta refresh" , NULL
288+ "http://" , "https://" , "meta refresh" , "casino" , "loan" , "credit" , "bitcoin" ,
289+ "crypto" , "forex" , "investment" , "guaranteed" , "risk-free" , "unsubscribe" ,
290+ "buy now" , "limited offer" , "act now" , "earn cash" , "work from home" , "miracle" ,
291+ "weight loss" , "no prescription" , "cheap" , "discount" , "deal" , "promo" , "bonus" ,
292+ "gift" , "exclusive" , "urgent" , "clearance" , "bargain" , "order now" , "trial" ,
293+ "winner!" , "congratulations" , "selected" , "luxury" , "get rich" , "easy money" ,
294+ NULL
287295 };
288296 const char * path_patterns [] = {
289- "../" , "..\\" , "/etc/passwd" , "C:\\" , NULL
297+ "../" , "..\\" , "/etc/passwd" , "C:\\" , "/proc/self/environ" , "/proc/version" ,
298+ "/proc/cpuinfo" , "/proc/meminfo" , "/boot.ini" , "/windows/" , "/winnt/" , "/system32/" ,
299+ "/sys/" , "/dev/" , "/bin/" , "/sbin/" , "/usr/" , "/var/" , "/tmp/" , "/root/" , "/home/" ,
300+ "/Users/" , "/Documents/" , "/AppData/" , "/Local/" , "/Roaming/" , "/Program Files/" ,
301+ "/ProgramData/" , "/Desktop/" , "/Downloads/" , NULL
290302 };
291303
292304 /* Scan categories */
@@ -385,34 +397,73 @@ int fossil_io_validate_is_suspicious_user(const char *input) {
385397 // 1. Too long or too short
386398 if (len < 3 || len > 32 ) return 1 ;
387399
388- // 2. Count digits, letters, and digit runs
400+ // 2. Count digits, letters, digit runs, and symbol runs
389401 int digit_run = 0 , max_digit_run = 0 , digit_count = 0 , alpha_count = 0 ;
402+ int symbol_run = 0 , max_symbol_run = 0 , symbol_count = 0 ;
390403 for (size_t i = 0 ; i < len ; i ++ ) {
391404 if (isdigit ((unsigned char )input [i ])) {
392405 digit_run ++ ;
393406 digit_count ++ ;
394407 if (digit_run > max_digit_run ) max_digit_run = digit_run ;
408+ symbol_run = 0 ;
409+ } else if (isalpha ((unsigned char )input [i ])) {
410+ alpha_count ++ ;
411+ digit_run = 0 ;
412+ symbol_run = 0 ;
395413 } else {
414+ symbol_run ++ ;
415+ symbol_count ++ ;
396416 digit_run = 0 ;
397- if (isalpha (( unsigned char ) input [ i ])) alpha_count ++ ;
417+ if (symbol_run > max_symbol_run ) max_symbol_run = symbol_run ;
398418 }
399419 }
400420
401- // 3. Check for long digit runs or too few letters
421+ // 3. Check for long digit/symbol runs or too few letters
402422 if (max_digit_run >= 5 ) return 1 ; // suspicious long digit tail
403- if ((float )digit_count / len > 0.5 ) return 1 ; // mostly digits
423+ if (max_symbol_run >= 4 ) return 1 ; // suspicious long symbol run
424+ if (digit_count >= 8 ) return 1 ; // many digits (new: covers user1234567890)
425+ if ((float )digit_count / len > 0.45 ) return 1 ; // high digit ratio (new: covers a1b2c3d4e5f6g7h8i9j0)
404426 if ((float )alpha_count / len < 0.3 ) return 1 ; // too few letters
405-
406- // 4. Suspicious keywords
407- const char * bad_keywords [] = {"bot" , "test" , "fake" , "spam" , "zzz" , "null" , "admin" };
427+ if ((float )symbol_count / len > 0.3 ) return 1 ; // too many symbols
428+
429+ // 4. Suspicious keywords and patterns
430+ const char * bad_keywords [] = {
431+ "bot" , "test" , "fake" , "spam" , "zzz" , "null" , "admin" ,
432+ "user" , "guest" , "demo" , "temp" , "unknown" , "default" , "root" ,
433+ "system" , "anonymous" , "trial" , "sample" , "password" , "qwerty" ,
434+ "abc123" , "123456" , "login" , "register" , "support" , "contact" ,
435+ "info" , "webmaster" , "help" , "service" , "account" , "manager" ,
436+ "api" , "sys" , "operator" , "mod" , "moderator" , "superuser" ,
437+ "owner" , "master" , "testuser" , "tester" , "dev" , "developer" ,
438+ "backup" , "restore" , "error" , "fail" , "invalid" , "void"
439+ };
408440 size_t nkeys = sizeof (bad_keywords ) / sizeof (bad_keywords [0 ]);
409441 for (size_t i = 0 ; i < nkeys ; i ++ ) {
410442 if (fossil_io_cstring_case_search (input , bad_keywords [i ]) != NULL ) {
411443 return 1 ;
412444 }
413445 }
414446
415- // 5. Very high entropy (simple Shannon estimate)
447+ // 5. Common suspicious patterns (repetitive, alternating, keyboard walks)
448+ int repetitive = 1 , alternating = 1 ;
449+ for (size_t i = 1 ; i < len ; i ++ ) {
450+ if (input [i ] != input [i - 1 ]) repetitive = 0 ;
451+ if (i > 1 && input [i ] != input [i - 2 ]) alternating = 0 ;
452+ }
453+ if (repetitive || alternating ) return 1 ;
454+
455+ // 6. Keyboard walk detection (e.g., "qwerty", "asdf", "zxcv")
456+ const char * keyboard_walks [] = {
457+ "qwerty" , "asdf" , "zxcv" , "12345" , "67890" , "poiuy" , "lkjhg" , "mnbvc"
458+ };
459+ size_t nwalks = sizeof (keyboard_walks ) / sizeof (keyboard_walks [0 ]);
460+ for (size_t i = 0 ; i < nwalks ; i ++ ) {
461+ if (fossil_io_cstring_case_search (input , keyboard_walks [i ]) != NULL ) {
462+ return 1 ;
463+ }
464+ }
465+
466+ // 7. Very high entropy (simple Shannon estimate)
416467 int freq [256 ] = {0 };
417468 for (size_t i = 0 ; i < len ; i ++ ) freq [(unsigned char )input [i ]]++ ;
418469 double entropy = 0.0 ;
@@ -422,7 +473,17 @@ int fossil_io_validate_is_suspicious_user(const char *input) {
422473 entropy -= p * log2 (p );
423474 }
424475 }
425- if (entropy > 4.5 ) return 1 ; // suspiciously random-like
476+ if (entropy > 4.2 ) return 1 ; // slightly lower threshold for suspicious randomness
477+
478+ // 8. Looks like an email or URL
479+ if (strchr (input , '@' ) || fossil_io_cstring_case_search (input , "http" ) != NULL ) return 1 ;
480+
481+ // 9. Looks like a UUID or hex string
482+ size_t hex_count = 0 ;
483+ for (size_t i = 0 ; i < len ; i ++ ) {
484+ if (isxdigit ((unsigned char )input [i ])) hex_count ++ ;
485+ }
486+ if (hex_count == len && len >= 16 ) return 1 ;
426487
427488 return 0 ; // not flagged
428489}
0 commit comments