Skip to content

Commit d29ca65

Browse files
Merge pull request #74 from dreamer-coding/main
2 parents ccb5f5d + 619b7c0 commit d29ca65

File tree

4 files changed

+319
-35
lines changed

4 files changed

+319
-35
lines changed

code/logic/fossil/io/input.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,21 @@ typedef enum {
2323
FOSSIL_CTX_HTML,
2424
FOSSIL_CTX_SQL,
2525
FOSSIL_CTX_SHELL,
26-
FOSSIL_CTX_FILENAME
26+
FOSSIL_CTX_FILENAME,
27+
FOSSIL_CTX_NONE
2728
} fossil_context_t;
2829

30+
/* Bitmask flags for string sanitization results */
31+
#define FOSSIL_SAN_OK 0x00 /* No issues detected; string is clean */
32+
#define FOSSIL_SAN_MODIFIED 0x01 /* Input was modified during sanitization */
33+
#define FOSSIL_SAN_SCRIPT 0x02 /* Script or JavaScript patterns detected */
34+
#define FOSSIL_SAN_SQL 0x04 /* SQL injection patterns detected */
35+
#define FOSSIL_SAN_SHELL 0x08 /* Shell or command execution patterns detected */
36+
#define FOSSIL_SAN_BASE64 0x10 /* Suspiciously long base64 sequences detected */
37+
#define FOSSIL_SAN_PATH 0x20 /* Path traversal or filesystem patterns detected */
38+
#define FOSSIL_SAN_BOT 0x40 /* Bot or automated agent patterns detected */
39+
#define FOSSIL_SAN_SPAM 0x80 /* Spam or suspicious marketing content detected */
40+
2941
#ifdef __cplusplus
3042
extern "C" {
3143
#endif
@@ -223,7 +235,7 @@ int fossil_io_validate_is_suspicious_user(const char *input);
223235
* output_size bytes. The function uses heuristics and is not a substitute
224236
* for context-specific escaping or prepared statements in SQL/HTML.
225237
*/
226-
int fossil_io_validate_sanitize_string_ctx(const char *input,
238+
int fossil_io_validate_sanitize_string(const char *input,
227239
char *output,
228240
size_t output_size,
229241
fossil_context_t ctx);
@@ -573,7 +585,7 @@ namespace fossil {
573585
*/
574586
static int validate_sanitize_string(std::string &input, fossil_context_t ctx) {
575587
std::vector<char> buffer(input.size() + 1);
576-
int flags = fossil_io_validate_sanitize_string_ctx(
588+
int flags = fossil_io_validate_sanitize_string(
577589
input.c_str(),
578590
buffer.data(),
579591
buffer.size(),

code/logic/input.c

Lines changed: 91 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -127,20 +127,6 @@ static int strncase_contains(const char *haystack, const char *needle, size_t le
127127
return 0;
128128
}
129129

130-
/* ============================================================
131-
* Bitmask flags for detection
132-
* ============================================================ */
133-
#define FOSSIL_SAN_OK 0x00
134-
#define FOSSIL_SAN_MODIFIED 0x01
135-
#define FOSSIL_SAN_SCRIPT 0x02
136-
#define FOSSIL_SAN_SQL 0x04
137-
#define FOSSIL_SAN_SHELL 0x08
138-
#define FOSSIL_SAN_BASE64 0x10
139-
#define FOSSIL_SAN_PATH 0x20
140-
#define FOSSIL_SAN_BOT 0x40
141-
#define FOSSIL_SAN_SPAM 0x80
142-
143-
144130
// Function to trim leading and trailing spaces from a string
145131
void fossil_io_trim(char *str) {
146132
if (str == NULL) return;
@@ -243,7 +229,7 @@ char *fossil_io_gets_from_stream(char *buf, size_t size, fossil_fstream_t *input
243229
}
244230

245231
/* --- sanitizer --- */
246-
int fossil_io_validate_sanitize_string_ctx(const char *input,
232+
int fossil_io_validate_sanitize_string(const char *input,
247233
char *output,
248234
size_t output_size,
249235
fossil_context_t ctx) {
@@ -268,25 +254,51 @@ int fossil_io_validate_sanitize_string_ctx(const char *input,
268254

269255
/* Suspicious patterns */
270256
const char *script_patterns[] = {
271-
"<script", "javascript:", "onerror=", "onload=", "onclick=", "eval(", NULL
257+
"<script", "javascript:", "onerror=", "onload=", "onclick=",
258+
"eval(", "document.cookie", "alert(", "src=", "iframe", "onmouseover=",
259+
"onfocus=", "onblur=", "onchange=", "oninput=", "onreset=", "onsubmit=",
260+
"onselect=", "onkeydown=", "onkeyup=", "onkeypress=", "onmousedown=",
261+
"onmouseup=", "onmousemove=", "onmouseenter=", "onmouseleave=", "onwheel=",
262+
"oncontextmenu=", "oncopy=", "oncut=", "onpaste=", "location.href",
263+
"window.open", "window.location", NULL
272264
};
273265
const char *sql_patterns[] = {
274266
"select ", "insert ", "update ", "delete ", "drop ", "union ",
275-
"--", ";--", "/*", "*/", "0x", NULL
267+
"--", ";--", "/*", "*/", "0x", "xp_", "exec ", "sp_", "information_schema",
268+
"truncate ", "alter ", "create ", "rename ", "grant ", "revoke ", "cast(",
269+
"convert(", "declare ", "fetch ", "open ", "close ", "rollback ", "commit ",
270+
"savepoint ", "release ", "begin ", "end ", NULL
276271
};
277272
const char *shell_patterns[] = {
278273
"curl ", "wget ", "rm -rf", "powershell", "cmd.exe",
279-
"exec(", "system(", "|", "&&", "||", NULL
274+
"exec(", "system(", "|", "&&", "||", "bash", "sh", "zsh", "fish", "scp ",
275+
"ssh ", "ftp ", "tftp ", "nc ", "netcat ", "nmap ", "chmod ", "chown ",
276+
"sudo ", "kill ", "pkill ", "ps ", "ls ", "cat ", "dd ", "mkfs ", "mount ",
277+
"umount ", "service ", "systemctl ", "init ", "reboot ", "shutdown ",
278+
"start ", "stop ", "restart ", NULL
280279
};
281280
const char *bot_patterns[] = {
282-
"bot", "crawler", "spider", "curl/", "python-requests", "scrapy", NULL
281+
"bot", "crawler", "spider", "curl/", "python-requests", "scrapy", "httpclient",
282+
"libwww", "wget", "java", "go-http-client", "phantomjs", "selenium", "headless",
283+
"robot", "checker", "monitor", "scan", "probe", "harvest", "grabber", "fetcher",
284+
"indexer", "parser", "api-client", "node-fetch", "axios", NULL
283285
};
284286
const char *spam_patterns[] = {
285287
"viagra", "free money", "winner", "prize", "click here",
286-
"http://", "https://", "meta refresh", NULL
288+
"http://", "https://", "meta refresh", "casino", "loan", "credit", "bitcoin",
289+
"crypto", "forex", "investment", "guaranteed", "risk-free", "unsubscribe",
290+
"buy now", "limited offer", "act now", "earn cash", "work from home", "miracle",
291+
"weight loss", "no prescription", "cheap", "discount", "deal", "promo", "bonus",
292+
"gift", "exclusive", "urgent", "clearance", "bargain", "order now", "trial",
293+
"winner!", "congratulations", "selected", "luxury", "get rich", "easy money",
294+
NULL
287295
};
288296
const char *path_patterns[] = {
289-
"../", "..\\", "/etc/passwd", "C:\\", NULL
297+
"../", "..\\", "/etc/passwd", "C:\\", "/proc/self/environ", "/proc/version",
298+
"/proc/cpuinfo", "/proc/meminfo", "/boot.ini", "/windows/", "/winnt/", "/system32/",
299+
"/sys/", "/dev/", "/bin/", "/sbin/", "/usr/", "/var/", "/tmp/", "/root/", "/home/",
300+
"/Users/", "/Documents/", "/AppData/", "/Local/", "/Roaming/", "/Program Files/",
301+
"/ProgramData/", "/Desktop/", "/Downloads/", NULL
290302
};
291303

292304
/* Scan categories */
@@ -385,34 +397,73 @@ int fossil_io_validate_is_suspicious_user(const char *input) {
385397
// 1. Too long or too short
386398
if (len < 3 || len > 32) return 1;
387399

388-
// 2. Count digits, letters, and digit runs
400+
// 2. Count digits, letters, digit runs, and symbol runs
389401
int digit_run = 0, max_digit_run = 0, digit_count = 0, alpha_count = 0;
402+
int symbol_run = 0, max_symbol_run = 0, symbol_count = 0;
390403
for (size_t i = 0; i < len; i++) {
391404
if (isdigit((unsigned char)input[i])) {
392405
digit_run++;
393406
digit_count++;
394407
if (digit_run > max_digit_run) max_digit_run = digit_run;
408+
symbol_run = 0;
409+
} else if (isalpha((unsigned char)input[i])) {
410+
alpha_count++;
411+
digit_run = 0;
412+
symbol_run = 0;
395413
} else {
414+
symbol_run++;
415+
symbol_count++;
396416
digit_run = 0;
397-
if (isalpha((unsigned char)input[i])) alpha_count++;
417+
if (symbol_run > max_symbol_run) max_symbol_run = symbol_run;
398418
}
399419
}
400420

401-
// 3. Check for long digit runs or too few letters
421+
// 3. Check for long digit/symbol runs or too few letters
402422
if (max_digit_run >= 5) return 1; // suspicious long digit tail
403-
if ((float)digit_count / len > 0.5) return 1; // mostly digits
423+
if (max_symbol_run >= 4) return 1; // suspicious long symbol run
424+
if (digit_count >= 8) return 1; // many digits (new: covers user1234567890)
425+
if ((float)digit_count / len > 0.45) return 1; // high digit ratio (new: covers a1b2c3d4e5f6g7h8i9j0)
404426
if ((float)alpha_count / len < 0.3) return 1; // too few letters
405-
406-
// 4. Suspicious keywords
407-
const char *bad_keywords[] = {"bot", "test", "fake", "spam", "zzz", "null", "admin"};
427+
if ((float)symbol_count / len > 0.3) return 1; // too many symbols
428+
429+
// 4. Suspicious keywords and patterns
430+
const char *bad_keywords[] = {
431+
"bot", "test", "fake", "spam", "zzz", "null", "admin",
432+
"user", "guest", "demo", "temp", "unknown", "default", "root",
433+
"system", "anonymous", "trial", "sample", "password", "qwerty",
434+
"abc123", "123456", "login", "register", "support", "contact",
435+
"info", "webmaster", "help", "service", "account", "manager",
436+
"api", "sys", "operator", "mod", "moderator", "superuser",
437+
"owner", "master", "testuser", "tester", "dev", "developer",
438+
"backup", "restore", "error", "fail", "invalid", "void"
439+
};
408440
size_t nkeys = sizeof(bad_keywords) / sizeof(bad_keywords[0]);
409441
for (size_t i = 0; i < nkeys; i++) {
410442
if (fossil_io_cstring_case_search(input, bad_keywords[i]) != NULL) {
411443
return 1;
412444
}
413445
}
414446

415-
// 5. Very high entropy (simple Shannon estimate)
447+
// 5. Common suspicious patterns (repetitive, alternating, keyboard walks)
448+
int repetitive = 1, alternating = 1;
449+
for (size_t i = 1; i < len; i++) {
450+
if (input[i] != input[i - 1]) repetitive = 0;
451+
if (i > 1 && input[i] != input[i - 2]) alternating = 0;
452+
}
453+
if (repetitive || alternating) return 1;
454+
455+
// 6. Keyboard walk detection (e.g., "qwerty", "asdf", "zxcv")
456+
const char *keyboard_walks[] = {
457+
"qwerty", "asdf", "zxcv", "12345", "67890", "poiuy", "lkjhg", "mnbvc"
458+
};
459+
size_t nwalks = sizeof(keyboard_walks) / sizeof(keyboard_walks[0]);
460+
for (size_t i = 0; i < nwalks; i++) {
461+
if (fossil_io_cstring_case_search(input, keyboard_walks[i]) != NULL) {
462+
return 1;
463+
}
464+
}
465+
466+
// 7. Very high entropy (simple Shannon estimate)
416467
int freq[256] = {0};
417468
for (size_t i = 0; i < len; i++) freq[(unsigned char)input[i]]++;
418469
double entropy = 0.0;
@@ -422,7 +473,17 @@ int fossil_io_validate_is_suspicious_user(const char *input) {
422473
entropy -= p * log2(p);
423474
}
424475
}
425-
if (entropy > 4.5) return 1; // suspiciously random-like
476+
if (entropy > 4.2) return 1; // slightly lower threshold for suspicious randomness
477+
478+
// 8. Looks like an email or URL
479+
if (strchr(input, '@') || fossil_io_cstring_case_search(input, "http") != NULL) return 1;
480+
481+
// 9. Looks like a UUID or hex string
482+
size_t hex_count = 0;
483+
for (size_t i = 0; i < len; i++) {
484+
if (isxdigit((unsigned char)input[i])) hex_count++;
485+
}
486+
if (hex_count == len && len >= 16) return 1;
426487

427488
return 0; // not flagged
428489
}

code/tests/cases/test_input.c

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,112 @@ FOSSIL_TEST(c_test_io_clear_keybindings_removes_all) {
266266
ASSUME_ITS_EQUAL_I32(0, (int)count);
267267
}
268268

269+
FOSSIL_TEST(c_test_io_validate_is_weak_password_bad) {
270+
const char *password = "password123";
271+
const char *username = "user";
272+
const char *email = "[email protected]";
273+
int result = fossil_io_validate_is_weak_password(password, username, email);
274+
ASSUME_ITS_TRUE(result);
275+
}
276+
277+
FOSSIL_TEST(c_test_io_validate_is_weak_password_good) {
278+
const char *password = "S3cure!Passw0rd";
279+
const char *username = "user";
280+
const char *email = "[email protected]";
281+
int result = fossil_io_validate_is_weak_password(password, username, email);
282+
ASSUME_ITS_FALSE(result);
283+
}
284+
285+
FOSSIL_TEST(c_test_io_validate_is_suspicious_bot_true) {
286+
const char *ua = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
287+
int result = fossil_io_validate_is_suspicious_bot(ua);
288+
ASSUME_ITS_TRUE(result);
289+
}
290+
291+
FOSSIL_TEST(c_test_io_validate_is_suspicious_bot_false) {
292+
const char *ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64)";
293+
int result = fossil_io_validate_is_suspicious_bot(ua);
294+
ASSUME_ITS_FALSE(result);
295+
}
296+
297+
FOSSIL_TEST(c_test_io_validate_is_disposable_email_true) {
298+
const char *input = "[email protected]";
299+
int result = fossil_io_validate_is_disposable_email(input);
300+
ASSUME_ITS_TRUE(result);
301+
}
302+
303+
FOSSIL_TEST(c_test_io_validate_is_disposable_email_false) {
304+
const char *input = "[email protected]";
305+
int result = fossil_io_validate_is_disposable_email(input);
306+
ASSUME_ITS_FALSE(result);
307+
}
308+
309+
FOSSIL_TEST(c_test_io_validate_is_suspicious_user_true) {
310+
const char *input = "bot123456";
311+
int result = fossil_io_validate_is_suspicious_user(input);
312+
ASSUME_ITS_TRUE(result);
313+
}
314+
315+
FOSSIL_TEST(c_test_io_validate_is_suspicious_user_false) {
316+
const char *input = "john_doe";
317+
int result = fossil_io_validate_is_suspicious_user(input);
318+
ASSUME_ITS_FALSE(result);
319+
}
320+
321+
FOSSIL_TEST(c_test_io_validate_sanitize_string_script) {
322+
const char *input = "<script>alert('xss')</script>";
323+
char output[64];
324+
int flags = fossil_io_validate_sanitize_string(input, output, sizeof(output), FOSSIL_CTX_HTML);
325+
ASSUME_ITS_TRUE(flags & FOSSIL_SAN_SCRIPT);
326+
ASSUME_ITS_TRUE(flags & FOSSIL_SAN_MODIFIED);
327+
}
328+
329+
FOSSIL_TEST(c_test_io_validate_sanitize_string_sql) {
330+
const char *input = "SELECT * FROM users WHERE name='admin' --";
331+
char output[64];
332+
int flags = fossil_io_validate_sanitize_string(input, output, sizeof(output), FOSSIL_CTX_SQL);
333+
ASSUME_ITS_TRUE(flags & FOSSIL_SAN_SQL);
334+
ASSUME_ITS_TRUE(flags & FOSSIL_SAN_MODIFIED);
335+
}
336+
337+
FOSSIL_TEST(c_test_io_validate_sanitize_string_clean) {
338+
const char *input = "SafeString123";
339+
char output[64];
340+
int flags = fossil_io_validate_sanitize_string(input, output, sizeof(output), FOSSIL_CTX_GENERIC);
341+
ASSUME_ITS_EQUAL_I32(FOSSIL_SAN_OK, flags);
342+
ASSUME_ITS_EQUAL_CSTR(input, output);
343+
}
344+
345+
FOSSIL_TEST(c_test_io_validate_is_suspicious_user_many_digits) {
346+
const char *input = "user1234567890";
347+
int result = fossil_io_validate_is_suspicious_user(input);
348+
ASSUME_ITS_TRUE(result);
349+
}
350+
351+
FOSSIL_TEST(c_test_io_validate_is_suspicious_user_high_digit_ratio) {
352+
const char *input = "a1b2c3d4e5f6g7h8i9j0";
353+
int result = fossil_io_validate_is_suspicious_user(input);
354+
ASSUME_ITS_TRUE(result);
355+
}
356+
357+
FOSSIL_TEST(c_test_io_validate_is_suspicious_user_contains_test) {
358+
const char *input = "testuser";
359+
int result = fossil_io_validate_is_suspicious_user(input);
360+
ASSUME_ITS_TRUE(result);
361+
}
362+
363+
FOSSIL_TEST(c_test_io_validate_is_suspicious_user_contains_fake) {
364+
const char *input = "fakeaccount";
365+
int result = fossil_io_validate_is_suspicious_user(input);
366+
ASSUME_ITS_TRUE(result);
367+
}
368+
369+
FOSSIL_TEST(c_test_io_validate_is_suspicious_user_entropy) {
370+
const char *input = "x7q9z2w8v5r1t3y6u0";
371+
int result = fossil_io_validate_is_suspicious_user(input);
372+
ASSUME_ITS_TRUE(result);
373+
}
374+
269375
// * * * * * * * * * * * * * * * * * * * * * * * *
270376
// * Fossil Logic Test Pool
271377
// * * * * * * * * * * * * * * * * * * * * * * * *
@@ -290,6 +396,23 @@ FOSSIL_TEST_GROUP(c_input_tests) {
290396
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_length_invalid);
291397
FOSSIL_TEST_ADD(c_input_suite, c_test_io_getc);
292398

399+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_weak_password_bad);
400+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_weak_password_good);
401+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_suspicious_bot_true);
402+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_suspicious_bot_false);
403+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_disposable_email_true);
404+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_disposable_email_false);
405+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_suspicious_user_true);
406+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_suspicious_user_false);
407+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_sanitize_string_script);
408+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_sanitize_string_sql);
409+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_sanitize_string_clean);
410+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_suspicious_user_many_digits);
411+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_suspicious_user_high_digit_ratio);
412+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_suspicious_user_contains_test);
413+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_suspicious_user_contains_fake);
414+
FOSSIL_TEST_ADD(c_input_suite, c_test_io_validate_is_suspicious_user_entropy);
415+
293416
FOSSIL_TEST_ADD(c_input_suite, c_test_io_register_keybinding_success);
294417
FOSSIL_TEST_ADD(c_input_suite, c_test_io_register_keybinding_duplicate);
295418
FOSSIL_TEST_ADD(c_input_suite, c_test_io_process_keybinding_no_binding);

0 commit comments

Comments
 (0)