11#define _GNU_SOURCE
2- #include <dlfcn.h>
3- #include <netdb.h>
4- #include <arpa/inet.h>
5- #include <string.h>
62#include <stdio.h>
73#include <stdlib.h>
4+ #include <string.h>
85#include <regex.h>
9- #include <unistd.h>
10-
11- static const char * ENV_NAME = "MAXKB_SANDBOX_PYTHON_BANNED_HOSTS" ;
12-
13- static int match_env_patterns (const char * target , const char * env_val ) {
14- if (!target || !env_val || !* env_val ) return 0 ;
6+ #include <dlfcn.h>
7+ #include <netdb.h>
8+ #include <arpa/inet.h>
9+ #include <sys/socket.h>
10+
11+ static int (* real_connect )(int , const struct sockaddr * , socklen_t ) = NULL ;
12+ static int (* real_getaddrinfo )(const char * , const char * , const struct addrinfo * , struct addrinfo * * ) = NULL ;
13+ static __thread char last_resolved_host [256 ] = {0 };
14+ static __thread int last_host_checked = 0 ; // 标记是否已检查过域名(1=已检查且允许)
15+
16+ /** 检查是否符合允许规则 */
17+ static int is_allowed_by_env (const char * target , const char * env_val ) {
18+ if (!target ) return 0 ;
19+ if (!env_val || !* env_val ) {
20+ fprintf (stderr , "[sandbox] ❌ No allow rules set — deny all by default\n" );
21+ return 0 ;
22+ }
1523
1624 char * patterns = strdup (env_val );
1725 char * token = strtok (patterns , "," );
18- int matched = 0 ;
26+ int allowed = 0 ;
1927
2028 while (token ) {
21- // 去掉前后空格
2229 while (* token == ' ' || * token == '\t' ) token ++ ;
2330 char * end = token + strlen (token ) - 1 ;
2431 while (end > token && (* end == ' ' || * end == '\t' )) * end -- = '\0' ;
2532
2633 if (* token ) {
27- regex_t regex ;
28- if (regcomp (& regex , token , REG_EXTENDED | REG_NOSUB ) == 0 ) {
29- if (regexec (& regex , target , 0 , NULL , 0 ) == 0 ) {
30- matched = 1 ;
34+ if (strncmp (token , "!=" , 2 ) == 0 ) {
35+ const char * pattern = token + 2 ;
36+ regex_t regex ;
37+ if (regcomp (& regex , pattern , REG_EXTENDED | REG_NOSUB | REG_ICASE ) != 0 ) {
38+ fprintf (stderr , "[sandbox] ⚠️ Invalid regex ignored: %s\n" , pattern );
39+ } else {
40+ if (regexec (& regex , target , 0 , NULL , 0 ) == 0 ) {
41+ fprintf (stderr , "[sandbox] ❌ Deny %s (matched deny /%s/)\n" , target , pattern );
42+ regfree (& regex );
43+ free (patterns );
44+ return 0 ;
45+ }
46+ regfree (& regex );
47+ }
48+ } else {
49+ regex_t regex ;
50+ if (regcomp (& regex , token , REG_EXTENDED | REG_NOSUB | REG_ICASE ) != 0 ) {
51+ fprintf (stderr , "[sandbox] ⚠️ Invalid regex ignored: %s\n" , token );
52+ } else {
53+ if (regexec (& regex , target , 0 , NULL , 0 ) == 0 )
54+ allowed = 1 ;
3155 regfree (& regex );
32- break ;
3356 }
34- regfree (& regex );
3557 }
3658 }
37-
3859 token = strtok (NULL , "," );
3960 }
4061
4162 free (patterns );
42- return matched ;
63+ return allowed ;
4364}
4465
45- /**
46- * 拦截 connect() —— 屏蔽直接 IP 访问
47- */
48- int connect (int sockfd , const struct sockaddr * addr , socklen_t addrlen ) {
49- static int (* real_connect )(int , const struct sockaddr * , socklen_t ) = NULL ;
50- static char * banned_env = NULL ;
51- static int initialized = 0 ;
66+ /** 检查逻辑封装 */
67+ static int check_host (const char * host ) {
68+ const char * env = getenv ("SANDBOX_ALLOW_HOSTS_REGEXES" );
69+ return is_allowed_by_env (host , env );
70+ }
5271
53- if (!real_connect )
54- real_connect = dlsym (RTLD_NEXT , "connect" );
72+ /** 拦截 getaddrinfo() — 检查域名 */
73+ int getaddrinfo (const char * node , const char * service ,
74+ const struct addrinfo * hints , struct addrinfo * * res ) {
75+ if (!real_getaddrinfo )
76+ real_getaddrinfo = dlsym (RTLD_NEXT , "getaddrinfo" );
5577
56- if (!initialized ) {
57- banned_env = getenv (ENV_NAME );
58- initialized = 1 ;
59- if (banned_env )
60- fprintf (stderr , "[ban] Loaded banned hosts: %s\n" , banned_env );
78+ if (node ) {
79+ strncpy (last_resolved_host , node , sizeof (last_resolved_host ) - 1 );
80+ last_resolved_host [sizeof (last_resolved_host ) - 1 ] = '\0' ;
81+ last_host_checked = 0 ;
82+
83+ // 判断是否为纯 IP(跳过 IPv4/IPv6)
84+ struct in_addr ipv4 ;
85+ struct in6_addr ipv6 ;
86+ int is_ip = (inet_pton (AF_INET , node , & ipv4 ) == 1 ) ||
87+ (inet_pton (AF_INET6 , node , & ipv6 ) == 1 );
88+
89+ if (!is_ip ) {
90+ if (!check_host (node )) {
91+ fprintf (stderr , "[sandbox] 🚫 Blocked DNS lookup for %s\n" , node );
92+ return EAI_FAIL ;
93+ }
94+ last_host_checked = 1 ; // 已检查并通过
95+ }
6196 }
6297
63- if (!banned_env || !* banned_env )
64- return real_connect (sockfd , addr , addrlen );
98+ return real_getaddrinfo (node , service , hints , res );
99+ }
100+
101+ /** 拦截 connect() — 检查 IP(仅当没检查过域名) */
102+ int connect (int sockfd , const struct sockaddr * addr , socklen_t addrlen ) {
103+ if (!real_connect )
104+ real_connect = dlsym (RTLD_NEXT , "connect" );
65105
66106 char ip [INET6_ADDRSTRLEN ] = {0 };
67- if (addr -> sa_family == AF_INET ) {
107+
108+ if (addr -> sa_family == AF_INET )
68109 inet_ntop (AF_INET , & ((struct sockaddr_in * )addr )-> sin_addr , ip , sizeof (ip ));
69- } else if (addr -> sa_family == AF_INET6 ) {
110+ else if (addr -> sa_family == AF_INET6 )
70111 inet_ntop (AF_INET6 , & ((struct sockaddr_in6 * )addr )-> sin6_addr , ip , sizeof (ip ));
112+
113+ // 如果域名已经检查通过,则跳过 IP 检查
114+ if (last_host_checked ) {
115+ return real_connect (sockfd , addr , addrlen );
71116 }
72117
73- if (match_env_patterns (ip , banned_env )) {
74- fprintf (stderr , "Access to host %s is banned for sandbox\n" , ip );
118+ // 没有检查过域名(可能是 IP 直连,如 curl)
119+ if (!check_host (ip )) {
120+ fprintf (stderr , "[sandbox] 🚫 Blocked connect to %s (no domain check)\n" , ip );
75121 return -1 ;
76122 }
77123
78124 return real_connect (sockfd , addr , addrlen );
79125}
80-
81- /**
82- * 拦截 getaddrinfo() —— 屏蔽域名解析
83- */
84- int getaddrinfo (const char * node , const char * service ,
85- const struct addrinfo * hints , struct addrinfo * * res ) {
86- static int (* real_getaddrinfo )(const char * , const char * ,
87- const struct addrinfo * , struct addrinfo * * ) = NULL ;
88- if (!real_getaddrinfo )
89- real_getaddrinfo = dlsym (RTLD_NEXT , "getaddrinfo" );
90-
91- const char * banned_env = getenv (ENV_NAME );
92- if (banned_env && node && match_env_patterns (node , banned_env )) {
93- fprintf (stderr , "Access to host %s is banned for sandbox\n" , node );
94- return EAI_FAIL ; // 模拟 DNS 失败
95- }
96-
97- return real_getaddrinfo (node , service , hints , res );
98- }
0 commit comments