Skip to content

Commit 3707d2f

Browse files
committed
Merge branch 'gfwlist'
2 parents 65635cc + 7cbc3f6 commit 3707d2f

File tree

5 files changed

+179
-5
lines changed

5 files changed

+179
-5
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Output
2+
gfwlist.txt
23
release-info.md
34
*.pac
45
dist/

README.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,22 @@
22

33
PAC scripts for proxies
44

5+
## 特点
6+
7+
基于 IP 地址白名单设计,位于白名单中的 IP 地址走直连,白名单以外的 IP 地址走代理(暂不支持 IPv6)。
8+
9+
另有 GFWList 版本从 [gfwlist/gfwlist](https://github.com/gfwlist/gfwlist) 获取域名及 URL 列表,优先匹配列表中的黑白名单,有效防止 DNS 污染。
10+
11+
每周六 12:00 (UTC) 会自动使用 GitHub Actions 运行[生成脚本](build.py)从数据源获取 IP 地址列表并生成 PAC 文件。
12+
513
## 使用
614

715
获取方式:[本仓库的 Releases](https://github.com/iBug/pac/releases/latest)
816

9-
每周六 12:00 (UTC) 会自动使用 GitHub Actions 运行[生成脚本](build.py)从数据源获取 IP 地址列表并生成 PAC 文件。
17+
- `pac-<name>.txt` 包含从数据源 `<name>` 获取的 IP 地址列表(白名单)
18+
- `pad-gfwlist-<name>.txt` 在 IP 白名单的基础上添加了 GFWList 的匹配
1019

11-
本代码是为 Shadowsocks Windows 4.1.9 及以上版本设计的,若要在旧版本中使用请手动修改一处字符串,详情见 [shadowsocks-windows#2761](https://github.com/shadowsocks/shadowsocks-windows/issues/2761)
20+
本代码是为 Shadowsocks Windows 4.1.9 及以上版本设计的,若要在旧版本中使用(或使用其他代理软件)请手动修改代码第 4 行 `__PROXY__` 为你的代理地址,详情见 [shadowsocks-windows#2761](https://github.com/shadowsocks/shadowsocks-windows/issues/2761)
1221

1322
## 贡献
1423

build.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,18 @@
66
import requests
77
from requests.exceptions import RequestException, HTTPError
88

9+
import gfwlist
10+
911

1012
SOURCES = {
1113
'ipdeny.com': 'http://www.ipdeny.com/ipblocks/data/aggregated/cn-aggregated.zone',
1214
'17mon': 'https://raw.githubusercontent.com/17mon/china_ip_list/master/china_ip_list.txt',
1315
}
1416
OUT_DIR = "dist"
1517

18+
# Stub content to disable GFWList check
19+
GFWLIST_STUB = "var DOMAINS = {};\nvar BLACKPAT = [];\nvar WHITEPAT = [];\n"
20+
1621

1722
def fetch_and_convert(src):
1823
response = requests.get(src)
@@ -36,6 +41,9 @@ def main():
3641
code = f.read()
3742
code = code.replace("@@TIME@@", now.isoformat()[:-7])
3843

44+
gfwlist_part = gfwlist.generate_pac_partial()
45+
gfwlist_stub = GFWLIST_STUB
46+
3947
os.makedirs(OUT_DIR, mode=0o755, exist_ok=True)
4048
for key in SOURCES:
4149
print(f"Generating PAC script from source {key}")
@@ -45,10 +53,19 @@ def main():
4553
continue
4654
except HTTPError:
4755
continue
56+
4857
filename = f"pac-{key}.txt"
58+
filename_gfwlist = f"pac-gfwlist-{key}.txt"
4959
with open(os.path.join(OUT_DIR, filename), "w") as f:
5060
f.write(code)
5161
f.write(data)
62+
f.write("\n")
63+
f.write(gfwlist_stub)
64+
with open(os.path.join(OUT_DIR, filename_gfwlist), "w") as f:
65+
f.write(code)
66+
f.write(data)
67+
f.write("\n")
68+
f.write(gfwlist_part)
5269

5370

5471
if __name__ == '__main__':

code.js

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
// Author: iBug <ibugone.com>
22
// Time: @@TIME@@
33

4+
var proxy = __PROXY__;
5+
var direct = "DIRECT";
6+
47
function belongsToSubnet(host, list) {
58
var ip = host.split(".").map(Number);
69
ip = 0x1000000 * ip[0] + 0x10000 * ip[1] + 0x100 * ip[2] + ip[3];
@@ -23,6 +26,40 @@ function belongsToSubnet(host, list) {
2326
return (masked ^ list[x][0]) == 0;
2427
}
2528

29+
function hasMatchedPattern(text, patterns) {
30+
for (var i = 0; i < patterns.length; i++) {
31+
if (shExpMatch(text, patterns[i]))
32+
return true;
33+
}
34+
return false;
35+
}
36+
37+
function checkDomainType(host) {
38+
// Check if a domain is blacklisted or whitelisted
39+
var segments = host.split(".").reverse();
40+
var ptr = DOMAINS;
41+
var type = DOMAINS["@"];
42+
for (var i = 0; i < segments.length; i++) {
43+
var segment = segments[i];
44+
ptr = ptr[segment];
45+
if (ptr === undefined)
46+
break;
47+
if (typeof ptr === "number")
48+
return ptr;
49+
if (ptr["@"] !== undefined)
50+
type = ptr["@"];
51+
}
52+
return type;
53+
}
54+
55+
function hasWhitelistedPattern(url) {
56+
return hasMatchedPattern(url, WHITEPAT);
57+
}
58+
59+
function hasBlacklistedPattern(url) {
60+
return hasMatchedPattern(url, BLACKPAT);
61+
}
62+
2663
function isChina(host) {
2764
return belongsToSubnet(host, CHINA);
2865
}
@@ -31,10 +68,21 @@ function isLan(host) {
3168
return belongsToSubnet(host, LAN);
3269
}
3370

34-
var proxy = __PROXY__;
35-
var direct = "DIRECT";
36-
3771
function FindProxyForURL(url, host) {
72+
if (hasWhitelistedPattern(url)) {
73+
return direct;
74+
}
75+
if (hasBlacklistedPattern(url)) {
76+
return proxy;
77+
}
78+
var domainType = checkDomainType(host);
79+
if (domainType === 0) {
80+
return proxy;
81+
} else if (domainType === 1) {
82+
return direct;
83+
}
84+
85+
// Fallback to IP whitelist
3886
var remote = dnsResolve(host);
3987
if (!remote || remote.indexOf(":") !== -1) {
4088
// resolution failed or is IPv6 addr

gfwlist.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#!/usr/bin/python3
2+
3+
import os
4+
import base64
5+
import json
6+
import urllib.parse
7+
import requests
8+
9+
10+
GFWLIST_FILE = "gfwlist.txt"
11+
GFWLIST_URL = 'https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt'
12+
13+
14+
def get_gfwlist():
15+
if os.path.isfile(GFWLIST_FILE):
16+
with open(GFWLIST_FILE, "r") as f:
17+
text = f.read()
18+
else:
19+
r = requests.get(GFWLIST_URL)
20+
r.raise_for_status()
21+
text = r.text
22+
return base64.b64decode(text).decode("utf-8").rstrip("\n")
23+
24+
25+
def update_domains(domains, host, mode=0):
26+
segments = host.strip(".").split(".")[::-1]
27+
28+
this = domains
29+
for segment in segments:
30+
if segment not in this:
31+
this[segment] = {}
32+
this = this[segment]
33+
this["@"] = mode
34+
35+
36+
def postproc_domains(domains):
37+
# Turn all {"@": 1} into 1 to save some text
38+
keys = list(domains.keys())
39+
for key in keys:
40+
if key == "@":
41+
continue
42+
obj = domains[key]
43+
if len(obj) == 1 and "@" in obj:
44+
domains[key] = obj["@"]
45+
else:
46+
postproc_domains(obj)
47+
48+
49+
def parse_gfwlist(text):
50+
domains = {}
51+
blackpat = [] # blacklisted patterns
52+
whitepat = [] # whitelisted patterns
53+
54+
for line in text.splitlines()[1:]:
55+
if not line.strip() or line.startswith("!"):
56+
continue # ignore comments and empty lines
57+
58+
mode = 0 # default to blacklist
59+
if line.startswith("@@"):
60+
mode = 1 # now it's whitelist
61+
line = line[2:]
62+
63+
if line.startswith("||"):
64+
# domain prefix
65+
update_domains(domains, line[2:], mode)
66+
elif line.startswith("/"):
67+
# regex, can't handle yet
68+
pass
69+
else:
70+
# Keyword pattern
71+
# Single vertical line at either side means string boundary
72+
if line.startswith("|"):
73+
line = line[1:]
74+
else:
75+
line = "*" + line
76+
if line.endswith("|"):
77+
line = line[:-1]
78+
else:
79+
line = line + "*"
80+
if mode == 0:
81+
blackpat.append(line)
82+
else:
83+
whitepat.append(line)
84+
postproc_domains(domains)
85+
return domains, blackpat, whitepat
86+
87+
88+
def generate_pac_partial():
89+
gfwlist = get_gfwlist()
90+
domains, blackpat, whitepat = parse_gfwlist(gfwlist)
91+
return "var DOMAINS = {};\n\nvar BLACKPAT = {};\n\nvar WHITEPAT = {};\n".format(
92+
json.dumps(domains, indent=2),
93+
json.dumps(blackpat, indent=2),
94+
json.dumps(whitepat, indent=2),
95+
)
96+
97+
98+
if __name__ == '__main__':
99+
print(generate_pac_partial())

0 commit comments

Comments
 (0)