|
| 1 | +#!/usr/bin/python3 |
| 2 | + |
| 3 | +import os |
| 4 | +import base64 |
| 5 | +import json |
| 6 | +import urllib.parse |
| 7 | +import requests |
| 8 | + |
| 9 | + |
| 10 | +GFWLIST_FILE = "gfwlist.txt" |
| 11 | +GFWLIST_URL = 'https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt' |
| 12 | + |
| 13 | + |
| 14 | +def get_gfwlist(): |
| 15 | + if os.path.isfile(GFWLIST_FILE): |
| 16 | + with open(GFWLIST_FILE, "r") as f: |
| 17 | + text = f.read() |
| 18 | + else: |
| 19 | + r = requests.get(GFWLIST_URL) |
| 20 | + r.raise_for_status() |
| 21 | + text = r.text |
| 22 | + return base64.b64decode(text).decode("utf-8").rstrip("\n") |
| 23 | + |
| 24 | + |
| 25 | +def update_domains(domains, host, mode=0): |
| 26 | + segments = host.strip(".").split(".")[::-1] |
| 27 | + |
| 28 | + this = domains |
| 29 | + for segment in segments: |
| 30 | + if segment not in this: |
| 31 | + this[segment] = {} |
| 32 | + this = this[segment] |
| 33 | + this["@"] = mode |
| 34 | + |
| 35 | + |
| 36 | +def postproc_domains(domains): |
| 37 | + # Turn all {"@": 1} into 1 to save some text |
| 38 | + keys = list(domains.keys()) |
| 39 | + for key in keys: |
| 40 | + if key == "@": |
| 41 | + continue |
| 42 | + obj = domains[key] |
| 43 | + if len(obj) == 1 and "@" in obj: |
| 44 | + domains[key] = obj["@"] |
| 45 | + else: |
| 46 | + postproc_domains(obj) |
| 47 | + |
| 48 | + |
| 49 | +def parse_gfwlist(text): |
| 50 | + domains = {} |
| 51 | + blackpat = [] # blacklisted patterns |
| 52 | + whitepat = [] # whitelisted patterns |
| 53 | + |
| 54 | + for line in text.splitlines()[1:]: |
| 55 | + if not line.strip() or line.startswith("!"): |
| 56 | + continue # ignore comments and empty lines |
| 57 | + |
| 58 | + mode = 0 # default to blacklist |
| 59 | + if line.startswith("@@"): |
| 60 | + mode = 1 # now it's whitelist |
| 61 | + line = line[2:] |
| 62 | + |
| 63 | + if line.startswith("||"): |
| 64 | + # domain prefix |
| 65 | + update_domains(domains, line[2:], mode) |
| 66 | + elif line.startswith("/"): |
| 67 | + # regex, can't handle yet |
| 68 | + pass |
| 69 | + else: |
| 70 | + # Keyword pattern |
| 71 | + # Single vertical line at either side means string boundary |
| 72 | + if line.startswith("|"): |
| 73 | + line = line[1:] |
| 74 | + else: |
| 75 | + line = "*" + line |
| 76 | + if line.endswith("|"): |
| 77 | + line = line[:-1] |
| 78 | + else: |
| 79 | + line = line + "*" |
| 80 | + if mode == 0: |
| 81 | + blackpat.append(line) |
| 82 | + else: |
| 83 | + whitepat.append(line) |
| 84 | + postproc_domains(domains) |
| 85 | + return domains, blackpat, whitepat |
| 86 | + |
| 87 | + |
| 88 | +def generate_pac_partial(): |
| 89 | + gfwlist = get_gfwlist() |
| 90 | + domains, blackpat, whitepat = parse_gfwlist(gfwlist) |
| 91 | + return "var DOMAINS = {};\n\nvar BLACKPAT = {};\n\nvar WHITEPAT = {};\n".format( |
| 92 | + json.dumps(domains, indent=2), |
| 93 | + json.dumps(blackpat, indent=2), |
| 94 | + json.dumps(whitepat, indent=2), |
| 95 | + ) |
| 96 | + |
| 97 | + |
| 98 | +if __name__ == '__main__': |
| 99 | + print(generate_pac_partial()) |
0 commit comments