|
| 1 | +"""Fetch Chinese Buddhist temples from Amap POI API — v2: search by city for full coverage. |
| 2 | +
|
| 3 | +V1 searched by province and hit 500-result caps in populous provinces. |
| 4 | +V2 searches by prefecture-level city (~340 cities) to avoid truncation. |
| 5 | +
|
| 6 | +Output: data/amap_temples_v2.json |
| 7 | +""" |
| 8 | +import json |
| 9 | +import math |
| 10 | +import time |
| 11 | +import urllib.parse |
| 12 | +import urllib.request |
| 13 | + |
| 14 | +AMAP_KEY = "7971e9b134c4684c3b43b6e442475d0e" |
| 15 | +AMAP_URL = "https://restapi.amap.com/v3/place/text" |
| 16 | +AMAP_DISTRICT_URL = "https://restapi.amap.com/v3/config/district" |
| 17 | +USER_AGENT = "FoJinBot/1.0" |
| 18 | +OUTPUT = "data/amap_temples_v2.json" |
| 19 | + |
| 20 | +KEYWORDS = ["寺", "庵", "禅寺", "佛寺", "佛教", "精舍", "佛堂"] |
| 21 | + |
| 22 | +SKIP_WORDS = ["清真", "教堂", "基督", "天主", "道观", "道教", "伊斯兰", |
| 23 | + "关帝", "妈祖", "城隍", "土地庙", "孔庙", "文庙", |
| 24 | + "殡仪", "墓", "陵园", "酒店", "宾馆", "饭店", "餐厅", |
| 25 | + "停车", "公厕", "超市", "药店", "医院", "学校", |
| 26 | + "公园", "广场", "商场", "写字楼", "小区", "花园"] |
| 27 | + |
| 28 | + |
| 29 | +def _transform_lat(x, y): |
| 30 | + ret = -100.0 + 2.0 * x + 3.0 * y + 0.2 * y * y + 0.1 * x * y + 0.2 * math.sqrt(abs(x)) |
| 31 | + ret += (20.0 * math.sin(6.0 * x * math.pi) + 20.0 * math.sin(2.0 * x * math.pi)) * 2.0 / 3.0 |
| 32 | + ret += (20.0 * math.sin(y * math.pi) + 40.0 * math.sin(y / 3.0 * math.pi)) * 2.0 / 3.0 |
| 33 | + ret += (160.0 * math.sin(y / 12.0 * math.pi) + 320.0 * math.sin(y * math.pi / 30.0)) * 2.0 / 3.0 |
| 34 | + return ret |
| 35 | + |
| 36 | + |
| 37 | +def _transform_lon(x, y): |
| 38 | + ret = 300.0 + x + 2.0 * y + 0.1 * x * x + 0.1 * x * y + 0.1 * math.sqrt(abs(x)) |
| 39 | + ret += (20.0 * math.sin(6.0 * x * math.pi) + 20.0 * math.sin(2.0 * x * math.pi)) * 2.0 / 3.0 |
| 40 | + ret += (20.0 * math.sin(x * math.pi) + 40.0 * math.sin(x / 3.0 * math.pi)) * 2.0 / 3.0 |
| 41 | + ret += (150.0 * math.sin(x / 12.0 * math.pi) + 300.0 * math.sin(x / 30.0 * math.pi)) * 2.0 / 3.0 |
| 42 | + return ret |
| 43 | + |
| 44 | + |
| 45 | +def gcj02_to_wgs84(lng, lat): |
| 46 | + a = 6378245.0 |
| 47 | + ee = 0.00669342162296594323 |
| 48 | + dlat = _transform_lat(lng - 105.0, lat - 35.0) |
| 49 | + dlng = _transform_lon(lng - 105.0, lat - 35.0) |
| 50 | + radlat = lat / 180.0 * math.pi |
| 51 | + magic = math.sin(radlat) |
| 52 | + magic = 1 - ee * magic * magic |
| 53 | + sqrtmagic = math.sqrt(magic) |
| 54 | + dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * math.pi) |
| 55 | + dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * math.pi) |
| 56 | + return lng - dlng, lat - dlat |
| 57 | + |
| 58 | + |
| 59 | +def get_cities() -> list[dict]: |
| 60 | + """Fetch all prefecture-level cities from Amap district API.""" |
| 61 | + params = urllib.parse.urlencode({ |
| 62 | + "key": AMAP_KEY, |
| 63 | + "keywords": "中国", |
| 64 | + "subdistrict": 2, |
| 65 | + "extensions": "base", |
| 66 | + }) |
| 67 | + url = f"{AMAP_DISTRICT_URL}?{params}" |
| 68 | + req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT}) |
| 69 | + with urllib.request.urlopen(req, timeout=30) as resp: |
| 70 | + data = json.loads(resp.read()) |
| 71 | + |
| 72 | + cities = [] |
| 73 | + for province in data.get("districts", [{}])[0].get("districts", []): |
| 74 | + pname = province.get("name", "") |
| 75 | + for city in province.get("districts", []): |
| 76 | + cities.append({ |
| 77 | + "adcode": city.get("adcode", ""), |
| 78 | + "name": city.get("name", ""), |
| 79 | + "province": pname, |
| 80 | + }) |
| 81 | + # Direct-administered municipalities: province itself is the city |
| 82 | + if not province.get("districts"): |
| 83 | + cities.append({ |
| 84 | + "adcode": province.get("adcode", ""), |
| 85 | + "name": pname, |
| 86 | + "province": pname, |
| 87 | + }) |
| 88 | + return cities |
| 89 | + |
| 90 | + |
| 91 | +def amap_search(keyword: str, city_code: str, page: int = 1) -> dict: |
| 92 | + params = urllib.parse.urlencode({ |
| 93 | + "key": AMAP_KEY, |
| 94 | + "keywords": keyword, |
| 95 | + "city": city_code, |
| 96 | + "citylimit": "true", |
| 97 | + "offset": 25, |
| 98 | + "page": page, |
| 99 | + "output": "json", |
| 100 | + "extensions": "base", |
| 101 | + }) |
| 102 | + url = f"{AMAP_URL}?{params}" |
| 103 | + req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT}) |
| 104 | + with urllib.request.urlopen(req, timeout=15) as resp: |
| 105 | + return json.loads(resp.read()) |
| 106 | + |
| 107 | + |
| 108 | +def main(): |
| 109 | + print("Fetching city list...") |
| 110 | + cities = get_cities() |
| 111 | + print(f"Got {len(cities)} cities") |
| 112 | + |
| 113 | + all_pois: dict[str, dict] = {} |
| 114 | + total_requests = 0 |
| 115 | + |
| 116 | + for ci, city in enumerate(cities): |
| 117 | + city_before = len(all_pois) |
| 118 | + for keyword in KEYWORDS: |
| 119 | + page = 1 |
| 120 | + while page <= 20: |
| 121 | + try: |
| 122 | + data = amap_search(keyword, city["adcode"], page) |
| 123 | + total_requests += 1 |
| 124 | + time.sleep(0.25) |
| 125 | + except Exception as e: |
| 126 | + print(f" ERR {city['name']}/{keyword}: {e}") |
| 127 | + time.sleep(2) |
| 128 | + break |
| 129 | + |
| 130 | + if data.get("status") != "1": |
| 131 | + break |
| 132 | + |
| 133 | + pois = data.get("pois", []) |
| 134 | + if not pois: |
| 135 | + break |
| 136 | + |
| 137 | + for poi in pois: |
| 138 | + pid = poi.get("id", "") |
| 139 | + name = poi.get("name", "") |
| 140 | + location = poi.get("location", "") |
| 141 | + if not pid or not name or not location: |
| 142 | + continue |
| 143 | + if any(w in name for w in SKIP_WORDS): |
| 144 | + continue |
| 145 | + if pid in all_pois: |
| 146 | + continue |
| 147 | + try: |
| 148 | + lng_gcj, lat_gcj = [float(x) for x in location.split(",")] |
| 149 | + lng_wgs, lat_wgs = gcj02_to_wgs84(lng_gcj, lat_gcj) |
| 150 | + except (ValueError, IndexError): |
| 151 | + continue |
| 152 | + |
| 153 | + all_pois[pid] = { |
| 154 | + "amap_id": pid, |
| 155 | + "name": name, |
| 156 | + "latitude": round(lat_wgs, 7), |
| 157 | + "longitude": round(lng_wgs, 7), |
| 158 | + "address": poi.get("address", ""), |
| 159 | + "province": poi.get("pname", city["province"]), |
| 160 | + "city": poi.get("cityname", city["name"]), |
| 161 | + "district": poi.get("adname", ""), |
| 162 | + "type": poi.get("type", ""), |
| 163 | + "typecode": poi.get("typecode", ""), |
| 164 | + } |
| 165 | + |
| 166 | + count = int(data.get("count", 0)) |
| 167 | + if page * 25 >= count or page * 25 >= 500: |
| 168 | + break |
| 169 | + page += 1 |
| 170 | + |
| 171 | + city_new = len(all_pois) - city_before |
| 172 | + if (ci + 1) % 20 == 0 or ci == len(cities) - 1: |
| 173 | + print(f"[{ci+1}/{len(cities)}] {city['province']}/{city['name']}: +{city_new} (total: {len(all_pois)}, reqs: {total_requests})") |
| 174 | + |
| 175 | + # Save |
| 176 | + result = list(all_pois.values()) |
| 177 | + with open(OUTPUT, "w", encoding="utf-8") as f: |
| 178 | + json.dump(result, f, ensure_ascii=False, indent=2) |
| 179 | + print(f"\n{'='*60}") |
| 180 | + print(f"Total: {len(result)} POIs, {total_requests} API requests") |
| 181 | + print(f"Saved to {OUTPUT}") |
| 182 | + print(f"{'='*60}") |
| 183 | + |
| 184 | + |
| 185 | +if __name__ == "__main__": |
| 186 | + main() |
0 commit comments