Skip to content

Commit 3bb28cd

Browse files
authored
Merge pull request #20 from Kanahiro/async
fix: migrate to httpx asyncio
2 parents 64275aa + 7bcb1f0 commit 3bb28cd

File tree

3 files changed

+212
-128
lines changed

3 files changed

+212
-128
lines changed

pyproject.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@ version = "0.4.3"
44
description = "Tile download utility - easily download xyz-tile data"
55
readme = "README.md"
66
requires-python = ">= 3.14"
7-
dependencies = ["shapely>=2.0.3", "tiletanic>=1.1.0", "pyproj>=3.6.1"]
7+
dependencies = [
8+
"shapely>=2.0.3",
9+
"tiletanic>=1.1.0",
10+
"pyproj>=3.6.1",
11+
"httpx>=0.28",
12+
]
813

914
[dependency-groups]
1015
dev = ["ruff>=0.3.7", "ty>=0.0.13"]

tileget/__main__.py

Lines changed: 146 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,110 @@
1+
import asyncio
12
import os
23
import sqlite3
3-
import time
4-
import urllib.request
54

5+
import httpx
66
import tiletanic
77

88
from tileget.arg import parse_arg
99

1010

11-
def fetch_data(url: str, timeout: int = 5000) -> bytes:
11+
async def fetch_data(
12+
client: httpx.AsyncClient, url: str, timeout: int = 5000
13+
) -> bytes | None:
1214
print("downloading: " + url)
13-
data = None
14-
while True:
15-
try:
16-
data = urllib.request.urlopen(url, timeout=timeout / 1000)
17-
break
18-
except urllib.error.HTTPError as e:
19-
raise Exception(str(e) + ":" + url)
20-
except Exception as e:
21-
if (
22-
str(e.args)
23-
== "(timeout('_ssl.c:1091: The handshake operation timed out'),)"
24-
):
25-
print("timeout, retrying... :" + url)
26-
else:
27-
raise Exception(str(e) + ":" + url)
15+
try:
16+
response = await client.get(url, timeout=timeout / 1000)
17+
response.raise_for_status()
18+
return response.content
19+
except httpx.HTTPStatusError as e:
20+
print(f"{e.response.status_code}: {url}")
21+
return None
22+
except httpx.TimeoutException:
23+
print(f"timeout: {url}")
24+
return None
25+
except Exception as e:
26+
print(f"{e}: {url}")
27+
return None
28+
29+
30+
async def download_dir(
31+
client: httpx.AsyncClient,
32+
semaphore: asyncio.Semaphore,
33+
tile: tiletanic.Tile,
34+
tileurl: str,
35+
output_path: str,
36+
timeout: int = 5000,
37+
overwrite: bool = False,
38+
):
39+
async with semaphore:
40+
ext = os.path.splitext(tileurl.split("?")[0])[-1]
41+
42+
write_dir = os.path.join(output_path, str(tile.z), str(tile.x))
43+
write_filepath = os.path.join(write_dir, str(tile.y) + ext)
44+
45+
if os.path.exists(write_filepath) and not overwrite:
46+
return
47+
48+
url = (
49+
tileurl.replace(r"{x}", str(tile.x))
50+
.replace(r"{y}", str(tile.y))
51+
.replace(r"{z}", str(tile.z))
52+
)
53+
54+
data = await fetch_data(client, url, timeout)
55+
if data is None:
56+
return
57+
58+
os.makedirs(write_dir, exist_ok=True)
59+
with open(write_filepath, mode="wb") as f:
60+
f.write(data)
61+
62+
63+
async def download_mbtiles(
64+
client: httpx.AsyncClient,
65+
semaphore: asyncio.Semaphore,
66+
conn: sqlite3.Connection,
67+
tile: tiletanic.Tile,
68+
tileurl: str,
69+
timeout: int = 5000,
70+
overwrite: bool = False,
71+
tms: bool = False,
72+
):
73+
async with semaphore:
74+
if tms:
75+
ty = tile.y
76+
else:
77+
ty = (1 << tile.z) - 1 - tile.y
78+
79+
c = conn.cursor()
80+
c.execute(
81+
"SELECT tile_data FROM tiles WHERE zoom_level = ? AND tile_column = ? AND tile_row = ?",
82+
(tile.z, tile.x, ty),
83+
)
84+
if c.fetchone() is not None and not overwrite:
85+
return
86+
87+
url = (
88+
tileurl.replace(r"{x}", str(tile.x))
89+
.replace(r"{y}", str(tile.y))
90+
.replace(r"{z}", str(tile.z))
91+
)
2892

29-
return data.read()
93+
data = await fetch_data(client, url, timeout)
94+
if data is None:
95+
return
96+
97+
if overwrite:
98+
c.execute(
99+
"DELETE FROM tiles WHERE zoom_level = ? AND tile_column = ? AND tile_row = ?",
100+
(tile.z, tile.x, ty),
101+
)
102+
103+
c.execute(
104+
"INSERT INTO tiles (zoom_level, tile_column, tile_row, tile_data) VALUES (?, ?, ?, ?)",
105+
(tile.z, tile.x, ty, data),
106+
)
107+
conn.commit()
30108

31109

32110
def create_mbtiles(output_file: str):
@@ -62,108 +140,19 @@ def create_mbtiles(output_file: str):
62140
return output_file
63141

64142

65-
def download_dir(
66-
tile: tiletanic.Tile,
67-
tileurl: str,
68-
output_path: str,
69-
timeout: int = 5000,
70-
overwrite: bool = False,
71-
):
72-
# detect file extension from tileurl
73-
# tileurl = https://path/to/{z}/{x}/{y}.ext?foo=bar...&hoge=fuga.json
74-
ext = os.path.splitext(tileurl.split("?")[0])[-1]
75-
76-
write_dir = os.path.join(output_path, str(tile.z), str(tile.x))
77-
write_filepath = os.path.join(write_dir, str(tile.y) + ext)
78-
79-
if os.path.exists(write_filepath) and not overwrite:
80-
# skip if already exists when not-overwrite mode
81-
return
82-
83-
url = (
84-
tileurl.replace(r"{x}", str(tile.x))
85-
.replace(r"{y}", str(tile.y))
86-
.replace(r"{z}", str(tile.z))
87-
)
88-
89-
try:
90-
data = fetch_data(url, timeout)
91-
except Exception as e:
92-
print(e)
93-
return
94-
95-
os.makedirs(write_dir, exist_ok=True)
96-
with open(write_filepath, mode="wb") as f:
97-
f.write(data)
98-
99-
100-
def download_mbtiles(
101-
conn: sqlite3.Connection,
102-
tile: tiletanic.Tile,
103-
tileurl: str,
104-
timeout: int = 5000,
105-
overwrite: bool = False,
106-
tms: bool = False,
107-
):
108-
if tms:
109-
ty = tile.y
110-
else:
111-
# flip y: xyz -> tms
112-
ty = (1 << tile.z) - 1 - tile.y
113-
114-
c = conn.cursor()
115-
c.execute(
116-
"SELECT tile_data FROM tiles WHERE zoom_level = ? AND tile_column = ? AND tile_row = ?",
117-
(tile.z, tile.x, ty),
118-
)
119-
if c.fetchone() is not None and not overwrite:
120-
return
121-
122-
url = (
123-
tileurl.replace(r"{x}", str(tile.x))
124-
.replace(r"{y}", str(tile.y))
125-
.replace(r"{z}", str(tile.z))
126-
)
127-
try:
128-
data = fetch_data(url, timeout)
129-
except Exception as e:
130-
print(e)
131-
return
132-
133-
if overwrite:
134-
c.execute(
135-
"DELETE FROM tiles WHERE zoom_level = ? AND tile_column = ? AND tile_row = ?",
136-
(tile.z, tile.x, ty),
137-
)
138-
139-
c.execute(
140-
"INSERT INTO tiles (zoom_level, tile_column, tile_row, tile_data) VALUES (?, ?, ?, ?)",
141-
(tile.z, tile.x, ty, data),
142-
)
143-
conn.commit()
144-
145-
146-
def main():
143+
async def run():
147144
params = parse_arg()
148145

149-
if params.mode == "dir":
146+
concurrency = max(1, 1000 // params.interval)
147+
semaphore = asyncio.Semaphore(concurrency)
150148

151-
def _download(tile):
152-
download_dir(
153-
tile,
154-
params.tileurl,
155-
params.output_path,
156-
params.timeout,
157-
params.overwrite,
158-
)
159-
time.sleep(params.interval / 1000)
160-
elif params.mode == "mbtiles":
149+
conn = None
150+
if params.mode == "mbtiles":
161151
if not os.path.exists(params.output_path):
162152
create_mbtiles(params.output_path)
163153

164-
conn = sqlite3.connect(params.output_path)
154+
conn = sqlite3.connect(params.output_path, check_same_thread=False)
165155

166-
# write metadata
167156
c = conn.cursor()
168157
c.execute(
169158
"INSERT INTO metadata (name, value) VALUES (?, ?)",
@@ -184,30 +173,60 @@ def _download(tile):
184173
"INSERT INTO metadata (name, value) VALUES (?, ?)",
185174
("maxzoom", params.maxzoom),
186175
)
187-
188176
conn.commit()
189177

190-
def _download(tile):
191-
download_mbtiles(
192-
conn, tile, params.tileurl, params.timeout, params.overwrite, params.tms
193-
)
194-
time.sleep(params.interval / 1000)
195-
196178
tilescheme = (
197179
tiletanic.tileschemes.WebMercatorBL()
198180
if params.tms
199181
else tiletanic.tileschemes.WebMercator()
200182
)
201183

202-
for zoom in range(params.minzoom, params.maxzoom + 1):
203-
generator = tiletanic.tilecover.cover_geometry(
204-
tilescheme, params.geometry, zoom
205-
)
206-
for tile in generator:
207-
_download(tile)
184+
async with httpx.AsyncClient() as client:
185+
for zoom in range(params.minzoom, params.maxzoom + 1):
186+
tiles = list(
187+
tiletanic.tilecover.cover_geometry(tilescheme, params.geometry, zoom)
188+
)
189+
190+
if params.mode == "dir":
191+
tasks = [
192+
download_dir(
193+
client,
194+
semaphore,
195+
tile,
196+
params.tileurl,
197+
params.output_path,
198+
params.timeout,
199+
params.overwrite,
200+
)
201+
for tile in tiles
202+
]
203+
else:
204+
assert conn is not None
205+
tasks = [
206+
download_mbtiles(
207+
client,
208+
semaphore,
209+
conn,
210+
tile,
211+
params.tileurl,
212+
params.timeout,
213+
params.overwrite,
214+
params.tms,
215+
)
216+
for tile in tiles
217+
]
218+
219+
await asyncio.gather(*tasks)
220+
221+
if conn is not None:
222+
conn.close()
208223

209224
print("finished")
210225

211226

227+
def main():
228+
asyncio.run(run())
229+
230+
212231
if __name__ == "__main__":
213232
main()

0 commit comments

Comments
 (0)