Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@ version = "0.4.3"
description = "Tile download utility - easily download xyz-tile data"
readme = "README.md"
requires-python = ">= 3.14"
dependencies = ["shapely>=2.0.3", "tiletanic>=1.1.0", "pyproj>=3.6.1"]
dependencies = [
"shapely>=2.0.3",
"tiletanic>=1.1.0",
"pyproj>=3.6.1",
"httpx>=0.28",
]

[dependency-groups]
dev = ["ruff>=0.3.7", "ty>=0.0.13"]
Expand Down
273 changes: 146 additions & 127 deletions tileget/__main__.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,110 @@
import asyncio
import os
import sqlite3
import time
import urllib.request

import httpx
import tiletanic

from tileget.arg import parse_arg


def fetch_data(url: str, timeout: int = 5000) -> bytes:
async def fetch_data(
client: httpx.AsyncClient, url: str, timeout: int = 5000
) -> bytes | None:
print("downloading: " + url)
data = None
while True:
try:
data = urllib.request.urlopen(url, timeout=timeout / 1000)
break
except urllib.error.HTTPError as e:
raise Exception(str(e) + ":" + url)
except Exception as e:
if (
str(e.args)
== "(timeout('_ssl.c:1091: The handshake operation timed out'),)"
):
print("timeout, retrying... :" + url)
else:
raise Exception(str(e) + ":" + url)
try:
response = await client.get(url, timeout=timeout / 1000)
response.raise_for_status()
return response.content
except httpx.HTTPStatusError as e:
print(f"{e.response.status_code}: {url}")
return None
except httpx.TimeoutException:
print(f"timeout: {url}")
return None
except Exception as e:
print(f"{e}: {url}")
return None


async def download_dir(
client: httpx.AsyncClient,
semaphore: asyncio.Semaphore,
tile: tiletanic.Tile,
tileurl: str,
output_path: str,
timeout: int = 5000,
overwrite: bool = False,
):
async with semaphore:
ext = os.path.splitext(tileurl.split("?")[0])[-1]

write_dir = os.path.join(output_path, str(tile.z), str(tile.x))
write_filepath = os.path.join(write_dir, str(tile.y) + ext)

if os.path.exists(write_filepath) and not overwrite:
return

url = (
tileurl.replace(r"{x}", str(tile.x))
.replace(r"{y}", str(tile.y))
.replace(r"{z}", str(tile.z))
)

data = await fetch_data(client, url, timeout)
if data is None:
return

os.makedirs(write_dir, exist_ok=True)
with open(write_filepath, mode="wb") as f:
f.write(data)


async def download_mbtiles(
client: httpx.AsyncClient,
semaphore: asyncio.Semaphore,
conn: sqlite3.Connection,
tile: tiletanic.Tile,
tileurl: str,
timeout: int = 5000,
overwrite: bool = False,
tms: bool = False,
):
async with semaphore:
if tms:
ty = tile.y
else:
ty = (1 << tile.z) - 1 - tile.y

c = conn.cursor()
c.execute(
"SELECT tile_data FROM tiles WHERE zoom_level = ? AND tile_column = ? AND tile_row = ?",
(tile.z, tile.x, ty),
)
if c.fetchone() is not None and not overwrite:
return

url = (
tileurl.replace(r"{x}", str(tile.x))
.replace(r"{y}", str(tile.y))
.replace(r"{z}", str(tile.z))
)

return data.read()
data = await fetch_data(client, url, timeout)
if data is None:
return

if overwrite:
c.execute(
"DELETE FROM tiles WHERE zoom_level = ? AND tile_column = ? AND tile_row = ?",
(tile.z, tile.x, ty),
)

c.execute(
"INSERT INTO tiles (zoom_level, tile_column, tile_row, tile_data) VALUES (?, ?, ?, ?)",
(tile.z, tile.x, ty, data),
)
conn.commit()


def create_mbtiles(output_file: str):
Expand Down Expand Up @@ -62,108 +140,19 @@ def create_mbtiles(output_file: str):
return output_file


def download_dir(
tile: tiletanic.Tile,
tileurl: str,
output_path: str,
timeout: int = 5000,
overwrite: bool = False,
):
# detect file extension from tileurl
# tileurl = https://path/to/{z}/{x}/{y}.ext?foo=bar...&hoge=fuga.json
ext = os.path.splitext(tileurl.split("?")[0])[-1]

write_dir = os.path.join(output_path, str(tile.z), str(tile.x))
write_filepath = os.path.join(write_dir, str(tile.y) + ext)

if os.path.exists(write_filepath) and not overwrite:
# skip if already exists when not-overwrite mode
return

url = (
tileurl.replace(r"{x}", str(tile.x))
.replace(r"{y}", str(tile.y))
.replace(r"{z}", str(tile.z))
)

try:
data = fetch_data(url, timeout)
except Exception as e:
print(e)
return

os.makedirs(write_dir, exist_ok=True)
with open(write_filepath, mode="wb") as f:
f.write(data)


def download_mbtiles(
conn: sqlite3.Connection,
tile: tiletanic.Tile,
tileurl: str,
timeout: int = 5000,
overwrite: bool = False,
tms: bool = False,
):
if tms:
ty = tile.y
else:
# flip y: xyz -> tms
ty = (1 << tile.z) - 1 - tile.y

c = conn.cursor()
c.execute(
"SELECT tile_data FROM tiles WHERE zoom_level = ? AND tile_column = ? AND tile_row = ?",
(tile.z, tile.x, ty),
)
if c.fetchone() is not None and not overwrite:
return

url = (
tileurl.replace(r"{x}", str(tile.x))
.replace(r"{y}", str(tile.y))
.replace(r"{z}", str(tile.z))
)
try:
data = fetch_data(url, timeout)
except Exception as e:
print(e)
return

if overwrite:
c.execute(
"DELETE FROM tiles WHERE zoom_level = ? AND tile_column = ? AND tile_row = ?",
(tile.z, tile.x, ty),
)

c.execute(
"INSERT INTO tiles (zoom_level, tile_column, tile_row, tile_data) VALUES (?, ?, ?, ?)",
(tile.z, tile.x, ty, data),
)
conn.commit()


def main():
async def run():
params = parse_arg()

if params.mode == "dir":
concurrency = max(1, 1000 // params.interval)
semaphore = asyncio.Semaphore(concurrency)

def _download(tile):
download_dir(
tile,
params.tileurl,
params.output_path,
params.timeout,
params.overwrite,
)
time.sleep(params.interval / 1000)
elif params.mode == "mbtiles":
conn = None
if params.mode == "mbtiles":
if not os.path.exists(params.output_path):
create_mbtiles(params.output_path)

conn = sqlite3.connect(params.output_path)
conn = sqlite3.connect(params.output_path, check_same_thread=False)

# write metadata
c = conn.cursor()
c.execute(
"INSERT INTO metadata (name, value) VALUES (?, ?)",
Expand All @@ -184,30 +173,60 @@ def _download(tile):
"INSERT INTO metadata (name, value) VALUES (?, ?)",
("maxzoom", params.maxzoom),
)

conn.commit()

def _download(tile):
download_mbtiles(
conn, tile, params.tileurl, params.timeout, params.overwrite, params.tms
)
time.sleep(params.interval / 1000)

tilescheme = (
tiletanic.tileschemes.WebMercatorBL()
if params.tms
else tiletanic.tileschemes.WebMercator()
)

for zoom in range(params.minzoom, params.maxzoom + 1):
generator = tiletanic.tilecover.cover_geometry(
tilescheme, params.geometry, zoom
)
for tile in generator:
_download(tile)
async with httpx.AsyncClient() as client:
for zoom in range(params.minzoom, params.maxzoom + 1):
tiles = list(
tiletanic.tilecover.cover_geometry(tilescheme, params.geometry, zoom)
)

if params.mode == "dir":
tasks = [
download_dir(
client,
semaphore,
tile,
params.tileurl,
params.output_path,
params.timeout,
params.overwrite,
)
for tile in tiles
]
else:
assert conn is not None
tasks = [
download_mbtiles(
client,
semaphore,
conn,
tile,
params.tileurl,
params.timeout,
params.overwrite,
params.tms,
)
for tile in tiles
]

await asyncio.gather(*tasks)

if conn is not None:
conn.close()

print("finished")


def main():
asyncio.run(run())


if __name__ == "__main__":
main()
Loading