Skip to content

Commit 12042c3

Browse files
Merge pull request #27 from developmentseed/feature/add-vsifile-support
support vsifile IO support
2 parents dd3eaf2 + 6494f0e commit 12042c3

File tree

9 files changed

+275
-64
lines changed

9 files changed

+275
-64
lines changed

CHANGES.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 0.15.0 (2025-02-27)
2+
3+
* add support for `VSIFile` backend (https://github.com/developmentseed/tilebench/pull/27)
4+
15
## 0.14.0 (2025-01-06)
26

37
* remove `python 3.8` support

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ test = [
3535
"pytest-cov",
3636
"pytest-asyncio",
3737
"requests",
38+
"vsifile",
3839
]
3940
dev = [
4041
"pre-commit",

tests/test_middleware.py

Lines changed: 76 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
"""Tests for tilebench."""
22

3+
import rasterio
34
from fastapi import FastAPI
45
from rio_tiler.io import Reader
56
from starlette.testclient import TestClient
7+
from vsifile.rasterio import opener
68

79
from tilebench.middleware import NoCacheMiddleware, VSIStatsMiddleware
810

@@ -33,26 +35,77 @@ def tile():
3335
def skip():
3436
return "I've been skipped"
3537

36-
client = TestClient(app)
37-
38-
response = client.get("/info")
39-
assert response.status_code == 200
40-
assert response.headers["content-type"] == "application/json"
41-
assert response.headers["Cache-Control"] == "no-cache"
42-
assert response.headers["VSI-Stats"]
43-
stats = response.headers["VSI-Stats"]
44-
assert "head;count=" in stats
45-
assert "get;count=" in stats
46-
47-
response = client.get("/tile")
48-
assert response.status_code == 200
49-
assert response.headers["content-type"] == "application/json"
50-
assert response.headers["VSI-Stats"]
51-
stats = response.headers["VSI-Stats"]
52-
assert "head;count=" in stats
53-
assert "get;count=" in stats
54-
55-
response = client.get("/skip")
56-
assert response.status_code == 200
57-
assert response.headers["content-type"] == "application/json"
58-
assert "VSI-Stats" not in response.headers
38+
with TestClient(app) as client:
39+
response = client.get("/info")
40+
assert response.status_code == 200
41+
assert response.headers["content-type"] == "application/json"
42+
assert response.headers["Cache-Control"] == "no-cache"
43+
assert response.headers["VSI-Stats"]
44+
stats = response.headers["VSI-Stats"]
45+
assert "head;count=" in stats
46+
assert "get;count=" in stats
47+
48+
response = client.get("/tile")
49+
assert response.status_code == 200
50+
assert response.headers["content-type"] == "application/json"
51+
assert response.headers["VSI-Stats"]
52+
stats = response.headers["VSI-Stats"]
53+
assert "head;count=" in stats
54+
assert "get;count=" in stats
55+
56+
response = client.get("/skip")
57+
assert response.status_code == 200
58+
assert response.headers["content-type"] == "application/json"
59+
assert "VSI-Stats" not in response.headers
60+
61+
62+
def test_middleware_vsifile():
63+
"""Simple test."""
64+
app = FastAPI()
65+
app.add_middleware(NoCacheMiddleware)
66+
app.add_middleware(
67+
VSIStatsMiddleware, config={}, exclude_paths=["/skip"], io="vsifile"
68+
)
69+
70+
@app.get("/info")
71+
def head():
72+
"""Get info."""
73+
with rasterio.open(COG_PATH, opener=opener) as src:
74+
with Reader(None, dataset=src) as cog:
75+
cog.info()
76+
return "I got info"
77+
78+
@app.get("/tile")
79+
def tile():
80+
"""Read tile."""
81+
with rasterio.open(COG_PATH, opener=opener) as src:
82+
with Reader(None, dataset=src) as cog:
83+
cog.tile(36460, 52866, 17)
84+
return "I got tile"
85+
86+
@app.get("/skip")
87+
def skip():
88+
return "I've been skipped"
89+
90+
with TestClient(app) as client:
91+
response = client.get("/info")
92+
assert response.status_code == 200
93+
assert response.headers["content-type"] == "application/json"
94+
assert response.headers["Cache-Control"] == "no-cache"
95+
assert response.headers["VSI-Stats"]
96+
stats = response.headers["VSI-Stats"]
97+
assert "head;count=" in stats
98+
assert "get;count=" in stats
99+
100+
response = client.get("/tile")
101+
assert response.status_code == 200
102+
assert response.headers["content-type"] == "application/json"
103+
assert response.headers["VSI-Stats"]
104+
stats = response.headers["VSI-Stats"]
105+
assert "head;count=" in stats
106+
assert "get;count=" in stats
107+
108+
response = client.get("/skip")
109+
assert response.status_code == 200
110+
assert response.headers["content-type"] == "application/json"
111+
assert "VSI-Stats" not in response.headers

tests/test_reader.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""Tests for tilebench."""
22

3+
import rasterio
34
from rio_tiler.io import Reader
5+
from vsifile.rasterio import opener
46

57
from tilebench import profile as profiler
68

@@ -41,3 +43,28 @@ def _read_tile(src_path: str, x: int, y: int, z: int, tilesize: int = 256):
4143
assert stats.get("GET")
4244
assert stats.get("Timing")
4345
assert stats.get("WarpKernels")
46+
47+
48+
def test_vsifile():
49+
"""Checkout profile output."""
50+
51+
@profiler(
52+
kernels=True,
53+
add_to_return=True,
54+
quiet=True,
55+
config={"GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR"},
56+
io="vsifile",
57+
)
58+
def _read_tile(src_path: str, x: int, y: int, z: int, tilesize: int = 256):
59+
with rasterio.open(src_path, opener=opener) as src:
60+
with Reader(None, dataset=src) as cog:
61+
return cog.tile(x, y, z, tilesize=tilesize)
62+
63+
(data, mask), stats = _read_tile(COG_PATH, 36460, 52866, 17)
64+
assert data.shape
65+
assert mask.shape
66+
assert stats
67+
assert "HEAD" in stats
68+
assert stats.get("GET")
69+
assert stats.get("Timing")
70+
assert "WarpKernels" in stats

tests/test_viz.py

Lines changed: 76 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
"""Tests for tilebench."""
22

3+
import attr
4+
import rasterio
5+
from rio_tiler.io import Reader
36
from starlette.testclient import TestClient
7+
from vsifile.rasterio import opener
48

59
from tilebench.viz import TileDebug
610

@@ -17,26 +21,75 @@ def test_viz():
1721
assert app.endpoint == "http://127.0.0.1:8080"
1822
assert app.template_url == "http://127.0.0.1:8080"
1923

20-
client = TestClient(app.app)
21-
22-
response = client.get("/tiles/17/36460/52866")
23-
assert response.status_code == 200
24-
assert response.headers["content-type"] == "application/json"
25-
assert response.headers["Cache-Control"] == "no-cache"
26-
assert response.headers["VSI-Stats"]
27-
stats = response.headers["VSI-Stats"]
28-
assert "head;count=" in stats
29-
assert "get;count=" in stats
30-
31-
response = client.get("/info.geojson")
32-
assert response.status_code == 200
33-
assert response.headers["content-type"] == "application/geo+json"
34-
assert "VSI-Stats" not in response.headers
35-
36-
response = client.get("/tiles.geojson?ovr_level=0")
37-
assert response.status_code == 200
38-
assert response.headers["content-type"] == "application/geo+json"
39-
40-
response = client.get("/tiles.geojson?ovr_level=1")
41-
assert response.status_code == 200
42-
assert response.headers["content-type"] == "application/geo+json"
24+
with TestClient(app.app) as client:
25+
response = client.get("/tiles/17/36460/52866")
26+
assert response.status_code == 200
27+
assert response.headers["content-type"] == "application/json"
28+
assert response.headers["Cache-Control"] == "no-cache"
29+
assert response.headers["VSI-Stats"]
30+
stats = response.headers["VSI-Stats"]
31+
assert "head;count=" in stats
32+
assert "get;count=" in stats
33+
34+
response = client.get("/info.geojson")
35+
assert response.status_code == 200
36+
assert response.headers["content-type"] == "application/geo+json"
37+
assert "VSI-Stats" not in response.headers
38+
39+
response = client.get("/tiles.geojson?ovr_level=0")
40+
assert response.status_code == 200
41+
assert response.headers["content-type"] == "application/geo+json"
42+
43+
response = client.get("/tiles.geojson?ovr_level=1")
44+
assert response.status_code == 200
45+
assert response.headers["content-type"] == "application/geo+json"
46+
47+
48+
def test_viz_vsifile():
49+
"""Should work as expected (create TileServer object)."""
50+
51+
@attr.s
52+
class VSIReader(Reader):
53+
"""Rasterio Reader with VSIFILE opener."""
54+
55+
dataset = attr.ib(default=None, init=False) # type: ignore
56+
57+
def __attrs_post_init__(self):
58+
"""Use vsifile.rasterio.opener as Python file opener."""
59+
self.dataset = self._ctx_stack.enter_context(
60+
rasterio.open(self.input, opener=opener)
61+
)
62+
super().__attrs_post_init__()
63+
64+
app = TileDebug(
65+
src_path=COG_PATH,
66+
config={"GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR"},
67+
reader=VSIReader,
68+
io_backend="vsifile",
69+
)
70+
assert app.port == 8080
71+
assert app.endpoint == "http://127.0.0.1:8080"
72+
assert app.template_url == "http://127.0.0.1:8080"
73+
74+
with TestClient(app.app) as client:
75+
response = client.get("/tiles/17/36460/52866")
76+
assert response.status_code == 200
77+
assert response.headers["content-type"] == "application/json"
78+
assert response.headers["Cache-Control"] == "no-cache"
79+
assert response.headers["VSI-Stats"]
80+
stats = response.headers["VSI-Stats"]
81+
assert "head;count=" in stats
82+
assert "get;count=" in stats
83+
84+
response = client.get("/info.geojson")
85+
assert response.status_code == 200
86+
assert response.headers["content-type"] == "application/geo+json"
87+
assert "VSI-Stats" not in response.headers
88+
89+
response = client.get("/tiles.geojson?ovr_level=0")
90+
assert response.status_code == 200
91+
assert response.headers["content-type"] == "application/geo+json"
92+
93+
response = client.get("/tiles.geojson?ovr_level=1")
94+
assert response.status_code == 200
95+
assert response.headers["content-type"] == "application/geo+json"

tilebench/__init__.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
log.add(sys.stderr, format=fmt)
2020

2121

22-
def parse_logs(logs: List[str]) -> Dict[str, Any]:
22+
def parse_rasterio_io_logs(logs: List[str]) -> Dict[str, Any]:
2323
"""Parse Rasterio and CURL logs."""
2424
# HEAD
2525
head_requests = len([line for line in logs if "CURL_INFO_HEADER_OUT: HEAD" in line])
@@ -53,25 +53,63 @@ def parse_logs(logs: List[str]) -> Dict[str, Any]:
5353
}
5454

5555

56+
def parse_vsifile_io_logs(logs: List[str]) -> Dict[str, Any]:
57+
"""Parse VSIFILE IO logs."""
58+
# HEAD
59+
head_requests = len([line for line in logs if "VSIFILE_INFO: HEAD" in line])
60+
head_summary = {
61+
"count": head_requests,
62+
}
63+
64+
# GET
65+
all_get_requests = len([line for line in logs if "VSIFILE_INFO: GET" in line])
66+
67+
get_requests = [line for line in logs if "VSIFILE: Downloading: " in line]
68+
69+
get_values_str = []
70+
for get in get_requests:
71+
get_values_str.extend(get.split("VSIFILE: Downloading: ")[1].split(", "))
72+
73+
get_values = [list(map(int, r.split("-"))) for r in get_values_str]
74+
data_transfer = sum([j - i + 1 for i, j in get_values])
75+
76+
get_summary = {
77+
"count": all_get_requests,
78+
"bytes": data_transfer,
79+
"ranges": get_values_str,
80+
}
81+
82+
warp_kernel = [line.split(" ")[-2:] for line in logs if "GDALWarpKernel" in line]
83+
84+
return {
85+
"HEAD": head_summary,
86+
"GET": get_summary,
87+
"WarpKernels": warp_kernel,
88+
}
89+
90+
5691
def profile(
5792
kernels: bool = False,
5893
add_to_return: bool = False,
5994
quiet: bool = False,
6095
raw: bool = False,
6196
cprofile: bool = False,
6297
config: Optional[Dict] = None,
98+
io="rasterio",
6399
):
64100
"""Profiling."""
101+
if io not in ["rasterio", "vsifile"]:
102+
raise ValueError(f"Unsupported {io} IO backend")
65103

66104
def wrapper(func: Callable):
67105
"""Wrap a function."""
68106

69107
def wrapped_f(*args, **kwargs):
70108
"""Wrapped function."""
71-
rio_stream = StringIO()
72-
logger = logging.getLogger("rasterio")
109+
io_stream = StringIO()
110+
logger = logging.getLogger(io)
73111
logger.setLevel(logging.DEBUG)
74-
handler = logging.StreamHandler(rio_stream)
112+
handler = logging.StreamHandler(io_stream)
75113
logger.addHandler(handler)
76114

77115
gdal_config = config or {}
@@ -88,10 +126,15 @@ def wrapped_f(*args, **kwargs):
88126
logger.removeHandler(handler)
89127
handler.close()
90128

91-
logs = rio_stream.getvalue().splitlines()
129+
logs = io_stream.getvalue().splitlines()
92130
profile_lines = [p for p in profile_stream.getvalue().splitlines() if p]
93131

94-
results = parse_logs(logs)
132+
results = {}
133+
if io == "vsifile":
134+
results.update(parse_vsifile_io_logs(logs))
135+
else:
136+
results.update(parse_rasterio_io_logs(logs))
137+
95138
results["Timing"] = t.elapsed
96139

97140
if cprofile:

0 commit comments

Comments
 (0)