Skip to content

Commit 1c97e8b

Browse files
committed
Support loading from remote resource and support loading from folders
1 parent 5a91780 commit 1c97e8b

24 files changed

+233
-173
lines changed

pyproject.toml

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,27 @@ classifiers = [
2020
"Programming Language :: Python :: 3.13",
2121
]
2222
requires-python = ">=3.10"
23+
# This is duplicate, must also be changed below in the pixi configuration
24+
dependencies = [
25+
"pyyaml>=6.0,<7.0",
26+
"click>=8.1,<9.0",
27+
"geopandas>=1.0.0,<2.0",
28+
"requests>=2.30,<3.0",
29+
"shapely>=2.1,<3.0",
30+
# numpy is restricted <2.2.0 due to the lower supported Python version being 3.11 and we still cater for 3.10
31+
"numpy>=2.0,<2.2",
32+
"pyarrow>=21.0,<22.0",
33+
"py7zr>=1.0,<2.0",
34+
"fsspec==2025.7.0",
35+
"jsonschema[format]>=4.20,<5.0",
36+
"aiohttp>=3.9,<4.0",
37+
"yarl>=1.20,<2.0",
38+
"rarfile>=4.0,<5.0",
39+
"semantic-version>=2.10.0,<3.0",
40+
"json-stream>=2.3.0,<3.0",
41+
"flatdict>=4.0,<5.0",
42+
"loguru==0.7.3",
43+
]
2344

2445
[project.scripts]
2546
vec = "vecorel_cli:vecorel_cli"
@@ -49,15 +70,6 @@ pyarrow = ">=21.0,<22.0"
4970
py7zr = ">=1.0,<2.0"
5071

5172
[tool.pixi.pypi-dependencies]
52-
fsspec = "==2025.7.0"
53-
jsonschema = {version = ">=4.20,<5.0", extras = ["format"]}
54-
aiohttp = ">=3.9,<4.0"
55-
rarfile = ">=4.0,<5.0"
56-
semantic-version = ">=2.10.0,<3.0"
57-
json-stream = ">=2.3.0,<3.0"
58-
flatdict = ">=4.0,<5.0"
59-
loguru = "==0.7.3"
60-
6173
# Editable install of the project itself
6274
vecorel-cli = {path = ".", editable = true}
6375

tests/test_encoding_auto.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
# non-existing encoding
1717
("invalid.txt", None),
1818
("invalid", None),
19-
(None, None),
2019
]
2120

2221

tests/test_encoding_geojson.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ def test_init_paths(tmp_folder):
1010
fpath = str(tmp_folder / "test.json")
1111
ppath = Path(fpath)
1212

13-
assert GeoJSON(fpath).file == ppath
14-
assert GeoJSON(ppath).file == ppath
13+
assert GeoJSON(fpath).uri == ppath
14+
assert GeoJSON(ppath).uri == ppath
1515

1616

1717
def test_get_datatypes_uri():

tests/test_encoding_geoparquet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ def test_init_paths(tmp_folder):
88
fpath = str(tmp_folder / "test.parquet")
99
ppath = Path(fpath)
1010

11-
assert GeoParquet(fpath).file == ppath
12-
assert GeoParquet(ppath).file == ppath
11+
assert GeoParquet(fpath).uri == ppath
12+
assert GeoParquet(ppath).uri == ppath
1313

1414

1515
def test_get_format():

vecorel_cli/basecommand.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def run(self, *args, **kwargs):
3939
f"{Registry.cli_title} {Registry.get_version()} - {self.cmd_title}",
4040
end="\n\n",
4141
style="bold",
42+
color="cyan",
4243
)
4344

4445
# Detect method to run

vecorel_cli/cli/logger.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def _format_data(self, value: dict, depth=0, max_depth=1, strlen=50):
100100
if depth > 0:
101101
output += "\n"
102102
for key, value in value.items():
103-
output += f"{prefix}<yellow>{key}</>: "
103+
output += f"{prefix}<cyan>{key}</>: "
104104
output += self._format_data(
105105
value, depth=depth + 1, max_depth=max_depth, strlen=strlen
106106
)

vecorel_cli/cli/options.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import click
22

33
from ..const import COMPRESSION_METHODS, GEOPARQUET_DEFAULT_VERSION, GEOPARQUET_VERSIONS
4-
from .util import valid_schemas_for_cli, valid_vecorel_file, valid_vecorel_files
4+
from ..registry import Registry
5+
from .path_url import PathOrURL
6+
from .util import valid_schemas_for_cli
57

68

79
def CRS(default_value):
@@ -62,16 +64,15 @@ def CRS(default_value):
6264

6365
VECOREL_FILES_ARG = click.argument(
6466
"source",
65-
type=click.Path(exists=True, dir_okay=False, resolve_path=True),
67+
type=PathOrURL(multiple=True, extensions=Registry.get_vecorel_extensions()),
6668
nargs=-1,
67-
callback=valid_vecorel_files,
69+
callback=PathOrURL.flatten_tuples,
6870
)
6971

7072
VECOREL_FILE_ARG = click.argument(
7173
"source",
72-
type=click.Path(exists=True, dir_okay=False, resolve_path=True),
74+
type=PathOrURL(extensions=Registry.get_vecorel_extensions()),
7375
nargs=1,
74-
callback=valid_vecorel_file,
7576
)
7677

7778

vecorel_cli/cli/path_url.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import click
2+
import pathlib
3+
4+
from ..vecorel.util import is_url, get_fs
5+
from yarl import URL
6+
7+
IGNORE_FILES = ["collection.json", "catalog.json"] # likely STAC
8+
9+
class PathOrURL(click.ParamType):
10+
name = "path_or_url"
11+
12+
@staticmethod
13+
def flatten_tuples(ctx, param, value) -> list:
14+
if not value:
15+
return []
16+
data = []
17+
for v in value:
18+
if isinstance(v, tuple) or isinstance(v, list):
19+
data.extend(v)
20+
else:
21+
data.append(v)
22+
return data
23+
24+
def __init__(self, *, multiple: bool = False, extensions: list[str] = []):
25+
self.extensions = extensions
26+
self.multiple = multiple
27+
self.path_type = click.Path(
28+
exists=True,
29+
dir_okay=multiple,
30+
resolve_path=True,
31+
allow_dash=False,
32+
path_type=pathlib.Path,
33+
)
34+
35+
def convert(self, value, param, ctx):
36+
# Check if it's a URL
37+
if "://" in value and is_url(value):
38+
fs = get_fs(value)
39+
if fs.exists(value):
40+
return URL(value)
41+
else:
42+
self.fail(f"URL '{value}' is does not exist or is currently unavailable.", param, ctx)
43+
44+
# Otherwise, validate as a local path
45+
filepath = self.path_type.convert(value, param, ctx)
46+
if filepath.is_dir():
47+
files = []
48+
for f in filepath.iterdir():
49+
if not self._check_extension(f):
50+
continue
51+
if f.name in IGNORE_FILES:
52+
continue
53+
54+
files.append(f)
55+
return tuple(files)
56+
elif not self._check_extension(filepath):
57+
self.fail(
58+
f"File '{filepath}' must have one of the following extensions: {', '.join(self.extensions)}",
59+
param,
60+
ctx
61+
)
62+
return filepath
63+
64+
def _check_extension(self, filepath: pathlib.Path) -> bool:
65+
return len(self.extensions) == 0 or filepath.suffix.lower() in self.extensions
66+
67+
def shell_complete(self, ctx, param, incomplete):
68+
if "://" in incomplete:
69+
return []
70+
return super().shell_complete(ctx, param, incomplete)

vecorel_cli/cli/util.py

Lines changed: 4 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,90 +1,10 @@
1-
import os
21
from pathlib import Path
32
from typing import Optional
4-
from urllib.parse import urlparse
53

64
import click
75
import pandas as pd
86

9-
from ..const import SUPPORTED_PROTOCOLS
10-
from ..registry import Registry
11-
from ..vecorel.util import name_from_uri
12-
13-
14-
def is_valid_file_uri(uri, extensions=[]):
15-
"""Determine if the input is a file path or a URL and handle it."""
16-
if not isinstance(uri, str):
17-
raise click.BadParameter("Input must be a string representing a file path or URL")
18-
elif len(extensions) > 0 and not uri.endswith(tuple(extensions)):
19-
raise click.BadParameter(
20-
f"File '{uri}' must have one of the following extensions: {', '.join(extensions)}"
21-
)
22-
elif os.path.exists(uri):
23-
return uri
24-
elif is_valid_url(uri):
25-
return uri
26-
else:
27-
raise click.BadParameter(
28-
"Input must be an existing local file or a URL with protocol: "
29-
+ ",".join(SUPPORTED_PROTOCOLS)
30-
)
31-
32-
33-
def is_valid_url(url):
34-
"""Check if a URL is valid."""
35-
try:
36-
result = urlparse(url)
37-
return all([result.scheme in SUPPORTED_PROTOCOLS, result.netloc])
38-
except ValueError:
39-
return False
40-
41-
42-
def get_files(value, extensions=[]):
43-
files = []
44-
extensions = tuple(extensions)
45-
for v in value:
46-
v = is_valid_file_uri(v)
47-
if os.path.isdir(v):
48-
for f in os.listdir(v):
49-
if len(extensions) > 0 and not f.endswith(extensions):
50-
continue
51-
if f == "collection.json" or f == "catalog.json": # likely STAC
52-
continue
53-
files.append(os.path.join(v, f))
54-
else:
55-
files.append(v)
56-
return files
57-
58-
59-
def valid_file(ctx, param, value):
60-
return is_valid_file_uri(value)
61-
62-
63-
def valid_vecorel_file(ctx, param, value) -> Path:
64-
ext = Registry.get_vecorel_extensions()
65-
return is_valid_file_uri(value, extensions=ext)
66-
67-
68-
def valid_vecorel_files(ctx, param, value) -> list[Path]:
69-
ext = Registry.get_vecorel_extensions()
70-
71-
files = []
72-
if isinstance(value, str):
73-
files = [value]
74-
elif isinstance(value, tuple):
75-
files = list(value)
76-
elif isinstance(value, list):
77-
files = value
78-
79-
if len(files) == 0:
80-
raise click.BadParameter("No files provided.")
81-
82-
actual_files = []
83-
for file in files:
84-
if is_valid_file_uri(file, extensions=ext):
85-
actual_files.append(Path(file))
86-
87-
return actual_files
7+
from ..vecorel.util import is_url, name_from_uri
888

899

9010
def parse_converter_input_files(ctx, param, value):
@@ -130,14 +50,14 @@ def valid_schemas_for_cli(value: tuple[str]) -> dict[str, Path]:
13050

13151
if len(part) != 2:
13252
raise click.BadParameter(
133-
"Schema must be a URL and a local file path separated by a comma character."
53+
"Schema must be a URL and a local file path, separated by a comma character."
13454
)
135-
if not is_valid_url(part[0]):
55+
if not is_url(part[0]):
13656
raise click.BadParameter(f"Schema URL '{part[0]}' is not a valid URL.")
13757

13858
p = Path(part[1])
13959
if not p.exists():
140-
raise click.BadParameter(f"Local schema file '{p}' does not exist.")
60+
raise click.BadParameter(f"Local schema file '{p.resolve()}' does not exist.")
14161

14262
map_[part[0]] = p
14363

vecorel_cli/create_geojson.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Optional, Union
33

44
import click
5+
from yarl import URL
56

67
from .basecommand import BaseCommand, runnable
78
from .cli.options import JSON_INDENT, PROPERTIES, VECOREL_FILE_ARG, VECOREL_TARGET
@@ -43,7 +44,7 @@ def get_cli_args():
4344
@runnable
4445
def create(
4546
self,
46-
source: Union[Path, str],
47+
source: Union[Path, URL, str],
4748
target: Union[Path, str],
4849
properties: Optional[Union[tuple[str], list[str]]] = None,
4950
split: bool = False,

0 commit comments

Comments
 (0)