Skip to content

Commit 5ed6ae6

Browse files
ivorbosloperm-mohrCopilot
authored
Use zstd with compression 15 by default (#16)
* Use zstd with compression 15 by default, fixes #12 * Update vecorel_cli/cli/options.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Matthias Mohr <matthias@mohr.ws> Co-authored-by: Matthias Mohr <webmaster@mamo-net.de> Co-authored-by: Matthias Mohr <m.mohr@moregeo.it> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 419f2f6 commit 5ed6ae6

File tree

5 files changed

+30
-12
lines changed

5 files changed

+30
-12
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
88
## [Unreleased]
99

1010
- Replace flatdict from pypi with a local version to avoid pkg_resource install issues
11+
- Change default compression to zstd
12+
- Add option to set compression level, zstd defaults to 15
1113

1214
## [v0.2.12] - 2025-12-08
1315

vecorel_cli/cli/options.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,16 @@ def CRS(default_value):
3131
type=click.Choice(COMPRESSION_METHODS),
3232
help="GeoParquet only: Compression method",
3333
show_default=True,
34-
default="brotli",
34+
default="zstd",
35+
)
36+
37+
GEOPARQUET_COMPRESSION_LEVEL = click.option(
38+
"--compression_level",
39+
"-pcl",
40+
type=click.IntRange(min=1, max=22),
41+
help="GeoParquet only: Compression level",
42+
show_default=True,
43+
default=None,
3544
)
3645

3746
GEOPARQUET_VERSION = click.option(

vecorel_cli/conversion/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ def convert(
298298
input_files=None,
299299
variant=None,
300300
compression=None,
301+
compression_level: Optional[int] = None,
301302
geoparquet_version=None,
302303
original_geometries=False,
303304
**kwargs,
@@ -425,6 +426,7 @@ def convert(
425426
gdf,
426427
properties=columns,
427428
compression=compression,
429+
compression_level=compression_level,
428430
geoparquet_version=geoparquet_version,
429431
)
430432

vecorel_cli/convert.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
import click
22

33
from .basecommand import BaseCommand, runnable
4-
from .cli.options import GEOPARQUET_COMPRESSION, GEOPARQUET_VERSION, PY_PACKAGE, VECOREL_TARGET
4+
from .cli.options import (
5+
GEOPARQUET_COMPRESSION,
6+
GEOPARQUET_COMPRESSION_LEVEL,
7+
GEOPARQUET_VERSION,
8+
PY_PACKAGE,
9+
VECOREL_TARGET,
10+
)
511
from .cli.util import parse_converter_input_files
612
from .converters import Converters
713
from .registry import Registry
@@ -61,6 +67,7 @@ def get_cli_args():
6167
default=None,
6268
),
6369
"compression": GEOPARQUET_COMPRESSION,
70+
"compression_level": GEOPARQUET_COMPRESSION_LEVEL,
6471
"geoparquet_version": GEOPARQUET_VERSION,
6572
"mapping_file": click.option(
6673
"--mapping-file",

vecorel_cli/encoding/geoparquet.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -131,22 +131,22 @@ def get_compression(self) -> Optional[str]:
131131
# geoparquet_version: bool, optional, default False
132132
# If True, writes the data in GeoParquet 1.0.0 format,
133133
# otherwise in GeoParquet 1.1.0 format.
134-
# compression: str, optional, default "brotli"
135-
# Compression algorithm to use, defaults to "brotli".
136-
# Other options are "snappy", "gzip", "lz4", "zstd", etc.
134+
# compression: str, optional, default "zstd"
135+
# Compression algorithm to use, defaults to "zstd".
136+
# Other options are "snappy", "gzip", "lz4", "brotli", etc.
137137
def write(
138138
self,
139139
data: GeoDataFrame,
140140
properties: Optional[list[str]] = None,
141141
schema_map: SchemaMapping = {},
142142
dehydrate: bool = True,
143-
compression: Optional[str] = None,
143+
compression: Optional[str] = "zstd",
144+
compression_level: Optional[int] = None, # default level for compression
144145
geoparquet_version: Optional[str] = None,
145146
**kwargs, # capture unknown arguments
146147
) -> bool:
147-
if compression is None:
148-
compression = "brotli"
149-
148+
if compression == "zstd" and compression_level is None:
149+
compression_level = 15
150150
if geoparquet_version not in GEOPARQUET_VERSIONS:
151151
geoparquet_version = GEOPARQUET_DEFAULT_VERSION
152152
self.uri.parent.mkdir(parents=True, exist_ok=True)
@@ -240,9 +240,6 @@ def write(
240240
}
241241
)
242242

243-
if compression is None:
244-
compression = "brotli"
245-
246243
# Write the data to the Parquet file
247244
to_parquet(
248245
data,
@@ -254,6 +251,7 @@ def write(
254251
schema_version=geoparquet_version,
255252
row_group_size=self.row_group_size,
256253
write_covering_bbox=bool(geoparquet_version != "1.0.0"),
254+
compression_level=compression_level,
257255
)
258256

259257
return True

0 commit comments

Comments
 (0)