Skip to content

Commit 2f78809

Browse files
committed
feat(cli,core): allow compression (and other options)
1 parent 96ec314 commit 2f78809

File tree

5 files changed

+68
-5
lines changed

5 files changed

+68
-5
lines changed

cli/Cargo.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,21 @@ categories = ["science", "data-structures"]
1414
default = ["gdal", "geoparquet", "pgstac"]
1515
duckdb = ["dep:stac-duckdb"]
1616
gdal = ["stac/gdal"]
17-
geoparquet = ["dep:bytes", "stac/geoparquet-compression"]
17+
geoparquet = [
18+
"dep:bytes",
19+
"dep:geoarrow",
20+
"stac/geoparquet-compression",
21+
"parquet",
22+
]
1823
pgstac = ["stac-server/pgstac"]
1924
python = ["dep:pyo3", "pgstac", "duckdb", "stac-duckdb/bundled", "geoparquet"]
2025

2126
[dependencies]
2227
axum = "0.7"
2328
bytes = { version = "1", optional = true }
2429
clap = { version = "4", features = ["derive"] }
30+
geoarrow = { git = "https://github.com/geoarrow/geoarrow-rs", rev = "6b877486bf98f280bc04b589eb7ce25b20e629f0", optional = true }
31+
parquet = { version = "52", optional = true }
2532
pyo3 = { version = "0.22", optional = true }
2633
reqwest = "0.12"
2734
serde = "1"

cli/src/args.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@ pub struct Args {
1919
#[arg(short, long, value_enum)]
2020
pub output_format: Option<Format>,
2121

22+
/// The type of geoparquet compression to use.
23+
///
24+
/// Possible values: uncompressed (default), snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), lz4_raw
25+
#[arg(long, value_enum)]
26+
#[cfg(feature = "geoparquet")]
27+
pub geoparquet_compression: Option<parquet::basic::Compression>,
28+
2229
/// The subcommand to run.
2330
#[command(subcommand)]
2431
pub subcommand: Subcommand,

cli/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ pub type Result<T> = std::result::Result<T, Error>;
6767
/// compact: false,
6868
/// input_format: None,
6969
/// output_format: None,
70+
/// #[cfg(feature = "geoparquet")]
71+
/// geoparquet_compression: None,
7072
/// subcommand: Subcommand::Sort(sort_args),
7173
/// };
7274
/// # tokio_test::block_on(async {
@@ -82,6 +84,8 @@ pub async fn run(args: Args) -> Result<()> {
8284
compact: args.compact,
8385
input_format,
8486
output_format,
87+
#[cfg(feature = "geoparquet")]
88+
geoparquet_compression: args.geoparquet_compression,
8589
writer,
8690
buffer: 100,
8791
};

cli/src/runner.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ where
1616
/// The output format.
1717
pub output_format: Format,
1818

19+
/// Geoparquet compression.
20+
#[cfg(feature = "geoparquet")]
21+
pub geoparquet_compression: Option<parquet::basic::Compression>,
22+
1923
/// The output writeable stream.
2024
pub writer: W,
2125

@@ -49,7 +53,19 @@ where
4953
#[cfg(feature = "geoparquet")]
5054
Format::Parquet => {
5155
if let Some(value) = value.to_stac() {
52-
stac::geoparquet::to_writer(&mut self.writer, value)?;
56+
let mut options = geoarrow::io::parquet::GeoParquetWriterOptions::default();
57+
if let Some(compression) = self.geoparquet_compression {
58+
let writer_properites =
59+
parquet::file::properties::WriterProperties::builder()
60+
.set_compression(compression)
61+
.build();
62+
options.writer_properties = Some(writer_properites);
63+
}
64+
stac::geoparquet::to_writer_with_options(
65+
&mut self.writer,
66+
value,
67+
&options,
68+
)?;
5369
} else {
5470
writeln!(self.writer, "{}", value)?;
5571
}

core/src/geoparquet.rs

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ pub fn has_extension(href: &str) -> bool {
2020
}
2121

2222
#[cfg(feature = "geoparquet")]
23-
pub use has_feature::{from_reader, to_writer};
23+
pub use has_feature::{from_reader, to_writer, to_writer_with_options};
2424

2525
#[cfg(feature = "geoparquet")]
2626
mod has_feature {
2727
use crate::{Error, ItemCollection, Result, Value};
28-
use geoarrow::io::parquet::GeoParquetRecordBatchReaderBuilder;
28+
use geoarrow::io::parquet::{GeoParquetRecordBatchReaderBuilder, GeoParquetWriterOptions};
2929
use parquet::file::reader::ChunkReader;
3030
use std::io::Write;
3131

@@ -46,6 +46,35 @@ mod has_feature {
4646
/// stac::geoparquet::to_writer(&mut cursor, item.into()).unwrap();
4747
/// ```
4848
pub fn to_writer<W>(writer: W, value: Value) -> Result<()>
49+
where
50+
W: Write + Send,
51+
{
52+
to_writer_with_options(writer, value, &Default::default())
53+
}
54+
55+
/// Writes a [Value] to a [std::io::Write] as
56+
/// [stac-geoparquet](https://github.com/stac-utils/stac-geoparquet) with the provided options.
57+
///
58+
/// # Examples
59+
///
60+
/// ```
61+
/// use std::io::Cursor;
62+
/// use stac::Item;
63+
/// use geoarrow::io::parquet::GeoParquetWriterOptions;
64+
/// use parquet::{basic::Compression, file::properties::WriterProperties};
65+
///
66+
/// let item: Item = stac::read("examples/simple-item.json").unwrap();
67+
/// let mut cursor = Cursor::new(Vec::new());
68+
/// let mut options = GeoParquetWriterOptions::default();
69+
/// let writer_properties = WriterProperties::builder().set_compression(Compression::SNAPPY).build();
70+
/// options.writer_properties = Some(writer_properties);
71+
/// stac::geoparquet::to_writer_with_options(&mut cursor, item.into(), &options).unwrap();
72+
/// ```
73+
pub fn to_writer_with_options<W>(
74+
writer: W,
75+
value: Value,
76+
options: &GeoParquetWriterOptions,
77+
) -> Result<()>
4978
where
5079
W: Write + Send,
5180
{
@@ -55,7 +84,7 @@ mod has_feature {
5584
geoarrow::io::parquet::write_geoparquet(
5685
table.into_record_batch_reader(),
5786
writer,
58-
&Default::default(),
87+
options,
5988
)
6089
.map_err(Error::from)
6190
}

0 commit comments

Comments
 (0)