From 144b9aa34f1aaf0873679570df0494db31cd3f9f Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Sun, 16 Feb 2025 12:03:11 -0700 Subject: [PATCH 1/6] feat: add config args to duckdb client --- Cargo.lock | 95 +++++++++++++++++--------------------------- src/duckdb.rs | 11 +++-- stacrs.pyi | 12 ++++++ tests/test_duckdb.py | 4 ++ 4 files changed, 61 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a86a83b..6662d10 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -565,9 +565,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.12" +version = "1.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "755717a7de9ec452bf7f3f1a3099085deabd7f2962b861dae91ecd7a365903d2" +checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9" dependencies = [ "jobserver", "libc", @@ -624,12 +624,11 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.3" +version = "7.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" dependencies = [ - "strum 0.26.3", - "strum_macros 0.26.4", + "unicode-segmentation", "unicode-width", ] @@ -762,9 +761,9 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" dependencies = [ "memchr", ] @@ -825,7 +824,7 @@ dependencies = [ "num-integer", "rust_decimal", "smallvec", - "strum 0.25.0", + "strum", ] [[package]] @@ -858,9 +857,9 @@ dependencies = [ [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" @@ -1814,9 +1813,9 @@ checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libduckdb-sys" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac2de5219db852597558df5dcd617ffccd5cbd7b9f5402ccbf899aca6cb6047" +checksum = "dc4020eaf07df4927b5205cd200ca2a5ed0798b49652dec22e09384ba8efa163" dependencies = [ "autocfg", "cc", @@ -1915,9 +1914,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" +checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b" dependencies = [ "adler2", ] @@ -2503,9 +2502,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.9" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c40286217b4ba3a71d644d752e6a0b71f13f1b6a2c5311acfcbe0c2418ed904" +checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944" dependencies = [ "cfg_aliases", "libc", @@ -2696,15 +2695,14 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.8" +version = "0.17.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +checksum = "e75ec5e92c4d8aede845126adc388046234541629e76029599ed35a003c7ed24" dependencies = [ "cc", "cfg-if", "getrandom", "libc", - "spin", "untrusted", "windows-sys 0.52.0", ] @@ -2807,9 +2805,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.22" +version = "0.23.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7" +checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395" dependencies = [ "once_cell", "ring", @@ -3038,9 +3036,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" [[package]] name = "snafu" @@ -3091,12 +3089,6 @@ dependencies = [ "smallvec", ] -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" - [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -3106,7 +3098,7 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "stac" version = "0.12.0" -source = "git+https://github.com/stac-utils/stac-rs#53ec8703d4cafc9284efa0bb7cda3f291be3a3fa" +source = "git+https://github.com/stac-utils/stac-rs#c54d559a26b77c1b4f01898be167fa5b5f88e864" dependencies = [ "arrow-array", "arrow-cast", @@ -3134,7 +3126,7 @@ dependencies = [ [[package]] name = "stac-api" version = "0.7.1" -source = "git+https://github.com/stac-utils/stac-rs#53ec8703d4cafc9284efa0bb7cda3f291be3a3fa" +source = "git+https://github.com/stac-utils/stac-rs#c54d559a26b77c1b4f01898be167fa5b5f88e864" dependencies = [ "async-stream", "chrono", @@ -3159,7 +3151,7 @@ dependencies = [ [[package]] name = "stac-derive" version = "0.2.0" -source = "git+https://github.com/stac-utils/stac-rs#53ec8703d4cafc9284efa0bb7cda3f291be3a3fa" +source = "git+https://github.com/stac-utils/stac-rs#c54d559a26b77c1b4f01898be167fa5b5f88e864" dependencies = [ "quote", "syn 2.0.98", @@ -3168,7 +3160,7 @@ dependencies = [ [[package]] name = "stac-duckdb" version = "0.1.1" -source = "git+https://github.com/stac-utils/stac-rs#53ec8703d4cafc9284efa0bb7cda3f291be3a3fa" +source = "git+https://github.com/stac-utils/stac-rs#c54d559a26b77c1b4f01898be167fa5b5f88e864" dependencies = [ "arrow", "chrono", @@ -3213,15 +3205,9 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" dependencies = [ - "strum_macros 0.25.3", + "strum_macros", ] -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" - [[package]] name = "strum_macros" version = "0.25.3" @@ -3235,19 +3221,6 @@ dependencies = [ "syn 2.0.98", ] -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck 0.5.0", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.98", -] - [[package]] name = "subtle" version = "2.6.1" @@ -3481,9 +3454,9 @@ checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" [[package]] name = "toml_edit" -version = "0.22.23" +version = "0.22.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02a8b472d1a3d7c18e2d61a489aee3453fd9031c33e4f55bd533f4a7adca1bee" +checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" dependencies = [ "indexmap", "toml_datetime", @@ -3582,6 +3555,12 @@ version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-width" version = "0.2.0" @@ -3912,9 +3891,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86e376c75f4f43f44db463cf729e0d3acbf954d13e22c51e26e4c264b4ab545f" +checksum = "59690dea168f2198d1a3b0cac23b8063efcd11012f10ae4698f284808c8ef603" dependencies = [ "memchr", ] diff --git a/src/duckdb.rs b/src/duckdb.rs index 63c9c7d..6221249 100644 --- a/src/duckdb.rs +++ b/src/duckdb.rs @@ -5,7 +5,7 @@ use pyo3::{ types::{PyDict, PyList}, }; use stac_api::python::{StringOrDict, StringOrList}; -use stac_duckdb::Client; +use stac_duckdb::{Client, Config}; use std::sync::Mutex; #[pyclass(frozen)] @@ -14,8 +14,13 @@ pub struct DuckdbClient(Mutex); #[pymethods] impl DuckdbClient { #[new] - fn new() -> Result { - let client = Client::new()?; + #[pyo3(signature = (use_s3_credential_chain=true, use_hive_partitioning=false))] + fn new(use_s3_credential_chain: bool, use_hive_partitioning: bool) -> Result { + let config = Config { + use_s3_credential_chain, + use_hive_partitioning, + }; + let client = Client::with_config(config)?; Ok(DuckdbClient(Mutex::new(client))) } diff --git a/stacrs.pyi b/stacrs.pyi index 8f81b4b..6fde95b 100644 --- a/stacrs.pyi +++ b/stacrs.pyi @@ -3,6 +3,18 @@ from typing import Any, Optional, Tuple class DuckdbClient: """A client for querying stac-geoparquet with DuckDB.""" + def __init__( + self, use_s3_credential_chain: bool = True, use_hive_partitioning: bool = False + ) -> None: + """Creates a new duckdb client. + + Args: + use_s3_credential_chain: If true, configures DuckDB to correctly + handle s3:// urls. + use_hive_partitioning: If true, enables queries on hive partitioned + geoparquet files. + """ + def search( self, href: str, diff --git a/tests/test_duckdb.py b/tests/test_duckdb.py index 3173e8e..9381dc0 100644 --- a/tests/test_duckdb.py +++ b/tests/test_duckdb.py @@ -33,3 +33,7 @@ def test_search_offset(client: DuckdbClient) -> None: def test_get_collections(client: DuckdbClient) -> None: collections = client.get_collections("data/100-sentinel-2-items.parquet") assert len(collections) == 1 + + +def test_init_with_config() -> None: + DuckdbClient(use_s3_credential_chain=True, use_hive_partitioning=True) From 43d0c2db5010920ec446cf2fe8aa33a6b428929a Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Sun, 16 Feb 2025 20:10:18 -0500 Subject: [PATCH 2/6] fix: pin duckdb --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 86e018b..ae66814 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ default = ["duckdb-bundled"] duckdb-bundled = ["duckdb/bundled"] [dependencies] -duckdb = { version = "1.1.1" } +duckdb = { version = "=1.1.1" } geojson = "0.24.1" pyo3 = { version = "0.23.4", features = ["abi3-py310"] } pyo3-async-runtimes = { version = "0.23.0", features = [ From 45ef410a9a6eaac3edce4a51e07d1bddf6ad5c3d Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Sun, 16 Feb 2025 20:30:14 -0500 Subject: [PATCH 3/6] feat: separate out build from sync --- .github/workflows/ci.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 64c4ba2..805008b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -27,7 +27,9 @@ jobs: - uses: astral-sh/setup-uv@v3 - uses: Swatinem/rust-cache@v2 - name: Sync - run: uv sync + run: uv sync --no-install-project + - name: Build directly with maturin + run: maturin dev --uv - name: Lint run: scripts/lint - name: Test From 4164cb5804a25066caa8617e72d0b08775e8678f Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Sun, 16 Feb 2025 20:32:14 -0500 Subject: [PATCH 4/6] fix: update stac-rs dep --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6662d10..4e51d8d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3098,7 +3098,7 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "stac" version = "0.12.0" -source = "git+https://github.com/stac-utils/stac-rs#c54d559a26b77c1b4f01898be167fa5b5f88e864" +source = "git+https://github.com/stac-utils/stac-rs#ea6efd1b9b64e2dcbf97777126a1f62e6cbc7b48" dependencies = [ "arrow-array", "arrow-cast", @@ -3126,7 +3126,7 @@ dependencies = [ [[package]] name = "stac-api" version = "0.7.1" -source = "git+https://github.com/stac-utils/stac-rs#c54d559a26b77c1b4f01898be167fa5b5f88e864" +source = "git+https://github.com/stac-utils/stac-rs#ea6efd1b9b64e2dcbf97777126a1f62e6cbc7b48" dependencies = [ "async-stream", "chrono", @@ -3151,7 +3151,7 @@ dependencies = [ [[package]] name = "stac-derive" version = "0.2.0" -source = "git+https://github.com/stac-utils/stac-rs#c54d559a26b77c1b4f01898be167fa5b5f88e864" +source = "git+https://github.com/stac-utils/stac-rs#ea6efd1b9b64e2dcbf97777126a1f62e6cbc7b48" dependencies = [ "quote", "syn 2.0.98", @@ -3160,7 +3160,7 @@ dependencies = [ [[package]] name = "stac-duckdb" version = "0.1.1" -source = "git+https://github.com/stac-utils/stac-rs#c54d559a26b77c1b4f01898be167fa5b5f88e864" +source = "git+https://github.com/stac-utils/stac-rs#ea6efd1b9b64e2dcbf97777126a1f62e6cbc7b48" dependencies = [ "arrow", "chrono", From 6557240c57247d0d71430ff138ba11add1d2ba1e Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Sun, 16 Feb 2025 20:33:55 -0500 Subject: [PATCH 5/6] fix: uv run --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 805008b..b4529bc 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -29,7 +29,7 @@ jobs: - name: Sync run: uv sync --no-install-project - name: Build directly with maturin - run: maturin dev --uv + run: uv run maturin dev --uv - name: Lint run: scripts/lint - name: Test From df768dcb7e43989b2e32ecb0538a9358d230211e Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Sun, 16 Feb 2025 20:56:57 -0500 Subject: [PATCH 6/6] fix: try forcing libduckdb-sys too --- Cargo.lock | 13 +++++++------ Cargo.toml | 1 + src/lib.rs | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4e51d8d..36fe584 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1813,9 +1813,9 @@ checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libduckdb-sys" -version = "1.2.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc4020eaf07df4927b5205cd200ca2a5ed0798b49652dec22e09384ba8efa163" +checksum = "eac2de5219db852597558df5dcd617ffccd5cbd7b9f5402ccbf899aca6cb6047" dependencies = [ "autocfg", "cc", @@ -3098,7 +3098,7 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "stac" version = "0.12.0" -source = "git+https://github.com/stac-utils/stac-rs#ea6efd1b9b64e2dcbf97777126a1f62e6cbc7b48" +source = "git+https://github.com/stac-utils/stac-rs#bc89452934d68a93f8772e9fc7492aef5fc0c70f" dependencies = [ "arrow-array", "arrow-cast", @@ -3126,7 +3126,7 @@ dependencies = [ [[package]] name = "stac-api" version = "0.7.1" -source = "git+https://github.com/stac-utils/stac-rs#ea6efd1b9b64e2dcbf97777126a1f62e6cbc7b48" +source = "git+https://github.com/stac-utils/stac-rs#bc89452934d68a93f8772e9fc7492aef5fc0c70f" dependencies = [ "async-stream", "chrono", @@ -3151,7 +3151,7 @@ dependencies = [ [[package]] name = "stac-derive" version = "0.2.0" -source = "git+https://github.com/stac-utils/stac-rs#ea6efd1b9b64e2dcbf97777126a1f62e6cbc7b48" +source = "git+https://github.com/stac-utils/stac-rs#bc89452934d68a93f8772e9fc7492aef5fc0c70f" dependencies = [ "quote", "syn 2.0.98", @@ -3160,7 +3160,7 @@ dependencies = [ [[package]] name = "stac-duckdb" version = "0.1.1" -source = "git+https://github.com/stac-utils/stac-rs#ea6efd1b9b64e2dcbf97777126a1f62e6cbc7b48" +source = "git+https://github.com/stac-utils/stac-rs#bc89452934d68a93f8772e9fc7492aef5fc0c70f" dependencies = [ "arrow", "chrono", @@ -3181,6 +3181,7 @@ version = "0.5.3" dependencies = [ "duckdb", "geojson", + "libduckdb-sys", "pyo3", "pyo3-async-runtimes", "pyo3-log", diff --git a/Cargo.toml b/Cargo.toml index ae66814..6ea83f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ duckdb-bundled = ["duckdb/bundled"] [dependencies] duckdb = { version = "=1.1.1" } +libduckdb-sys = { version = "=1.1.1" } geojson = "0.24.1" pyo3 = { version = "0.23.4", features = ["abi3-py310"] } pyo3-async-runtimes = { version = "0.23.0", features = [ diff --git a/src/lib.rs b/src/lib.rs index cfc0db4..09edb8b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,7 @@ mod write; use ::duckdb as _; use error::Error; +use libduckdb_sys as _; use pyo3::prelude::*; type Result = std::result::Result;