11//! Basic DuckLake query example with snapshot isolation
22//!
33//! This example demonstrates how to:
4- //! 1. Create a DuckLake catalog from a DuckDB catalog file
4+ //! 1. Create a DuckLake catalog from DuckDB or PostgreSQL
55//! 2. Bind the catalog to a specific snapshot for query consistency
66//! 3. Register it with DataFusion
77//! 4. Execute a simple SELECT query
1616//! To query data at different points in time, create separate catalogs bound to
1717//! different snapshot IDs.
1818//!
19- //! To run this example, you need:
20- //! - A DuckDB database file with DuckLake tables
21- //! - Parquet data files referenced by the catalog
19+ //! ## Usage
2220//!
23- //! Usage: cargo run --example basic_query <catalog.db> <sql>
21+ //! With DuckDB catalog:
22+ //! ```bash
23+ //! cargo run --example basic_query catalog.db "SELECT * FROM main.users"
24+ //! ```
25+ //!
26+ //! With PostgreSQL catalog (requires --features metadata-postgres):
27+ //! ```bash
28+ //! cargo run --example basic_query --features metadata-postgres \
29+ //! "postgresql://user:password@localhost:5432/postgres" \
30+ //! "SELECT * FROM main.users"
31+ //! ```
2432
2533use datafusion:: execution:: runtime_env:: RuntimeEnv ;
2634use datafusion:: prelude:: * ;
35+ #[ cfg( feature = "metadata-postgres" ) ]
36+ use datafusion_ducklake:: PostgresMetadataProvider ;
2737use datafusion_ducklake:: {
2838 DuckLakeCatalog , DuckdbMetadataProvider , MetadataProvider , register_ducklake_functions,
2939} ;
@@ -38,25 +48,51 @@ use url::Url;
3848async fn main ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
3949 let args: Vec < String > = env:: args ( ) . collect ( ) ;
4050 if args. len ( ) < 3 {
41- eprintln ! ( "Usage: cargo run --example basic_query catalog.db sql" ) ;
51+ eprintln ! ( "Usage:" ) ;
52+ eprintln ! ( " DuckDB: cargo run --example basic_query catalog.db \" SQL\" " ) ;
53+ eprintln ! (
54+ " PostgreSQL: cargo run --example basic_query --features metadata-postgres \" postgresql://...\" \" SQL\" "
55+ ) ;
4256 exit ( 1 ) ;
4357 }
44- let catalog_path = & args[ 1 ] ;
58+ let catalog_source = & args[ 1 ] ;
4559 let sql = & args[ 2 ] ;
4660
47- // // Path to your DuckLake catalog database
48- // let catalog_path = "test_catalog.db" ;
61+ // Detect provider type based on input
62+ let is_postgres = catalog_source . starts_with ( "postgresql://" ) ;
4963
50- println ! ( "Connecting to DuckLake catalog: {}" , catalog_path) ;
64+ if is_postgres {
65+ #[ cfg( not( feature = "metadata-postgres" ) ) ]
66+ {
67+ eprintln ! ( "Error: PostgreSQL support requires the 'metadata-postgres' feature" ) ;
68+ eprintln ! ( "Run with: cargo run --example basic_query --features metadata-postgres" ) ;
69+ exit ( 1 ) ;
70+ }
5171
52- // Create the metadata provider
53- let provider = Arc :: new ( DuckdbMetadataProvider :: new ( catalog_path) ?) ;
72+ #[ cfg( feature = "metadata-postgres" ) ]
73+ {
74+ println ! ( "Connecting to PostgreSQL catalog: {}" , catalog_source) ;
75+ let provider = Arc :: new ( PostgresMetadataProvider :: new ( catalog_source) . await ?) ;
76+ let snapshot_id = provider. get_current_snapshot ( ) ?;
77+ println ! ( "Current snapshot ID: {}" , snapshot_id) ;
78+ run_query ( provider, snapshot_id, sql) . await ?;
79+ }
80+ } else {
81+ println ! ( "Connecting to DuckDB catalog: {}" , catalog_source) ;
82+ let provider = Arc :: new ( DuckdbMetadataProvider :: new ( catalog_source) ?) ;
83+ let snapshot_id = provider. get_current_snapshot ( ) ?;
84+ println ! ( "Current snapshot ID: {}" , snapshot_id) ;
85+ run_query ( provider, snapshot_id, sql) . await ?;
86+ }
5487
55- // Get the current snapshot ID
56- // This ensures query consistency - all metadata lookups will use this snapshot
57- let snapshot_id = provider. get_current_snapshot ( ) ?;
58- println ! ( "Current snapshot ID: {}" , snapshot_id) ;
88+ Ok ( ( ) )
89+ }
5990
91+ async fn run_query (
92+ provider : Arc < dyn MetadataProvider > ,
93+ snapshot_id : i64 ,
94+ sql : & str ,
95+ ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
6096 // Create runtime and register object stores
6197 // For MinIO or S3, register the object store with the runtime
6298 let runtime = Arc :: new ( RuntimeEnv :: default ( ) ) ;
@@ -77,11 +113,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
77113 // Create the DuckLake catalog bound to the snapshot
78114 // This ensures all queries through this catalog see consistent data
79115 // from this specific snapshot, even if the underlying data changes
80- let ducklake_catalog = DuckLakeCatalog :: with_snapshot ( provider, snapshot_id) ?;
81-
82- // Alternative: Use the backward-compatible constructor that automatically
83- // binds to the current snapshot:
84- // let ducklake_catalog = DuckLakeCatalog::new(DuckdbMetadataProvider::new(catalog_path)?)?;
116+ let ducklake_catalog = DuckLakeCatalog :: with_snapshot ( provider. clone ( ) , snapshot_id) ?;
85117
86118 println ! ( "✓ Connected to DuckLake catalog" ) ;
87119
@@ -90,9 +122,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
90122 // Create DataFusion session context
91123 let ctx = SessionContext :: new_with_config_rt ( config, runtime. clone ( ) ) ;
92124
93- // Get the provider before moving the catalog
94- let provider = ducklake_catalog. provider ( ) ;
95-
96125 // Register the DuckLake catalog (standard DataFusion pattern)
97126 ctx. register_catalog ( "ducklake" , Arc :: new ( ducklake_catalog) ) ;
98127
@@ -121,17 +150,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
121150 }
122151 }
123152
124- // Example query (adjust schema and table names to match your data)
125- // Uncomment and modify this once you have actual DuckLake data:
126-
153+ // Execute the query
127154 println ! ( "\n Executing query..." ) ;
128155 let df = ctx. sql ( sql) . await ?;
129156
130157 // Show the query results
131158 df. show ( ) . await ?;
132159
133160 println ! ( "\n ✓ Example completed successfully!" ) ;
134- println ! ( "\n To run a query, create a DuckLake database and uncomment the query section." ) ;
135161
136162 Ok ( ( ) )
137163}
0 commit comments