hotdata-dev
diff --git a/‎rust-toolchain.toml‎
Lines changed: 3 additions & 0 deletions b/‎rust-toolchain.toml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎rustfmt.toml‎
Lines changed: 18 additions & 0 deletions b/‎rustfmt.toml‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎src/catalog.rs‎
Lines changed: 3 additions & 3 deletions b/‎src/catalog.rs‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/delete_filter.rs‎
Lines changed: 23 additions & 39 deletions b/‎src/delete_filter.rs‎
Lines changed: 23 additions & 39 deletions
diff --git a/‎src/metadata_provider.rs‎
Lines changed: 12 additions & 4 deletions b/‎src/metadata_provider.rs‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎src/metadata_provider_duckdb.rs‎
Lines changed: 52 additions & 42 deletions b/‎src/metadata_provider_duckdb.rs‎
Lines changed: 52 additions & 42 deletions
@@ -0,0 +1,3 @@
+[toolchain]
+channel = "stable"
+components = ["rustfmt", "clippy"]
@@ -0,0 +1,18 @@
+# Rustfmt configuration for datafusion-ducklake
+# Some features require nightly: cargo +nightly fmt
+
+# Set max width for the whole line
+max_width = 100
+
+# Function call style - prefer vertical layout for readability
+fn_call_width = 60
+
+# Use small item threshold to prefer vertical layout
+# "Off" = more aggressive vertical formatting
+use_small_heuristics = "Off"
+
+# Chain width - when to break method chains
+chain_width = 60
+
+# Preserve vertical spacing in match arms
+match_block_trailing_comma = true
@@ -3,10 +3,10 @@
 use std::any::Any;
 use std::sync::Arc;
 
+use crate::Result;
 use crate::metadata_provider::MetadataProvider;
 use crate::path_resolver::parse_object_store_url;
 use crate::schema::DuckLakeSchema;
-use crate::Result;
 use datafusion::catalog::{CatalogProvider, SchemaProvider};
 use datafusion::datasource::object_store::ObjectStoreUrl;
 
@@ -88,11 +88,11 @@ impl CatalogProvider for DuckLakeCatalog {
                     meta.schema_id,
                     meta.schema_name,
                     Arc::clone(&self.provider),
-                    snapshot_id,  // Propagate snapshot_id
+                    snapshot_id, // Propagate snapshot_id
                     self.object_store_url.clone(),
                     schema_path,
                 )) as Arc<dyn SchemaProvider>)
-            }
+            },
             _ => None,
         }
     }
 
@@ -13,9 +13,7 @@ use arrow::datatypes::SchemaRef;
 use arrow::record_batch::{RecordBatch, RecordBatchOptions};
 use datafusion::error::{DataFusionError, Result as DataFusionResult};
 use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
-use datafusion::physical_plan::{
-    DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
-};
+use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties};
 use futures::Stream;
 
 /// Custom execution plan that filters out deleted rows
@@ -59,15 +57,15 @@ impl DisplayAs for DeleteFilterExec {
                     self.file_path,
                     self.deleted_positions.len()
                 )
-            }
+            },
             DisplayFormatType::TreeRender => {
                 write!(
                     f,
                     "DeleteFilterExec: file={}, deletes={}",
                     self.file_path,
                     self.deleted_positions.len()
                 )
-            }
+            },
         }
     }
 }
@@ -142,10 +140,10 @@ impl Stream for DeleteFilterStream {
                         // Update row offset for next batch
                         self.row_offset += batch.num_rows() as i64;
                         Poll::Ready(Some(Ok(filtered_batch)))
-                    }
+                    },
                     Err(e) => Poll::Ready(Some(Err(e))),
                 }
-            }
+            },
             Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
             Poll::Ready(None) => Poll::Ready(None),
             Poll::Pending => Poll::Pending,
@@ -180,20 +178,15 @@ impl DeleteFilterStream {
         if batch.num_columns() == 0 {
             let mut options = RecordBatchOptions::new();
             options = options.with_row_count(Some(keep_indices.len()));
-            return RecordBatch::try_new_with_options(
-                batch.schema(),
-                vec![],
-                &options,
-            )
-            .map_err(|e| DataFusionError::ArrowError(Box::new(e), None));
+            return RecordBatch::try_new_with_options(batch.schema(), vec![], &options)
+                .map_err(|e| DataFusionError::ArrowError(Box::new(e), None));
         }
 
         // Use Arrow's take kernel to select rows
         use arrow::array::UInt32Array;
         use arrow::compute::take;
 
-        let indices =
-            UInt32Array::from(keep_indices.iter().map(|&i| i as u32).collect::<Vec<_>>());
+        let indices = UInt32Array::from(keep_indices.iter().map(|&i| i as u32).collect::<Vec<_>>());
 
         let filtered_columns: DataFusionResult<Vec<_>> = batch
             .columns()
@@ -225,16 +218,12 @@ mod tests {
     #[test]
     fn test_filter_batch_ignores_out_of_bounds_positions() {
         // Create a simple RecordBatch with 4 rows
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-        ]));
+        let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
 
         let id_array = Int32Array::from(vec![1, 2, 3, 4]);
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![Arc::new(id_array) as Arc<dyn Array>],
-        )
-        .unwrap();
+        let batch =
+            RecordBatch::try_new(schema.clone(), vec![Arc::new(id_array) as Arc<dyn Array>])
+                .unwrap();
 
         // Create delete positions: 1 (valid), 1000, 2000, 5000 (all out of bounds)
         // Only position 1 should actually delete a row (the row with id=2)
@@ -275,16 +264,12 @@ mod tests {
     #[test]
     fn test_filter_batch_all_out_of_bounds_positions() {
         // Test the edge case where ALL delete positions are beyond the file
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-        ]));
+        let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
 
         let id_array = Int32Array::from(vec![10, 20, 30]);
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![Arc::new(id_array) as Arc<dyn Array>],
-        )
-        .unwrap();
+        let batch =
+            RecordBatch::try_new(schema.clone(), vec![Arc::new(id_array) as Arc<dyn Array>])
+                .unwrap();
 
         // All positions are way beyond the 3-row file
         let deleted_positions: HashSet<i64> = [1000, 2000, 3000, 9999].into_iter().collect();
@@ -317,16 +302,15 @@ mod tests {
     #[test]
     fn test_filter_batch_with_row_offset() {
         // Test that row_offset is correctly considered when checking positions
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("value", DataType::Int32, false),
-        ]));
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "value",
+            DataType::Int32,
+            false,
+        )]));
 
         let array = Int32Array::from(vec![100, 200, 300, 400]);
-        let batch = RecordBatch::try_new(
-            schema.clone(),
-            vec![Arc::new(array) as Arc<dyn Array>],
-        )
-        .unwrap();
+        let batch =
+            RecordBatch::try_new(schema.clone(), vec![Arc::new(array) as Arc<dyn Array>]).unwrap();
 
         // Delete position 11 and 1000 (way out of bounds)
         // With row_offset=10, this batch contains global positions [10, 11, 12, 13]
 
@@ -57,8 +57,7 @@ pub const SQL_GET_TABLE_BY_NAME: &str =
        AND ? >= begin_snapshot
        AND (? < end_snapshot OR end_snapshot IS NULL)";
 
-pub const SQL_TABLE_EXISTS: &str =
-    "SELECT EXISTS(
+pub const SQL_TABLE_EXISTS: &str = "SELECT EXISTS(
        SELECT 1 FROM ducklake_table
        WHERE schema_id = ?
          AND table_name = ?
@@ -184,7 +183,11 @@ pub trait MetadataProvider: Send + Sync + std::fmt::Debug {
     fn get_table_structure(&self, table_id: i64) -> Result<Vec<DuckLakeTableColumn>>;
 
     /// Get table files for a specific snapshot
-    fn get_table_files_for_select(&self, table_id: i64, snapshot_id: i64) -> Result<Vec<DuckLakeTableFile>>;
+    fn get_table_files_for_select(
+        &self,
+        table_id: i64,
+        snapshot_id: i64,
+    ) -> Result<Vec<DuckLakeTableFile>>;
     //     todo: support select with file pruning
 
     // Dynamic lookup methods for on-demand metadata retrieval
@@ -193,7 +196,12 @@ pub trait MetadataProvider: Send + Sync + std::fmt::Debug {
     fn get_schema_by_name(&self, name: &str, snapshot_id: i64) -> Result<Option<SchemaMetadata>>;
 
     /// Get table by name for a specific snapshot
-    fn get_table_by_name(&self, schema_id: i64, name: &str, snapshot_id: i64) -> Result<Option<TableMetadata>>;
+    fn get_table_by_name(
+        &self,
+        schema_id: i64,
+        name: &str,
+        snapshot_id: i64,
+    ) -> Result<Option<TableMetadata>>;
 
     /// Check if table exists for a specific snapshot
     fn table_exists(&self, schema_id: i64, name: &str, snapshot_id: i64) -> Result<bool>;
 
@@ -6,7 +6,7 @@ use crate::metadata_provider::{
     TableMetadata,
 };
 use duckdb::AccessMode::ReadOnly;
-use duckdb::{params, Config, Connection};
+use duckdb::{Config, Connection, params};
 
 /// DuckDB metadata provider
 ///
@@ -49,11 +49,11 @@ impl DuckdbMetadataProvider {
                     "DuckDB file likely already open in write mode. Cannot connect"
                 );
                 Err(DuckLakeError::DuckDb(msg))
-            }
+            },
             Err(msg) => {
                 tracing::error!(error = %msg, "Failed to open DuckDB catalog");
                 Err(DuckLakeError::DuckDb(msg))
-            }
+            },
         }
     }
 }
@@ -135,54 +135,59 @@ impl MetadataProvider for DuckdbMetadataProvider {
         Ok(columns)
     }
 
-    fn get_table_files_for_select(&self, table_id: i64, snapshot_id: i64) -> crate::Result<Vec<DuckLakeTableFile>> {
+    fn get_table_files_for_select(
+        &self,
+        table_id: i64,
+        snapshot_id: i64,
+    ) -> crate::Result<Vec<DuckLakeTableFile>> {
         let conn = self.open_connection()?;
         let mut stmt = conn.prepare(SQL_GET_DATA_FILES)?;
 
         let files = stmt
-            .query_map(
-                [table_id, snapshot_id, snapshot_id, table_id],
-                |row| {
-                    // Parse data file (columns 0-4)
-                    let _data_file_id: i64 = row.get(0)?;
-                    let data_file = DuckLakeFileData {
-                        path: row.get(1)?,
-                        path_is_relative: row.get(2)?,
-                        file_size_bytes: row.get(3)?,
-                        footer_size: row.get(4)?,
-                        encryption_key: String::new(), // TODO: handle encryption
-                    };
-
-                    // Parse delete file (columns 5-10) if exists
-                    let delete_file = if let Ok(Some(_)) = row.get::<_, Option<i64>>(5) {
-                        Some(DuckLakeFileData {
-                            path: row.get(6)?,
-                            path_is_relative: row.get(7)?,
-                            file_size_bytes: row.get(8)?,
-                            footer_size: row.get(9)?,
-                            encryption_key: String::new(),
-                        })
-                    } else {
-                        None
-                    };
-
-                    let _delete_count: Option<i64> = row.get(10)?;
-
-                    Ok(DuckLakeTableFile {
-                        file: data_file,
-                        delete_file,
-                        row_id_start: None,
-                        snapshot_id: Some(snapshot_id),
-                        max_row_count: None,  // Set to None until we have actual row count from data file metadata
+            .query_map([table_id, snapshot_id, snapshot_id, table_id], |row| {
+                // Parse data file (columns 0-4)
+                let _data_file_id: i64 = row.get(0)?;
+                let data_file = DuckLakeFileData {
+                    path: row.get(1)?,
+                    path_is_relative: row.get(2)?,
+                    file_size_bytes: row.get(3)?,
+                    footer_size: row.get(4)?,
+                    encryption_key: String::new(), // TODO: handle encryption
+                };
+
+                // Parse delete file (columns 5-10) if exists
+                let delete_file = if let Ok(Some(_)) = row.get::<_, Option<i64>>(5) {
+                    Some(DuckLakeFileData {
+                        path: row.get(6)?,
+                        path_is_relative: row.get(7)?,
+                        file_size_bytes: row.get(8)?,
+                        footer_size: row.get(9)?,
+                        encryption_key: String::new(),
                     })
-                },
-            )?
+                } else {
+                    None
+                };
+
+                let _delete_count: Option<i64> = row.get(10)?;
+
+                Ok(DuckLakeTableFile {
+                    file: data_file,
+                    delete_file,
+                    row_id_start: None,
+                    snapshot_id: Some(snapshot_id),
+                    max_row_count: None, // Set to None until we have actual row count from data file metadata
+                })
+            })?
             .collect::<Result<Vec<_>, _>>()?;
 
         Ok(files)
     }
 
-    fn get_schema_by_name(&self, name: &str, snapshot_id: i64) -> crate::Result<Option<SchemaMetadata>> {
+    fn get_schema_by_name(
+        &self,
+        name: &str,
+        snapshot_id: i64,
+    ) -> crate::Result<Option<SchemaMetadata>> {
         let conn = self.open_connection()?;
         let mut stmt = conn.prepare(SQL_GET_SCHEMA_BY_NAME)?;
 
@@ -204,7 +209,12 @@ impl MetadataProvider for DuckdbMetadataProvider {
         }
     }
 
-    fn get_table_by_name(&self, schema_id: i64, name: &str, snapshot_id: i64) -> crate::Result<Option<TableMetadata>> {
+    fn get_table_by_name(
+        &self,
+        schema_id: i64,
+        name: &str,
+        snapshot_id: i64,
+    ) -> crate::Result<Option<TableMetadata>> {
         let conn = self.open_connection()?;
         let mut stmt = conn.prepare(SQL_GET_TABLE_BY_NAME)?;
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[toolchain]`
	`2`	`+channel = "stable"`
	`3`	`+components = ["rustfmt", "clippy"]`