Skip to content

Commit d9ac176

Browse files
committed
apply cargo fmt
1 parent 3baece9 commit d9ac176

13 files changed

+383
-259
lines changed

rust-toolchain.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[toolchain]
2+
channel = "stable"
3+
components = ["rustfmt", "clippy"]

rustfmt.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Rustfmt configuration for datafusion-ducklake
2+
# Some features require nightly: cargo +nightly fmt
3+
4+
# Set max width for the whole line
5+
max_width = 100
6+
7+
# Function call style - prefer vertical layout for readability
8+
fn_call_width = 60
9+
10+
# Use small item threshold to prefer vertical layout
11+
# "Off" = more aggressive vertical formatting
12+
use_small_heuristics = "Off"
13+
14+
# Chain width - when to break method chains
15+
chain_width = 60
16+
17+
# Preserve vertical spacing in match arms
18+
match_block_trailing_comma = true

src/catalog.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
use std::any::Any;
44
use std::sync::Arc;
55

6+
use crate::Result;
67
use crate::metadata_provider::MetadataProvider;
78
use crate::path_resolver::parse_object_store_url;
89
use crate::schema::DuckLakeSchema;
9-
use crate::Result;
1010
use datafusion::catalog::{CatalogProvider, SchemaProvider};
1111
use datafusion::datasource::object_store::ObjectStoreUrl;
1212

@@ -88,11 +88,11 @@ impl CatalogProvider for DuckLakeCatalog {
8888
meta.schema_id,
8989
meta.schema_name,
9090
Arc::clone(&self.provider),
91-
snapshot_id, // Propagate snapshot_id
91+
snapshot_id, // Propagate snapshot_id
9292
self.object_store_url.clone(),
9393
schema_path,
9494
)) as Arc<dyn SchemaProvider>)
95-
}
95+
},
9696
_ => None,
9797
}
9898
}

src/delete_filter.rs

Lines changed: 23 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,7 @@ use arrow::datatypes::SchemaRef;
1313
use arrow::record_batch::{RecordBatch, RecordBatchOptions};
1414
use datafusion::error::{DataFusionError, Result as DataFusionResult};
1515
use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
16-
use datafusion::physical_plan::{
17-
DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
18-
};
16+
use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties};
1917
use futures::Stream;
2018

2119
/// Custom execution plan that filters out deleted rows
@@ -59,15 +57,15 @@ impl DisplayAs for DeleteFilterExec {
5957
self.file_path,
6058
self.deleted_positions.len()
6159
)
62-
}
60+
},
6361
DisplayFormatType::TreeRender => {
6462
write!(
6563
f,
6664
"DeleteFilterExec: file={}, deletes={}",
6765
self.file_path,
6866
self.deleted_positions.len()
6967
)
70-
}
68+
},
7169
}
7270
}
7371
}
@@ -142,10 +140,10 @@ impl Stream for DeleteFilterStream {
142140
// Update row offset for next batch
143141
self.row_offset += batch.num_rows() as i64;
144142
Poll::Ready(Some(Ok(filtered_batch)))
145-
}
143+
},
146144
Err(e) => Poll::Ready(Some(Err(e))),
147145
}
148-
}
146+
},
149147
Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
150148
Poll::Ready(None) => Poll::Ready(None),
151149
Poll::Pending => Poll::Pending,
@@ -180,20 +178,15 @@ impl DeleteFilterStream {
180178
if batch.num_columns() == 0 {
181179
let mut options = RecordBatchOptions::new();
182180
options = options.with_row_count(Some(keep_indices.len()));
183-
return RecordBatch::try_new_with_options(
184-
batch.schema(),
185-
vec![],
186-
&options,
187-
)
188-
.map_err(|e| DataFusionError::ArrowError(Box::new(e), None));
181+
return RecordBatch::try_new_with_options(batch.schema(), vec![], &options)
182+
.map_err(|e| DataFusionError::ArrowError(Box::new(e), None));
189183
}
190184

191185
// Use Arrow's take kernel to select rows
192186
use arrow::array::UInt32Array;
193187
use arrow::compute::take;
194188

195-
let indices =
196-
UInt32Array::from(keep_indices.iter().map(|&i| i as u32).collect::<Vec<_>>());
189+
let indices = UInt32Array::from(keep_indices.iter().map(|&i| i as u32).collect::<Vec<_>>());
197190

198191
let filtered_columns: DataFusionResult<Vec<_>> = batch
199192
.columns()
@@ -225,16 +218,12 @@ mod tests {
225218
#[test]
226219
fn test_filter_batch_ignores_out_of_bounds_positions() {
227220
// Create a simple RecordBatch with 4 rows
228-
let schema = Arc::new(Schema::new(vec![
229-
Field::new("id", DataType::Int32, false),
230-
]));
221+
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
231222

232223
let id_array = Int32Array::from(vec![1, 2, 3, 4]);
233-
let batch = RecordBatch::try_new(
234-
schema.clone(),
235-
vec![Arc::new(id_array) as Arc<dyn Array>],
236-
)
237-
.unwrap();
224+
let batch =
225+
RecordBatch::try_new(schema.clone(), vec![Arc::new(id_array) as Arc<dyn Array>])
226+
.unwrap();
238227

239228
// Create delete positions: 1 (valid), 1000, 2000, 5000 (all out of bounds)
240229
// Only position 1 should actually delete a row (the row with id=2)
@@ -275,16 +264,12 @@ mod tests {
275264
#[test]
276265
fn test_filter_batch_all_out_of_bounds_positions() {
277266
// Test the edge case where ALL delete positions are beyond the file
278-
let schema = Arc::new(Schema::new(vec![
279-
Field::new("id", DataType::Int32, false),
280-
]));
267+
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
281268

282269
let id_array = Int32Array::from(vec![10, 20, 30]);
283-
let batch = RecordBatch::try_new(
284-
schema.clone(),
285-
vec![Arc::new(id_array) as Arc<dyn Array>],
286-
)
287-
.unwrap();
270+
let batch =
271+
RecordBatch::try_new(schema.clone(), vec![Arc::new(id_array) as Arc<dyn Array>])
272+
.unwrap();
288273

289274
// All positions are way beyond the 3-row file
290275
let deleted_positions: HashSet<i64> = [1000, 2000, 3000, 9999].into_iter().collect();
@@ -317,16 +302,15 @@ mod tests {
317302
#[test]
318303
fn test_filter_batch_with_row_offset() {
319304
// Test that row_offset is correctly considered when checking positions
320-
let schema = Arc::new(Schema::new(vec![
321-
Field::new("value", DataType::Int32, false),
322-
]));
305+
let schema = Arc::new(Schema::new(vec![Field::new(
306+
"value",
307+
DataType::Int32,
308+
false,
309+
)]));
323310

324311
let array = Int32Array::from(vec![100, 200, 300, 400]);
325-
let batch = RecordBatch::try_new(
326-
schema.clone(),
327-
vec![Arc::new(array) as Arc<dyn Array>],
328-
)
329-
.unwrap();
312+
let batch =
313+
RecordBatch::try_new(schema.clone(), vec![Arc::new(array) as Arc<dyn Array>]).unwrap();
330314

331315
// Delete position 11 and 1000 (way out of bounds)
332316
// With row_offset=10, this batch contains global positions [10, 11, 12, 13]

src/metadata_provider.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,7 @@ pub const SQL_GET_TABLE_BY_NAME: &str =
5757
AND ? >= begin_snapshot
5858
AND (? < end_snapshot OR end_snapshot IS NULL)";
5959

60-
pub const SQL_TABLE_EXISTS: &str =
61-
"SELECT EXISTS(
60+
pub const SQL_TABLE_EXISTS: &str = "SELECT EXISTS(
6261
SELECT 1 FROM ducklake_table
6362
WHERE schema_id = ?
6463
AND table_name = ?
@@ -184,7 +183,11 @@ pub trait MetadataProvider: Send + Sync + std::fmt::Debug {
184183
fn get_table_structure(&self, table_id: i64) -> Result<Vec<DuckLakeTableColumn>>;
185184

186185
/// Get table files for a specific snapshot
187-
fn get_table_files_for_select(&self, table_id: i64, snapshot_id: i64) -> Result<Vec<DuckLakeTableFile>>;
186+
fn get_table_files_for_select(
187+
&self,
188+
table_id: i64,
189+
snapshot_id: i64,
190+
) -> Result<Vec<DuckLakeTableFile>>;
188191
// todo: support select with file pruning
189192

190193
// Dynamic lookup methods for on-demand metadata retrieval
@@ -193,7 +196,12 @@ pub trait MetadataProvider: Send + Sync + std::fmt::Debug {
193196
fn get_schema_by_name(&self, name: &str, snapshot_id: i64) -> Result<Option<SchemaMetadata>>;
194197

195198
/// Get table by name for a specific snapshot
196-
fn get_table_by_name(&self, schema_id: i64, name: &str, snapshot_id: i64) -> Result<Option<TableMetadata>>;
199+
fn get_table_by_name(
200+
&self,
201+
schema_id: i64,
202+
name: &str,
203+
snapshot_id: i64,
204+
) -> Result<Option<TableMetadata>>;
197205

198206
/// Check if table exists for a specific snapshot
199207
fn table_exists(&self, schema_id: i64, name: &str, snapshot_id: i64) -> Result<bool>;

src/metadata_provider_duckdb.rs

Lines changed: 52 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use crate::metadata_provider::{
66
TableMetadata,
77
};
88
use duckdb::AccessMode::ReadOnly;
9-
use duckdb::{params, Config, Connection};
9+
use duckdb::{Config, Connection, params};
1010

1111
/// DuckDB metadata provider
1212
///
@@ -49,11 +49,11 @@ impl DuckdbMetadataProvider {
4949
"DuckDB file likely already open in write mode. Cannot connect"
5050
);
5151
Err(DuckLakeError::DuckDb(msg))
52-
}
52+
},
5353
Err(msg) => {
5454
tracing::error!(error = %msg, "Failed to open DuckDB catalog");
5555
Err(DuckLakeError::DuckDb(msg))
56-
}
56+
},
5757
}
5858
}
5959
}
@@ -135,54 +135,59 @@ impl MetadataProvider for DuckdbMetadataProvider {
135135
Ok(columns)
136136
}
137137

138-
fn get_table_files_for_select(&self, table_id: i64, snapshot_id: i64) -> crate::Result<Vec<DuckLakeTableFile>> {
138+
fn get_table_files_for_select(
139+
&self,
140+
table_id: i64,
141+
snapshot_id: i64,
142+
) -> crate::Result<Vec<DuckLakeTableFile>> {
139143
let conn = self.open_connection()?;
140144
let mut stmt = conn.prepare(SQL_GET_DATA_FILES)?;
141145

142146
let files = stmt
143-
.query_map(
144-
[table_id, snapshot_id, snapshot_id, table_id],
145-
|row| {
146-
// Parse data file (columns 0-4)
147-
let _data_file_id: i64 = row.get(0)?;
148-
let data_file = DuckLakeFileData {
149-
path: row.get(1)?,
150-
path_is_relative: row.get(2)?,
151-
file_size_bytes: row.get(3)?,
152-
footer_size: row.get(4)?,
153-
encryption_key: String::new(), // TODO: handle encryption
154-
};
155-
156-
// Parse delete file (columns 5-10) if exists
157-
let delete_file = if let Ok(Some(_)) = row.get::<_, Option<i64>>(5) {
158-
Some(DuckLakeFileData {
159-
path: row.get(6)?,
160-
path_is_relative: row.get(7)?,
161-
file_size_bytes: row.get(8)?,
162-
footer_size: row.get(9)?,
163-
encryption_key: String::new(),
164-
})
165-
} else {
166-
None
167-
};
168-
169-
let _delete_count: Option<i64> = row.get(10)?;
170-
171-
Ok(DuckLakeTableFile {
172-
file: data_file,
173-
delete_file,
174-
row_id_start: None,
175-
snapshot_id: Some(snapshot_id),
176-
max_row_count: None, // Set to None until we have actual row count from data file metadata
147+
.query_map([table_id, snapshot_id, snapshot_id, table_id], |row| {
148+
// Parse data file (columns 0-4)
149+
let _data_file_id: i64 = row.get(0)?;
150+
let data_file = DuckLakeFileData {
151+
path: row.get(1)?,
152+
path_is_relative: row.get(2)?,
153+
file_size_bytes: row.get(3)?,
154+
footer_size: row.get(4)?,
155+
encryption_key: String::new(), // TODO: handle encryption
156+
};
157+
158+
// Parse delete file (columns 5-10) if exists
159+
let delete_file = if let Ok(Some(_)) = row.get::<_, Option<i64>>(5) {
160+
Some(DuckLakeFileData {
161+
path: row.get(6)?,
162+
path_is_relative: row.get(7)?,
163+
file_size_bytes: row.get(8)?,
164+
footer_size: row.get(9)?,
165+
encryption_key: String::new(),
177166
})
178-
},
179-
)?
167+
} else {
168+
None
169+
};
170+
171+
let _delete_count: Option<i64> = row.get(10)?;
172+
173+
Ok(DuckLakeTableFile {
174+
file: data_file,
175+
delete_file,
176+
row_id_start: None,
177+
snapshot_id: Some(snapshot_id),
178+
max_row_count: None, // Set to None until we have actual row count from data file metadata
179+
})
180+
})?
180181
.collect::<Result<Vec<_>, _>>()?;
181182

182183
Ok(files)
183184
}
184185

185-
fn get_schema_by_name(&self, name: &str, snapshot_id: i64) -> crate::Result<Option<SchemaMetadata>> {
186+
fn get_schema_by_name(
187+
&self,
188+
name: &str,
189+
snapshot_id: i64,
190+
) -> crate::Result<Option<SchemaMetadata>> {
186191
let conn = self.open_connection()?;
187192
let mut stmt = conn.prepare(SQL_GET_SCHEMA_BY_NAME)?;
188193

@@ -204,7 +209,12 @@ impl MetadataProvider for DuckdbMetadataProvider {
204209
}
205210
}
206211

207-
fn get_table_by_name(&self, schema_id: i64, name: &str, snapshot_id: i64) -> crate::Result<Option<TableMetadata>> {
212+
fn get_table_by_name(
213+
&self,
214+
schema_id: i64,
215+
name: &str,
216+
snapshot_id: i64,
217+
) -> crate::Result<Option<TableMetadata>> {
208218
let conn = self.open_connection()?;
209219
let mut stmt = conn.prepare(SQL_GET_TABLE_BY_NAME)?;
210220

0 commit comments

Comments
 (0)