Skip to content

Commit 644567d

Browse files
committed
fix: arrow schema not constructed with metadata in unrelated test
1 parent e24406b commit 644567d

File tree

5 files changed

+37
-7
lines changed

5 files changed

+37
-7
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/iceberg/src/arrow/value.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ impl PartnerAccessor<ArrayRef> for ArrowArrayAccessor {
436436
"The schema partner is not a struct type",
437437
));
438438
}
439+
439440
Ok(schema_partner)
440441
}
441442

@@ -453,6 +454,7 @@ impl PartnerAccessor<ArrayRef> for ArrowArrayAccessor {
453454
"The struct partner is not a struct array",
454455
)
455456
})?;
457+
456458
let field_pos = struct_array
457459
.fields()
458460
.iter()
@@ -467,6 +469,7 @@ impl PartnerAccessor<ArrayRef> for ArrowArrayAccessor {
467469
format!("Field id {} not found in struct array", field.id),
468470
)
469471
})?;
472+
470473
Ok(struct_array.column(field_pos))
471474
}
472475

crates/iceberg/src/writer/base_writer/data_file_writer.rs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,13 @@ impl<B: FileWriterBuilder> CurrentFileStatus for DataFileWriter<B> {
9898

9999
#[cfg(test)]
100100
mod test {
101+
use std::collections::HashMap;
101102
use std::sync::Arc;
102103

103104
use arrow_array::{Int32Array, StringArray};
104105
use arrow_schema::{DataType, Field};
105106
use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
107+
use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
106108
use parquet::file::properties::WriterProperties;
107109
use tempfile::TempDir;
108110

@@ -145,8 +147,14 @@ mod test {
145147
let mut data_file_writer = DataFileWriterBuilder::new(pw, None).build().await.unwrap();
146148

147149
let arrow_schema = arrow_schema::Schema::new(vec![
148-
Field::new("foo", DataType::Int32, false),
149-
Field::new("bar", DataType::Utf8, false),
150+
Field::new("foo", DataType::Int32, false).with_metadata(HashMap::from([(
151+
PARQUET_FIELD_ID_META_KEY.to_string(),
152+
3.to_string(),
153+
)])),
154+
Field::new("bar", DataType::Utf8, false).with_metadata(HashMap::from([(
155+
PARQUET_FIELD_ID_META_KEY.to_string(),
156+
4.to_string(),
157+
)])),
150158
]);
151159
let batch = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![
152160
Arc::new(Int32Array::from(vec![1, 2, 3])),
@@ -216,8 +224,14 @@ mod test {
216224
.await?;
217225

218226
let arrow_schema = arrow_schema::Schema::new(vec![
219-
Field::new("id", DataType::Int32, false),
220-
Field::new("name", DataType::Utf8, false),
227+
Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
228+
PARQUET_FIELD_ID_META_KEY.to_string(),
229+
5.to_string(),
230+
)])),
231+
Field::new("name", DataType::Utf8, false).with_metadata(HashMap::from([(
232+
PARQUET_FIELD_ID_META_KEY.to_string(),
233+
6.to_string(),
234+
)])),
221235
]);
222236
let batch = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![
223237
Arc::new(Int32Array::from(vec![1, 2, 3])),

crates/integrations/datafusion/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,4 @@ tokio = { workspace = true }
4242
[dev-dependencies]
4343
iceberg-catalog-memory = { workspace = true }
4444
tempfile = { workspace = true }
45+
parquet = { workspace = true }

crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -220,20 +220,31 @@ fn scalar_value_to_datum(value: &ScalarValue) -> Option<Datum> {
220220

221221
#[cfg(test)]
222222
mod tests {
223+
use std::collections::HashMap;
224+
223225
use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
224226
use datafusion::common::DFSchema;
225227
use datafusion::logical_expr::utils::split_conjunction;
226228
use datafusion::prelude::{Expr, SessionContext};
227229
use iceberg::expr::{Predicate, Reference};
228230
use iceberg::spec::Datum;
231+
use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
229232

230233
use super::convert_filters_to_predicate;
231234

232235
fn create_test_schema() -> DFSchema {
233236
let arrow_schema = Schema::new(vec![
234-
Field::new("foo", DataType::Int32, true),
235-
Field::new("bar", DataType::Utf8, true),
236-
Field::new("ts", DataType::Timestamp(TimeUnit::Second, None), true),
237+
Field::new("foo", DataType::Int32, true).with_metadata(HashMap::from([(
238+
PARQUET_FIELD_ID_META_KEY.to_string(),
239+
"1".to_string(),
240+
)])),
241+
Field::new("bar", DataType::Utf8, true).with_metadata(HashMap::from([(
242+
PARQUET_FIELD_ID_META_KEY.to_string(),
243+
"2".to_string(),
244+
)])),
245+
Field::new("ts", DataType::Timestamp(TimeUnit::Second, None), true).with_metadata(
246+
HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "3".to_string())]),
247+
),
237248
]);
238249
DFSchema::try_from_qualified_schema("my_table", &arrow_schema).unwrap()
239250
}

0 commit comments

Comments
 (0)