Skip to content

Commit a33869d

Browse files
committed
match mode
1 parent 54bc5df commit a33869d

File tree

1 file changed

+28
-36
lines changed

1 file changed

+28
-36
lines changed

crates/iceberg/src/arrow/value.rs

Lines changed: 28 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use arrow_array::{
2121
LargeListArray, LargeStringArray, ListArray, MapArray, StringArray, StructArray,
2222
Time64MicrosecondArray, TimestampMicrosecondArray, TimestampNanosecondArray,
2323
};
24-
use arrow_schema::{DataType, Schema as ArrowSchema};
24+
use arrow_schema::{DataType, FieldRef, Schema as ArrowSchema};
2525
use uuid::Uuid;
2626

2727
use super::{get_field_id, schema_to_arrow_schema};
@@ -425,38 +425,40 @@ impl SchemaWithPartnerVisitor<ArrayRef> for ArrowArrayToIcebergStructConverter {
425425
}
426426
}
427427

428+
/// todo doc
429+
pub enum FieldMatchMode {
430+
Id,
431+
Name,
432+
}
433+
434+
impl FieldMatchMode {
435+
pub fn match_field(&self, arrow_field: &FieldRef, iceberg_field: &NestedField) -> bool {
436+
match self {
437+
FieldMatchMode::Id => get_field_id(arrow_field)
438+
.map(|id| id == iceberg_field.id)
439+
.unwrap_or(false),
440+
FieldMatchMode::Name => arrow_field.name() == &iceberg_field.name,
441+
}
442+
}
443+
}
444+
428445
/// Partner type representing accessing and walking arrow arrays alongside iceberg schema
429446
pub struct ArrowArrayAccessor {
430-
arrow_schema: Option<ArrowSchema>,
447+
match_mode: FieldMatchMode,
431448
}
432449

433450
impl ArrowArrayAccessor {
434451
/// Creates a new instance of ArrowArrayAccessor without arrow schema fallback
435-
pub fn new() -> Result<Self> {
436-
Ok(Self { arrow_schema: None })
452+
pub fn new() -> Self {
453+
Self {
454+
match_mode: FieldMatchMode::Id,
455+
}
437456
}
438457

439458
/// Creates a new instance of ArrowArrayAccessor with arrow schema converted from table schema
440459
/// for field ID resolution fallback
441-
pub fn new_with_table_schema(table_schema: &Schema) -> Result<Self> {
442-
Ok(Self {
443-
arrow_schema: Some(schema_to_arrow_schema(table_schema)?),
444-
})
445-
}
446-
447-
/// Check if an arrow field matches the target field ID, either directly or through schema lookup
448-
fn arrow_field_matches_id(&self, arrow_field: &arrow_schema::Field, target_id: i32) -> bool {
449-
// First try direct match via field metadata
450-
if let Ok(id) = get_field_id(arrow_field) {
451-
id == target_id
452-
} else {
453-
// Only if direct match fails, try fallback via schema lookup
454-
self.arrow_schema
455-
.as_ref()
456-
.and_then(|schema| schema.field_with_name(arrow_field.name()).ok())
457-
.and_then(|field_from_schema| get_field_id(field_from_schema).ok())
458-
.is_some_and(|id| id == target_id)
459-
}
460+
pub fn new_with_match_mode(match_mode: FieldMatchMode) -> Self {
461+
Self { match_mode }
460462
}
461463
}
462464

@@ -493,21 +495,11 @@ impl PartnerAccessor<ArrayRef> for ArrowArrayAccessor {
493495
let field_pos = struct_array
494496
.fields()
495497
.iter()
496-
.position(|arrow_field| self.arrow_field_matches_id(arrow_field, field.id))
498+
.position(|arrow_field| self.match_mode.match_field(arrow_field, field))
497499
.ok_or_else(|| {
498500
Error::new(
499501
ErrorKind::DataInvalid,
500-
format!(
501-
"Field with id={} or name={} not found in struct array. Available fields: [{}]",
502-
field.id,
503-
field.name,
504-
struct_array
505-
.fields()
506-
.iter()
507-
.map(|f| f.name().as_str())
508-
.collect::<Vec<_>>()
509-
.join(", ")
510-
),
502+
format!("Field id {} not found in struct array", field.id),
511503
)
512504
})?;
513505

@@ -590,7 +582,7 @@ pub fn arrow_struct_to_literal(
590582
ty,
591583
struct_array,
592584
&mut ArrowArrayToIcebergStructConverter,
593-
&ArrowArrayAccessor::new()?,
585+
&ArrowArrayAccessor::new(),
594586
)
595587
}
596588

0 commit comments

Comments
 (0)