Skip to content

Commit e3ef617

Browse files
authored
feat: Infer partition values from bounds (#1079)
## Which issue does this PR close? - Part of #1035. ## What changes are included in this PR? Added API for creating partition struct from statistics ## Are these changes tested? Will add tests after follow up pr for integrating it with the add_parquet_file api
1 parent db8ceac commit e3ef617

File tree

1 file changed

+52
-3
lines changed

1 file changed

+52
-3
lines changed

crates/iceberg/src/writer/file_writer/parquet_writer.rs

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,11 @@ use crate::arrow::{
4444
};
4545
use crate::io::{FileIO, FileWrite, OutputFile};
4646
use crate::spec::{
47-
visit_schema, DataContentType, DataFileBuilder, DataFileFormat, Datum, ListType, MapType,
48-
NestedFieldRef, PrimitiveType, Schema, SchemaRef, SchemaVisitor, Struct, StructType,
49-
TableMetadata, Type,
47+
visit_schema, DataContentType, DataFileBuilder, DataFileFormat, Datum, ListType, Literal,
48+
MapType, NestedFieldRef, PartitionSpec, PrimitiveType, Schema, SchemaRef, SchemaVisitor,
49+
Struct, StructType, TableMetadata, Type,
5050
};
51+
use crate::transform::create_transform_function;
5152
use crate::writer::{CurrentFileStatus, DataFile};
5253
use crate::{Error, ErrorKind, Result};
5354

@@ -458,6 +459,54 @@ impl ParquetWriter {
458459

459460
Ok(builder)
460461
}
462+
463+
#[allow(dead_code)]
464+
fn partition_value_from_bounds(
465+
table_spec: Arc<PartitionSpec>,
466+
lower_bounds: &HashMap<i32, Datum>,
467+
upper_bounds: &HashMap<i32, Datum>,
468+
) -> Result<Struct> {
469+
let mut partition_literals: Vec<Option<Literal>> = Vec::new();
470+
471+
for field in table_spec.fields() {
472+
if let (Some(lower), Some(upper)) = (
473+
lower_bounds.get(&field.source_id),
474+
upper_bounds.get(&field.source_id),
475+
) {
476+
if !field.transform.preserves_order() {
477+
return Err(Error::new(
478+
ErrorKind::DataInvalid,
479+
format!(
480+
"cannot infer partition value for non linear partition field (needs to preserve order): {} with transform {}",
481+
field.name, field.transform
482+
),
483+
));
484+
}
485+
486+
if lower != upper {
487+
return Err(Error::new(
488+
ErrorKind::DataInvalid,
489+
format!(
490+
"multiple partition values for field {}: lower: {:?}, upper: {:?}",
491+
field.name, lower, upper
492+
),
493+
));
494+
}
495+
496+
let transform_fn = create_transform_function(&field.transform)?;
497+
let transform_literal =
498+
Literal::from(transform_fn.transform_literal_result(lower)?);
499+
500+
partition_literals.push(Some(transform_literal));
501+
} else {
502+
partition_literals.push(None);
503+
}
504+
}
505+
506+
let partition_struct = Struct::from_iter(partition_literals);
507+
508+
Ok(partition_struct)
509+
}
461510
}
462511

463512
impl FileWriter for ParquetWriter {

0 commit comments

Comments
 (0)