Skip to content

Commit da243bf

Browse files
committed
Address spec_id and serde feedback. Need to think about the name mapping.
1 parent f481c66 commit da243bf

File tree

6 files changed

+24
-31
lines changed

6 files changed

+24
-31
lines changed

crates/iceberg/src/arrow/delete_filter.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,6 @@ pub(crate) mod tests {
340340
predicate: None,
341341
deletes: vec![pos_del_1, pos_del_2.clone()],
342342
partition: None,
343-
partition_spec_id: None,
344343
partition_spec: None,
345344
},
346345
FileScanTask {
@@ -354,7 +353,6 @@ pub(crate) mod tests {
354353
predicate: None,
355354
deletes: vec![pos_del_3],
356355
partition: None,
357-
partition_spec_id: None,
358356
partition_spec: None,
359357
},
360358
];

crates/iceberg/src/arrow/reader.rs

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1960,7 +1960,6 @@ message schema {
19601960
predicate: Some(predicate.bind(schema, true).unwrap()),
19611961
deletes: vec![],
19621962
partition: None,
1963-
partition_spec_id: None,
19641963
partition_spec: None,
19651964
})]
19661965
.into_iter(),
@@ -2281,7 +2280,6 @@ message schema {
22812280
predicate: None,
22822281
deletes: vec![],
22832282
partition: None,
2284-
partition_spec_id: None,
22852283
partition_spec: None,
22862284
};
22872285

@@ -2297,7 +2295,6 @@ message schema {
22972295
predicate: None,
22982296
deletes: vec![],
22992297
partition: None,
2300-
partition_spec_id: None,
23012298
partition_spec: None,
23022299
};
23032300

@@ -2429,7 +2426,6 @@ message schema {
24292426
predicate: None,
24302427
deletes: vec![],
24312428
partition: None,
2432-
partition_spec_id: None,
24332429
partition_spec: None,
24342430
})]
24352431
.into_iter(),
@@ -2600,7 +2596,6 @@ message schema {
26002596
equality_ids: None,
26012597
}],
26022598
partition: None,
2603-
partition_spec_id: None,
26042599
partition_spec: None,
26052600
};
26062601

@@ -2818,7 +2813,6 @@ message schema {
28182813
equality_ids: None,
28192814
}],
28202815
partition: None,
2821-
partition_spec_id: None,
28222816
partition_spec: None,
28232817
};
28242818

@@ -3029,7 +3023,6 @@ message schema {
30293023
equality_ids: None,
30303024
}],
30313025
partition: None,
3032-
partition_spec_id: None,
30333026
partition_spec: None,
30343027
};
30353028

@@ -3132,7 +3125,6 @@ message schema {
31323125
predicate: None,
31333126
deletes: vec![],
31343127
partition: None,
3135-
partition_spec_id: None,
31363128
partition_spec: None,
31373129
})]
31383130
.into_iter(),
@@ -3229,7 +3221,6 @@ message schema {
32293221
predicate: None,
32303222
deletes: vec![],
32313223
partition: None,
3232-
partition_spec_id: None,
32333224
partition_spec: None,
32343225
})]
32353226
.into_iter(),
@@ -3315,7 +3306,6 @@ message schema {
33153306
predicate: None,
33163307
deletes: vec![],
33173308
partition: None,
3318-
partition_spec_id: None,
33193309
partition_spec: None,
33203310
})]
33213311
.into_iter(),
@@ -3415,7 +3405,6 @@ message schema {
34153405
predicate: None,
34163406
deletes: vec![],
34173407
partition: None,
3418-
partition_spec_id: None,
34193408
partition_spec: None,
34203409
})]
34213410
.into_iter(),
@@ -3544,7 +3533,6 @@ message schema {
35443533
predicate: None,
35453534
deletes: vec![],
35463535
partition: None,
3547-
partition_spec_id: None,
35483536
partition_spec: None,
35493537
})]
35503538
.into_iter(),
@@ -3640,7 +3628,6 @@ message schema {
36403628
predicate: None,
36413629
deletes: vec![],
36423630
partition: None,
3643-
partition_spec_id: None,
36443631
partition_spec: None,
36453632
})]
36463633
.into_iter(),
@@ -3749,7 +3736,6 @@ message schema {
37493736
predicate: Some(predicate.bind(schema, true).unwrap()),
37503737
deletes: vec![],
37513738
partition: None,
3752-
partition_spec_id: None,
37533739
partition_spec: None,
37543740
})]
37553741
.into_iter(),
@@ -3888,7 +3874,6 @@ message schema {
38883874
predicate: None,
38893875
deletes: vec![],
38903876
partition: Some(partition_data),
3891-
partition_spec_id: Some(0),
38923877
partition_spec: Some(partition_spec),
38933878
})]
38943879
.into_iter(),

crates/iceberg/src/arrow/record_batch_transformer.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,6 @@ impl RecordBatchTransformer {
218218
///
219219
/// To enable this fix, the following fields were added to `FileScanTask`:
220220
/// - `partition: Option<Struct>` - The partition data for this file
221-
/// - `partition_spec_id: Option<i32>` - The spec ID for the partition
222221
/// - `partition_spec: Option<Arc<PartitionSpec>>` - The actual partition spec
223222
///
224223
/// These fields should be populated by any system that reads Iceberg tables and provides

crates/iceberg/src/scan/context.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,8 @@ impl ManifestEntryContext {
129129

130130
deletes,
131131

132-
// Include partition data and spec ID from manifest entry
132+
// Include partition data and spec from manifest entry
133133
partition: Some(self.manifest_entry.data_file.partition.clone()),
134-
partition_spec_id: Some(self.partition_spec_id),
135134
// TODO: Pass actual PartitionSpec through context chain for native flow
136135
partition_spec: None,
137136
})

crates/iceberg/src/scan/mod.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1778,7 +1778,6 @@ pub mod tests {
17781778
data_file_format: DataFileFormat::Parquet,
17791779
deletes: vec![],
17801780
partition: None,
1781-
partition_spec_id: None,
17821781
partition_spec: None,
17831782
};
17841783
test_fn(task);
@@ -1795,7 +1794,6 @@ pub mod tests {
17951794
data_file_format: DataFileFormat::Avro,
17961795
deletes: vec![],
17971796
partition: None,
1798-
partition_spec_id: None,
17991797
partition_spec: None,
18001798
};
18011799
test_fn(task);

crates/iceberg/src/scan/task.rs

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
use std::sync::Arc;
1919

2020
use futures::stream::BoxStream;
21-
use serde::{Deserialize, Serialize};
21+
use serde::{Deserialize, Serialize, Serializer};
2222

2323
use crate::Result;
2424
use crate::expr::BoundPredicate;
@@ -29,6 +29,24 @@ use crate::spec::{
2929
/// A stream of [`FileScanTask`].
3030
pub type FileScanTaskStream = BoxStream<'static, Result<FileScanTask>>;
3131

32+
/// Serialization helper that always returns NotImplementedError.
33+
/// Used for fields that should not be serialized but we want to be explicit about it.
34+
fn serialize_not_implemented<S, T>(_: &T, _: S) -> std::result::Result<S::Ok, S::Error>
35+
where S: Serializer {
36+
Err(serde::ser::Error::custom(
37+
"Serialization not implemented for this field",
38+
))
39+
}
40+
41+
/// Deserialization helper that always returns NotImplementedError.
42+
/// Used for fields that should not be deserialized but we want to be explicit about it.
43+
fn deserialize_not_implemented<'de, D, T>(_: D) -> std::result::Result<T, D::Error>
44+
where D: serde::Deserializer<'de> {
45+
Err(serde::de::Error::custom(
46+
"Deserialization not implemented for this field",
47+
))
48+
}
49+
3250
/// A task to scan part of file.
3351
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
3452
pub struct FileScanTask {
@@ -62,19 +80,15 @@ pub struct FileScanTask {
6280
/// Partition data from the manifest entry, used to identify which columns can use
6381
/// constant values from partition metadata vs. reading from the data file.
6482
/// Per the Iceberg spec, only identity-transformed partition fields should use constants.
65-
#[serde(skip)]
83+
#[serde(serialize_with = "serialize_not_implemented")]
84+
#[serde(deserialize_with = "deserialize_not_implemented")]
6685
pub partition: Option<Struct>,
6786

68-
/// The partition spec ID for this file, required to look up the correct
69-
/// partition spec and determine which fields are identity-transformed.
70-
/// Not serialized as partition data is runtime-only and populated from manifest entries.
71-
#[serde(skip)]
72-
pub partition_spec_id: Option<i32>,
73-
7487
/// The partition spec for this file, used to distinguish identity transforms
7588
/// (which use partition metadata constants) from non-identity transforms like
7689
/// bucket/truncate (which must read source columns from the data file).
77-
#[serde(skip)]
90+
#[serde(serialize_with = "serialize_not_implemented")]
91+
#[serde(deserialize_with = "deserialize_not_implemented")]
7892
pub partition_spec: Option<Arc<PartitionSpec>>,
7993
}
8094

0 commit comments

Comments
 (0)