Skip to content

Commit 7e5076f

Browse files
AdamGSalamb
andauthored
Replace serde with serde_core when possible (#8558)
# Which issue does this PR close? - Closes #8451. With this change, its possible to compile the core crate without pulling `serde_derive`, which is will be only required for `arrow-avro` and `arrow-schema/serde`. # Rationale for this change Improve compile time and reduce number of dependencies and binary size in some cases. # What changes are included in this PR? 1. Use `serde_core` when possible 2. Manually implement `Serialize/Deserialize` for canonical extension type metadata. # Are these changes tested? Covered by existing tests # Are there any user-facing changes? No --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 60fe5fa commit 7e5076f

File tree

11 files changed

+509
-26
lines changed

11 files changed

+509
-26
lines changed

arrow-integration-testing/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ arrow-integration-test = { path = "../arrow-integration-test", default-features
4040
clap = { version = "4", default-features = false, features = ["std", "derive", "help", "error-context", "usage"] }
4141
futures = { version = "0.3", default-features = false }
4242
prost = { version = "0.14.1", default-features = false }
43-
serde = { version = "1.0", default-features = false, features = ["rc", "derive"] }
4443
serde_json = { version = "1.0", default-features = false, features = ["std"] }
4544
tokio = { version = "1.0", default-features = false, features = [ "rt-multi-thread"] }
4645
tonic = { version = "0.14.1", default-features = false }

arrow-json/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ arrow-schema = { workspace = true }
4444
half = { version = "2.1", default-features = false }
4545
indexmap = { version = "2.0", default-features = false, features = ["std"] }
4646
num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
47-
serde = { version = "1.0", default-features = false }
47+
serde_core = { version = "1.0", default-features = false }
4848
serde_json = { version = "1.0", default-features = false, features = ["std"] }
4949
chrono = { workspace = true }
5050
lexical-core = { version = "1.0", default-features = false}

arrow-json/src/reader/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ use std::io::BufRead;
138138
use std::sync::Arc;
139139

140140
use chrono::Utc;
141-
use serde::Serialize;
141+
use serde_core::Serialize;
142142

143143
use arrow_array::timezone::Tz;
144144
use arrow_array::types::*;
@@ -613,6 +613,8 @@ impl Decoder {
613613
/// ```
614614
///
615615
/// Note: this ignores any batch size setting, and always decodes all rows
616+
///
617+
/// [serde]: https://docs.rs/serde/latest/serde/
616618
pub fn serialize<S: Serialize>(&mut self, rows: &[S]) -> Result<(), ArrowError> {
617619
self.tape_decoder.serialize(rows)
618620
}

arrow-json/src/reader/serializer.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717

1818
use crate::reader::tape::TapeElement;
1919
use lexical_core::FormattedSize;
20-
use serde::ser::{
20+
use serde_core::ser::{
2121
Impossible, SerializeMap, SerializeSeq, SerializeStruct, SerializeTuple, SerializeTupleStruct,
2222
};
23-
use serde::{Serialize, Serializer};
23+
use serde_core::{Serialize, Serializer};
2424

2525
#[derive(Debug)]
2626
pub struct SerializerError(String);
@@ -33,7 +33,7 @@ impl std::fmt::Display for SerializerError {
3333
}
3434
}
3535

36-
impl serde::ser::Error for SerializerError {
36+
impl serde_core::ser::Error for SerializerError {
3737
fn custom<T>(msg: T) -> Self
3838
where
3939
T: std::fmt::Display,

arrow-json/src/reader/tape.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
use crate::reader::serializer::TapeSerializer;
1919
use arrow_schema::ArrowError;
2020
use memchr::memchr2;
21-
use serde::Serialize;
21+
use serde_core::Serialize;
2222
use std::fmt::Write;
2323

2424
/// We decode JSON to a flattened tape representation,

arrow-json/src/writer/encoder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use arrow_cast::display::{ArrayFormatter, FormatOptions};
2626
use arrow_schema::{ArrowError, DataType, FieldRef};
2727
use half::f16;
2828
use lexical_core::FormattedSize;
29-
use serde::Serializer;
29+
use serde_core::Serializer;
3030

3131
/// Configuration options for the JSON encoder.
3232
#[derive(Debug, Clone, Default)]

arrow-schema/Cargo.toml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,25 +33,30 @@ name = "arrow_schema"
3333
bench = false
3434

3535
[dependencies]
36-
serde = { version = "1.0", default-features = false, features = [
37-
"derive",
36+
serde_core = { version = "1.0", default-features = false, features = [
3837
"std",
3938
"rc",
4039
], optional = true }
40+
serde = { version = "1.0", default-features = false, features = [
41+
"derive",
42+
], optional = true }
4143
bitflags = { version = "2.0.0", default-features = false, optional = true }
4244
serde_json = { version = "1.0", optional = true }
4345

4446
[features]
45-
canonical_extension_types = ["dep:serde", "dep:serde_json"]
47+
canonical_extension_types = ["dep:serde_core", "dep:serde_json"]
4648
# Enable ffi support
4749
ffi = ["bitflags"]
48-
serde = ["dep:serde"]
50+
serde = ["dep:serde_core", "dep:serde"]
4951

5052
[package.metadata.docs.rs]
5153
all-features = true
5254

5355
[dev-dependencies]
54-
bincode = { version = "2.0.1", default-features = false, features = ["std", "serde"] }
56+
bincode = { version = "2.0.1", default-features = false, features = [
57+
"std",
58+
"serde",
59+
] }
5560
criterion = { version = "0.5", default-features = false }
5661
insta = "1.43.1"
5762

arrow-schema/src/extension/canonical/fixed_shape_tensor.rs

Lines changed: 144 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
//!
2020
//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#fixed-shape-tensor>
2121
22-
use serde::{Deserialize, Serialize};
22+
use serde_core::de::{self, MapAccess, Visitor};
23+
use serde_core::ser::SerializeStruct;
24+
use serde_core::{Deserialize, Deserializer, Serialize, Serializer};
25+
use std::fmt;
2326

2427
use crate::{ArrowError, DataType, extension::ExtensionType};
2528

@@ -129,7 +132,7 @@ impl FixedShapeTensor {
129132
}
130133

131134
/// Extension type metadata for [`FixedShapeTensor`].
132-
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
135+
#[derive(Debug, Clone, PartialEq)]
133136
pub struct FixedShapeTensorMetadata {
134137
/// The physical shape of the contained tensors.
135138
shape: Vec<usize>,
@@ -141,6 +144,143 @@ pub struct FixedShapeTensorMetadata {
141144
permutations: Option<Vec<usize>>,
142145
}
143146

147+
impl Serialize for FixedShapeTensorMetadata {
148+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
149+
where
150+
S: Serializer,
151+
{
152+
let mut state = serializer.serialize_struct("FixedShapeTensorMetadata", 3)?;
153+
state.serialize_field("shape", &self.shape)?;
154+
state.serialize_field("dim_names", &self.dim_names)?;
155+
state.serialize_field("permutations", &self.permutations)?;
156+
state.end()
157+
}
158+
}
159+
160+
#[derive(Debug)]
161+
enum MetadataField {
162+
Shape,
163+
DimNames,
164+
Permutations,
165+
}
166+
167+
struct MetadataFieldVisitor;
168+
169+
impl<'de> Visitor<'de> for MetadataFieldVisitor {
170+
type Value = MetadataField;
171+
172+
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
173+
formatter.write_str("`shape`, `dim_names`, or `permutations`")
174+
}
175+
176+
fn visit_str<E>(self, value: &str) -> Result<MetadataField, E>
177+
where
178+
E: de::Error,
179+
{
180+
match value {
181+
"shape" => Ok(MetadataField::Shape),
182+
"dim_names" => Ok(MetadataField::DimNames),
183+
"permutations" => Ok(MetadataField::Permutations),
184+
_ => Err(de::Error::unknown_field(
185+
value,
186+
&["shape", "dim_names", "permutations"],
187+
)),
188+
}
189+
}
190+
}
191+
192+
impl<'de> Deserialize<'de> for MetadataField {
193+
fn deserialize<D>(deserializer: D) -> Result<MetadataField, D::Error>
194+
where
195+
D: Deserializer<'de>,
196+
{
197+
deserializer.deserialize_identifier(MetadataFieldVisitor)
198+
}
199+
}
200+
201+
struct FixedShapeTensorMetadataVisitor;
202+
203+
impl<'de> Visitor<'de> for FixedShapeTensorMetadataVisitor {
204+
type Value = FixedShapeTensorMetadata;
205+
206+
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
207+
formatter.write_str("struct FixedShapeTensorMetadata")
208+
}
209+
210+
fn visit_seq<V>(self, mut seq: V) -> Result<FixedShapeTensorMetadata, V::Error>
211+
where
212+
V: de::SeqAccess<'de>,
213+
{
214+
let shape = seq
215+
.next_element()?
216+
.ok_or_else(|| de::Error::invalid_length(0, &self))?;
217+
let dim_names = seq
218+
.next_element()?
219+
.ok_or_else(|| de::Error::invalid_length(1, &self))?;
220+
let permutations = seq
221+
.next_element()?
222+
.ok_or_else(|| de::Error::invalid_length(2, &self))?;
223+
Ok(FixedShapeTensorMetadata {
224+
shape,
225+
dim_names,
226+
permutations,
227+
})
228+
}
229+
230+
fn visit_map<V>(self, mut map: V) -> Result<FixedShapeTensorMetadata, V::Error>
231+
where
232+
V: MapAccess<'de>,
233+
{
234+
let mut shape = None;
235+
let mut dim_names = None;
236+
let mut permutations = None;
237+
238+
while let Some(key) = map.next_key()? {
239+
match key {
240+
MetadataField::Shape => {
241+
if shape.is_some() {
242+
return Err(de::Error::duplicate_field("shape"));
243+
}
244+
shape = Some(map.next_value()?);
245+
}
246+
MetadataField::DimNames => {
247+
if dim_names.is_some() {
248+
return Err(de::Error::duplicate_field("dim_names"));
249+
}
250+
dim_names = Some(map.next_value()?);
251+
}
252+
MetadataField::Permutations => {
253+
if permutations.is_some() {
254+
return Err(de::Error::duplicate_field("permutations"));
255+
}
256+
permutations = Some(map.next_value()?);
257+
}
258+
}
259+
}
260+
261+
let shape = shape.ok_or_else(|| de::Error::missing_field("shape"))?;
262+
263+
Ok(FixedShapeTensorMetadata {
264+
shape,
265+
dim_names,
266+
permutations,
267+
})
268+
}
269+
}
270+
271+
impl<'de> Deserialize<'de> for FixedShapeTensorMetadata {
272+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
273+
where
274+
D: Deserializer<'de>,
275+
{
276+
deserializer.deserialize_struct(
277+
"FixedShapeTensorMetadata",
278+
&["shape", "dim_names", "permutations"],
279+
FixedShapeTensorMetadataVisitor,
280+
)
281+
}
282+
}
283+
144284
impl FixedShapeTensorMetadata {
145285
/// Returns metadata for a fixed shape tensor extension type.
146286
///
@@ -377,9 +517,8 @@ mod tests {
377517
}
378518

379519
#[test]
380-
#[should_panic(
381-
expected = "FixedShapeTensor metadata deserialization failed: missing field `shape`"
382-
)]
520+
#[should_panic(expected = "FixedShapeTensor metadata deserialization failed: \
521+
unknown field `not-shape`, expected one of `shape`, `dim_names`, `permutations`")]
383522
fn invalid_metadata() {
384523
let fixed_shape_tensor =
385524
FixedShapeTensor::try_new(DataType::Float32, [100, 200, 500], None, None).unwrap();

arrow-schema/src/extension/canonical/json.rs

Lines changed: 74 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
//!
2020
//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#json>
2121
22-
use serde::{Deserialize, Serialize};
22+
use serde_core::de::{self, MapAccess, Visitor};
23+
use serde_core::ser::SerializeStruct;
24+
use serde_core::{Deserialize, Deserializer, Serialize, Serializer};
25+
use std::fmt;
2326

2427
use crate::{ArrowError, DataType, extension::ExtensionType};
2528

@@ -42,10 +45,78 @@ use crate::{ArrowError, DataType, extension::ExtensionType};
4245
pub struct Json(JsonMetadata);
4346

4447
/// Empty object
45-
#[derive(Debug, Clone, Copy, PartialEq, Deserialize, Serialize)]
46-
#[serde(deny_unknown_fields)]
48+
#[derive(Debug, Clone, Copy, PartialEq)]
4749
struct Empty {}
4850

51+
impl Serialize for Empty {
52+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
53+
where
54+
S: Serializer,
55+
{
56+
let state = serializer.serialize_struct("Empty", 0)?;
57+
state.end()
58+
}
59+
}
60+
61+
struct EmptyVisitor;
62+
63+
impl<'de> Visitor<'de> for EmptyVisitor {
64+
type Value = Empty;
65+
66+
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
67+
formatter.write_str("struct Empty")
68+
}
69+
70+
fn visit_seq<A>(self, mut _seq: A) -> Result<Self::Value, A::Error>
71+
where
72+
A: de::SeqAccess<'de>,
73+
{
74+
Ok(Empty {})
75+
}
76+
77+
fn visit_map<V>(self, mut map: V) -> Result<Empty, V::Error>
78+
where
79+
V: MapAccess<'de>,
80+
{
81+
if let Some(key) = map.next_key::<String>()? {
82+
return Err(de::Error::unknown_field(&key, EMPTY_FIELDS));
83+
}
84+
Ok(Empty {})
85+
}
86+
87+
fn visit_u64<E>(self, _v: u64) -> Result<Self::Value, E>
88+
where
89+
E: de::Error,
90+
{
91+
Err(de::Error::unknown_field("", EMPTY_FIELDS))
92+
}
93+
94+
fn visit_str<E>(self, _v: &str) -> Result<Self::Value, E>
95+
where
96+
E: de::Error,
97+
{
98+
Err(de::Error::unknown_field("", EMPTY_FIELDS))
99+
}
100+
101+
fn visit_bytes<E>(self, _v: &[u8]) -> Result<Self::Value, E>
102+
where
103+
E: de::Error,
104+
{
105+
Err(de::Error::unknown_field("", EMPTY_FIELDS))
106+
}
107+
}
108+
109+
static EMPTY_FIELDS: &[&str] = &[];
110+
111+
impl<'de> Deserialize<'de> for Empty {
112+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
113+
where
114+
D: Deserializer<'de>,
115+
{
116+
deserializer.deserialize_struct("Empty", EMPTY_FIELDS, EmptyVisitor)
117+
}
118+
}
119+
49120
/// Extension type metadata for [`Json`].
50121
#[derive(Debug, Default, Clone, PartialEq)]
51122
pub struct JsonMetadata(Option<Empty>);

0 commit comments

Comments
 (0)