Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion arrow-integration-testing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ arrow-integration-test = { path = "../arrow-integration-test", default-features
clap = { version = "4", default-features = false, features = ["std", "derive", "help", "error-context", "usage"] }
futures = { version = "0.3", default-features = false }
prost = { version = "0.14.1", default-features = false }
serde = { version = "1.0", default-features = false, features = ["rc", "derive"] }
serde_json = { version = "1.0", default-features = false, features = ["std"] }
tokio = { version = "1.0", default-features = false, features = [ "rt-multi-thread"] }
tonic = { version = "0.14.1", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion arrow-json/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ arrow-schema = { workspace = true }
half = { version = "2.1", default-features = false }
indexmap = { version = "2.0", default-features = false, features = ["std"] }
num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
serde = { version = "1.0", default-features = false }
serde_core = { version = "1.0", default-features = false }
serde_json = { version = "1.0", default-features = false, features = ["std"] }
chrono = { workspace = true }
lexical-core = { version = "1.0", default-features = false}
Expand Down
4 changes: 3 additions & 1 deletion arrow-json/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ use std::io::BufRead;
use std::sync::Arc;

use chrono::Utc;
use serde::Serialize;
use serde_core::Serialize;

use arrow_array::timezone::Tz;
use arrow_array::types::*;
Expand Down Expand Up @@ -613,6 +613,8 @@ impl Decoder {
/// ```
///
/// Note: this ignores any batch size setting, and always decodes all rows
///
/// [serde]: https://docs.rs/serde/latest/serde/
pub fn serialize<S: Serialize>(&mut self, rows: &[S]) -> Result<(), ArrowError> {
self.tape_decoder.serialize(rows)
}
Expand Down
6 changes: 3 additions & 3 deletions arrow-json/src/reader/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

use crate::reader::tape::TapeElement;
use lexical_core::FormattedSize;
use serde::ser::{
use serde_core::ser::{
Impossible, SerializeMap, SerializeSeq, SerializeStruct, SerializeTuple, SerializeTupleStruct,
};
use serde::{Serialize, Serializer};
use serde_core::{Serialize, Serializer};

#[derive(Debug)]
pub struct SerializerError(String);
Expand All @@ -33,7 +33,7 @@ impl std::fmt::Display for SerializerError {
}
}

impl serde::ser::Error for SerializerError {
impl serde_core::ser::Error for SerializerError {
fn custom<T>(msg: T) -> Self
where
T: std::fmt::Display,
Expand Down
2 changes: 1 addition & 1 deletion arrow-json/src/reader/tape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use crate::reader::serializer::TapeSerializer;
use arrow_schema::ArrowError;
use memchr::memchr2;
use serde::Serialize;
use serde_core::Serialize;
use std::fmt::Write;

/// We decode JSON to a flattened tape representation,
Expand Down
2 changes: 1 addition & 1 deletion arrow-json/src/writer/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use arrow_cast::display::{ArrayFormatter, FormatOptions};
use arrow_schema::{ArrowError, DataType, FieldRef};
use half::f16;
use lexical_core::FormattedSize;
use serde::Serializer;
use serde_core::Serializer;

/// Configuration options for the JSON encoder.
#[derive(Debug, Clone, Default)]
Expand Down
15 changes: 10 additions & 5 deletions arrow-schema/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,30 @@ name = "arrow_schema"
bench = false

[dependencies]
serde = { version = "1.0", default-features = false, features = [
"derive",
serde_core = { version = "1.0", default-features = false, features = [
"std",
"rc",
], optional = true }
serde = { version = "1.0", default-features = false, features = [
"derive",
], optional = true }
bitflags = { version = "2.0.0", default-features = false, optional = true }
serde_json = { version = "1.0", optional = true }

[features]
canonical_extension_types = ["dep:serde", "dep:serde_json"]
canonical_extension_types = ["dep:serde_core", "dep:serde_json"]
# Enable ffi support
ffi = ["bitflags"]
serde = ["dep:serde"]
serde = ["dep:serde_core", "dep:serde"]

[package.metadata.docs.rs]
all-features = true

[dev-dependencies]
bincode = { version = "2.0.1", default-features = false, features = ["std", "serde"] }
bincode = { version = "2.0.1", default-features = false, features = [
"std",
"serde",
] }
criterion = { version = "0.5", default-features = false }
insta = "1.43.1"

Expand Down
149 changes: 144 additions & 5 deletions arrow-schema/src/extension/canonical/fixed_shape_tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
//!
//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#fixed-shape-tensor>

use serde::{Deserialize, Serialize};
use serde_core::de::{self, MapAccess, Visitor};
use serde_core::ser::SerializeStruct;
use serde_core::{Deserialize, Deserializer, Serialize, Serializer};
use std::fmt;

use crate::{ArrowError, DataType, extension::ExtensionType};

Expand Down Expand Up @@ -129,7 +132,7 @@ impl FixedShapeTensor {
}

/// Extension type metadata for [`FixedShapeTensor`].
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
#[derive(Debug, Clone, PartialEq)]
pub struct FixedShapeTensorMetadata {
/// The physical shape of the contained tensors.
shape: Vec<usize>,
Expand All @@ -141,6 +144,143 @@ pub struct FixedShapeTensorMetadata {
permutations: Option<Vec<usize>>,
}

impl Serialize for FixedShapeTensorMetadata {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut state = serializer.serialize_struct("FixedShapeTensorMetadata", 3)?;
state.serialize_field("shape", &self.shape)?;
state.serialize_field("dim_names", &self.dim_names)?;
state.serialize_field("permutations", &self.permutations)?;
state.end()
}
}

#[derive(Debug)]
enum MetadataField {
Shape,
DimNames,
Permutations,
}

struct MetadataFieldVisitor;

impl<'de> Visitor<'de> for MetadataFieldVisitor {
type Value = MetadataField;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("`shape`, `dim_names`, or `permutations`")
}

fn visit_str<E>(self, value: &str) -> Result<MetadataField, E>
where
E: de::Error,
{
match value {
"shape" => Ok(MetadataField::Shape),
"dim_names" => Ok(MetadataField::DimNames),
"permutations" => Ok(MetadataField::Permutations),
_ => Err(de::Error::unknown_field(
value,
&["shape", "dim_names", "permutations"],
)),
}
}
}

impl<'de> Deserialize<'de> for MetadataField {
fn deserialize<D>(deserializer: D) -> Result<MetadataField, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_identifier(MetadataFieldVisitor)
}
}

struct FixedShapeTensorMetadataVisitor;

impl<'de> Visitor<'de> for FixedShapeTensorMetadataVisitor {
type Value = FixedShapeTensorMetadata;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("struct FixedShapeTensorMetadata")
}

fn visit_seq<V>(self, mut seq: V) -> Result<FixedShapeTensorMetadata, V::Error>
where
V: de::SeqAccess<'de>,
{
let shape = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(0, &self))?;
let dim_names = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(1, &self))?;
let permutations = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(2, &self))?;
Ok(FixedShapeTensorMetadata {
shape,
dim_names,
permutations,
})
}

fn visit_map<V>(self, mut map: V) -> Result<FixedShapeTensorMetadata, V::Error>
where
V: MapAccess<'de>,
{
let mut shape = None;
let mut dim_names = None;
let mut permutations = None;

while let Some(key) = map.next_key()? {
match key {
MetadataField::Shape => {
if shape.is_some() {
return Err(de::Error::duplicate_field("shape"));
}
shape = Some(map.next_value()?);
}
MetadataField::DimNames => {
if dim_names.is_some() {
return Err(de::Error::duplicate_field("dim_names"));
}
dim_names = Some(map.next_value()?);
}
MetadataField::Permutations => {
if permutations.is_some() {
return Err(de::Error::duplicate_field("permutations"));
}
permutations = Some(map.next_value()?);
}
}
}

let shape = shape.ok_or_else(|| de::Error::missing_field("shape"))?;

Ok(FixedShapeTensorMetadata {
shape,
dim_names,
permutations,
})
}
}

impl<'de> Deserialize<'de> for FixedShapeTensorMetadata {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_struct(
"FixedShapeTensorMetadata",
&["shape", "dim_names", "permutations"],
FixedShapeTensorMetadataVisitor,
)
}
}

impl FixedShapeTensorMetadata {
/// Returns metadata for a fixed shape tensor extension type.
///
Expand Down Expand Up @@ -377,9 +517,8 @@ mod tests {
}

#[test]
#[should_panic(
expected = "FixedShapeTensor metadata deserialization failed: missing field `shape`"
)]
#[should_panic(expected = "FixedShapeTensor metadata deserialization failed: \
unknown field `not-shape`, expected one of `shape`, `dim_names`, `permutations`")]
fn invalid_metadata() {
let fixed_shape_tensor =
FixedShapeTensor::try_new(DataType::Float32, [100, 200, 500], None, None).unwrap();
Expand Down
77 changes: 74 additions & 3 deletions arrow-schema/src/extension/canonical/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
//!
//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#json>

use serde::{Deserialize, Serialize};
use serde_core::de::{self, MapAccess, Visitor};
use serde_core::ser::SerializeStruct;
use serde_core::{Deserialize, Deserializer, Serialize, Serializer};
use std::fmt;

use crate::{ArrowError, DataType, extension::ExtensionType};

Expand All @@ -42,10 +45,78 @@ use crate::{ArrowError, DataType, extension::ExtensionType};
pub struct Json(JsonMetadata);

/// Empty object
#[derive(Debug, Clone, Copy, PartialEq, Deserialize, Serialize)]
#[serde(deny_unknown_fields)]
#[derive(Debug, Clone, Copy, PartialEq)]
struct Empty {}

impl Serialize for Empty {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let state = serializer.serialize_struct("Empty", 0)?;
state.end()
}
}

struct EmptyVisitor;

impl<'de> Visitor<'de> for EmptyVisitor {
type Value = Empty;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("struct Empty")
}

fn visit_seq<A>(self, mut _seq: A) -> Result<Self::Value, A::Error>
where
A: de::SeqAccess<'de>,
{
Ok(Empty {})
}

fn visit_map<V>(self, mut map: V) -> Result<Empty, V::Error>
where
V: MapAccess<'de>,
{
if let Some(key) = map.next_key::<String>()? {
return Err(de::Error::unknown_field(&key, EMPTY_FIELDS));
}
Ok(Empty {})
}

fn visit_u64<E>(self, _v: u64) -> Result<Self::Value, E>
where
E: de::Error,
{
Err(de::Error::unknown_field("", EMPTY_FIELDS))
}

fn visit_str<E>(self, _v: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
Err(de::Error::unknown_field("", EMPTY_FIELDS))
}

fn visit_bytes<E>(self, _v: &[u8]) -> Result<Self::Value, E>
where
E: de::Error,
{
Err(de::Error::unknown_field("", EMPTY_FIELDS))
}
}

static EMPTY_FIELDS: &[&str] = &[];

impl<'de> Deserialize<'de> for Empty {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_struct("Empty", EMPTY_FIELDS, EmptyVisitor)
}
}

/// Extension type metadata for [`Json`].
#[derive(Debug, Default, Clone, PartialEq)]
pub struct JsonMetadata(Option<Empty>);
Expand Down
Loading
Loading