Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion arrow-integration-testing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ arrow-integration-test = { path = "../arrow-integration-test", default-features
clap = { version = "4", default-features = false, features = ["std", "derive", "help", "error-context", "usage"] }
futures = { version = "0.3", default-features = false }
prost = { version = "0.14.1", default-features = false }
serde = { version = "1.0", default-features = false, features = ["rc", "derive"] }
serde_json = { version = "1.0", default-features = false, features = ["std"] }
tokio = { version = "1.0", default-features = false, features = [ "rt-multi-thread"] }
tonic = { version = "0.14.1", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion arrow-json/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ arrow-schema = { workspace = true }
half = { version = "2.1", default-features = false }
indexmap = { version = "2.0", default-features = false, features = ["std"] }
num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
serde = { version = "1.0", default-features = false }
serde_core = { version = "1.0", default-features = false }
serde_json = { version = "1.0", default-features = false, features = ["std"] }
chrono = { workspace = true }
lexical-core = { version = "1.0", default-features = false}
Expand Down
4 changes: 3 additions & 1 deletion arrow-json/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ use std::io::BufRead;
use std::sync::Arc;

use chrono::Utc;
use serde::Serialize;
use serde_core::Serialize;

use arrow_array::timezone::Tz;
use arrow_array::types::*;
Expand Down Expand Up @@ -613,6 +613,8 @@ impl Decoder {
/// ```
///
/// Note: this ignores any batch size setting, and always decodes all rows
///
/// [serde]: https://docs.rs/serde/latest/serde/
pub fn serialize<S: Serialize>(&mut self, rows: &[S]) -> Result<(), ArrowError> {
self.tape_decoder.serialize(rows)
}
Expand Down
6 changes: 3 additions & 3 deletions arrow-json/src/reader/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

use crate::reader::tape::TapeElement;
use lexical_core::FormattedSize;
use serde::ser::{
use serde_core::ser::{
Impossible, SerializeMap, SerializeSeq, SerializeStruct, SerializeTuple, SerializeTupleStruct,
};
use serde::{Serialize, Serializer};
use serde_core::{Serialize, Serializer};

#[derive(Debug)]
pub struct SerializerError(String);
Expand All @@ -33,7 +33,7 @@ impl std::fmt::Display for SerializerError {
}
}

impl serde::ser::Error for SerializerError {
impl serde_core::ser::Error for SerializerError {
fn custom<T>(msg: T) -> Self
where
T: std::fmt::Display,
Expand Down
2 changes: 1 addition & 1 deletion arrow-json/src/reader/tape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use crate::reader::serializer::TapeSerializer;
use arrow_schema::ArrowError;
use memchr::memchr2;
use serde::Serialize;
use serde_core::Serialize;
use std::fmt::Write;

/// We decode JSON to a flattened tape representation,
Expand Down
2 changes: 1 addition & 1 deletion arrow-json/src/writer/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use arrow_cast::display::{ArrayFormatter, FormatOptions};
use arrow_schema::{ArrowError, DataType, FieldRef};
use half::f16;
use lexical_core::FormattedSize;
use serde::Serializer;
use serde_core::Serializer;

/// Configuration options for the JSON encoder.
#[derive(Debug, Clone, Default)]
Expand Down
15 changes: 10 additions & 5 deletions arrow-schema/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,30 @@ name = "arrow_schema"
bench = false

[dependencies]
serde = { version = "1.0", default-features = false, features = [
"derive",
serde_core = { version = "1.0", default-features = false, features = [
"std",
"rc",
], optional = true }
serde = { version = "1.0", default-features = false, features = [
"derive",
], optional = true }
bitflags = { version = "2.0.0", default-features = false, optional = true }
serde_json = { version = "1.0", optional = true }

[features]
canonical_extension_types = ["dep:serde", "dep:serde_json"]
canonical_extension_types = ["dep:serde_core", "dep:serde_json"]
# Enable ffi support
ffi = ["bitflags"]
serde = ["dep:serde"]
serde = ["dep:serde_core", "dep:serde"]

[package.metadata.docs.rs]
all-features = true

[dev-dependencies]
bincode = { version = "2.0.1", default-features = false, features = ["std", "serde"] }
bincode = { version = "2.0.1", default-features = false, features = [
"std",
"serde",
] }
criterion = { version = "0.5", default-features = false }
insta = "1.43.1"

Expand Down
149 changes: 144 additions & 5 deletions arrow-schema/src/extension/canonical/fixed_shape_tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
//!
//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#fixed-shape-tensor>
use serde::{Deserialize, Serialize};
use serde_core::de::{self, MapAccess, Visitor};
use serde_core::ser::SerializeStruct;
use serde_core::{Deserialize, Deserializer, Serialize, Serializer};
use std::fmt;

use crate::{ArrowError, DataType, extension::ExtensionType};

Expand Down Expand Up @@ -129,7 +132,7 @@ impl FixedShapeTensor {
}

/// Extension type metadata for [`FixedShapeTensor`].
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
#[derive(Debug, Clone, PartialEq)]
pub struct FixedShapeTensorMetadata {
/// The physical shape of the contained tensors.
shape: Vec<usize>,
Expand All @@ -141,6 +144,143 @@ pub struct FixedShapeTensorMetadata {
permutations: Option<Vec<usize>>,
}

impl Serialize for FixedShapeTensorMetadata {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut state = serializer.serialize_struct("FixedShapeTensorMetadata", 3)?;
state.serialize_field("shape", &self.shape)?;
state.serialize_field("dim_names", &self.dim_names)?;
state.serialize_field("permutations", &self.permutations)?;
state.end()
}
}

#[derive(Debug)]
enum MetadataField {
Shape,
DimNames,
Permutations,
}

struct MetadataFieldVisitor;

impl<'de> Visitor<'de> for MetadataFieldVisitor {
type Value = MetadataField;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("`shape`, `dim_names`, or `permutations`")
}

fn visit_str<E>(self, value: &str) -> Result<MetadataField, E>
where
E: de::Error,
{
match value {
"shape" => Ok(MetadataField::Shape),
"dim_names" => Ok(MetadataField::DimNames),
"permutations" => Ok(MetadataField::Permutations),
_ => Err(de::Error::unknown_field(
value,
&["shape", "dim_names", "permutations"],
)),
}
}
}

impl<'de> Deserialize<'de> for MetadataField {
fn deserialize<D>(deserializer: D) -> Result<MetadataField, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_identifier(MetadataFieldVisitor)
}
}

struct FixedShapeTensorMetadataVisitor;

impl<'de> Visitor<'de> for FixedShapeTensorMetadataVisitor {
type Value = FixedShapeTensorMetadata;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("struct FixedShapeTensorMetadata")
}

fn visit_seq<V>(self, mut seq: V) -> Result<FixedShapeTensorMetadata, V::Error>
where
V: de::SeqAccess<'de>,
{
let shape = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(0, &self))?;
let dim_names = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(1, &self))?;
let permutations = seq
.next_element()?
.ok_or_else(|| de::Error::invalid_length(2, &self))?;
Ok(FixedShapeTensorMetadata {
shape,
dim_names,
permutations,
})
}

fn visit_map<V>(self, mut map: V) -> Result<FixedShapeTensorMetadata, V::Error>
where
V: MapAccess<'de>,
{
let mut shape = None;
let mut dim_names = None;
let mut permutations = None;

while let Some(key) = map.next_key()? {
match key {
MetadataField::Shape => {
if shape.is_some() {
return Err(de::Error::duplicate_field("shape"));
}
shape = Some(map.next_value()?);
}
MetadataField::DimNames => {
if dim_names.is_some() {
return Err(de::Error::duplicate_field("dim_names"));
}
dim_names = Some(map.next_value()?);
}
MetadataField::Permutations => {
if permutations.is_some() {
return Err(de::Error::duplicate_field("permutations"));
}
permutations = Some(map.next_value()?);
}
}
}

let shape = shape.ok_or_else(|| de::Error::missing_field("shape"))?;

Ok(FixedShapeTensorMetadata {
shape,
dim_names,
permutations,
})
}
}

impl<'de> Deserialize<'de> for FixedShapeTensorMetadata {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_struct(
"FixedShapeTensorMetadata",
&["shape", "dim_names", "permutations"],
FixedShapeTensorMetadataVisitor,
)
}
}

impl FixedShapeTensorMetadata {
/// Returns metadata for a fixed shape tensor extension type.
///
Expand Down Expand Up @@ -377,9 +517,8 @@ mod tests {
}

#[test]
#[should_panic(
expected = "FixedShapeTensor metadata deserialization failed: missing field `shape`"
)]
#[should_panic(expected = "FixedShapeTensor metadata deserialization failed: \
unknown field `not-shape`, expected one of `shape`, `dim_names`, `permutations`")]
fn invalid_metadata() {
let fixed_shape_tensor =
FixedShapeTensor::try_new(DataType::Float32, [100, 200, 500], None, None).unwrap();
Expand Down
77 changes: 74 additions & 3 deletions arrow-schema/src/extension/canonical/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
//!
//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#json>
use serde::{Deserialize, Serialize};
use serde_core::de::{self, MapAccess, Visitor};
use serde_core::ser::SerializeStruct;
use serde_core::{Deserialize, Deserializer, Serialize, Serializer};
use std::fmt;

use crate::{ArrowError, DataType, extension::ExtensionType};

Expand All @@ -42,10 +45,78 @@ use crate::{ArrowError, DataType, extension::ExtensionType};
pub struct Json(JsonMetadata);

/// Empty object
#[derive(Debug, Clone, Copy, PartialEq, Deserialize, Serialize)]
#[serde(deny_unknown_fields)]
#[derive(Debug, Clone, Copy, PartialEq)]
struct Empty {}

impl Serialize for Empty {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let state = serializer.serialize_struct("Empty", 0)?;
state.end()
}
}

struct EmptyVisitor;

impl<'de> Visitor<'de> for EmptyVisitor {
type Value = Empty;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("struct Empty")
}

fn visit_seq<A>(self, mut _seq: A) -> Result<Self::Value, A::Error>
where
A: de::SeqAccess<'de>,
{
Ok(Empty {})
}

fn visit_map<V>(self, mut map: V) -> Result<Empty, V::Error>
where
V: MapAccess<'de>,
{
if let Some(key) = map.next_key::<String>()? {
return Err(de::Error::unknown_field(&key, EMPTY_FIELDS));
}
Ok(Empty {})
}

fn visit_u64<E>(self, _v: u64) -> Result<Self::Value, E>
where
E: de::Error,
{
Err(de::Error::unknown_field("", EMPTY_FIELDS))
}

fn visit_str<E>(self, _v: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
Err(de::Error::unknown_field("", EMPTY_FIELDS))
}

fn visit_bytes<E>(self, _v: &[u8]) -> Result<Self::Value, E>
where
E: de::Error,
{
Err(de::Error::unknown_field("", EMPTY_FIELDS))
}
}

static EMPTY_FIELDS: &[&str] = &[];

impl<'de> Deserialize<'de> for Empty {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_struct("Empty", EMPTY_FIELDS, EmptyVisitor)
}
}

/// Extension type metadata for [`Json`].
#[derive(Debug, Default, Clone, PartialEq)]
pub struct JsonMetadata(Option<Empty>);
Expand Down
Loading
Loading