Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,6 @@ fn arrow_array_builder_type_and_declaration(
);
ident
}
Type::Binary => {
let ident = format_ident!("LargeBinaryBuilder");
declarations.insert("arrow", ForwardDecl::Class(ident.clone()));
ident
}
Type::String => {
let ident = format_ident!("StringBuilder");
declarations.insert("arrow", ForwardDecl::Class(ident.clone()));
Expand Down
25 changes: 1 addition & 24 deletions crates/build/re_types_builder/src/codegen/cpp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2003,7 +2003,6 @@ fn quote_fill_arrow_array_builder(
ElementType::Float16 => Some("HalfFloatBuilder"),
ElementType::Float32 => Some("FloatBuilder"),
ElementType::Float64 => Some("DoubleBuilder"),
ElementType::Binary => Some("BinaryBuilder"),
ElementType::String => Some("StringBuilder"),
ElementType::Object{..} => None,
};
Expand Down Expand Up @@ -2234,7 +2233,7 @@ fn quote_append_single_value_to_builder(
value_access: &TokenStream,
includes: &mut Includes,
) -> TokenStream {
match typ {
match &typ {
Type::Unit => {
quote!(ARROW_RETURN_NOT_OK(#value_builder->AppendNull());)
}
Expand All @@ -2253,11 +2252,6 @@ fn quote_append_single_value_to_builder(
| Type::String => {
quote!(ARROW_RETURN_NOT_OK(#value_builder->Append(#value_access));)
}
Type::Binary => {
quote!(
ARROW_RETURN_NOT_OK(#value_builder->Append(#value_access.data(), static_cast<int64_t>(#value_access.size())));
)
}
Type::Float16 => {
// Cast `rerun::half` to a `uint16_t``
quote! {
Expand Down Expand Up @@ -2296,14 +2290,6 @@ fn quote_append_single_value_to_builder(
);
}
}
ElementType::Binary => {
quote! {
for (size_t item_idx = 0; item_idx < #num_items_per_element; item_idx += 1) {
auto&& data = &#value_access[elem_idx].data;
ARROW_RETURN_NOT_OK(#value_builder->Append(data.data(), static_cast<int32_t>(data.size())));
}
}
}
ElementType::String => {
quote! {
for (size_t item_idx = 0; item_idx < #num_items_per_element; item_idx += 1) {
Expand Down Expand Up @@ -2461,10 +2447,6 @@ fn quote_field_type(includes: &mut Includes, obj_field: &ObjectField) -> TokenSt
}
Type::Float32 => quote! { float },
Type::Float64 => quote! { double },
Type::Binary => {
includes.insert_rerun("collection.hpp");
quote! { rerun::Collection<uint8_t> }
}
Type::String => {
includes.insert_system("string");
quote! { std::string }
Expand Down Expand Up @@ -2525,10 +2507,6 @@ fn quote_element_type(includes: &mut Includes, typ: &ElementType) -> TokenStream
}
ElementType::Float32 => quote! { float },
ElementType::Float64 => quote! { double },
ElementType::Binary => {
includes.insert_rerun("collection.hpp");
quote! { rerun::Collection<uint8_t> }
}
ElementType::String => {
includes.insert_system("string");
quote! { std::string }
Expand Down Expand Up @@ -2670,7 +2648,6 @@ fn quote_arrow_datatype(
Type::Float16 => quote!(arrow::float16()),
Type::Float32 => quote!(arrow::float32()),
Type::Float64 => quote!(arrow::float64()),
Type::Binary => quote!(arrow::large_binary()),
Type::String => quote!(arrow::utf8()),
Type::Bool => quote!(arrow::boolean()),

Expand Down
1 change: 0 additions & 1 deletion crates/build/re_types_builder/src/codegen/docs/website.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,6 @@ fn write_fields(reporter: &Reporter, objects: &Objects, o: &mut String, object:
Type::Float16 => atomic("float16"),
Type::Float32 => atomic("float32"),
Type::Float64 => atomic("float64"),
Type::Binary => atomic("binary"),
Type::String => atomic("utf8"),

Type::Array { elem_type, length } => {
Expand Down
20 changes: 2 additions & 18 deletions crates/build/re_types_builder/src/codegen/python/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1673,7 +1673,6 @@ fn quote_field_type_from_field(
| Type::Int64 => "int".to_owned(),
Type::Bool => "bool".to_owned(),
Type::Float16 | Type::Float32 | Type::Float64 => "float".to_owned(),
Type::Binary => "bytes".to_owned(),
Type::String => "str".to_owned(),
Type::Array {
elem_type,
Expand All @@ -1692,7 +1691,6 @@ fn quote_field_type_from_field(
ElementType::Float16 => "npt.NDArray[np.float16]".to_owned(),
ElementType::Float32 => "npt.NDArray[np.float32]".to_owned(),
ElementType::Float64 => "npt.NDArray[np.float64]".to_owned(),
ElementType::Binary => "list[bytes]".to_owned(),
ElementType::String => "list[str]".to_owned(),
ElementType::Object { .. } => {
let typ = quote_type_from_element_type(elem_type);
Expand Down Expand Up @@ -1754,13 +1752,6 @@ fn quote_field_converter_from_field(
"float".to_owned()
}
}
Type::Binary => {
if field.is_nullable {
"bytes_or_none".to_owned()
} else {
"bytes".to_owned()
}
}
Type::String => {
if field.is_nullable {
"str_or_none".to_owned()
Expand Down Expand Up @@ -1877,7 +1868,6 @@ fn quote_type_from_type(typ: &Type) -> String {
| Type::Int64 => "int".to_owned(),
Type::Bool => "bool".to_owned(),
Type::Float16 | Type::Float32 | Type::Float64 => "float".to_owned(),
Type::Binary => "bytes".to_owned(),
Type::String => "str".to_owned(),
Type::Object { fqname } => fqname_to_type(fqname),
Type::Array { elem_type, .. } | Type::Vector { elem_type } => {
Expand Down Expand Up @@ -2036,7 +2026,6 @@ fn np_dtype_from_type(t: &Type) -> Option<&'static str> {
Type::Float32 => Some("np.float32"),
Type::Float64 => Some("np.float64"),
Type::Unit
| Type::Binary
| Type::String
| Type::Array { .. }
| Type::Vector { .. }
Expand Down Expand Up @@ -2133,11 +2122,7 @@ fn quote_arrow_serialization(
code.push_indented(2, &field_fwd, 1);
}

Type::Unit
| Type::Binary
| Type::String
| Type::Array { .. }
| Type::Vector { .. } => {
Type::Unit | Type::String | Type::Array { .. } | Type::Vector { .. } => {
return Err(
"We lack codegen for arrow-serialization of general structs".to_owned()
);
Expand Down Expand Up @@ -2264,7 +2249,6 @@ return pa.array(pa_data, type=data_type)
| Type::Float16
| Type::Float32
| Type::Float64
| Type::Binary
| Type::String => {
let datatype = quote_arrow_datatype(&type_registry.get(&field.fqname));
format!("pa.array({variant_kind_list}, type={datatype})")
Expand Down Expand Up @@ -2822,7 +2806,7 @@ fn quote_arrow_datatype(datatype: &DataType) -> String {
DataType::Atomic(AtomicDataType::Float32) => "pa.float32()".to_owned(),
DataType::Atomic(AtomicDataType::Float64) => "pa.float64()".to_owned(),

DataType::Binary => "pa.large_binary()".to_owned(),
DataType::Binary => "pa.binary()".to_owned(),

DataType::Utf8 => "pa.utf8()".to_owned(),

Expand Down
2 changes: 0 additions & 2 deletions crates/build/re_types_builder/src/codegen/rust/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,6 @@ impl quote::ToTokens for TypeTokenizer<'_> {
Type::Float16 => quote!(half::f16),
Type::Float32 => quote!(f32),
Type::Float64 => quote!(f64),
Type::Binary => quote!(::arrow::buffer::Buffer),
Type::String => quote!(::re_types_core::ArrowString),
Type::Array { elem_type, length } => {
if *unwrap {
Expand Down Expand Up @@ -822,7 +821,6 @@ impl quote::ToTokens for &ElementType {
ElementType::Float16 => quote!(half::f16),
ElementType::Float32 => quote!(f32),
ElementType::Float64 => quote!(f64),
ElementType::Binary => quote!(::arrow::buffer::Buffer),
ElementType::String => quote!(::re_types_core::ArrowString),
ElementType::Object { fqname } => quote_fqname_as_type_path(fqname),
}
Expand Down
2 changes: 1 addition & 1 deletion crates/build/re_types_builder/src/codegen/rust/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ impl quote::ToTokens for ArrowDataTypeTokenizer<'_> {
DataType::Atomic(AtomicDataType::Float32) => quote!(DataType::Float32),
DataType::Atomic(AtomicDataType::Float64) => quote!(DataType::Float64),

DataType::Binary => quote!(DataType::LargeBinary),
DataType::Binary => quote!(DataType::Binary),

DataType::Utf8 => quote!(DataType::Utf8),

Expand Down
77 changes: 6 additions & 71 deletions crates/build/re_types_builder/src/codegen/rust/deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -537,75 +537,11 @@ fn quote_arrow_field_deserializer(
}
}

DataType::Binary => {
// Special code to handle deserializing both 32-bit and 64-bit opffsets (BinaryArray vs LargeBinaryArray)
quote! {{
fn extract_from_binary<O>(
arrow_data: &arrow::array::GenericByteArray<arrow::datatypes::GenericBinaryType<O>>,
) -> DeserializationResult<std::vec::Vec<Option<arrow::buffer::Buffer>>>
where
O: ::arrow::array::OffsetSizeTrait,
{
use ::arrow::array::Array as _;
use ::re_types_core::arrow_zip_validity::ZipValidity;

let arrow_data_buf = arrow_data.values();
let offsets = arrow_data.offsets();

ZipValidity::new_with_validity(offsets.windows(2), arrow_data.nulls())
.map(|elem| {
elem.map(|window| {
// NOTE: Do _not_ use `Buffer::sliced`, it panics on malformed inputs.

let start = window[0].as_usize();
let end = window[1].as_usize();
let len = end - start;

// NOTE: It is absolutely crucial we explicitly handle the
// boundchecks manually first, otherwise rustc completely chokes
// when slicing the data (as in: a 100x perf drop)!
if arrow_data_buf.len() < end {
// error context is appended below during final collection
return Err(DeserializationError::offset_slice_oob(
(start, end),
arrow_data_buf.len(),
));
}

#[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
let data = arrow_data_buf.slice_with_length(start, len);
Ok(data)
})
.transpose()
})
.collect::<DeserializationResult<Vec<Option<_>>>>()
}

if let Some(arrow_data) = #data_src.as_any().downcast_ref::<BinaryArray>() {
extract_from_binary(arrow_data)
.with_context(#obj_field_fqname)?
.into_iter()
} else if let Some(arrow_data) = #data_src.as_any().downcast_ref::<LargeBinaryArray>()
{
extract_from_binary(arrow_data)
.with_context(#obj_field_fqname)?
.into_iter()
} else {
let expected = Self::arrow_datatype();
let actual = arrow_data.data_type().clone();
return Err(DeserializationError::datatype_mismatch(expected, actual))
.with_context(#obj_field_fqname);
}
}}
}

DataType::Utf8 => {
let quoted_downcast = quote_array_downcast(
obj_field_fqname,
data_src,
quote!(StringArray),
quoted_datatype,
);
let quoted_downcast = {
let cast_as = quote!(StringArray);
quote_array_downcast(obj_field_fqname, data_src, cast_as, quoted_datatype)
};

let quoted_iter_transparency = quote_iterator_transparency(
objects,
Expand Down Expand Up @@ -641,8 +577,7 @@ fn quote_arrow_field_deserializer(
(start, end), #data_src_buf.len(),
));
}

#[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
#[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] // TODO(apache/arrow-rs#6900): slice_with_length_unchecked unsafe when https://github.com/apache/arrow-rs/pull/6901 is merged and released
let data = #data_src_buf.slice_with_length(start, len);

Ok(data)
Expand Down Expand Up @@ -889,7 +824,7 @@ fn quote_arrow_field_deserializer(
quote!(#fqname_use::from_arrow_opt(#data_src).with_context(#obj_field_fqname)?.into_iter())
}

DataType::Object { .. } => unimplemented!("{datatype:#?}"),
_ => unimplemented!("{datatype:#?}"),
}
}

Expand Down
46 changes: 16 additions & 30 deletions crates/build/re_types_builder/src/codegen/rust/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -583,14 +583,7 @@ fn quote_arrow_field_serializer(
}
}

DataType::Binary | DataType::Utf8 => {
let is_binary = datatype.to_logical_type() == &DataType::Binary;
let as_bytes = if is_binary {
quote!()
} else {
quote!(.as_bytes())
};

DataType::Utf8 => {
// NOTE: We need values for all slots, regardless of what the validity says,
// hence `unwrap_or_default`.
let (quoted_member_accessor, quoted_transparent_length) = if inner_is_arrow_transparent
Expand Down Expand Up @@ -630,7 +623,7 @@ fn quote_arrow_field_serializer(

let inner_data_and_offsets = if elements_are_nullable {
quote! {
let offsets = arrow::buffer::OffsetBuffer::from_lengths(
let offsets = arrow::buffer::OffsetBuffer::<i32>::from_lengths(
#data_src.iter().map(|opt| opt.as_ref() #quoted_transparent_length .unwrap_or_default())
);

Expand All @@ -643,13 +636,13 @@ fn quote_arrow_field_serializer(
// NOTE: Flattening to remove the guaranteed layer of nullability: we don't care
// about it while building the backing buffer since it's all offsets driven.
for data in #data_src.iter().flatten() {
buffer_builder.append_slice(data #quoted_member_accessor #as_bytes);
buffer_builder.append_slice(data #quoted_member_accessor.as_bytes());
}
let inner_data: arrow::buffer::Buffer = buffer_builder.finish();
}
} else {
quote! {
let offsets = arrow::buffer::OffsetBuffer::from_lengths(
let offsets = arrow::buffer::OffsetBuffer::<i32>::from_lengths(
#data_src.iter() #quoted_transparent_length
);

Expand All @@ -660,29 +653,22 @@ fn quote_arrow_field_serializer(

let mut buffer_builder = arrow::array::builder::BufferBuilder::<u8>::new(capacity);
for data in &#data_src {
buffer_builder.append_slice(data #quoted_member_accessor #as_bytes);
buffer_builder.append_slice(data #quoted_member_accessor.as_bytes());
}
let inner_data: arrow::buffer::Buffer = buffer_builder.finish();
}
};

if is_binary {
quote! {{
#inner_data_and_offsets
as_array_ref(LargeBinaryArray::new(offsets, inner_data, #validity_src))
}}
} else {
quote! {{
#inner_data_and_offsets

// Safety: we're building this from actual native strings, so no need to do the
// whole utf8 validation _again_.
// It would be nice to use quote_comment here and put this safety notice in the generated code,
// but that seems to push us over some complexity limit causing rustfmt to fail.
#[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
as_array_ref(unsafe { StringArray::new_unchecked(offsets, inner_data, #validity_src) })
}}
}
quote! {{
#inner_data_and_offsets

// Safety: we're building this from actual native strings, so no need to do the
// whole utf8 validation _again_.
// It would be nice to use quote_comment here and put this safety notice in the generated code,
// but that seems to push us over some complexity limit causing rustfmt to fail.
#[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
as_array_ref(unsafe { StringArray::new_unchecked(offsets, inner_data, #validity_src) })
}}
}

DataType::List(inner_field) | DataType::FixedSizeList(inner_field, _) => {
Expand Down Expand Up @@ -933,6 +919,6 @@ fn quote_arrow_field_serializer(
}}
}

DataType::Object { .. } => unimplemented!("{datatype:#?}"),
_ => unimplemented!("{datatype:#?}"),
}
}
Loading
Loading