Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 70 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ documentation = "https://docs.rs/crate/datafusion-postgres/"

[workspace.dependencies]
arrow = "56"
arrow-schema = "56"
bytes = "1.10.1"
chrono = { version = "0.4", features = ["std"] }
datafusion = { version = "50", default-features = false }
Expand Down
5 changes: 4 additions & 1 deletion arrow-pg/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,19 @@ readme = "../README.md"
rust-version.workspace = true

[features]
default = ["arrow"]
default = ["arrow", "geo"]
arrow = ["dep:arrow"]
datafusion = ["dep:datafusion"]
geo = ["postgres-types/with-geo-types-0_7", "dep:geoarrow-schema"]
# for testing
_duckdb = []
_bundled = ["duckdb/bundled"]


[dependencies]
arrow = { workspace = true, optional = true }
arrow-schema = { workspace = true }
geoarrow-schema = { version = "0.6", optional = true }
bytes.workspace = true
chrono.workspace = true
datafusion = { workspace = true, optional = true }
Expand Down
130 changes: 73 additions & 57 deletions arrow-pg/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::sync::Arc;

#[cfg(not(feature = "datafusion"))]
use arrow::{datatypes::*, record_batch::RecordBatch};
use arrow_schema::extension::ExtensionType;
#[cfg(feature = "datafusion")]
use datafusion::arrow::{datatypes::*, record_batch::RecordBatch};

Expand All @@ -17,34 +18,42 @@ use crate::row_encoder::RowEncoder;
#[cfg(feature = "datafusion")]
pub mod df;

pub fn into_pg_type(arrow_type: &DataType) -> PgWireResult<Type> {
Ok(match arrow_type {
DataType::Null => Type::UNKNOWN,
DataType::Boolean => Type::BOOL,
DataType::Int8 | DataType::UInt8 => Type::CHAR,
DataType::Int16 | DataType::UInt16 => Type::INT2,
DataType::Int32 | DataType::UInt32 => Type::INT4,
DataType::Int64 | DataType::UInt64 => Type::INT8,
DataType::Timestamp(_, tz) => {
if tz.is_some() {
Type::TIMESTAMPTZ
} else {
Type::TIMESTAMP
pub fn into_pg_type(field: &Arc<Field>) -> PgWireResult<Type> {
let arrow_type = field.data_type();

match field.extension_type_name() {
#[cfg(feature = "geo")]
Some(geoarrow_schema::PointType::NAME) => Ok(Type::POINT),
_ => Ok(match arrow_type {
DataType::Null => Type::UNKNOWN,
DataType::Boolean => Type::BOOL,
DataType::Int8 | DataType::UInt8 => Type::CHAR,
DataType::Int16 | DataType::UInt16 => Type::INT2,
DataType::Int32 | DataType::UInt32 => Type::INT4,
DataType::Int64 | DataType::UInt64 => Type::INT8,
DataType::Timestamp(_, tz) => {
if tz.is_some() {
Type::TIMESTAMPTZ
} else {
Type::TIMESTAMP
}
}
}
DataType::Time32(_) | DataType::Time64(_) => Type::TIME,
DataType::Date32 | DataType::Date64 => Type::DATE,
DataType::Interval(_) => Type::INTERVAL,
DataType::Binary
| DataType::FixedSizeBinary(_)
| DataType::LargeBinary
| DataType::BinaryView => Type::BYTEA,
DataType::Float16 | DataType::Float32 => Type::FLOAT4,
DataType::Float64 => Type::FLOAT8,
DataType::Decimal128(_, _) => Type::NUMERIC,
DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Type::TEXT,
DataType::List(field) | DataType::FixedSizeList(field, _) | DataType::LargeList(field) => {
match field.data_type() {
DataType::Time32(_) | DataType::Time64(_) => Type::TIME,
DataType::Date32 | DataType::Date64 => Type::DATE,
DataType::Interval(_) => Type::INTERVAL,
DataType::Binary
| DataType::FixedSizeBinary(_)
| DataType::LargeBinary
| DataType::BinaryView => Type::BYTEA,
DataType::Float16 | DataType::Float32 => Type::FLOAT4,
DataType::Float64 => Type::FLOAT8,
DataType::Decimal128(_, _) => Type::NUMERIC,
DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Type::TEXT,
DataType::List(field)
| DataType::FixedSizeList(field, _)
| DataType::LargeList(field)
| DataType::ListView(field)
| DataType::LargeListView(field) => match field.data_type() {
DataType::Boolean => Type::BOOL_ARRAY,
DataType::Int8 | DataType::UInt8 => Type::CHAR_ARRAY,
DataType::Int16 | DataType::UInt16 => Type::INT2_ARRAY,
Expand All @@ -67,10 +76,10 @@ pub fn into_pg_type(arrow_type: &DataType) -> PgWireResult<Type> {
DataType::Float16 | DataType::Float32 => Type::FLOAT4_ARRAY,
DataType::Float64 => Type::FLOAT8_ARRAY,
DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Type::TEXT_ARRAY,
struct_type @ DataType::Struct(_) => Type::new(
DataType::Struct(_) => Type::new(
Type::RECORD_ARRAY.name().into(),
Type::RECORD_ARRAY.oid(),
Kind::Array(into_pg_type(struct_type)?),
Kind::Array(into_pg_type(field)?),
Type::RECORD_ARRAY.schema().into(),
),
list_type => {
Expand All @@ -80,35 +89,42 @@ pub fn into_pg_type(arrow_type: &DataType) -> PgWireResult<Type> {
format!("Unsupported List Datatype {list_type}"),
))));
}
},
DataType::Dictionary(_, value_type) => {
let field = Arc::new(Field::new(
Field::LIST_FIELD_DEFAULT_NAME,
*value_type.clone(),
true,
));
into_pg_type(&field)?
}
}
DataType::Dictionary(_, value_type) => into_pg_type(value_type)?,
DataType::Struct(fields) => {
let name: String = fields
.iter()
.map(|x| x.name().clone())
.reduce(|a, b| a + ", " + &b)
.map(|x| format!("({x})"))
.unwrap_or("()".to_string());
let kind = Kind::Composite(
fields
DataType::Struct(fields) => {
let name: String = fields
.iter()
.map(|x| {
into_pg_type(x.data_type())
.map(|_type| postgres_types::Field::new(x.name().clone(), _type))
})
.collect::<Result<Vec<_>, PgWireError>>()?,
);
Type::new(name, Type::RECORD.oid(), kind, Type::RECORD.schema().into())
}
_ => {
return Err(PgWireError::UserError(Box::new(ErrorInfo::new(
"ERROR".to_owned(),
"XX000".to_owned(),
format!("Unsupported Datatype {arrow_type}"),
))));
}
})
.map(|x| x.name().clone())
.reduce(|a, b| a + ", " + &b)
.map(|x| format!("({x})"))
.unwrap_or("()".to_string());
let kind = Kind::Composite(
fields
.iter()
.map(|x| {
into_pg_type(x)
.map(|_type| postgres_types::Field::new(x.name().clone(), _type))
})
.collect::<Result<Vec<_>, PgWireError>>()?,
);
Type::new(name, Type::RECORD.oid(), kind, Type::RECORD.schema().into())
}
_ => {
return Err(PgWireError::UserError(Box::new(ErrorInfo::new(
"ERROR".to_owned(),
"XX000".to_owned(),
format!("Unsupported Datatype {arrow_type}"),
))));
}
}),
}
}

pub fn arrow_schema_to_pg_fields(schema: &Schema, format: &Format) -> PgWireResult<Vec<FieldInfo>> {
Expand All @@ -117,7 +133,7 @@ pub fn arrow_schema_to_pg_fields(schema: &Schema, format: &Format) -> PgWireResu
.iter()
.enumerate()
.map(|(idx, f)| {
let pg_type = into_pg_type(f.data_type())?;
let pg_type = into_pg_type(f)?;
Ok(FieldInfo::new(
f.name().into(),
None,
Expand Down
4 changes: 2 additions & 2 deletions arrow-pg/src/datatypes/df.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::iter;
use std::sync::Arc;

use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Timelike};
use datafusion::arrow::datatypes::{DataType, Date32Type};
use datafusion::arrow::datatypes::{DataType, Date32Type, Field};
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::common::ParamValues;
use datafusion::prelude::*;
Expand Down Expand Up @@ -61,7 +61,7 @@ where
if let Some(ty) = pg_type_hint {
Ok(ty.clone())
} else if let Some(infer_type) = inferenced_type {
into_pg_type(infer_type)
into_pg_type(&Arc::new(Field::new("item", infer_type.clone(), true)))
} else {
Ok(Type::UNKNOWN)
}
Expand Down
Loading
Loading