Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
312 changes: 312 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

21 changes: 20 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ daft-sql = {path = "src/daft-sql", default-features = false}
daft-stats = {path = "src/daft-stats", default-features = false}
daft-warc = {path = "src/daft-warc", default-features = false}
daft-writers = {path = "src/daft-writers", default-features = false}
geoarrow-array = {path = "src/geoarrow/geoarrow-array", default-features = false}
geoarrow-cast = {path = "src/geoarrow/geoarrow-cast", default-features = false}
geoarrow-expr-geo = {path = "src/geoarrow/geoarrow-expr-geo", default-features = false}
geoarrow-schema = {path = "src/geoarrow/geoarrow-schema", default-features = false}
log = {workspace = true}
lzma-sys = {version = "*", features = ["static"]}
pyo3 = {workspace = true, optional = true}
Expand Down Expand Up @@ -222,7 +226,11 @@ members = [
"src/daft-writers",
"src/hyperloglog",
"src/daft-cli",
"src/daft-text"
"src/daft-text",
"src/geoarrow/geoarrow-array",
"src/geoarrow/geoarrow-cast",
"src/geoarrow/geoarrow-expr-geo",
"src/geoarrow/geoarrow-schema"
]
exclude = [
"examples/hello"
Expand All @@ -232,6 +240,7 @@ exclude = [
arrow = "57.1.0"
arrow-array = {version = "57.1.0", features = ["chrono-tz"]}
arrow-buffer = "57.1.0"
arrow-cast = "57.1.0"
arrow-csv = "57.1.0"
arrow-data = "57.1.0"
arrow-flight = "57.1.0"
Expand Down Expand Up @@ -299,6 +308,16 @@ dashmap = "6.1.0"
educe = "0.6.0"
futures = "0.3.30"
hashbrown = "0.16"
geo = "0.31.0"
geo-traits = "0.3.0"
geo-types = "0.7.16"
geozero = "0.14"
geoarrow-array = {path = "src/geoarrow/geoarrow-array", default-features = false}
geoarrow-cast = {path = "src/geoarrow/geoarrow-cast", default-features = false}
geoarrow-expr-geo = {path = "src/geoarrow/geoarrow-expr-geo", default-features = false}
geoarrow-schema = {path = "src/geoarrow/geoarrow-schema", default-features = false}
wkb = "0.9.1"
wkt = "0.14"
html-escape = "0.2.13"
image = {version = "0.25.10", default-features = false}
indexmap = "2.9.0"
Expand Down
3 changes: 2 additions & 1 deletion daft/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def refresh_logger() -> None:
from_pylist,
from_ray_dataset,
)
from daft.daft import ImageFormat, ImageMode, ImageProperty, ResourceRequest
from daft.daft import GeospatialMode, ImageFormat, ImageMode, ImageProperty, ResourceRequest
from daft.dataframe import DataFrame
from daft.schema import Schema
from daft.datatype import DataType, TimeUnit, MediaType
Expand Down Expand Up @@ -178,6 +178,7 @@ def __getattr__(name: str) -> object:
"DataType",
"Expression",
"File",
"GeospatialMode",
"IOConfig",
"Identifier",
"ImageFormat",
Expand Down
28 changes: 28 additions & 0 deletions daft/daft/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ class ImageProperty(Enum):
@staticmethod
def from_property_string(attr: str) -> ImageProperty: ...

class GeospatialMode:
@staticmethod
def from_user_defined_mode(dimension: str, coord_type: str) -> GeospatialMode: ...

class PyWindowBoundary:
"""Represents a window frame boundary in window functions."""

Expand Down Expand Up @@ -1370,6 +1374,30 @@ class PyDataType:
def python() -> PyDataType: ...
@staticmethod
def file(media_type: PyMediaType) -> PyDataType: ...
@staticmethod
def wkt(mode: GeospatialMode | None = None) -> PyDataType: ...
@staticmethod
def wkb(mode: GeospatialMode | None = None) -> PyDataType: ...
@staticmethod
def point(mode: GeospatialMode | None = None) -> PyDataType: ...
@staticmethod
def linestring(mode: GeospatialMode | None = None) -> PyDataType: ...
@staticmethod
def polygon(mode: GeospatialMode | None = None) -> PyDataType: ...
@staticmethod
def multipoint(mode: GeospatialMode | None = None) -> PyDataType: ...
@staticmethod
def multilinestring(mode: GeospatialMode | None = None) -> PyDataType: ...
@staticmethod
def multipolygon(mode: GeospatialMode | None = None) -> PyDataType: ...
@staticmethod
def geometry_collection(mode: GeospatialMode | None = None) -> PyDataType: ...
@staticmethod
def geometry(mode: GeospatialMode | None = None) -> PyDataType: ...
@staticmethod
def geography() -> PyDataType: ...
@staticmethod
def rect(mode: GeospatialMode | None = None) -> PyDataType: ...
def to_arrow(self, cast_tensor_type_for_ray: builtins.bool | None = None) -> pa.DataType: ...
def is_null(self) -> builtins.bool: ...
def is_boolean(self) -> builtins.bool: ...
Expand Down
107 changes: 106 additions & 1 deletion daft/datatype.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from packaging.version import parse

from daft.daft import ImageMode, PyDataType, PyMediaType, PyTimeUnit, sql_datatype
from daft.daft import GeospatialMode, ImageMode, PyDataType, PyMediaType, PyTimeUnit, sql_datatype
from daft.dependencies import np, pa
from daft.runners import get_or_create_runner

Expand Down Expand Up @@ -727,6 +727,111 @@ def image(
return cls._from_pydatatype(PyDataType.image(mode, height, width))

@datatype_constructor
@classmethod
def wkt(
cls,
mode: GeospatialMode | None = None,
) -> DataType:
if not isinstance(mode, GeospatialMode) and mode is not None:
raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}")
return cls._from_pydatatype(PyDataType.wkt(mode))

@classmethod
def wkb(
cls,
mode: GeospatialMode | None = None,
) -> DataType:
if not isinstance(mode, GeospatialMode) and mode is not None:
raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}")
return cls._from_pydatatype(PyDataType.wkb(mode))

@classmethod
def point(
cls,
mode: GeospatialMode | None = None,
) -> DataType:
if not isinstance(mode, GeospatialMode) and mode is not None:
raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}")
return cls._from_pydatatype(PyDataType.point(mode))

@classmethod
def linestring(
cls,
mode: GeospatialMode | None = None,
) -> DataType:
if not isinstance(mode, GeospatialMode) and mode is not None:
raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}")
return cls._from_pydatatype(PyDataType.linestring(mode))

@classmethod
def polygon(
cls,
mode: GeospatialMode | None = None,
) -> DataType:
if not isinstance(mode, GeospatialMode) and mode is not None:
raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}")
return cls._from_pydatatype(PyDataType.polygon(mode))

@classmethod
def multipoint(
cls,
mode: GeospatialMode | None = None,
) -> DataType:
if not isinstance(mode, GeospatialMode) and mode is not None:
raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}")
return cls._from_pydatatype(PyDataType.multipoint(mode))

@classmethod
def multilinestring(
cls,
mode: GeospatialMode | None = None,
) -> DataType:
if not isinstance(mode, GeospatialMode) and mode is not None:
raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}")
return cls._from_pydatatype(PyDataType.multilinestring(mode))

@classmethod
def multipolygon(
cls,
mode: GeospatialMode | None = None,
) -> DataType:
if not isinstance(mode, GeospatialMode) and mode is not None:
raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}")
return cls._from_pydatatype(PyDataType.multipolygon(mode))

@classmethod
def geometry_collection(
cls,
mode: GeospatialMode | None = None,
) -> DataType:
if not isinstance(mode, GeospatialMode) and mode is not None:
raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}")
return cls._from_pydatatype(PyDataType.geometry_collection(mode))

@classmethod
def geometry(
cls,
mode: GeospatialMode | None = None,
) -> DataType:
if not isinstance(mode, GeospatialMode) and mode is not None:
raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}")
return cls._from_pydatatype(PyDataType.geometry(mode))

@classmethod
def geography(
cls,
) -> DataType:
return cls._from_pydatatype(PyDataType.geography())

@classmethod
def rect(
cls,
mode: GeospatialMode | None = None,
) -> DataType:
if not isinstance(mode, GeospatialMode) and mode is not None:
raise ValueError(f"geospatial mode must be none or GeospatialMode variant, but got: {mode}")
return cls._from_pydatatype(PyDataType.rect(mode))

@classmethod
def tensor(
cls,
Expand Down
4 changes: 4 additions & 0 deletions src/daft-core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[dependencies]
arrow = {workspace = true}
arrow-row = {workspace = true}
arrow-schema = {workspace = true}
bincode = {workspace = true}
bytemuck = {version = "1", features = ["derive"]}
chrono = {workspace = true}
Expand All @@ -21,6 +22,9 @@ daft-sketch = {path = "../daft-sketch", default-features = false}
derive_more = {workspace = true}
fastrand = "2.1.0"
fnv = "1.0.7"
geoarrow-array = {workspace = true, default-features = false}
geoarrow-cast = {workspace = true, default-features = false}
geoarrow-schema = {workspace = true, default-features = false}
html-escape = {workspace = true}
hyperloglog = {path = "../hyperloglog"}
image = {workspace = true, features = [
Expand Down
8 changes: 8 additions & 0 deletions src/daft-core/src/array/growable/arrow_growable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,10 @@ where
let field = Arc::new(Field::new(self.name.clone(), self.dtype.clone()));
Ok(DataArray::<T>::from_arrow(field, arrow_array)?.into_series())
}

fn len(&self) -> usize {
self.len
}
}

/// Simplified null growable — just tracks a length counter.
Expand Down Expand Up @@ -279,6 +283,10 @@ impl Growable for ArrowNullGrowable {
self.len = 0;
Ok(NullArray::full_null(&self.name, &self.dtype, len).into_series())
}

fn len(&self) -> usize {
self.len
}
}

#[cfg(test)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,8 @@ impl Growable for FixedSizeListGrowable<'_> {
)
.into_series())
}

fn len(&self) -> usize {
self.child_growable.len() / self.element_fixed_len
}
}
4 changes: 4 additions & 0 deletions src/daft-core/src/array/growable/list_growable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,8 @@ impl Growable for ListGrowable<'_> {
)
.into_series())
}

fn len(&self) -> usize {
self.growable_offsets.len() - 1
}
}
16 changes: 16 additions & 0 deletions src/daft-core/src/array/growable/logical_growable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ where
);
Ok(arr.into_series())
}

fn len(&self) -> usize {
self.physical_growable.len()
}
}

macro_rules! impl_logical_growable {
Expand Down Expand Up @@ -115,3 +119,15 @@ where
}
}
}
impl_logical_growable!(LogicalWKTGrowable, WKTType);
impl_logical_growable!(LogicalWKBGrowable, WKBType);
impl_logical_growable!(LogicalPointGrowable, PointType);
impl_logical_growable!(LogicalLineStringGrowable, LineStringType);
impl_logical_growable!(LogicalPolygonGrowable, PolygonType);
impl_logical_growable!(LogicalMultiPointGrowable, MultiPointType);
impl_logical_growable!(LogicalMultiLineStringGrowable, MultiLineStringType);
impl_logical_growable!(LogicalMultiPolygonGrowable, MultiPolygonType);
impl_logical_growable!(LogicalGeometryCollectionGrowable, GeometryCollectionType);
impl_logical_growable!(LogicalGeometryGrowable, GeometryType);
impl_logical_growable!(LogicalGeographyGrowable, GeographyType);
impl_logical_growable!(LogicalRectGrowable, RectType);
4 changes: 4 additions & 0 deletions src/daft-core/src/array/growable/map_growable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ impl Growable for MapGrowable<'_> {
);
Ok(map_array.into_series())
}

fn len(&self) -> usize {
self.list_growable.len()
}
}

#[cfg(test)]
Expand Down
Loading
Loading