Skip to content

Commit 105bfd6

Browse files
dask 2025.1.0 compatibility (#324)
1 parent f629462 commit 105bfd6

File tree

17 files changed

+125
-1356
lines changed

17 files changed

+125
-1356
lines changed

.github/workflows/tests.yaml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,9 @@ jobs:
2929
matrix:
3030
os: [ubuntu-latest]
3131
env:
32+
- ci/envs/310-minimal.yaml
3233
- ci/envs/310-no-optional-deps.yaml
33-
- ci/envs/39-minimal.yaml
34-
- ci/envs/311-no-expr.yaml
3534
- ci/envs/311-latest.yaml
36-
- ci/envs/311-latest-no-expr.yaml
3735
- ci/envs/312-latest.yaml
3836

3937
include:
@@ -52,7 +50,7 @@ jobs:
5250
with:
5351
environment-file: ${{ matrix.env }}
5452
miniforge-version: latest
55-
miniforge-variant: Mambaforge
53+
miniforge-variant: Miniforge3
5654
use-mamba: true
5755

5856
- name: Check and Log Environment

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
Changelog
22
=========
33

4+
Version 0.4.3 (January, 2025)
5+
----------------------------------
6+
7+
Packaging:
8+
9+
- `dask>=2025.1.0` is now required.
10+
- `python>=3.10` is now required.
11+
12+
413
Version 0.4.2 (September 24, 2024)
514
----------------------------------
615

ci/envs/39-minimal.yaml renamed to ci/envs/310-minimal.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@ channels:
33
- conda-forge
44
dependencies:
55
# required dependencies
6-
- python=3.9
7-
- numpy=1.23
8-
- dask=2022.06.0
9-
- distributed=2022.06.0
10-
- geopandas=0.12
11-
- pandas=1.5.3
6+
- python=3.10
7+
- numpy=1.24
8+
- dask=2025.1.0
9+
- distributed=2025.1.0
10+
- geopandas=0.14.3
11+
- pandas=2.0.0
1212
- shapely=2.0
1313
- pyproj=3.4
1414
- packaging

ci/envs/311-no-expr.yaml

Lines changed: 0 additions & 26 deletions
This file was deleted.

ci/envs/312-dev.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,3 @@ dependencies:
2828
- git+https://github.com/shapely/shapely.git@main
2929
- git+https://github.com/geopandas/geopandas.git@main
3030
- git+https://github.com/dask/dask.git@main
31-
- git+https://github.com/dask-contrib/dask-expr.git@main

dask_geopandas/__init__.py

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,42 @@
11
from ._version import get_versions
22

3-
from . import backends
4-
5-
if backends.QUERY_PLANNING_ON:
6-
from .expr import (
7-
points_from_xy,
8-
from_wkt,
9-
from_wkb,
10-
GeoDataFrame,
11-
GeoSeries,
12-
from_geopandas,
13-
from_dask_dataframe,
14-
)
15-
else:
16-
from .core import (
17-
points_from_xy,
18-
from_wkt,
19-
from_wkb,
20-
GeoDataFrame,
21-
GeoSeries,
22-
from_geopandas,
23-
from_dask_dataframe,
24-
)
3+
from .expr import (
4+
points_from_xy,
5+
from_wkt,
6+
from_wkb,
7+
GeoDataFrame,
8+
GeoSeries,
9+
from_geopandas,
10+
from_dask_dataframe,
11+
)
2512
from .io.file import read_file
2613
from .io.parquet import read_parquet, to_parquet
2714
from .io.arrow import read_feather, to_feather
2815
from .clip import clip
2916
from .sjoin import sjoin
17+
from . import backends as _ # needed to register dispatch functions with dask
3018

3119

3220
__version__ = get_versions()["version"]
3321
del get_versions
3422

3523
__all__ = [
36-
"points_from_xy",
37-
"from_wkt",
38-
"from_wkb",
3924
"GeoDataFrame",
4025
"GeoSeries",
41-
"from_geopandas",
26+
"clip",
4227
"from_dask_dataframe",
43-
"read_file",
28+
"from_geopandas",
29+
"from_wkb",
30+
"from_wkt",
31+
"points_from_xy",
4432
"read_feather",
33+
"read_file",
4534
"read_parquet",
35+
"sjoin",
4636
"to_feather",
4737
"to_parquet",
48-
"clip",
49-
"sjoin",
5038
]
5139

5240
from . import _version
5341

5442
__version__ = _version.get_versions()["version"]
55-
56-
from . import _version
57-
58-
__version__ = _version.get_versions()["version"]

dask_geopandas/backends.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,6 @@
44
import pandas as pd
55

66
import dask
7-
from dask import config
8-
9-
# Check if dask-dataframe is using dask-expr (mimix the logic of dask.dataframe
10-
# _dask_expr_enabled() - default of None means True as well if dask-expr is available)
11-
QUERY_PLANNING_ON = config.get("dataframe.query-planning", False)
12-
if QUERY_PLANNING_ON is None:
13-
if Version(pd.__version__).major < 2:
14-
QUERY_PLANNING_ON = False
15-
else:
16-
try:
17-
import dask_expr # noqa: F401
18-
except ImportError:
19-
# dask will raise error or warning depending on the config
20-
QUERY_PLANNING_ON = False
21-
else:
22-
QUERY_PLANNING_ON = True
23-
24-
257
from dask.base import normalize_token
268
from dask.dataframe.backends import _nonempty_index, meta_nonempty_dataframe
279
from dask.dataframe.core import get_parallel_type
@@ -34,7 +16,7 @@
3416
from geopandas.array import GeometryArray, GeometryDtype, from_shapely
3517
from shapely.geometry.base import BaseGeometry
3618

37-
from .core import GeoDataFrame, GeoSeries
19+
from .expr import GeoDataFrame, GeoSeries
3820

3921
get_parallel_type.register(geopandas.GeoDataFrame, lambda _: GeoDataFrame)
4022
get_parallel_type.register(geopandas.GeoSeries, lambda _: GeoSeries)
@@ -85,7 +67,6 @@ def tokenize_geometryarray(x):
8567

8668
@pyarrow_schema_dispatch.register((geopandas.GeoDataFrame,))
8769
def get_pyarrow_schema_geopandas(obj):
88-
import pandas as pd
8970
import pyarrow as pa
9071

9172
df = pd.DataFrame(obj.copy())

dask_geopandas/clip.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,16 @@
11
import numpy as np
22

33
from dask.base import tokenize
4+
from dask.dataframe import from_graph
45
from dask.highlevelgraph import HighLevelGraph
56
from dask.utils import derived_from
67

78
import geopandas
89

9-
from . import backends
10-
1110

1211
@derived_from(geopandas.tools)
1312
def clip(gdf, mask, keep_geom_type=False):
14-
15-
if backends.QUERY_PLANNING_ON:
16-
from .expr import GeoDataFrame, GeoSeries
17-
else:
18-
from .core import GeoDataFrame, GeoSeries
13+
from dask_geopandas import GeoDataFrame, GeoSeries
1914

2015
if isinstance(mask, (GeoDataFrame, GeoSeries)):
2116
raise NotImplementedError("Mask cannot be a Dask GeoDataFrame or GeoSeries.")
@@ -45,17 +40,8 @@ def clip(gdf, mask, keep_geom_type=False):
4540
}
4641
divisions = [None] * (len(dsk) + 1)
4742
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[gdf])
48-
if backends.QUERY_PLANNING_ON:
49-
from dask_expr import from_graph
50-
51-
result = from_graph(graph, gdf._meta, tuple(divisions), dsk.keys(), "clip")
52-
else:
53-
from .core import GeoDataFrame, GeoSeries
5443

55-
if isinstance(gdf, GeoDataFrame):
56-
result = GeoDataFrame(graph, name, gdf._meta, tuple(divisions))
57-
elif isinstance(gdf, GeoSeries):
58-
result = GeoSeries(graph, name, gdf._meta, tuple(divisions))
44+
result = from_graph(graph, gdf._meta, tuple(divisions), dsk.keys(), "clip")
5945

6046
result.spatial_partitions = new_spatial_partitions
6147
return result

0 commit comments

Comments
 (0)