Skip to content

Commit 069f0c1

Browse files
Handle dask-expr optimizations in sjoin (#307)
1 parent fe0bae8 commit 069f0c1

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

dask_geopandas/sjoin.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ def sjoin(left, right, how="inner", predicate="intersects", **kwargs):
6262
if isinstance(right, geopandas.GeoDataFrame):
6363
right = from_geopandas(right, npartitions=1)
6464

65+
if backends.QUERY_PLANNING_ON:
66+
# We call optimize on the inputs to ensure that any optimizations
67+
# done by dask-expr (which might change the expression, and thus the
68+
# name of the DataFrame) *before* we build the HighLevelGraph.
69+
# https://github.com/dask/dask-expr/issues/1129
70+
left = left.optimize()
71+
right = right.optimize()
72+
6573
name = "sjoin-" + tokenize(left, right, how, predicate)
6674
meta = geopandas.sjoin(left._meta, right._meta, how=how, predicate=predicate)
6775

@@ -74,8 +82,8 @@ def sjoin(left, right, how="inner", predicate="intersects", **kwargs):
7482
how="inner",
7583
predicate="intersects",
7684
)
77-
parts_left = np.asarray(parts.index)
78-
parts_right = np.asarray(parts["index_right"].values)
85+
parts_left = np.asarray(parts.index).tolist()
86+
parts_right = np.asarray(parts["index_right"].values).tolist()
7987
using_spatial_partitions = True
8088
else:
8189
# Unknown spatial partitions -> full cartesian (cross) product of all

dask_geopandas/tests/test_sjoin.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import geopandas
2+
import shapely
23

34
import dask_geopandas
45

@@ -46,3 +47,13 @@ def test_sjoin_dask_geopandas(naturalearth_lowres, naturalearth_cities):
4647
# check warning
4748
with pytest.warns(FutureWarning, match="The `op` parameter is deprecated"):
4849
dask_geopandas.sjoin(df_points, ddf_polygons, op="within", how="inner")
50+
51+
52+
def test_no_value_error():
53+
# https://github.com/geopandas/dask-geopandas/issues/303
54+
shape = shapely.geometry.box(-74.5, -74.0, 4.5, 5.0)
55+
df = dask_geopandas.from_geopandas(
56+
geopandas.GeoDataFrame(geometry=[shape]), npartitions=1
57+
).spatial_shuffle()
58+
# no TypeError
59+
df.sjoin(df).compute()

0 commit comments

Comments
 (0)