Skip to content

Commit bf1ae70

Browse files
authored
feat: add bigframes.bigquery.st_distance function (#1637)
* feat: add `bigframes.bigquery.st_distance` function * fix docstring * add tests * add tests * type checks * make sure shapely.Point is available * fix docstrings, add null row test * GeoSereies typo * Update bigframes/dtypes.py * Update bigframes/dtypes.py * Update third_party/bigframes_vendored/geopandas/geoseries.py
1 parent 8cc56d5 commit bf1ae70

File tree

13 files changed

+424
-139
lines changed

13 files changed

+424
-139
lines changed

bigframes/bigquery/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,12 @@
2727
unix_millis,
2828
unix_seconds,
2929
)
30-
from bigframes.bigquery._operations.geo import st_area, st_difference, st_intersection
30+
from bigframes.bigquery._operations.geo import (
31+
st_area,
32+
st_difference,
33+
st_distance,
34+
st_intersection,
35+
)
3136
from bigframes.bigquery._operations.json import (
3237
json_extract,
3338
json_extract_array,
@@ -49,6 +54,7 @@
4954
# geo ops
5055
"st_area",
5156
"st_difference",
57+
"st_distance",
5258
"st_intersection",
5359
# json ops
5460
"json_set",

bigframes/bigquery/_operations/geo.py

Lines changed: 116 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@
1414

1515
from __future__ import annotations
1616

17+
from typing import Union
18+
19+
import shapely # type: ignore
20+
1721
from bigframes import operations as ops
18-
import bigframes.dtypes
1922
import bigframes.geopandas
2023
import bigframes.series
2124

@@ -25,7 +28,9 @@
2528
"""
2629

2730

28-
def st_area(series: bigframes.series.Series) -> bigframes.series.Series:
31+
def st_area(
32+
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
33+
) -> bigframes.series.Series:
2934
"""
3035
Returns the area in square meters covered by the polygons in the input
3136
`GEOGRAPHY`.
@@ -85,6 +90,10 @@ def st_area(series: bigframes.series.Series) -> bigframes.series.Series:
8590
4 0.0
8691
dtype: Float64
8792
93+
Args:
94+
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
95+
A series containing geography objects.
96+
8897
Returns:
8998
bigframes.pandas.Series:
9099
Series of float representing the areas.
@@ -95,7 +104,10 @@ def st_area(series: bigframes.series.Series) -> bigframes.series.Series:
95104

96105

97106
def st_difference(
98-
series: bigframes.series.Series, other: bigframes.series.Series
107+
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
108+
other: Union[
109+
bigframes.series.Series, bigframes.geopandas.GeoSeries, shapely.Geometry
110+
],
99111
) -> bigframes.series.Series:
100112
"""
101113
Returns a `GEOGRAPHY` that represents the point set difference of
@@ -166,44 +178,23 @@ def st_difference(
166178
5 None
167179
dtype: geometry
168180
169-
We can also check difference of single shapely geometries:
170-
171-
>>> polygon_s1 = bigframes.geopandas.GeoSeries(
172-
... [
173-
... Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])
174-
... ]
175-
... )
176-
>>> polygon_s2 = bigframes.geopandas.GeoSeries(
177-
... [
178-
... Polygon([(4, 2), (6, 2), (8, 6), (4, 2)])
179-
... ]
180-
... )
181-
182-
>>> polygon_s1
183-
0 POLYGON ((0 0, 10 0, 10 10, 0 0))
184-
dtype: geometry
185-
186-
>>> polygon_s2
187-
0 POLYGON ((4 2, 6 2, 8 6, 4 2))
188-
dtype: geometry
189-
190-
>>> bbq.st_difference(polygon_s1, polygon_s2)
191-
0 POLYGON ((0 0, 10 0, 10 10, 0 0), (8 6, 6 2, 4...
192-
dtype: geometry
193-
194181
Additionally, we can check difference of a GeoSeries against a single shapely geometry:
195182
196-
>>> bbq.st_difference(s1, polygon_s2)
197-
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
198-
1 None
199-
2 None
200-
3 None
201-
4 None
183+
>>> polygon = Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])
184+
>>> bbq.st_difference(s1, polygon)
185+
0 POLYGON ((1.97082 2.00002, 0 2, 0 0, 1.97082 2...
186+
1 POLYGON ((1.97082 2.00002, 0 2, 0 0, 1.97082 2...
187+
2 GEOMETRYCOLLECTION EMPTY
188+
3 LINESTRING (0.99265 1.00781, 0 2)
189+
4 POINT (0 1)
202190
dtype: geometry
203191
204192
Args:
205-
other (bigframes.series.Series or geometric object):
206-
The GeoSeries (elementwise) or geometric object to find the difference to.
193+
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
194+
A series containing geography objects.
195+
other (bigframes.pandas.Series | bigframes.geopandas.GeoSeries | shapely.Geometry):
196+
The series or geometric object to subtract from the geography
197+
objects in ``series``.
207198
208199
Returns:
209200
bigframes.series.Series:
@@ -213,8 +204,86 @@ def st_difference(
213204
return series._apply_binary_op(other, ops.geo_st_difference_op)
214205

215206

207+
def st_distance(
208+
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
209+
other: Union[
210+
bigframes.series.Series, bigframes.geopandas.GeoSeries, shapely.Geometry
211+
],
212+
*,
213+
use_spheroid: bool = False,
214+
) -> bigframes.series.Series:
215+
"""
216+
Returns the shortest distance in meters between two non-empty
217+
``GEOGRAPHY`` objects.
218+
219+
**Examples:**
220+
221+
>>> import bigframes as bpd
222+
>>> import bigframes.bigquery as bbq
223+
>>> import bigframes.geopandas
224+
>>> from shapely.geometry import Polygon, LineString, Point
225+
>>> bpd.options.display.progress_bar = None
226+
227+
We can check two GeoSeries against each other, row by row.
228+
229+
>>> s1 = bigframes.geopandas.GeoSeries(
230+
... [
231+
... Point(0, 0),
232+
... Point(0.00001, 0),
233+
... Point(0.00002, 0),
234+
... ],
235+
... )
236+
>>> s2 = bigframes.geopandas.GeoSeries(
237+
... [
238+
... Point(0.00001, 0),
239+
... Point(0.00003, 0),
240+
... Point(0.00005, 0),
241+
... ],
242+
... )
243+
244+
>>> bbq.st_distance(s1, s2, use_spheroid=True)
245+
0 1.113195
246+
1 2.22639
247+
2 3.339585
248+
dtype: Float64
249+
250+
We can also calculate the distance of each geometry and a single shapely geometry:
251+
252+
>>> bbq.st_distance(s2, Point(0.00001, 0))
253+
0 0.0
254+
1 2.223902
255+
2 4.447804
256+
dtype: Float64
257+
258+
Args:
259+
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
260+
A series containing geography objects.
261+
other (bigframes.pandas.Series | bigframes.geopandas.GeoSeries | shapely.Geometry):
262+
The series or geometric object to calculate the distance in meters
263+
to from the geography objects in ``series``.
264+
use_spheroid (optional, default ``False``):
265+
Determines how this function measures distance. If ``use_spheroid``
266+
is False, the function measures distance on the surface of a perfect
267+
sphere. If ``use_spheroid`` is True, the function measures distance
268+
on the surface of the `WGS84 spheroid
269+
<https://cloud.google.com/bigquery/docs/geospatial-data>`_. The
270+
default value of ``use_spheroid`` is False.
271+
272+
Returns:
273+
bigframes.pandas.Series:
274+
The Series (elementwise) of the smallest distance between
275+
each aligned geometry with other.
276+
"""
277+
return series._apply_binary_op(
278+
other, ops.GeoStDistanceOp(use_spheroid=use_spheroid)
279+
)
280+
281+
216282
def st_intersection(
217-
series: bigframes.series.Series, other: bigframes.series.Series
283+
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
284+
other: Union[
285+
bigframes.series.Series, bigframes.geopandas.GeoSeries, shapely.Geometry
286+
],
218287
) -> bigframes.series.Series:
219288
"""
220289
Returns a `GEOGRAPHY` that represents the point set intersection of the two
@@ -284,18 +353,20 @@ def st_intersection(
284353
285354
We can also do intersection of each geometry and a single shapely geometry:
286355
287-
>>> bbq.st_intersection(s1, bigframes.geopandas.GeoSeries([Polygon([(0, 0), (1, 1), (0, 1)])]))
356+
>>> bbq.st_intersection(s1, Polygon([(0, 0), (1, 1), (0, 1)]))
288357
0 POLYGON ((0 0, 0.99954 1, 0 1, 0 0))
289-
1 None
290-
2 None
291-
3 None
292-
4 None
358+
1 POLYGON ((0 0, 0.99954 1, 0 1, 0 0))
359+
2 LINESTRING (0 0, 0.99954 1)
360+
3 GEOMETRYCOLLECTION EMPTY
361+
4 POINT (0 1)
293362
dtype: geometry
294363
295364
Args:
296-
other (GeoSeries or geometric object):
297-
The Geoseries (elementwise) or geometric object to find the
298-
intersection with.
365+
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
366+
A series containing geography objects.
367+
other (bigframes.pandas.Series | bigframes.geopandas.GeoSeries | shapely.Geometry):
368+
The series or geometric object to intersect with the geography
369+
objects in ``series``.
299370
300371
Returns:
301372
bigframes.geopandas.GeoSeries:

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,13 @@ def geo_st_difference_op_impl(x: ibis_types.Value, y: ibis_types.Value):
10231023
)
10241024

10251025

1026+
@scalar_op_compiler.register_binary_op(ops.GeoStDistanceOp, pass_op=True)
1027+
def geo_st_distance_op_impl(
1028+
x: ibis_types.Value, y: ibis_types.Value, op: ops.GeoStDistanceOp
1029+
):
1030+
return st_distance(x, y, op.use_spheroid)
1031+
1032+
10261033
@scalar_op_compiler.register_unary_op(ops.geo_st_geogfromtext_op)
10271034
def geo_st_geogfromtext_op_impl(x: ibis_types.Value):
10281035
# Ibis doesn't seem to provide a dedicated method to cast from string to geography,
@@ -1989,6 +1996,11 @@ def st_boundary(a: ibis_dtypes.geography) -> ibis_dtypes.geography: # type: ign
19891996
"""Find the boundary of a geography."""
19901997

19911998

1999+
@ibis_udf.scalar.builtin
2000+
def st_distance(a: ibis_dtypes.geography, b: ibis_dtypes.geography, use_spheroid: bool) -> ibis_dtypes.float: # type: ignore
2001+
"""Convert string to geography."""
2002+
2003+
19922004
@ibis_udf.scalar.builtin
19932005
def unix_micros(a: ibis_dtypes.timestamp) -> int: # type: ignore
19942006
"""Convert a timestamp to microseconds"""

bigframes/dtypes.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -586,30 +586,32 @@ def _is_bigframes_dtype(dtype) -> bool:
586586
return False
587587

588588

589-
def _infer_dtype_from_python_type(type: type) -> Dtype:
590-
if type in (datetime.timedelta, pd.Timedelta, np.timedelta64):
589+
def _infer_dtype_from_python_type(type_: type) -> Dtype:
590+
if type_ in (datetime.timedelta, pd.Timedelta, np.timedelta64):
591591
# Must check timedelta type first. Otherwise other branchs will be evaluated to true
592592
# E.g. np.timedelta64 is a sublcass as np.integer
593593
return TIMEDELTA_DTYPE
594-
if issubclass(type, (bool, np.bool_)):
594+
if issubclass(type_, (bool, np.bool_)):
595595
return BOOL_DTYPE
596-
if issubclass(type, (int, np.integer)):
596+
if issubclass(type_, (int, np.integer)):
597597
return INT_DTYPE
598-
if issubclass(type, (float, np.floating)):
598+
if issubclass(type_, (float, np.floating)):
599599
return FLOAT_DTYPE
600-
if issubclass(type, decimal.Decimal):
600+
if issubclass(type_, decimal.Decimal):
601601
return NUMERIC_DTYPE
602-
if issubclass(type, (str, np.str_)):
602+
if issubclass(type_, (str, np.str_)):
603603
return STRING_DTYPE
604-
if issubclass(type, (bytes, np.bytes_)):
604+
if issubclass(type_, (bytes, np.bytes_)):
605605
return BYTES_DTYPE
606-
if issubclass(type, datetime.date):
606+
if issubclass(type_, datetime.date):
607607
return DATE_DTYPE
608-
if issubclass(type, datetime.time):
608+
if issubclass(type_, datetime.time):
609609
return TIME_DTYPE
610+
if issubclass(type_, shapely.Geometry):
611+
return GEO_DTYPE
610612
else:
611613
raise TypeError(
612-
f"No matching datatype for python type: {type}. {constants.FEEDBACK_LINK}"
614+
f"No matching datatype for python type: {type_}. {constants.FEEDBACK_LINK}"
613615
)
614616

615617

bigframes/geopandas/geoseries.py

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -47,23 +47,6 @@ def y(self) -> bigframes.series.Series:
4747
# we can.
4848
@property
4949
def area(self, crs=None) -> bigframes.series.Series: # type: ignore
50-
"""Returns a Series containing the area of each geometry in the GeoSeries
51-
expressed in the units of the CRS.
52-
53-
Args:
54-
crs (optional):
55-
Coordinate Reference System of the geometry objects. Can be
56-
anything accepted by pyproj.CRS.from_user_input(), such as an
57-
authority string (eg “EPSG:4326”) or a WKT string.
58-
59-
Returns:
60-
bigframes.pandas.Series:
61-
Series of float representing the areas.
62-
63-
Raises:
64-
NotImplementedError:
65-
GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead.
66-
"""
6750
raise NotImplementedError(
6851
f"GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. {constants.FEEDBACK_LINK}"
6952
)
@@ -97,5 +80,10 @@ def to_wkt(self: GeoSeries) -> bigframes.series.Series:
9780
def difference(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore
9881
return self._apply_binary_op(other, ops.geo_st_difference_op)
9982

83+
def distance(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore
84+
raise NotImplementedError(
85+
f"GeoSeries.distance is not supported. Use bigframes.bigquery.st_distance(series, other), instead. {constants.FEEDBACK_LINK}"
86+
)
87+
10088
def intersection(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore
10189
return self._apply_binary_op(other, ops.geo_st_intersection_op)

bigframes/operations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
geo_st_intersection_op,
9797
geo_x_op,
9898
geo_y_op,
99+
GeoStDistanceOp,
99100
)
100101
from bigframes.operations.json_ops import (
101102
JSONExtract,
@@ -375,6 +376,7 @@
375376
"geo_st_intersection_op",
376377
"geo_x_op",
377378
"geo_y_op",
379+
"GeoStDistanceOp",
378380
# Numpy ops mapping
379381
"NUMPY_TO_BINOP",
380382
"NUMPY_TO_OP",

bigframes/operations/geo_ops.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import dataclasses
16+
1517
from bigframes import dtypes
1618
from bigframes.operations import base_ops
1719
import bigframes.operations.type as op_typing
@@ -69,3 +71,12 @@
6971
geo_st_intersection_op = base_ops.create_binary_op(
7072
name="geo_st_intersection", type_signature=op_typing.BinaryGeo()
7173
)
74+
75+
76+
@dataclasses.dataclass(frozen=True)
77+
class GeoStDistanceOp(base_ops.BinaryOp):
78+
name = "st_distance"
79+
use_spheroid: bool
80+
81+
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
82+
return dtypes.FLOAT_DTYPE

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
"pyarrow >=15.0.2",
5454
"pydata-google-auth >=1.8.2",
5555
"requests >=2.27.1",
56-
"shapely >=1.8.5",
56+
"shapely >=2.0.0",
5757
"sqlglot >=23.6.3",
5858
"tabulate >=0.9",
5959
"ipywidgets >=7.7.1",

0 commit comments

Comments
 (0)