Skip to content

Commit 4c3548f

Browse files
rey-esparwas11tswast
authored
feat: add GeoSeries.x and GeoSeries.y (#1126)
* feat: add basic geopandas functionality * update examples for geoseries * feat: add Series.geo helper to convert Series to a GeoSeries * fix cirucular import * Added a constructor * add documentation for geoseries * add geo ops * create test file and implement .x and .y * add test_geo_y * edit training data * Update test_geoseries.py --------- Co-authored-by: Arwa <[email protected]> Co-authored-by: Tim Sweña (Swast) <[email protected]>
1 parent e13eca2 commit 4c3548f

File tree

9 files changed

+265
-0
lines changed

9 files changed

+265
-0
lines changed

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -967,6 +967,17 @@ def normalize_op_impl(x: ibis_types.Value):
967967
return result.cast(result_type)
968968

969969

970+
# Geo Ops
971+
@scalar_op_compiler.register_unary_op(ops.geo_x_op)
972+
def geo_x_op_impl(x: ibis_types.Value):
973+
return typing.cast(ibis_types.GeoSpatialValue, x).x()
974+
975+
976+
@scalar_op_compiler.register_unary_op(ops.geo_y_op)
977+
def geo_y_op_impl(x: ibis_types.Value):
978+
return typing.cast(ibis_types.GeoSpatialValue, x).y()
979+
980+
970981
# Parameterized ops
971982
@scalar_op_compiler.register_unary_op(ops.StructFieldOp, pass_op=True)
972983
def struct_field_op_impl(x: ibis_types.Value, op: ops.StructFieldOp):

bigframes/dtypes.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,10 @@ def is_time_like(type_: ExpressionType) -> bool:
253253
return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)
254254

255255

256+
def is_geo_like(type_: ExpressionType) -> bool:
257+
return type_ in (GEO_DTYPE,)
258+
259+
256260
def is_binary_like(type_: ExpressionType) -> bool:
257261
return type_ in (BOOL_DTYPE, BYTES_DTYPE, INT_DTYPE)
258262

bigframes/geopandas/geoseries.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import bigframes_vendored.geopandas.geoseries as vendored_geoseries
1717
import geopandas.array # type: ignore
1818

19+
import bigframes.operations as ops
1920
import bigframes.series
2021

2122

@@ -26,3 +27,15 @@ def __init__(self, data=None, index=None, **kwargs):
2627
super().__init__(
2728
data=data, index=index, dtype=geopandas.array.GeometryDtype(), **kwargs
2829
)
30+
31+
@property
32+
def x(self) -> bigframes.series.Series:
33+
series = self._apply_unary_op(ops.geo_x_op)
34+
series.name = None
35+
return series
36+
37+
@property
38+
def y(self) -> bigframes.series.Series:
39+
series = self._apply_unary_op(ops.geo_y_op)
40+
series.name = None
41+
return series

bigframes/operations/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,19 @@ def create_binary_op(
314314
arctanh_op = create_unary_op(
315315
name="arctanh", type_signature=op_typing.UNARY_REAL_NUMERIC
316316
)
317+
# Geo Ops
318+
geo_x_op = create_unary_op(
319+
name="geo_x",
320+
type_signature=op_typing.FixedOutputType(
321+
dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like"
322+
),
323+
)
324+
geo_y_op = create_unary_op(
325+
name="geo_y",
326+
type_signature=op_typing.FixedOutputType(
327+
dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like"
328+
),
329+
)
317330
## Numeric Ops
318331
floor_op = create_unary_op(name="floor", type_signature=op_typing.UNARY_REAL_NUMERIC)
319332
ceil_op = create_unary_op(name="ceil", type_signature=op_typing.UNARY_REAL_NUMERIC)

tests/data/urban_areas.jsonl

Lines changed: 22 additions & 0 deletions
Large diffs are not rendered by default.

tests/data/urban_areas_schema.json

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
[
2+
{
3+
"mode": "NULLABLE",
4+
"name": "geo_id",
5+
"type": "STRING"
6+
},
7+
{
8+
"mode": "NULLABLE",
9+
"name": "urban_area_code",
10+
"type": "STRING"
11+
},
12+
{
13+
"mode": "NULLABLE",
14+
"name": "name",
15+
"type": "STRING"
16+
},
17+
{
18+
"mode": "NULLABLE",
19+
"name": "lsad_name",
20+
"type": "STRING"
21+
},
22+
{
23+
"mode": "NULLABLE",
24+
"name": "area_lsad_code",
25+
"type": "STRING"
26+
},
27+
{
28+
"mode": "NULLABLE",
29+
"name": "mtfcc_feature_class_code",
30+
"type": "STRING"
31+
},
32+
{
33+
"mode": "NULLABLE",
34+
"name": "type",
35+
"type": "STRING"
36+
},
37+
{
38+
"mode": "NULLABLE",
39+
"name": "functional_status",
40+
"type": "STRING"
41+
},
42+
{
43+
"mode": "NULLABLE",
44+
"name": "area_land_meters",
45+
"type": "FLOAT"
46+
},
47+
{
48+
"mode": "NULLABLE",
49+
"name": "area_water_meters",
50+
"type": "FLOAT"
51+
},
52+
{
53+
"mode": "NULLABLE",
54+
"name": "internal_point_lon",
55+
"type": "FLOAT"
56+
},
57+
{
58+
"mode": "NULLABLE",
59+
"name": "internal_point_lat",
60+
"type": "FLOAT"
61+
},
62+
{
63+
"mode": "NULLABLE",
64+
"name": "internal_point_geom",
65+
"type": "GEOGRAPHY"
66+
},
67+
{
68+
"mode": "NULLABLE",
69+
"name": "urban_area_geom",
70+
"type": "GEOGRAPHY"
71+
}
72+
]

tests/system/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ def load_test_data_tables(
309309
("hockey_players", "hockey_players.json", "hockey_players.jsonl"),
310310
("matrix_2by3", "matrix_2by3.json", "matrix_2by3.jsonl"),
311311
("matrix_3by4", "matrix_3by4.json", "matrix_3by4.jsonl"),
312+
("urban_areas", "urban_areas_schema.json", "urban_areas.jsonl"),
312313
]:
313314
test_data_hash = hashlib.md5()
314315
_hash_digest_file(test_data_hash, DATA_DIR / schema_filename)
@@ -400,6 +401,11 @@ def penguins_table_id(test_data_tables) -> str:
400401
return test_data_tables["penguins"]
401402

402403

404+
@pytest.fixture(scope="session")
405+
def urban_areas_table_id(test_data_tables) -> str:
406+
return test_data_tables["urban_areas"]
407+
408+
403409
@pytest.fixture(scope="session")
404410
def time_series_table_id(test_data_tables) -> str:
405411
return test_data_tables["time_series"]
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import geopandas # type: ignore
16+
import google.api_core.exceptions
17+
import pandas as pd
18+
import pytest
19+
20+
import bigframes.geopandas
21+
import bigframes.series
22+
from tests.system.utils import assert_series_equal
23+
24+
25+
@pytest.fixture(scope="session")
26+
def urban_areas_dfs(session, urban_areas_table_id):
27+
bf_ua = session.read_gbq(urban_areas_table_id, index_col="geo_id")
28+
pd_ua = bf_ua.to_pandas()
29+
return (bf_ua, pd_ua)
30+
31+
32+
def test_geo_x(urban_areas_dfs):
33+
bf_ua, pd_ua = urban_areas_dfs
34+
bf_series: bigframes.geopandas.GeoSeries = bf_ua["internal_point_geom"].geo
35+
pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"])
36+
bf_result = bf_series.x.to_pandas()
37+
pd_result = pd_series.x
38+
39+
assert_series_equal(
40+
pd_result.astype(pd.Float64Dtype()),
41+
bf_result,
42+
)
43+
44+
45+
def test_geo_x_non_point(urban_areas_dfs):
46+
bf_ua, _ = urban_areas_dfs
47+
bf_series: bigframes.geopandas.GeoSeries = bf_ua["urban_area_geom"].geo
48+
49+
with pytest.raises(google.api_core.exceptions.BadRequest, match="ST_X"):
50+
bf_series.x.to_pandas()
51+
52+
53+
def test_geo_y(urban_areas_dfs):
54+
bf_ua, pd_ua = urban_areas_dfs
55+
bf_series: bigframes.geopandas.GeoSeries = bf_ua["internal_point_geom"].geo
56+
pd_series: geopandas.GeoSeries = geopandas.GeoSeries(pd_ua["internal_point_geom"])
57+
bf_result = bf_series.y.to_pandas()
58+
pd_result = pd_series.y
59+
60+
assert_series_equal(
61+
pd_result.astype(pd.Float64Dtype()),
62+
bf_result,
63+
)

third_party/bigframes_vendored/geopandas/geoseries.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
# contains code from https://github.com/geopandas/geopandas/blob/main/geopandas/geoseries.py
22
from __future__ import annotations
33

4+
from typing import TYPE_CHECKING
5+
6+
from bigframes import constants
7+
8+
if TYPE_CHECKING:
9+
import bigframes.series
10+
411

512
class GeoSeries:
613
"""
@@ -28,3 +35,57 @@ class GeoSeries:
2835
Additional arguments passed to the Series constructor,
2936
e.g. ``name``.
3037
"""
38+
39+
@property
40+
def x(self) -> bigframes.series.Series:
41+
"""Return the x location of point geometries in a GeoSeries
42+
43+
**Examples:**
44+
45+
>>> import bigframes.pandas as bpd
46+
>>> bpd.options.display.progress_bar = None
47+
>>> import geopandas.array
48+
>>> import shapely
49+
50+
>>> series = bpd.Series(
51+
... [shapely.Point(1, 2), shapely.Point(2, 3), shapely.Point(3, 4)],
52+
... dtype=geopandas.array.GeometryDtype()
53+
... )
54+
>>> series.geo.x
55+
0 1.0
56+
1 2.0
57+
2 3.0
58+
dtype: Float64
59+
60+
Returns:
61+
bigframes.series.Series:
62+
Return the x location (longitude) of point geometries.
63+
"""
64+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
65+
66+
@property
67+
def y(self) -> bigframes.series.Series:
68+
"""Return the y location of point geometries in a GeoSeries
69+
70+
**Examples:**
71+
72+
>>> import bigframes.pandas as bpd
73+
>>> bpd.options.display.progress_bar = None
74+
>>> import geopandas.array
75+
>>> import shapely
76+
77+
>>> series = bpd.Series(
78+
... [shapely.Point(1, 2), shapely.Point(2, 3), shapely.Point(3, 4)],
79+
... dtype=geopandas.array.GeometryDtype()
80+
... )
81+
>>> series.geo.y
82+
0 2.0
83+
1 3.0
84+
2 4.0
85+
dtype: Float64
86+
87+
Returns:
88+
bigframes.series.Series:
89+
Return the y location (latitude) of point geometries.
90+
"""
91+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 commit comments

Comments
 (0)