Skip to content

Commit e403528

Browse files
sycaigcf-owl-bot[bot]tswast
authored
chore: add snippet tests for type system doc (#1783)
* chore: add snippet tests for type system doc * fix format * fix more lint * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * add tests for snippets * fix lint * try to fix tests with typo * restore project in set_options test * use options.reset(): * put global options setting in a try-finally block * warn about json type and remove json type output from the comment * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * polish comments * Update samples/snippets/type_system_test.py Co-authored-by: Tim Sweña (Swast) <[email protected]> * remove json samples * remove json samples --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Tim Sweña (Swast) <[email protected]>
1 parent 63205f2 commit e403528

File tree

2 files changed

+259
-20
lines changed

2 files changed

+259
-20
lines changed

samples/snippets/set_options_test.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,27 @@ def test_bigquery_dataframes_set_options() -> None:
1919

2020
bpd.close_session()
2121

22-
# [START bigquery_dataframes_set_options]
23-
import bigframes.pandas as bpd
24-
25-
PROJECT_ID = "bigframes-dec" # @param {type:"string"}
26-
REGION = "US" # @param {type:"string"}
27-
28-
# Set BigQuery DataFrames options
29-
# Note: The project option is not required in all environments.
30-
# On BigQuery Studio, the project ID is automatically detected.
31-
bpd.options.bigquery.project = PROJECT_ID
32-
33-
# Note: The location option is not required.
34-
# It defaults to the location of the first table or query
35-
# passed to read_gbq(). For APIs where a location can't be
36-
# auto-detected, the location defaults to the "US" location.
37-
bpd.options.bigquery.location = REGION
38-
39-
# [END bigquery_dataframes_set_options]
40-
assert bpd.options.bigquery.project == PROJECT_ID
41-
assert bpd.options.bigquery.location == REGION
22+
try:
23+
# [START bigquery_dataframes_set_options]
24+
import bigframes.pandas as bpd
25+
26+
PROJECT_ID = "bigframes-dev" # @param {type:"string"}
27+
REGION = "US" # @param {type:"string"}
28+
29+
# Set BigQuery DataFrames options
30+
# Note: The project option is not required in all environments.
31+
# On BigQuery Studio, the project ID is automatically detected.
32+
bpd.options.bigquery.project = PROJECT_ID
33+
34+
# Note: The location option is not required.
35+
# It defaults to the location of the first table or query
36+
# passed to read_gbq(). For APIs where a location can't be
37+
# auto-detected, the location defaults to the "US" location.
38+
bpd.options.bigquery.location = REGION
39+
40+
# [END bigquery_dataframes_set_options]
41+
assert bpd.options.bigquery.project == PROJECT_ID
42+
assert bpd.options.bigquery.location == REGION
43+
finally:
44+
bpd.close_session()
45+
bpd.options.reset()

samples/snippets/type_system_test.py

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pandas.testing
16+
17+
from bigframes import dtypes
18+
19+
20+
def test_type_system_examples() -> None:
21+
# [START bigquery_dataframes_type_sytem_timestamp_local_type_conversion]
22+
import pandas as pd
23+
24+
import bigframes.pandas as bpd
25+
26+
s = pd.Series([pd.Timestamp("20250101")])
27+
assert s.dtype == "datetime64[ns]"
28+
assert bpd.read_pandas(s).dtype == "timestamp[us][pyarrow]"
29+
# [END bigquery_dataframes_type_sytem_timestamp_local_type_conversion]
30+
31+
# [START bigquery_dataframes_type_system_pyarrow_preference]
32+
import datetime
33+
34+
import pandas as pd
35+
36+
import bigframes.pandas as bpd
37+
38+
s = pd.Series([datetime.date(2025, 1, 1)])
39+
s + pd.Timedelta(hours=12)
40+
# 0 2025-01-01
41+
# dtype: object
42+
43+
bpd.read_pandas(s) + pd.Timedelta(hours=12)
44+
# 0 2025-01-01 12:00:00
45+
# dtype: timestamp[us][pyarrow]
46+
# [END bigquery_dataframes_type_system_pyarrow_preference]
47+
pandas.testing.assert_series_equal(
48+
s + pd.Timedelta(hours=12), pd.Series([datetime.date(2025, 1, 1)])
49+
)
50+
pandas.testing.assert_series_equal(
51+
(bpd.read_pandas(s) + pd.Timedelta(hours=12)).to_pandas(),
52+
pd.Series([pd.Timestamp(2025, 1, 1, 12)], dtype=dtypes.DATETIME_DTYPE),
53+
check_index_type=False,
54+
)
55+
56+
# [START bigquery_dataframes_type_system_load_timedelta]
57+
import pandas as pd
58+
59+
import bigframes.pandas as bpd
60+
61+
s = pd.Series([pd.Timedelta("1s"), pd.Timedelta("2m")])
62+
bpd.read_pandas(s)
63+
# 0 0 days 00:00:01
64+
# 1 0 days 00:02:00
65+
# dtype: duration[us][pyarrow]
66+
# [END bigquery_dataframes_type_system_load_timedelta]
67+
pandas.testing.assert_series_equal(
68+
bpd.read_pandas(s).to_pandas(),
69+
s.astype(dtypes.TIMEDELTA_DTYPE),
70+
check_index_type=False,
71+
)
72+
73+
# [START bigquery_dataframes_type_system_timedelta_precision]
74+
import pandas as pd
75+
76+
s = pd.Series([pd.Timedelta("999ns")])
77+
bpd.read_pandas(s.dt.round("us"))
78+
# 0 0 days 00:00:00.000001
79+
# dtype: duration[us][pyarrow]
80+
# [END bigquery_dataframes_type_system_timedelta_precision]
81+
pandas.testing.assert_series_equal(
82+
bpd.read_pandas(s.dt.round("us")).to_pandas(),
83+
s.dt.round("us").astype(dtypes.TIMEDELTA_DTYPE),
84+
check_index_type=False,
85+
)
86+
87+
# [START bigquery_dataframes_type_system_cast_timedelta]
88+
import bigframes.pandas as bpd
89+
90+
bpd.to_timedelta([1, 2, 3], unit="s")
91+
# 0 0 days 00:00:01
92+
# 1 0 days 00:00:02
93+
# 2 0 days 00:00:03
94+
# dtype: duration[us][pyarrow]
95+
# [END bigquery_dataframes_type_system_cast_timedelta]
96+
pandas.testing.assert_series_equal(
97+
bpd.to_timedelta([1, 2, 3], unit="s").to_pandas(),
98+
pd.Series(pd.to_timedelta([1, 2, 3], unit="s"), dtype=dtypes.TIMEDELTA_DTYPE),
99+
check_index_type=False,
100+
)
101+
102+
# [START bigquery_dataframes_type_system_list_accessor]
103+
import bigframes.pandas as bpd
104+
105+
s = bpd.Series([[1, 2, 3], [4, 5], [6]]) # dtype: list<item: int64>[pyarrow]
106+
107+
# Access the first elements of each list
108+
s.list[0]
109+
# 0 1
110+
# 1 4
111+
# 2 6
112+
# dtype: Int64
113+
114+
# Get the lengths of each list
115+
s.list.len()
116+
# 0 3
117+
# 1 2
118+
# 2 1
119+
# dtype: Int64
120+
# [END bigquery_dataframes_type_system_list_accessor]
121+
pandas.testing.assert_series_equal(
122+
s.list[0].to_pandas(),
123+
pd.Series([1, 4, 6], dtype="Int64"),
124+
check_index_type=False,
125+
)
126+
pandas.testing.assert_series_equal(
127+
s.list.len().to_pandas(),
128+
pd.Series([3, 2, 1], dtype="Int64"),
129+
check_index_type=False,
130+
)
131+
132+
# [START bigquery_dataframes_type_system_struct_accessor]
133+
import bigframes.pandas as bpd
134+
135+
structs = [
136+
{"id": 101, "category": "A"},
137+
{"id": 102, "category": "B"},
138+
{"id": 103, "category": "C"},
139+
]
140+
s = bpd.Series(structs)
141+
# Get the 'id' field of each struct
142+
s.struct.field("id")
143+
# 0 101
144+
# 1 102
145+
# 2 103
146+
# Name: id, dtype: Int64
147+
# [END bigquery_dataframes_type_system_struct_accessor]
148+
149+
# [START bigquery_dataframes_type_system_struct_accessor_shortcut]
150+
import bigframes.pandas as bpd
151+
152+
structs = [
153+
{"id": 101, "category": "A"},
154+
{"id": 102, "category": "B"},
155+
{"id": 103, "category": "C"},
156+
]
157+
s = bpd.Series(structs)
158+
159+
# not explicitly using the "struct" property
160+
s.id
161+
# 0 101
162+
# 1 102
163+
# 2 103
164+
# Name: id, dtype: Int64
165+
# [END bigquery_dataframes_type_system_struct_accessor_shortcut]
166+
pandas.testing.assert_series_equal(
167+
s.struct.field("id").to_pandas(),
168+
pd.Series([101, 102, 103], dtype="Int64", name="id"),
169+
check_index_type=False,
170+
)
171+
pandas.testing.assert_series_equal(
172+
s.id.to_pandas(),
173+
pd.Series([101, 102, 103], dtype="Int64", name="id"),
174+
check_index_type=False,
175+
)
176+
177+
# [START bigquery_dataframes_type_system_string_accessor]
178+
import bigframes.pandas as bpd
179+
180+
s = bpd.Series(["abc", "de", "1"]) # dtype: string[pyarrow]
181+
182+
# Get the first character of each string
183+
s.str[0]
184+
# 0 a
185+
# 1 d
186+
# 2 1
187+
# dtype: string
188+
189+
# Check whether there are only alphabetic characters in each string
190+
s.str.isalpha()
191+
# 0 True
192+
# 1 True
193+
# 2 False
194+
# dtype: boolean
195+
196+
# Cast the alphabetic characters to their upper cases for each string
197+
s.str.upper()
198+
# 0 ABC
199+
# 1 DE
200+
# 2 1
201+
# dtype: string
202+
# [END bigquery_dataframes_type_system_string_accessor]
203+
pandas.testing.assert_series_equal(
204+
s.str[0].to_pandas(),
205+
pd.Series(["a", "d", "1"], dtype=dtypes.STRING_DTYPE),
206+
check_index_type=False,
207+
)
208+
pandas.testing.assert_series_equal(
209+
s.str.isalpha().to_pandas(),
210+
pd.Series([True, True, False], dtype=dtypes.BOOL_DTYPE),
211+
check_index_type=False,
212+
)
213+
pandas.testing.assert_series_equal(
214+
s.str.upper().to_pandas(),
215+
pd.Series(["ABC", "DE", "1"], dtype=dtypes.STRING_DTYPE),
216+
check_index_type=False,
217+
)
218+
219+
# [START bigquery_dataframes_type_system_geo_accessor]
220+
from shapely.geometry import Point
221+
222+
import bigframes.pandas as bpd
223+
224+
s = bpd.Series([Point(1, 0), Point(2, 1)]) # dtype: geometry
225+
226+
s.geo.y
227+
# 0 0.0
228+
# 1 1.0
229+
# dtype: Float64
230+
# [END bigquery_dataframes_type_system_geo_accessor]
231+
pandas.testing.assert_series_equal(
232+
s.geo.y.to_pandas(),
233+
pd.Series([0.0, 1.0], dtype=dtypes.FLOAT_DTYPE),
234+
check_index_type=False,
235+
)

0 commit comments

Comments
 (0)