Skip to content

Commit 6716283

Browse files
authored
chore: add experimental blob properties tests (#1449)
* chore: add experimental blob properties tests * include files * fix * fix mypy * debug * fix
1 parent e92a196 commit 6716283

File tree

3 files changed

+190
-22
lines changed

3 files changed

+190
-22
lines changed

tests/system/small/blob/conftest.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pytest
16+
17+
import bigframes
18+
import bigframes.pandas as bpd
19+
20+
21+
@pytest.fixture(scope="session")
22+
def images_gcs_path() -> str:
23+
return "gs://bigframes_blob_test/images/*"
24+
25+
26+
@pytest.fixture(scope="session")
27+
def images_uris() -> list[str]:
28+
return [
29+
"gs://bigframes_blob_test/images/img0.jpg",
30+
"gs://bigframes_blob_test/images/img1.jpg",
31+
]
32+
33+
34+
@pytest.fixture(scope="session")
35+
def images_mm_df(
36+
images_gcs_path, session: bigframes.Session, bq_connection: str
37+
) -> bpd.DataFrame:
38+
bigframes.options.experiments.blob = True
39+
40+
return session.from_glob_path(
41+
images_gcs_path, name="blob_col", connection=bq_connection
42+
)

tests/system/small/blob/test_io.py

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,18 @@
1818
import bigframes.pandas as bpd
1919

2020

21-
def test_blob_create_from_uri_str(bq_connection: str, session: bigframes.Session):
21+
def test_blob_create_from_uri_str(
22+
bq_connection: str, session: bigframes.Session, images_uris
23+
):
2224
bigframes.options.experiments.blob = True
2325

24-
uris = [
25-
"gs://bigframes_blob_test/images/img0.jpg",
26-
"gs://bigframes_blob_test/images/img1.jpg",
27-
]
28-
29-
uri_series = bpd.Series(uris, session=session)
26+
uri_series = bpd.Series(images_uris, session=session)
3027
blob_series = uri_series.str.to_blob(connection=bq_connection)
3128

3229
pd_blob_df = blob_series.struct.explode().to_pandas()
3330
expected_pd_df = pd.DataFrame(
3431
{
35-
"uri": uris,
32+
"uri": images_uris,
3633
"version": [None, None],
3734
"authorizer": [bq_connection.casefold(), bq_connection.casefold()],
3835
"details": [None, None],
@@ -44,19 +41,18 @@ def test_blob_create_from_uri_str(bq_connection: str, session: bigframes.Session
4441
)
4542

4643

47-
def test_blob_create_from_glob_path(bq_connection: str, session: bigframes.Session):
44+
def test_blob_create_from_glob_path(
45+
bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
46+
):
4847
bigframes.options.experiments.blob = True
4948

5049
blob_df = session.from_glob_path(
51-
"gs://bigframes_blob_test/images/*", connection=bq_connection, name="blob_col"
50+
images_gcs_path, connection=bq_connection, name="blob_col"
5251
)
5352
pd_blob_df = blob_df["blob_col"].struct.explode().to_pandas()
5453
expected_df = pd.DataFrame(
5554
{
56-
"uri": [
57-
"gs://bigframes_blob_test/images/img0.jpg",
58-
"gs://bigframes_blob_test/images/img1.jpg",
59-
],
55+
"uri": images_uris,
6056
"version": [None, None],
6157
"authorizer": [bq_connection.casefold(), bq_connection.casefold()],
6258
"details": [None, None],
@@ -69,22 +65,17 @@ def test_blob_create_from_glob_path(bq_connection: str, session: bigframes.Sessi
6965

7066

7167
def test_blob_create_read_gbq_object_table(
72-
bq_connection: str, session: bigframes.Session
68+
bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
7369
):
7470
bigframes.options.experiments.blob = True
7571

76-
obj_table = session._create_object_table(
77-
"gs://bigframes_blob_test/images/*", bq_connection
78-
)
72+
obj_table = session._create_object_table(images_gcs_path, bq_connection)
7973

8074
blob_df = session.read_gbq_object_table(obj_table, name="blob_col")
8175
pd_blob_df = blob_df["blob_col"].struct.explode().to_pandas()
8276
expected_df = pd.DataFrame(
8377
{
84-
"uri": [
85-
"gs://bigframes_blob_test/images/img0.jpg",
86-
"gs://bigframes_blob_test/images/img1.jpg",
87-
],
78+
"uri": images_uris,
8879
"version": [None, None],
8980
"authorizer": [bq_connection.casefold(), bq_connection.casefold()],
9081
"details": [None, None],
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import db_dtypes # type: ignore
16+
import pandas as pd
17+
18+
import bigframes
19+
import bigframes.pandas as bpd
20+
21+
22+
def test_blob_uri(images_uris: list[str], images_mm_df: bpd.DataFrame):
23+
bigframes.options.experiments.blob = True
24+
25+
actual = images_mm_df["blob_col"].blob.uri().to_pandas()
26+
expected = pd.Series(images_uris, name="uri")
27+
28+
pd.testing.assert_series_equal(
29+
actual, expected, check_dtype=False, check_index_type=False
30+
)
31+
32+
33+
def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str):
34+
bigframes.options.experiments.blob = True
35+
36+
actual = images_mm_df["blob_col"].blob.authorizer().to_pandas()
37+
expected = pd.Series(
38+
[bq_connection.casefold(), bq_connection.casefold()], name="authorizer"
39+
)
40+
41+
pd.testing.assert_series_equal(
42+
actual, expected, check_dtype=False, check_index_type=False
43+
)
44+
45+
46+
def test_blob_version(images_mm_df: bpd.DataFrame):
47+
bigframes.options.experiments.blob = True
48+
49+
actual = images_mm_df["blob_col"].blob.version().to_pandas()
50+
expected = pd.Series(["1739574332294150", "1739574332271343"], name="version")
51+
52+
pd.testing.assert_series_equal(
53+
actual, expected, check_dtype=False, check_index_type=False
54+
)
55+
56+
57+
def test_blob_metadata(images_mm_df: bpd.DataFrame):
58+
bigframes.options.experiments.blob = True
59+
60+
actual = images_mm_df["blob_col"].blob.metadata().to_pandas()
61+
expected = pd.Series(
62+
[
63+
{
64+
"content_type": "image/jpeg",
65+
"md5_hash": "e130ad042261a1883cd2cc06831cf748",
66+
"size": 338390,
67+
"updated": 1739574332000000,
68+
},
69+
{
70+
"content_type": "image/jpeg",
71+
"md5_hash": "e2ae3191ff2b809fd0935f01a537c650",
72+
"size": 43333,
73+
"updated": 1739574332000000,
74+
},
75+
],
76+
name="metadata",
77+
dtype=db_dtypes.JSONDtype(),
78+
)
79+
80+
pd.testing.assert_series_equal(
81+
actual, expected, check_dtype=False, check_index_type=False
82+
)
83+
84+
85+
def test_blob_content_type(images_mm_df: bpd.DataFrame):
86+
bigframes.options.experiments.blob = True
87+
88+
actual = images_mm_df["blob_col"].blob.content_type().to_pandas()
89+
expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type")
90+
91+
pd.testing.assert_series_equal(
92+
actual, expected, check_dtype=False, check_index_type=False
93+
)
94+
95+
96+
def test_blob_md5_hash(images_mm_df: bpd.DataFrame):
97+
bigframes.options.experiments.blob = True
98+
99+
actual = images_mm_df["blob_col"].blob.md5_hash().to_pandas()
100+
expected = pd.Series(
101+
["e130ad042261a1883cd2cc06831cf748", "e2ae3191ff2b809fd0935f01a537c650"],
102+
name="md5_hash",
103+
)
104+
105+
pd.testing.assert_series_equal(
106+
actual, expected, check_dtype=False, check_index_type=False
107+
)
108+
109+
110+
def test_blob_size(images_mm_df: bpd.DataFrame):
111+
bigframes.options.experiments.blob = True
112+
113+
actual = images_mm_df["blob_col"].blob.size().to_pandas()
114+
expected = pd.Series([338390, 43333], name="size")
115+
116+
pd.testing.assert_series_equal(
117+
actual, expected, check_dtype=False, check_index_type=False
118+
)
119+
120+
121+
def test_blob_updated(images_mm_df: bpd.DataFrame):
122+
bigframes.options.experiments.blob = True
123+
124+
actual = images_mm_df["blob_col"].blob.updated().to_pandas()
125+
expected = pd.Series(
126+
[
127+
pd.Timestamp("2025-02-14 23:05:32", tz="UTC"),
128+
pd.Timestamp("2025-02-14 23:05:32", tz="UTC"),
129+
],
130+
name="updated",
131+
)
132+
133+
pd.testing.assert_series_equal(
134+
actual, expected, check_dtype=False, check_index_type=False
135+
)

0 commit comments

Comments
 (0)