Skip to content

Commit 5dae01a

Browse files
authored
chore: kickstart tests for experimental blob creation (#1402)
1 parent 0145656 commit 5dae01a

File tree

4 files changed

+129
-0
lines changed

4 files changed

+129
-0
lines changed

scripts/create_gcs.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This script create the bigtable resources required for
16+
# bigframes.streaming testing if they don't already exist
17+
18+
import os
19+
from pathlib import Path
20+
import sys
21+
22+
import google.cloud.exceptions as exceptions
23+
from google.cloud.storage import transfer_manager
24+
import google.cloud.storage as gcs
25+
26+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
27+
28+
if not PROJECT_ID:
29+
print(
30+
"Please set GOOGLE_CLOUD_PROJECT environment variable before running.",
31+
file=sys.stderr,
32+
)
33+
sys.exit(1)
34+
35+
36+
def create_bucket(client: gcs.Client) -> gcs.Bucket:
37+
bucket_name = "bigframes_blob_test"
38+
39+
print(f"Creating bucket: {bucket_name}")
40+
try:
41+
bucket = client.create_bucket(bucket_name)
42+
print(f"Bucket {bucket_name} created. ")
43+
44+
except exceptions.Conflict:
45+
print(f"Bucket {bucket_name} already exists.")
46+
bucket = client.bucket(bucket_name)
47+
48+
return bucket
49+
50+
51+
def upload_data(bucket: gcs.Bucket):
52+
# from https://cloud.google.com/storage/docs/samples/storage-transfer-manager-upload-directory
53+
source_directory = "scripts/data/"
54+
workers = 8
55+
56+
# First, recursively get all files in `directory` as Path objects.
57+
directory_as_path_obj = Path(source_directory)
58+
paths = directory_as_path_obj.rglob("*")
59+
60+
# Filter so the list only includes files, not directories themselves.
61+
file_paths = [path for path in paths if path.is_file()]
62+
63+
# These paths are relative to the current working directory. Next, make them
64+
# relative to `directory`
65+
relative_paths = [path.relative_to(source_directory) for path in file_paths]
66+
67+
# Finally, convert them all to strings.
68+
string_paths = [str(path) for path in relative_paths]
69+
70+
print("Found {} files.".format(len(string_paths)))
71+
72+
# Start the upload.
73+
results = transfer_manager.upload_many_from_filenames(
74+
bucket, string_paths, source_directory=source_directory, max_workers=workers
75+
)
76+
77+
for name, result in zip(string_paths, results):
78+
# The results list is either `None` or an exception for each filename in
79+
# the input list, in order.
80+
81+
if isinstance(result, Exception):
82+
print("Failed to upload {} due to exception: {}".format(name, result))
83+
else:
84+
print("Uploaded {} to {}.".format(name, bucket.name))
85+
86+
87+
def main():
88+
client = gcs.Client(project=PROJECT_ID)
89+
90+
bucket = create_bucket(client)
91+
92+
upload_data(bucket)
93+
94+
95+
if __name__ == "__main__":
96+
main()

scripts/data/images/img0.jpg

330 KB
Loading

scripts/data/images/img1.jpg

42.3 KB
Loading

tests/system/small/blob/test_io.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import bigframes
16+
import bigframes.pandas as bpd
17+
18+
19+
def test_blob_create_from_uri_str():
20+
bigframes.options.experiments.blob = True
21+
22+
uri_series = bpd.Series(
23+
[
24+
"gs://bigframes_blob_test/images/img0.jpg",
25+
"gs://bigframes_blob_test/images/img1.jpg",
26+
]
27+
)
28+
# TODO: use bq_connection fixture when MMD location capitalization fix is in prod
29+
blob_series = uri_series.str.to_blob(connection="us.bigframes-default-connection")
30+
31+
pd_blob_series = blob_series.to_pandas()
32+
33+
assert len(pd_blob_series) == 2

0 commit comments

Comments
 (0)