Skip to content

Commit 5cac5c8

Browse files
authored
chore: add experimental blob.display to support audio and video (#1291)
* chore: add experimental blob.display to support audio and videos * fix * fix
1 parent 61b1932 commit 5cac5c8

File tree

1 file changed

+46
-8
lines changed

1 file changed

+46
-8
lines changed

bigframes/operations/blob.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,28 +49,66 @@ def metadata(self) -> bigframes.series.Series:
4949

5050
return bbq.json_extract(details_json, "$.gcs_metadata")
5151

52-
def display(self, n: int = 3):
52+
def content_type(self) -> bigframes.series.Series:
53+
"""Retrive the content type of the Blob.
54+
55+
.. note::
56+
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
57+
58+
Returns:
59+
BigFrames Series: json-string of the content type."""
60+
import bigframes.bigquery as bbq
61+
62+
metadata = self.metadata()
63+
64+
return bbq.json_extract(metadata, "$.content_type")
65+
66+
def display(self, n: int = 3, *, content_type: str = ""):
5367
"""Display the blob content in the IPython Notebook environment. Only works for image type now.
5468
5569
.. note::
5670
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
5771
5872
Args:
5973
n (int, default 3): number of sample blob objects to display.
74+
content_type (str, default ""): content type of the blob. If unset, use the blob metadata of the storage. Possible values are "image", "audio" and "video".
6075
"""
6176
import bigframes.bigquery as bbq
6277

63-
s = bigframes.series.Series(self._block).head(n)
78+
# col name doesn't matter here. Rename to avoid column name conflicts
79+
df = bigframes.series.Series(self._block).rename("blob_col").head(n).to_frame()
6480

65-
obj_ref_runtime = s._apply_unary_op(ops.ObjGetAccessUrl(mode="R"))
66-
read_urls = bbq.json_extract(
81+
obj_ref_runtime = df["blob_col"]._apply_unary_op(ops.ObjGetAccessUrl(mode="R"))
82+
df["read_url"] = bbq.json_extract(
6783
obj_ref_runtime, json_path="$.access_urls.read_url"
6884
)
6985

70-
for read_url in read_urls:
71-
read_url = str(read_url).strip('"')
72-
response = requests.get(read_url)
73-
ipy_display.display(ipy_display.Image(response.content))
86+
if content_type:
87+
df["content_type"] = content_type
88+
else:
89+
df["content_type"] = df["blob_col"].blob.content_type()
90+
91+
def display_single_url(read_url: str, content_type: str):
92+
content_type = content_type.casefold()
93+
94+
if content_type.startswith("image"):
95+
ipy_display.display(ipy_display.Image(url=read_url))
96+
elif content_type.startswith("audio"):
97+
# using url somehow doesn't work with audios
98+
response = requests.get(read_url)
99+
ipy_display.display(ipy_display.Audio(response.content))
100+
elif content_type.startswith("video"):
101+
ipy_display.display(ipy_display.Video(url=read_url))
102+
else: # display as raw data
103+
response = requests.get(read_url)
104+
ipy_display.display(response.content, raw=True)
105+
106+
for _, row in df.iterrows():
107+
# both are JSON-formated strings
108+
read_url = str(row["read_url"]).strip('"')
109+
content_type = str(row["content_type"]).strip('"')
110+
111+
display_single_url(read_url, content_type)
74112

75113
def image_blur(
76114
self,

0 commit comments

Comments
 (0)