Skip to content

Commit 0d7b2d6

Browse files
authored
chore: add experimental series.str.to_blob function (#1224)
* chore: add experimental series.str.to_blob function * extract dtype * update docs * fix connection
1 parent e0a8288 commit 0d7b2d6

File tree

5 files changed

+85
-0
lines changed

5 files changed

+85
-0
lines changed

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1735,6 +1735,12 @@ def binary_remote_function_op_impl(
17351735
return x_transformed
17361736

17371737

1738+
# Blob Ops
1739+
@scalar_op_compiler.register_binary_op(ops.obj_make_ref_op)
1740+
def obj_make_ref_op(x: ibis_types.Value, y: ibis_types.Value):
1741+
return obj_make_ref(uri=x, authorizer=y)
1742+
1743+
17381744
# Ternary Operations
17391745
@scalar_op_compiler.register_ternary_op(ops.where_op)
17401746
def where_op(
@@ -1906,3 +1912,8 @@ def vector_distance(vector1, vector2, type: str) -> ibis_dtypes.Float64: # type
19061912
@ibis_udf.scalar.builtin(name="OBJ.FETCH_METADATA")
19071913
def obj_fetch_metadata(obj_ref: _OBJ_REF_IBIS_DTYPE) -> _OBJ_REF_IBIS_DTYPE: # type: ignore
19081914
"""Fetch metadata from ObjectRef Struct."""
1915+
1916+
1917+
@ibis_udf.scalar.builtin(name="OBJ.MAKE_REF")
1918+
def obj_make_ref(uri: str, authorizer: str) -> _OBJ_REF_IBIS_DTYPE: # type: ignore
1919+
"""Make ObjectRef Struct from uri and connection."""

bigframes/dtypes.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,28 @@
5757
GEO_DTYPE = gpd.array.GeometryDtype()
5858
# JSON
5959
JSON_DTYPE = pd.ArrowDtype(pa.large_string())
60+
OBJ_REF_DTYPE = pd.ArrowDtype(
61+
pa.struct(
62+
(
63+
pa.field(
64+
"uri",
65+
pa.string(),
66+
),
67+
pa.field(
68+
"version",
69+
pa.string(),
70+
),
71+
pa.field(
72+
"authorizer",
73+
pa.string(),
74+
),
75+
pa.field(
76+
"details",
77+
pa.large_string(), # JSON
78+
),
79+
)
80+
)
81+
)
6082

6183
# Used when storing Null expressions
6284
DEFAULT_DTYPE = FLOAT_DTYPE

bigframes/operations/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -893,6 +893,21 @@ def output_type(self, *input_types):
893893
return left_type
894894

895895

896+
## Blob Ops
897+
@dataclasses.dataclass(frozen=True)
898+
class ObjMakeRef(BinaryOp):
899+
name: typing.ClassVar[str] = "obj.make_ref"
900+
901+
def output_type(self, *input_types):
902+
if not all(map(dtypes.is_string_like, input_types)):
903+
raise TypeError("obj.make_ref requires string-like arguments")
904+
905+
return dtypes.OBJ_REF_DTYPE
906+
907+
908+
obj_make_ref_op = ObjMakeRef()
909+
910+
896911
# Ternary Ops
897912
@dataclasses.dataclass(frozen=True)
898913
class WhereOp(TernaryOp):

bigframes/operations/blob.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@ def __init__(self, *args, **kwargs):
2727
super().__init__(*args, **kwargs)
2828

2929
def metadata(self):
30+
"""Retrive the metadata of the Blob.
31+
32+
.. note::
33+
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
34+
35+
Returns:
36+
JSON: metadata of the Blob. Contains fields: content_type, md5_hash, size and updated(time)."""
3037
details_json = self._apply_unary_op(ops.obj_fetch_metadata_op).struct.field(
3138
"details"
3239
)

bigframes/operations/strings.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import bigframes_vendored.constants as constants
2121
import bigframes_vendored.pandas.core.strings.accessor as vendorstr
2222

23+
from bigframes import clients
2324
from bigframes.core import log_adapter
2425
import bigframes.dataframe as df
2526
import bigframes.operations as ops
@@ -284,6 +285,35 @@ def cat(
284285
) -> series.Series:
285286
return self._apply_binary_op(others, ops.strconcat_op, alignment=join)
286287

288+
def to_blob(self, connection: Optional[str] = None) -> series.Series:
289+
"""Create a BigFrames Blob series from a series of URIs.
290+
291+
.. note::
292+
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
293+
294+
295+
Args:
296+
connection (str or None, default None):
297+
Connection to connect with remote service. str of the format <PROJECT_NUMBER/PROJECT_ID>.<LOCATION>.<CONNECTION_ID>.
298+
If None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach
299+
permission if the connection isn't fully set up.
300+
301+
Returns:
302+
bigframes.series.Series: Blob Series.
303+
304+
"""
305+
if not bigframes.options.experiments.blob:
306+
raise NotImplementedError()
307+
308+
session = self._block.session
309+
connection = connection or session._bq_connection
310+
connection = clients.resolve_full_bq_connection_name(
311+
connection,
312+
default_project=session._project,
313+
default_location=session._location,
314+
)
315+
return self._apply_binary_op(connection, ops.obj_make_ref_op)
316+
287317

288318
def _parse_flags(flags: int) -> Optional[str]:
289319
re2flags = []

0 commit comments

Comments
 (0)