Skip to content

Commit 4c8e6c3

Browse files
authored
chore: add experimental blob.image_resize function (#1383)
* chore: add experimental blob.image_resize function * refactor
1 parent 1054405 commit 4c8e6c3

File tree

2 files changed

+106
-0
lines changed

2 files changed

+106
-0
lines changed

bigframes/blob/_functions.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,50 @@ def image_blur_func(
130130
image_blur_def = FunctionDef(image_blur_func, ["opencv-python", "numpy", "requests"])
131131

132132

133+
def image_resize_func(
134+
src_obj_ref_rt: str,
135+
dst_obj_ref_rt: str,
136+
dsize_x: int,
137+
dsize_y: int,
138+
fx: float,
139+
fy: float,
140+
) -> str:
141+
import json
142+
143+
import cv2 as cv # type: ignore
144+
import numpy as np
145+
import requests
146+
147+
src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
148+
dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt)
149+
150+
src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
151+
dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"]
152+
153+
response = requests.get(src_url)
154+
bts = response.content
155+
156+
nparr = np.frombuffer(bts, np.uint8)
157+
img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
158+
img_resized = cv.resize(img, dsize=(dsize_x, dsize_y), fx=fx, fy=fy)
159+
bts = cv.imencode(".jpeg", img_resized)[1].tobytes()
160+
161+
requests.put(
162+
url=dst_url,
163+
data=bts,
164+
headers={
165+
"Content-Type": "image/jpeg",
166+
},
167+
)
168+
169+
return dst_obj_ref_rt
170+
171+
172+
image_resize_def = FunctionDef(
173+
image_resize_func, ["opencv-python", "numpy", "requests"]
174+
)
175+
176+
133177
# Extracts all text from a PDF url
134178
def pdf_extract_func(src_obj_ref_rt: str) -> str:
135179
import io

bigframes/operations/blob.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,68 @@ def image_blur(
322322

323323
return dst
324324

325+
def image_resize(
326+
self,
327+
dsize: tuple[int, int] = (0, 0),
328+
*,
329+
fx: float = 0.0,
330+
fy: float = 0.0,
331+
dst: Union[str, bigframes.series.Series],
332+
connection: Optional[str] = None,
333+
):
334+
"""Resize images.
335+
336+
.. note::
337+
BigFrames Blob is still under experiments. It may not work and subject to change in the future.
338+
339+
Args:
340+
dsize (tuple(int, int), default (0, 0)): Destination size. If set to 0, fx and fy parameters determine the size.
341+
fx (float, default 0.0): scale factor along the horizontal axis. If set to 0.0, dsize parameter determines the output size.
342+
fy (float, defalut 0.0): scale factor along the vertical axis. If set to 0.0, dsize parameter determines the output size.
343+
dst (str or bigframes.series.Series): Destination GCS folder str or blob series.
344+
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
345+
346+
Returns:
347+
BigFrames Blob Series
348+
"""
349+
dsize_set = dsize[0] > 0 and dsize[1] > 0
350+
fsize_set = fx > 0.0 and fy > 0.0
351+
if not dsize_set ^ fsize_set:
352+
raise ValueError(
353+
"Only one of dsize or (fx, fy) parameters must be set. And the set values must be positive. "
354+
)
355+
356+
import bigframes.blob._functions as blob_func
357+
358+
connection = self._resolve_connection(connection)
359+
360+
if isinstance(dst, str):
361+
dst = os.path.join(dst, "")
362+
src_uri = bigframes.series.Series(self._block).struct.explode()["uri"]
363+
# Replace src folder with dst folder, keep the file names.
364+
dst_uri = src_uri.str.replace(r"^.*\/(.*)$", rf"{dst}\1", regex=True)
365+
dst = cast(
366+
bigframes.series.Series, dst_uri.str.to_blob(connection=connection)
367+
)
368+
369+
image_resize_udf = blob_func.TransformFunction(
370+
blob_func.image_resize_def,
371+
session=self._block.session,
372+
connection=connection,
373+
).udf()
374+
375+
src_rt = self._get_runtime_json_str(mode="R")
376+
dst_rt = dst.blob._get_runtime_json_str(mode="RW")
377+
378+
df = src_rt.to_frame().join(dst_rt.to_frame(), how="outer")
379+
df["dsize_x"], df["dsizye_y"] = dsize
380+
df["fx"], df["fy"] = fx, fy
381+
382+
res = df.apply(image_resize_udf, axis=1)
383+
res.cache() # to execute the udf
384+
385+
return dst
386+
325387
def pdf_extract(
326388
self, *, connection: Optional[str] = None
327389
) -> bigframes.series.Series:

0 commit comments

Comments
 (0)