Skip to content

Commit 26f351a

Browse files
authored
chore: add experimental image transform functions to BQ bytes (#1397)
* chore: add experimental image transform functions to BQ bytes * revert unrelated files
1 parent 534f7b4 commit 26f351a

File tree

2 files changed

+146
-12
lines changed

2 files changed

+146
-12
lines changed

bigframes/blob/_functions.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,32 @@ def image_blur_func(
130130
image_blur_def = FunctionDef(image_blur_func, ["opencv-python", "numpy", "requests"])
131131

132132

133+
def image_blur_to_bytes_func(src_obj_ref_rt: str, ksize_x: int, ksize_y: int) -> bytes:
134+
import json
135+
136+
import cv2 as cv # type: ignore
137+
import numpy as np
138+
import requests
139+
140+
src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
141+
src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
142+
143+
response = requests.get(src_url)
144+
bts = response.content
145+
146+
nparr = np.frombuffer(bts, np.uint8)
147+
img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
148+
img_blurred = cv.blur(img, ksize=(ksize_x, ksize_y))
149+
bts = cv.imencode(".jpeg", img_blurred)[1].tobytes()
150+
151+
return bts
152+
153+
154+
image_blur_to_bytes_def = FunctionDef(
155+
image_blur_to_bytes_func, ["opencv-python", "numpy", "requests"]
156+
)
157+
158+
133159
def image_resize_func(
134160
src_obj_ref_rt: str,
135161
dst_obj_ref_rt: str,
@@ -174,6 +200,38 @@ def image_resize_func(
174200
)
175201

176202

203+
def image_resize_to_bytes_func(
204+
src_obj_ref_rt: str,
205+
dsize_x: int,
206+
dsize_y: int,
207+
fx: float,
208+
fy: float,
209+
) -> bytes:
210+
import json
211+
212+
import cv2 as cv # type: ignore
213+
import numpy as np
214+
import requests
215+
216+
src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
217+
src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
218+
219+
response = requests.get(src_url)
220+
bts = response.content
221+
222+
nparr = np.frombuffer(bts, np.uint8)
223+
img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
224+
img_resized = cv.resize(img, dsize=(dsize_x, dsize_y), fx=fx, fy=fy)
225+
bts = cv.imencode(".jpeg", img_resized)[1].tobytes()
226+
227+
return bts
228+
229+
230+
image_resize_to_bytes_def = FunctionDef(
231+
image_resize_to_bytes_func, ["opencv-python", "numpy", "requests"]
232+
)
233+
234+
177235
def image_normalize_func(
178236
src_obj_ref_rt: str, dst_obj_ref_rt: str, alpha: float, beta: float, norm_type: str
179237
) -> str:
@@ -222,6 +280,43 @@ def image_normalize_func(
222280
)
223281

224282

283+
def image_normalize_to_bytes_func(
284+
src_obj_ref_rt: str, alpha: float, beta: float, norm_type: str
285+
) -> bytes:
286+
import json
287+
288+
import cv2 as cv # type: ignore
289+
import numpy as np
290+
import requests
291+
292+
norm_type_mapping = {
293+
"inf": cv.NORM_INF,
294+
"l1": cv.NORM_L1,
295+
"l2": cv.NORM_L2,
296+
"minmax": cv.NORM_MINMAX,
297+
}
298+
299+
src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
300+
src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
301+
302+
response = requests.get(src_url)
303+
bts = response.content
304+
305+
nparr = np.frombuffer(bts, np.uint8)
306+
img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
307+
img_normalized = cv.normalize(
308+
img, None, alpha=alpha, beta=beta, norm_type=norm_type_mapping[norm_type]
309+
)
310+
bts = cv.imencode(".jpeg", img_normalized)[1].tobytes()
311+
312+
return bts
313+
314+
315+
image_normalize_to_bytes_def = FunctionDef(
316+
image_normalize_to_bytes_func, ["opencv-python", "numpy", "requests"]
317+
)
318+
319+
225320
# Extracts all text from a PDF url
226321
def pdf_extract_func(src_obj_ref_rt: str) -> str:
227322
import io

bigframes/operations/blob.py

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ def image_blur(
276276
self,
277277
ksize: tuple[int, int],
278278
*,
279-
dst: Union[str, bigframes.series.Series],
279+
dst: Optional[Union[str, bigframes.series.Series]] = None,
280280
connection: Optional[str] = None,
281281
) -> bigframes.series.Series:
282282
"""Blurs images.
@@ -286,7 +286,7 @@ def image_blur(
286286
287287
Args:
288288
ksize (tuple(int, int)): Kernel size.
289-
dst (str or bigframes.series.Series): Destination GCS folder str or blob series.
289+
dst (str or bigframes.series.Series or None, default None): Destination GCS folder str or blob series. If None, output to BQ as bytes.
290290
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
291291
292292
Returns:
@@ -295,6 +295,19 @@ def image_blur(
295295
import bigframes.blob._functions as blob_func
296296

297297
connection = self._resolve_connection(connection)
298+
df = self._get_runtime_json_str(mode="R").to_frame()
299+
300+
if dst is None:
301+
image_blur_udf = blob_func.TransformFunction(
302+
blob_func.image_blur_to_bytes_def,
303+
session=self._block.session,
304+
connection=connection,
305+
).udf()
306+
307+
df["ksize_x"], df["ksize_y"] = ksize
308+
res = df.apply(image_blur_udf, axis=1)
309+
310+
return res
298311

299312
if isinstance(dst, str):
300313
dst = os.path.join(dst, "")
@@ -311,10 +324,9 @@ def image_blur(
311324
connection=connection,
312325
).udf()
313326

314-
src_rt = self._get_runtime_json_str(mode="R")
315327
dst_rt = dst.blob._get_runtime_json_str(mode="RW")
316328

317-
df = src_rt.to_frame().join(dst_rt.to_frame(), how="outer")
329+
df = df.join(dst_rt, how="outer")
318330
df["ksize_x"], df["ksize_y"] = ksize
319331

320332
res = df.apply(image_blur_udf, axis=1)
@@ -328,7 +340,7 @@ def image_resize(
328340
*,
329341
fx: float = 0.0,
330342
fy: float = 0.0,
331-
dst: Union[str, bigframes.series.Series],
343+
dst: Optional[Union[str, bigframes.series.Series]] = None,
332344
connection: Optional[str] = None,
333345
):
334346
"""Resize images.
@@ -340,7 +352,7 @@ def image_resize(
340352
dsize (tuple(int, int), default (0, 0)): Destination size. If set to 0, fx and fy parameters determine the size.
341353
fx (float, default 0.0): scale factor along the horizontal axis. If set to 0.0, dsize parameter determines the output size.
342354
fy (float, defalut 0.0): scale factor along the vertical axis. If set to 0.0, dsize parameter determines the output size.
343-
dst (str or bigframes.series.Series): Destination GCS folder str or blob series.
355+
dst (str or bigframes.series.Series or None, default None): Destination GCS folder str or blob series. If None, output to BQ as bytes.
344356
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
345357
346358
Returns:
@@ -356,6 +368,20 @@ def image_resize(
356368
import bigframes.blob._functions as blob_func
357369

358370
connection = self._resolve_connection(connection)
371+
df = self._get_runtime_json_str(mode="R").to_frame()
372+
373+
if dst is None:
374+
image_resize_udf = blob_func.TransformFunction(
375+
blob_func.image_resize_to_bytes_def,
376+
session=self._block.session,
377+
connection=connection,
378+
).udf()
379+
380+
df["dsize_x"], df["dsizye_y"] = dsize
381+
df["fx"], df["fy"] = fx, fy
382+
res = df.apply(image_resize_udf, axis=1)
383+
384+
return res
359385

360386
if isinstance(dst, str):
361387
dst = os.path.join(dst, "")
@@ -372,10 +398,9 @@ def image_resize(
372398
connection=connection,
373399
).udf()
374400

375-
src_rt = self._get_runtime_json_str(mode="R")
376401
dst_rt = dst.blob._get_runtime_json_str(mode="RW")
377402

378-
df = src_rt.to_frame().join(dst_rt.to_frame(), how="outer")
403+
df = df.join(dst_rt, how="outer")
379404
df["dsize_x"], df["dsizye_y"] = dsize
380405
df["fx"], df["fy"] = fx, fy
381406

@@ -390,7 +415,7 @@ def image_normalize(
390415
alpha: float = 1.0,
391416
beta: float = 0.0,
392417
norm_type: str = "l2",
393-
dst: Union[str, bigframes.series.Series],
418+
dst: Optional[Union[str, bigframes.series.Series]] = None,
394419
connection: Optional[str] = None,
395420
) -> bigframes.series.Series:
396421
"""Normalize images.
@@ -402,7 +427,7 @@ def image_normalize(
402427
alpha (float, default 1.0): Norm value to normalize to or the lower range boundary in case of the range normalization.
403428
beta (float, default 0.0): Upper range boundary in case of the range normalization; it is not used for the norm normalization.
404429
norm_type (str, default "l2"): Normalization type. Accepted values are "inf", "l1", "l2" and "minmax".
405-
dst (str or bigframes.series.Series): Destination GCS folder str or blob series.
430+
dst (str or bigframes.series.Series or None, default None): Destination GCS folder str or blob series. If None, output to BQ as bytes.
406431
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
407432
408433
Returns:
@@ -411,6 +436,21 @@ def image_normalize(
411436
import bigframes.blob._functions as blob_func
412437

413438
connection = self._resolve_connection(connection)
439+
df = self._get_runtime_json_str(mode="R").to_frame()
440+
441+
if dst is None:
442+
image_normalize_udf = blob_func.TransformFunction(
443+
blob_func.image_normalize_to_bytes_def,
444+
session=self._block.session,
445+
connection=connection,
446+
).udf()
447+
448+
df["alpha"] = alpha
449+
df["beta"] = beta
450+
df["norm_type"] = norm_type
451+
res = df.apply(image_normalize_udf, axis=1)
452+
453+
return res
414454

415455
if isinstance(dst, str):
416456
dst = os.path.join(dst, "")
@@ -427,10 +467,9 @@ def image_normalize(
427467
connection=connection,
428468
).udf()
429469

430-
src_rt = self._get_runtime_json_str(mode="R")
431470
dst_rt = dst.blob._get_runtime_json_str(mode="RW")
432471

433-
df = src_rt.to_frame().join(dst_rt.to_frame(), how="outer")
472+
df = df.join(dst_rt, how="outer")
434473
df["alpha"] = alpha
435474
df["beta"] = beta
436475
df["norm_type"] = norm_type

0 commit comments

Comments
 (0)