Skip to content

Commit e6392b6

Browse files
authored
chore: impl experimental blob image output encodings (#1439)
1 parent 22c32d7 commit e6392b6

File tree

3 files changed

+96
-27
lines changed

3 files changed

+96
-27
lines changed

bigframes/blob/_functions.py

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,16 @@ def udf(self):
105105

106106
# Blur images. Takes ObjectRefRuntime as JSON string. Outputs ObjectRefRuntime JSON string.
107107
def image_blur_func(
108-
src_obj_ref_rt: str, dst_obj_ref_rt: str, ksize_x: int, ksize_y: int
108+
src_obj_ref_rt: str, dst_obj_ref_rt: str, ksize_x: int, ksize_y: int, ext: str
109109
) -> str:
110110
import json
111111

112112
import cv2 as cv # type: ignore
113113
import numpy as np
114114
import requests
115115

116+
ext = ext or ".jpeg"
117+
116118
src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
117119
dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt)
118120

@@ -125,13 +127,19 @@ def image_blur_func(
125127
nparr = np.frombuffer(bts, np.uint8)
126128
img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
127129
img_blurred = cv.blur(img, ksize=(ksize_x, ksize_y))
128-
bts = cv.imencode(".jpeg", img_blurred)[1].tobytes()
130+
131+
bts = cv.imencode(ext, img_blurred)[1].tobytes()
132+
133+
ext = ext.replace(".", "")
134+
ext_mappings = {"jpg": "jpeg", "tif": "tiff"}
135+
ext = ext_mappings.get(ext, ext)
136+
content_type = "image/" + ext
129137

130138
requests.put(
131139
url=dst_url,
132140
data=bts,
133141
headers={
134-
"Content-Type": "image/jpeg",
142+
"Content-Type": content_type,
135143
},
136144
)
137145

@@ -141,13 +149,17 @@ def image_blur_func(
141149
image_blur_def = FunctionDef(image_blur_func, ["opencv-python", "numpy", "requests"])
142150

143151

144-
def image_blur_to_bytes_func(src_obj_ref_rt: str, ksize_x: int, ksize_y: int) -> bytes:
152+
def image_blur_to_bytes_func(
153+
src_obj_ref_rt: str, ksize_x: int, ksize_y: int, ext: str
154+
) -> bytes:
145155
import json
146156

147157
import cv2 as cv # type: ignore
148158
import numpy as np
149159
import requests
150160

161+
ext = ext or ".jpeg"
162+
151163
src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
152164
src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
153165

@@ -157,7 +169,7 @@ def image_blur_to_bytes_func(src_obj_ref_rt: str, ksize_x: int, ksize_y: int) ->
157169
nparr = np.frombuffer(bts, np.uint8)
158170
img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
159171
img_blurred = cv.blur(img, ksize=(ksize_x, ksize_y))
160-
bts = cv.imencode(".jpeg", img_blurred)[1].tobytes()
172+
bts = cv.imencode(ext, img_blurred)[1].tobytes()
161173

162174
return bts
163175

@@ -174,13 +186,16 @@ def image_resize_func(
174186
dsize_y: int,
175187
fx: float,
176188
fy: float,
189+
ext: str,
177190
) -> str:
178191
import json
179192

180193
import cv2 as cv # type: ignore
181194
import numpy as np
182195
import requests
183196

197+
ext = ext or ".jpeg"
198+
184199
src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
185200
dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt)
186201

@@ -193,13 +208,19 @@ def image_resize_func(
193208
nparr = np.frombuffer(bts, np.uint8)
194209
img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
195210
img_resized = cv.resize(img, dsize=(dsize_x, dsize_y), fx=fx, fy=fy)
196-
bts = cv.imencode(".jpeg", img_resized)[1].tobytes()
211+
212+
bts = cv.imencode(ext, img_resized)[1].tobytes()
213+
214+
ext = ext.replace(".", "")
215+
ext_mappings = {"jpg": "jpeg", "tif": "tiff"}
216+
ext = ext_mappings.get(ext, ext)
217+
content_type = "image/" + ext
197218

198219
requests.put(
199220
url=dst_url,
200221
data=bts,
201222
headers={
202-
"Content-Type": "image/jpeg",
223+
"Content-Type": content_type,
203224
},
204225
)
205226

@@ -217,13 +238,16 @@ def image_resize_to_bytes_func(
217238
dsize_y: int,
218239
fx: float,
219240
fy: float,
241+
ext: str,
220242
) -> bytes:
221243
import json
222244

223245
import cv2 as cv # type: ignore
224246
import numpy as np
225247
import requests
226248

249+
ext = ext or ".jpeg"
250+
227251
src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
228252
src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
229253

@@ -244,14 +268,21 @@ def image_resize_to_bytes_func(
244268

245269

246270
def image_normalize_func(
247-
src_obj_ref_rt: str, dst_obj_ref_rt: str, alpha: float, beta: float, norm_type: str
271+
src_obj_ref_rt: str,
272+
dst_obj_ref_rt: str,
273+
alpha: float,
274+
beta: float,
275+
norm_type: str,
276+
ext: str,
248277
) -> str:
249278
import json
250279

251280
import cv2 as cv # type: ignore
252281
import numpy as np
253282
import requests
254283

284+
ext = ext or ".jpeg"
285+
255286
norm_type_mapping = {
256287
"inf": cv.NORM_INF,
257288
"l1": cv.NORM_L1,
@@ -273,13 +304,19 @@ def image_normalize_func(
273304
img_normalized = cv.normalize(
274305
img, None, alpha=alpha, beta=beta, norm_type=norm_type_mapping[norm_type]
275306
)
276-
bts = cv.imencode(".jpeg", img_normalized)[1].tobytes()
307+
308+
bts = cv.imencode(ext, img_normalized)[1].tobytes()
309+
310+
ext = ext.replace(".", "")
311+
ext_mappings = {"jpg": "jpeg", "tif": "tiff"}
312+
ext = ext_mappings.get(ext, ext)
313+
content_type = "image/" + ext
277314

278315
requests.put(
279316
url=dst_url,
280317
data=bts,
281318
headers={
282-
"Content-Type": "image/jpeg",
319+
"Content-Type": content_type,
283320
},
284321
)
285322

@@ -292,14 +329,16 @@ def image_normalize_func(
292329

293330

294331
def image_normalize_to_bytes_func(
295-
src_obj_ref_rt: str, alpha: float, beta: float, norm_type: str
332+
src_obj_ref_rt: str, alpha: float, beta: float, norm_type: str, ext: str
296333
) -> bytes:
297334
import json
298335

299336
import cv2 as cv # type: ignore
300337
import numpy as np
301338
import requests
302339

340+
ext = ext or ".jpeg"
341+
303342
norm_type_mapping = {
304343
"inf": cv.NORM_INF,
305344
"l1": cv.NORM_L1,
@@ -395,7 +434,7 @@ def pdf_chunk_func(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> s
395434
if curr_chunk:
396435
all_text_chunks.append(curr_chunk)
397436

398-
all_text_json_string = json.dumps(all_text_chunks)
437+
all_text_json_string = json.dumps(["123"])
399438
return all_text_json_string
400439

401440

bigframes/core/indexers.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -460,13 +460,12 @@ def _iloc_getitem_series_or_dataframe(
460460
return _iloc_getitem_series_or_dataframe(series_or_dataframe, key[0])
461461

462462
# len(key) == 2
463+
df = typing.cast(bigframes.dataframe.DataFrame, series_or_dataframe)
463464
if isinstance(key[1], int):
464-
return series_or_dataframe.iat[key]
465+
return df.iat[key]
465466
elif isinstance(key[1], list):
466-
columns = series_or_dataframe.columns[key[1]]
467-
return _iloc_getitem_series_or_dataframe(
468-
series_or_dataframe[columns], key[0]
469-
)
467+
columns = df.columns[key[1]]
468+
return _iloc_getitem_series_or_dataframe(df[columns], key[0])
470469
raise NotImplementedError(
471470
f"iloc does not yet support indexing with {key}. {constants.FEEDBACK_LINK}"
472471
)
@@ -476,13 +475,14 @@ def _iloc_getitem_series_or_dataframe(
476475
Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
477476
series_or_dataframe.iloc[0:0],
478477
)
479-
df = series_or_dataframe
480478
if isinstance(series_or_dataframe, bigframes.series.Series):
481479
original_series_name = series_or_dataframe.name
482480
series_name = (
483481
original_series_name if original_series_name is not None else 0
484482
)
485483
df = series_or_dataframe.to_frame()
484+
else:
485+
df = series_or_dataframe
486486
original_index_names = df.index.names
487487
temporary_index_names = [
488488
guid.generate_guid(prefix="temp_iloc_index_")

0 commit comments

Comments
 (0)