You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
dst (str or bigframes.series.Series or None, default None): Destination GCS folder str or blob series. If None, output to BQ as bytes.
290
291
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
292
+
max_batching_rows (int, default 10,000): Max number of rows per batch send to cloud run to execute the function.
fy (float, defalut 0.0): scale factor along the vertical axis. If set to 0.0, dsize parameter determines the output size.
355
360
dst (str or bigframes.series.Series or None, default None): Destination GCS folder str or blob series. If None, output to BQ as bytes.
356
361
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
362
+
max_batching_rows (int, default 10,000): Max number of rows per batch send to cloud run to execute the function.
norm_type (str, default "l2"): Normalization type. Accepted values are "inf", "l1", "l2" and "minmax".
430
439
dst (str or bigframes.series.Series or None, default None): Destination GCS folder str or blob series. If None, output to BQ as bytes.
431
440
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
441
+
max_batching_rows (int, default 10,000): Max number of rows per batch send to cloud run to execute the function.
432
442
433
443
Returns:
434
444
BigFrames Blob Series
@@ -443,6 +453,7 @@ def image_normalize(
443
453
blob_func.image_normalize_to_bytes_def,
444
454
session=self._block.session,
445
455
connection=connection,
456
+
max_batching_rows=max_batching_rows,
446
457
).udf()
447
458
448
459
df["alpha"] =alpha
@@ -465,6 +476,7 @@ def image_normalize(
465
476
blob_func.image_normalize_def,
466
477
session=self._block.session,
467
478
connection=connection,
479
+
max_batching_rows=max_batching_rows,
468
480
).udf()
469
481
470
482
dst_rt=dst.blob._get_runtime_json_str(mode="RW")
@@ -480,7 +492,10 @@ def image_normalize(
480
492
returndst
481
493
482
494
defpdf_extract(
483
-
self, *, connection: Optional[str] =None
495
+
self,
496
+
*,
497
+
connection: Optional[str] =None,
498
+
max_batching_rows: int=10000,
484
499
) ->bigframes.series.Series:
485
500
"""Extracts and chunks text from PDF URLs and saves the text as
486
501
arrays of string.
@@ -493,6 +508,8 @@ def pdf_extract(
493
508
connection (str or None, default None): BQ connection used for
494
509
function internet transactions, and the output blob if "dst"
495
510
is str. If None, uses default connection of the session.
511
+
max_batching_rows (int, default 10,000): Max number of rows per batch
512
+
send to cloud run to execute the function.
496
513
497
514
Returns:
498
515
bigframes.series.Series: conatins all text from a pdf file
@@ -502,14 +519,15 @@ def pdf_extract(
502
519
503
520
connection=self._resolve_connection(connection)
504
521
505
-
pdf_chunk_udf=blob_func.TransformFunction(
522
+
pdf_extract_udf=blob_func.TransformFunction(
506
523
blob_func.pdf_extract_def,
507
524
session=self._block.session,
508
525
connection=connection,
526
+
max_batching_rows=max_batching_rows,
509
527
).udf()
510
528
511
529
src_rt=self._get_runtime_json_str(mode="R")
512
-
res=src_rt.apply(pdf_chunk_udf)
530
+
res=src_rt.apply(pdf_extract_udf)
513
531
returnres
514
532
515
533
defpdf_chunk(
@@ -518,6 +536,7 @@ def pdf_chunk(
518
536
connection: Optional[str] =None,
519
537
chunk_size: int=1000,
520
538
overlap_size: int=200,
539
+
max_batching_rows: int=10000,
521
540
) ->bigframes.series.Series:
522
541
"""Extracts and chunks text from PDF URLs and saves the text as
523
542
arrays of strings.
@@ -535,6 +554,8 @@ def pdf_chunk(
535
554
overlap_size (int, default 200): the number of overlapping characters
536
555
between consective chunks. The helps to ensure context is
537
556
perserved across chunk boundaries.
557
+
max_batching_rows (int, default 10,000): Max number of rows per batch
558
+
send to cloud run to execute the function.
538
559
539
560
Returns:
540
561
bigframe.series.Series of array[str], where each string is a
0 commit comments