You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
dst (str or bigframes.series.Series or None, default None): Destination GCS folder str or blob series. If None, output to BQ as bytes.
291
293
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
292
-
max_batching_rows (int, default 10,000): Max number of rows per batch send to cloud run to execute the function.
294
+
max_batching_rows (int, default 8,096): Max number of rows per batch send to cloud run to execute the function.
295
+
container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
296
+
container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
fy (float, defalut 0.0): scale factor along the vertical axis. If set to 0.0, dsize parameter determines the output size.
360
370
dst (str or bigframes.series.Series or None, default None): Destination GCS folder str or blob series. If None, output to BQ as bytes.
361
371
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
362
-
max_batching_rows (int, default 10,000): Max number of rows per batch send to cloud run to execute the function.
372
+
max_batching_rows (int, default 8,096): Max number of rows per batch send to cloud run to execute the function.
373
+
container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
374
+
container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
norm_type (str, default "l2"): Normalization type. Accepted values are "inf", "l1", "l2" and "minmax".
439
457
dst (str or bigframes.series.Series or None, default None): Destination GCS folder str or blob series. If None, output to BQ as bytes.
440
458
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
441
-
max_batching_rows (int, default 10,000): Max number of rows per batch send to cloud run to execute the function.
459
+
max_batching_rows (int, default 8,096): Max number of rows per batch send to cloud run to execute the function.
460
+
container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
461
+
container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
442
462
443
463
Returns:
444
464
BigFrames Blob Series
@@ -454,6 +474,8 @@ def image_normalize(
454
474
session=self._block.session,
455
475
connection=connection,
456
476
max_batching_rows=max_batching_rows,
477
+
container_cpu=container_cpu,
478
+
container_memory=container_memory,
457
479
).udf()
458
480
459
481
df["alpha"] =alpha
@@ -477,6 +499,8 @@ def image_normalize(
477
499
session=self._block.session,
478
500
connection=connection,
479
501
max_batching_rows=max_batching_rows,
502
+
container_cpu=container_cpu,
503
+
container_memory=container_memory,
480
504
).udf()
481
505
482
506
dst_rt=dst.blob._get_runtime_json_str(mode="RW")
@@ -495,7 +519,9 @@ def pdf_extract(
495
519
self,
496
520
*,
497
521
connection: Optional[str] =None,
498
-
max_batching_rows: int=10000,
522
+
max_batching_rows: int=8096,
523
+
container_cpu: Union[float, int] =0.33,
524
+
container_memory: str="512Mi",
499
525
) ->bigframes.series.Series:
500
526
"""Extracts and chunks text from PDF URLs and saves the text as
501
527
arrays of string.
@@ -508,8 +534,10 @@ def pdf_extract(
508
534
connection (str or None, default None): BQ connection used for
509
535
function internet transactions, and the output blob if "dst"
510
536
is str. If None, uses default connection of the session.
511
-
max_batching_rows (int, default 10,000): Max number of rows per batch
537
+
max_batching_rows (int, default 8,096): Max number of rows per batch
512
538
send to cloud run to execute the function.
539
+
container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
540
+
container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
513
541
514
542
Returns:
515
543
bigframes.series.Series: conatins all text from a pdf file
@@ -524,6 +552,8 @@ def pdf_extract(
524
552
session=self._block.session,
525
553
connection=connection,
526
554
max_batching_rows=max_batching_rows,
555
+
container_cpu=container_cpu,
556
+
container_memory=container_memory,
527
557
).udf()
528
558
529
559
src_rt=self._get_runtime_json_str(mode="R")
@@ -536,7 +566,9 @@ def pdf_chunk(
536
566
connection: Optional[str] =None,
537
567
chunk_size: int=1000,
538
568
overlap_size: int=200,
539
-
max_batching_rows: int=10000,
569
+
max_batching_rows: int=8096,
570
+
container_cpu: Union[float, int] =0.33,
571
+
container_memory: str="512Mi",
540
572
) ->bigframes.series.Series:
541
573
"""Extracts and chunks text from PDF URLs and saves the text as
542
574
arrays of strings.
@@ -554,8 +586,10 @@ def pdf_chunk(
554
586
overlap_size (int, default 200): the number of overlapping characters
555
587
between consective chunks. The helps to ensure context is
556
588
perserved across chunk boundaries.
557
-
max_batching_rows (int, default 10,000): Max number of rows per batch
589
+
max_batching_rows (int, default 8,096): Max number of rows per batch
558
590
send to cloud run to execute the function.
591
+
container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
592
+
container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
559
593
560
594
Returns:
561
595
bigframe.series.Series of array[str], where each string is a
0 commit comments