Skip to content

Commit c34409d

Browse files
authored
Merge branch 'main' into output_schema
2 parents 4776872 + 209d0d4 commit c34409d

File tree

15 files changed

+203
-90
lines changed

15 files changed

+203
-90
lines changed

bigframes/core/compile/polars/compiler.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,34 @@ def _(self, op: ops.ScalarOp, l_input: pl.Expr, r_input: pl.Expr) -> pl.Expr:
301301
assert isinstance(op, string_ops.StrConcatOp)
302302
return pl.concat_str(l_input, r_input)
303303

304+
@compile_op.register(string_ops.StrContainsOp)
305+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
306+
assert isinstance(op, string_ops.StrContainsOp)
307+
return input.str.contains(pattern=op.pat, literal=True)
308+
309+
@compile_op.register(string_ops.StrContainsRegexOp)
310+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
311+
assert isinstance(op, string_ops.StrContainsRegexOp)
312+
return input.str.contains(pattern=op.pat, literal=False)
313+
314+
@compile_op.register(string_ops.StartsWithOp)
315+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
316+
assert isinstance(op, string_ops.StartsWithOp)
317+
if len(op.pat) == 1:
318+
return input.str.starts_with(op.pat[0])
319+
else:
320+
return pl.any_horizontal(
321+
*(input.str.starts_with(pat) for pat in op.pat)
322+
)
323+
324+
@compile_op.register(string_ops.EndsWithOp)
325+
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
326+
assert isinstance(op, string_ops.EndsWithOp)
327+
if len(op.pat) == 1:
328+
return input.str.ends_with(op.pat[0])
329+
else:
330+
return pl.any_horizontal(*(input.str.ends_with(pat) for pat in op.pat))
331+
304332
@compile_op.register(dt_ops.StrftimeOp)
305333
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
306334
assert isinstance(op, dt_ops.StrftimeOp)

bigframes/core/expression.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,11 @@ def is_identity(self) -> bool:
253253
def transform_children(self, t: Callable[[Expression], Expression]) -> Expression:
254254
...
255255

256+
def bottom_up(self, t: Callable[[Expression], Expression]) -> Expression:
257+
expr = self.transform_children(lambda child: child.bottom_up(t))
258+
expr = t(expr)
259+
return expr
260+
256261
def walk(self) -> Generator[Expression, None, None]:
257262
yield self
258263
for child in self.children:

bigframes/core/rewrite/op_lowering.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def lower_expr_step(expr: expression.Expression) -> expression.Expression:
4444
return maybe_rule.lower(expr)
4545
return expr
4646

47-
return lower_expr_step(expr.transform_children(lower_expr_step))
47+
return expr.bottom_up(lower_expr_step)
4848

4949
def lower_node(node: bigframe_node.BigFrameNode) -> bigframe_node.BigFrameNode:
5050
if isinstance(

bigframes/dataframe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,7 @@ def __getitem__(
582582
# Index of column labels can be treated the same as a sequence of column labels.
583583
pandas.Index,
584584
bigframes.series.Series,
585+
slice,
585586
],
586587
): # No return type annotations (like pandas) as type cannot always be determined statically
587588
# NOTE: This implements the operations described in

bigframes/ml/cluster.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def __init__(
5959
warm_start: bool = False,
6060
):
6161
self.n_clusters = n_clusters
62-
# allow the alias to be compatible with sklean
62+
# allow the alias to be compatible with sklearn
6363
self.init = "kmeans++" if init == "k-means++" else init
6464
self.init_col = init_col
6565
self.distance_type = distance_type

bigframes/ml/forecasting.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def _fit(
211211
Args:
212212
X (bigframes.dataframe.DataFrame or bigframes.series.Series,
213213
or pandas.core.frame.DataFrame or pandas.core.series.Series):
214-
A dataframe or series of trainging timestamp.
214+
A dataframe or series of training timestamp.
215215
y (bigframes.dataframe.DataFrame, or bigframes.series.Series,
216216
or pandas.core.frame.DataFrame, or pandas.core.series.Series):
217217
Target values for training.

bigframes/ml/llm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -834,7 +834,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> GeminiTextGenerator:
834834
class Claude3TextGenerator(base.RetriableRemotePredictor):
835835
"""Claude3 text generator LLM model.
836836
837-
Go to Google Cloud Console -> Vertex AI -> Model Garden page to enabe the models before use. Must have the Consumer Procurement Entitlement Manager Identity and Access Management (IAM) role to enable the models.
837+
Go to Google Cloud Console -> Vertex AI -> Model Garden page to enable the models before use. Must have the Consumer Procurement Entitlement Manager Identity and Access Management (IAM) role to enable the models.
838838
https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-partner-models#grant-permissions
839839
840840
.. note::

bigframes/ml/model_selection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def train_test_split(
8282
dfs = list(utils.batch_convert_to_dataframe(*arrays))
8383

8484
def _stratify_split(df: bpd.DataFrame, stratify: bpd.Series) -> List[bpd.DataFrame]:
85-
"""Split a single DF accoding to the stratify Series."""
85+
"""Split a single DF according to the stratify Series."""
8686
stratify = stratify.rename("bigframes_stratify_col") # avoid name conflicts
8787
merged_df = df.join(stratify.to_frame(), how="outer")
8888

bigframes/ml/preprocessing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ def _compile_to_sql(
434434
if columns is None:
435435
columns = X.columns
436436
drop = self.drop if self.drop is not None else "none"
437-
# minus one here since BQML's inplimentation always includes index 0, and top_k is on top of that.
437+
# minus one here since BQML's implementation always includes index 0, and top_k is on top of that.
438438
top_k = (
439439
(self.max_categories - 1)
440440
if self.max_categories is not None

bigframes/session/metrics.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,17 @@ def count_job_stats(
4545
bytes_processed = getattr(row_iterator, "total_bytes_processed", 0) or 0
4646
query_char_count = len(getattr(row_iterator, "query", "") or "")
4747
slot_millis = getattr(row_iterator, "slot_millis", 0) or 0
48-
exec_seconds = 0.0
48+
created = getattr(row_iterator, "created", None)
49+
ended = getattr(row_iterator, "ended", None)
50+
exec_seconds = (
51+
(ended - created).total_seconds() if created and ended else 0.0
52+
)
4953

5054
self.execution_count += 1
5155
self.query_char_count += query_char_count
5256
self.bytes_processed += bytes_processed
5357
self.slot_millis += slot_millis
58+
self.execution_secs += exec_seconds
5459

5560
elif query_job.configuration.dry_run:
5661
query_char_count = len(query_job.query)

0 commit comments

Comments
 (0)