Skip to content

Commit 4d51fd2

Browse files
Merge remote-tracking branch 'github/main' into session_simplify
2 parents 03249a1 + 9ed0078 commit 4d51fd2

File tree

38 files changed

+1222
-79
lines changed

38 files changed

+1222
-79
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,15 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.17.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.16.0...v2.17.0) (2025-08-22)
8+
9+
10+
### Features
11+
12+
* Add isin local execution impl ([#1993](https://github.com/googleapis/python-bigquery-dataframes/issues/1993)) ([26df6e6](https://github.com/googleapis/python-bigquery-dataframes/commit/26df6e691bb27ed09322a81214faedbf3639b32e))
13+
* Add reset_index names, col_level, col_fill, allow_duplicates args ([#2017](https://github.com/googleapis/python-bigquery-dataframes/issues/2017)) ([c02a1b6](https://github.com/googleapis/python-bigquery-dataframes/commit/c02a1b67d27758815430bb8006ac3a72cea55a89))
14+
* Support callable for series mask method ([#2014](https://github.com/googleapis/python-bigquery-dataframes/issues/2014)) ([5ac32eb](https://github.com/googleapis/python-bigquery-dataframes/commit/5ac32ebe17cfda447870859f5dd344b082b4d3d0))
15+
716
## [2.16.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.15.0...v2.16.0) (2025-08-20)
817

918

bigframes/core/blocks.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -390,12 +390,21 @@ def reversed(self) -> Block:
390390
index_labels=self.index.names,
391391
)
392392

393-
def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
393+
def reset_index(
394+
self,
395+
level: LevelsType = None,
396+
drop: bool = True,
397+
*,
398+
col_level: Union[str, int] = 0,
399+
col_fill: typing.Hashable = "",
400+
allow_duplicates: bool = False,
401+
) -> Block:
394402
"""Reset the index of the block, promoting the old index to a value column.
395403
396404
Arguments:
397405
level: the label or index level of the index levels to remove.
398406
name: this is the column id for the new value id derived from the old index
407+
allow_duplicates:
399408
400409
Returns:
401410
A new Block because dropping index columns can break references
@@ -441,6 +450,11 @@ def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
441450
)
442451
else:
443452
# Add index names to column index
453+
col_level_n = (
454+
col_level
455+
if isinstance(col_level, int)
456+
else self.column_labels.names.index(col_level)
457+
)
444458
column_labels_modified = self.column_labels
445459
for position, level_id in enumerate(level_ids):
446460
label = self.col_id_to_index_name[level_id]
@@ -450,11 +464,15 @@ def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
450464
else:
451465
label = f"level_{self.index_columns.index(level_id)}"
452466

453-
if label in self.column_labels:
467+
if (not allow_duplicates) and (label in self.column_labels):
454468
raise ValueError(f"cannot insert {label}, already exists")
469+
455470
if isinstance(self.column_labels, pd.MultiIndex):
456471
nlevels = self.column_labels.nlevels
457-
label = tuple(label if i == 0 else "" for i in range(nlevels))
472+
label = tuple(
473+
label if i == col_level_n else col_fill for i in range(nlevels)
474+
)
475+
458476
# Create index copy with label inserted
459477
# See: https://pandas.pydata.org/docs/reference/api/pandas.Index.insert.html
460478
column_labels_modified = column_labels_modified.insert(position, label)

bigframes/core/compile/sqlglot/expressions/binary_compiler.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,14 +140,37 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
140140

141141
@BINARY_OP_REGISTRATION.register(ops.ge_op)
142142
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
143-
return sge.GTE(this=left.expr, expression=right.expr)
143+
left_expr = _coerce_bool_to_int(left)
144+
right_expr = _coerce_bool_to_int(right)
145+
return sge.GTE(this=left_expr, expression=right_expr)
146+
147+
148+
@BINARY_OP_REGISTRATION.register(ops.gt_op)
149+
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
150+
left_expr = _coerce_bool_to_int(left)
151+
right_expr = _coerce_bool_to_int(right)
152+
return sge.GT(this=left_expr, expression=right_expr)
144153

145154

146155
@BINARY_OP_REGISTRATION.register(ops.JSONSet)
147156
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
148157
return sge.func("JSON_SET", left.expr, sge.convert(op.json_path), right.expr)
149158

150159

160+
@BINARY_OP_REGISTRATION.register(ops.lt_op)
161+
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
162+
left_expr = _coerce_bool_to_int(left)
163+
right_expr = _coerce_bool_to_int(right)
164+
return sge.LT(this=left_expr, expression=right_expr)
165+
166+
167+
@BINARY_OP_REGISTRATION.register(ops.le_op)
168+
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
169+
left_expr = _coerce_bool_to_int(left)
170+
right_expr = _coerce_bool_to_int(right)
171+
return sge.LTE(this=left_expr, expression=right_expr)
172+
173+
151174
@BINARY_OP_REGISTRATION.register(ops.mul_op)
152175
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
153176
left_expr = _coerce_bool_to_int(left)
@@ -170,6 +193,11 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
170193
return sge.NEQ(this=left_expr, expression=right_expr)
171194

172195

196+
@BINARY_OP_REGISTRATION.register(ops.obj_make_ref_op)
197+
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
198+
return sge.func("OBJ.MAKE_REF", left.expr, right.expr)
199+
200+
173201
@BINARY_OP_REGISTRATION.register(ops.sub_op)
174202
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
175203
if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype):
@@ -202,11 +230,6 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
202230
)
203231

204232

205-
@BINARY_OP_REGISTRATION.register(ops.obj_make_ref_op)
206-
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
207-
return sge.func("OBJ.MAKE_REF", left.expr, right.expr)
208-
209-
210233
def _coerce_bool_to_int(typed_expr: TypedExpr) -> sge.Expression:
211234
"""Coerce boolean expression to integer."""
212235
if typed_expr.dtype == dtypes.BOOL_DTYPE:

bigframes/core/compile/sqlglot/expressions/unary_compiler.py

Lines changed: 129 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -177,14 +177,96 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
177177
)
178178

179179

180+
@UNARY_OP_REGISTRATION.register(ops.StrContainsOp)
181+
def _(op: ops.StrContainsOp, expr: TypedExpr) -> sge.Expression:
182+
return sge.Like(this=expr.expr, expression=sge.convert(f"%{op.pat}%"))
183+
184+
180185
@UNARY_OP_REGISTRATION.register(ops.StrContainsRegexOp)
181186
def _(op: ops.StrContainsRegexOp, expr: TypedExpr) -> sge.Expression:
182187
return sge.RegexpLike(this=expr.expr, expression=sge.convert(op.pat))
183188

184189

185-
@UNARY_OP_REGISTRATION.register(ops.StrContainsOp)
186-
def _(op: ops.StrContainsOp, expr: TypedExpr) -> sge.Expression:
187-
return sge.Like(this=expr.expr, expression=sge.convert(f"%{op.pat}%"))
190+
@UNARY_OP_REGISTRATION.register(ops.StrExtractOp)
191+
def _(op: ops.StrExtractOp, expr: TypedExpr) -> sge.Expression:
192+
return sge.RegexpExtract(
193+
this=expr.expr, expression=sge.convert(op.pat), group=sge.convert(op.n)
194+
)
195+
196+
197+
@UNARY_OP_REGISTRATION.register(ops.StrFindOp)
198+
def _(op: ops.StrFindOp, expr: TypedExpr) -> sge.Expression:
199+
# INSTR is 1-based, so we need to adjust the start position.
200+
start = sge.convert(op.start + 1) if op.start is not None else sge.convert(1)
201+
if op.end is not None:
202+
# BigQuery's INSTR doesn't support `end`, so we need to use SUBSTR.
203+
return sge.func(
204+
"INSTR",
205+
sge.Substring(
206+
this=expr.expr,
207+
start=start,
208+
length=sge.convert(op.end - (op.start or 0)),
209+
),
210+
sge.convert(op.substr),
211+
) - sge.convert(1)
212+
else:
213+
return sge.func(
214+
"INSTR",
215+
expr.expr,
216+
sge.convert(op.substr),
217+
start,
218+
) - sge.convert(1)
219+
220+
221+
@UNARY_OP_REGISTRATION.register(ops.StrLstripOp)
222+
def _(op: ops.StrLstripOp, expr: TypedExpr) -> sge.Expression:
223+
return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="LEFT")
224+
225+
226+
@UNARY_OP_REGISTRATION.register(ops.StrPadOp)
227+
def _(op: ops.StrPadOp, expr: TypedExpr) -> sge.Expression:
228+
pad_length = sge.func(
229+
"GREATEST", sge.Length(this=expr.expr), sge.convert(op.length)
230+
)
231+
if op.side == "left":
232+
return sge.func(
233+
"LPAD",
234+
expr.expr,
235+
pad_length,
236+
sge.convert(op.fillchar),
237+
)
238+
elif op.side == "right":
239+
return sge.func(
240+
"RPAD",
241+
expr.expr,
242+
pad_length,
243+
sge.convert(op.fillchar),
244+
)
245+
else: # side == both
246+
lpad_amount = sge.Cast(
247+
this=sge.func(
248+
"SAFE_DIVIDE",
249+
sge.Sub(this=pad_length, expression=sge.Length(this=expr.expr)),
250+
sge.convert(2),
251+
),
252+
to="INT64",
253+
) + sge.Length(this=expr.expr)
254+
return sge.func(
255+
"RPAD",
256+
sge.func(
257+
"LPAD",
258+
expr.expr,
259+
lpad_amount,
260+
sge.convert(op.fillchar),
261+
),
262+
pad_length,
263+
sge.convert(op.fillchar),
264+
)
265+
266+
267+
@UNARY_OP_REGISTRATION.register(ops.StrRepeatOp)
268+
def _(op: ops.StrRepeatOp, expr: TypedExpr) -> sge.Expression:
269+
return sge.Repeat(this=expr.expr, times=sge.convert(op.repeats))
188270

189271

190272
@UNARY_OP_REGISTRATION.register(ops.date_op)
@@ -262,6 +344,27 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
262344
return sge.func("ST_BOUNDARY", expr.expr)
263345

264346

347+
@UNARY_OP_REGISTRATION.register(ops.GeoStBufferOp)
348+
def _(op: ops.GeoStBufferOp, expr: TypedExpr) -> sge.Expression:
349+
return sge.func(
350+
"ST_BUFFER",
351+
expr.expr,
352+
sge.convert(op.buffer_radius),
353+
sge.convert(op.num_seg_quarter_circle),
354+
sge.convert(op.use_spheroid),
355+
)
356+
357+
358+
@UNARY_OP_REGISTRATION.register(ops.geo_st_centroid_op)
359+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
360+
return sge.func("ST_CENTROID", expr.expr)
361+
362+
363+
@UNARY_OP_REGISTRATION.register(ops.geo_st_convexhull_op)
364+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
365+
return sge.func("ST_CONVEXHULL", expr.expr)
366+
367+
265368
@UNARY_OP_REGISTRATION.register(ops.geo_st_geogfromtext_op)
266369
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
267370
return sge.func("SAFE.ST_GEOGFROMTEXT", expr.expr)
@@ -434,6 +537,17 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
434537
return sge.Lower(this=expr.expr)
435538

436539

540+
@UNARY_OP_REGISTRATION.register(ops.MapOp)
541+
def _(op: ops.MapOp, expr: TypedExpr) -> sge.Expression:
542+
return sge.Case(
543+
this=expr.expr,
544+
ifs=[
545+
sge.If(this=sge.convert(key), true=sge.convert(value))
546+
for key, value in op.mappings
547+
],
548+
)
549+
550+
437551
@UNARY_OP_REGISTRATION.register(ops.minute_op)
438552
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
439553
return sge.Extract(this=sge.Identifier(this="MINUTE"), expression=expr.expr)
@@ -444,11 +558,6 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
444558
return sge.Extract(this=sge.Identifier(this="MONTH"), expression=expr.expr)
445559

446560

447-
@UNARY_OP_REGISTRATION.register(ops.StrLstripOp)
448-
def _(op: ops.StrLstripOp, expr: TypedExpr) -> sge.Expression:
449-
return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="LEFT")
450-
451-
452561
@UNARY_OP_REGISTRATION.register(ops.neg_op)
453562
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
454563
return sge.Neg(this=expr.expr)
@@ -484,6 +593,18 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
484593
return sge.Extract(this=sge.Identifier(this="QUARTER"), expression=expr.expr)
485594

486595

596+
@UNARY_OP_REGISTRATION.register(ops.ReplaceStrOp)
597+
def _(op: ops.ReplaceStrOp, expr: TypedExpr) -> sge.Expression:
598+
return sge.func("REPLACE", expr.expr, sge.convert(op.pat), sge.convert(op.repl))
599+
600+
601+
@UNARY_OP_REGISTRATION.register(ops.RegexReplaceStrOp)
602+
def _(op: ops.RegexReplaceStrOp, expr: TypedExpr) -> sge.Expression:
603+
return sge.func(
604+
"REGEXP_REPLACE", expr.expr, sge.convert(op.pat), sge.convert(op.repl)
605+
)
606+
607+
487608
@UNARY_OP_REGISTRATION.register(ops.reverse_op)
488609
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
489610
return sge.func("REVERSE", expr.expr)

0 commit comments

Comments
 (0)