Skip to content

Commit 7763818

Browse files
committed
Merge main to shuowei-anywidget-nested-strcut-array
2 parents a34802e + 34b5975 commit 7763818

File tree

18 files changed

+472
-26
lines changed

18 files changed

+472
-26
lines changed

bigframes/core/compile/sqlglot/aggregations/windows.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def apply_window_if_present(
4444
order_by = None
4545
elif window.is_range_bounded:
4646
order_by = get_window_order_by((window.ordering[0],))
47+
order_by = remove_null_ordering_for_range_windows(order_by)
4748
else:
4849
order_by = get_window_order_by(window.ordering)
4950

@@ -150,6 +151,30 @@ def get_window_order_by(
150151
return tuple(order_by)
151152

152153

154+
def remove_null_ordering_for_range_windows(
155+
order_by: typing.Optional[tuple[sge.Ordered, ...]],
156+
) -> typing.Optional[tuple[sge.Ordered, ...]]:
157+
"""Removes NULL FIRST/LAST from ORDER BY expressions in RANGE windows.
158+
Here's the support matrix:
159+
✅ sum(x) over (order by y desc nulls last)
160+
🚫 sum(x) over (order by y asc nulls last)
161+
✅ sum(x) over (order by y asc nulls first)
162+
🚫 sum(x) over (order by y desc nulls first)
163+
"""
164+
if order_by is None:
165+
return None
166+
167+
new_order_by = []
168+
for key in order_by:
169+
kargs = key.args
170+
if kargs.get("desc") is True and kargs.get("nulls_first", False):
171+
kargs["nulls_first"] = False
172+
elif kargs.get("desc") is False and not kargs.setdefault("nulls_first", True):
173+
kargs["nulls_first"] = True
174+
new_order_by.append(sge.Ordered(**kargs))
175+
return tuple(new_order_by)
176+
177+
153178
def _get_window_bounds(
154179
value, is_preceding: bool
155180
) -> tuple[typing.Union[str, sge.Expression], typing.Optional[str]]:

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,9 @@ def compile_window(node: nodes.WindowOpNode, child: ir.SQLGlotIR) -> ir.SQLGlotI
356356
observation_count = windows.apply_window_if_present(
357357
sge.func("SUM", is_observation), window_spec
358358
)
359+
observation_count = sge.func(
360+
"COALESCE", observation_count, sge.convert(0)
361+
)
359362
else:
360363
# Operations like count treat even NULLs as valid observations
361364
# for the sake of min_periods notnull is just used to convert

bigframes/core/compile/sqlglot/expressions/comparison_ops.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,27 +89,39 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
8989

9090
@register_binary_op(ops.ge_op)
9191
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
92+
if left.expr == sge.null() or right.expr == sge.null():
93+
return sge.null()
94+
9295
left_expr = _coerce_bool_to_int(left)
9396
right_expr = _coerce_bool_to_int(right)
9497
return sge.GTE(this=left_expr, expression=right_expr)
9598

9699

97100
@register_binary_op(ops.gt_op)
98101
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
102+
if left.expr == sge.null() or right.expr == sge.null():
103+
return sge.null()
104+
99105
left_expr = _coerce_bool_to_int(left)
100106
right_expr = _coerce_bool_to_int(right)
101107
return sge.GT(this=left_expr, expression=right_expr)
102108

103109

104110
@register_binary_op(ops.lt_op)
105111
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
112+
if left.expr == sge.null() or right.expr == sge.null():
113+
return sge.null()
114+
106115
left_expr = _coerce_bool_to_int(left)
107116
right_expr = _coerce_bool_to_int(right)
108117
return sge.LT(this=left_expr, expression=right_expr)
109118

110119

111120
@register_binary_op(ops.le_op)
112121
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
122+
if left.expr == sge.null() or right.expr == sge.null():
123+
return sge.null()
124+
113125
left_expr = _coerce_bool_to_int(left)
114126
right_expr = _coerce_bool_to_int(right)
115127
return sge.LTE(this=left_expr, expression=right_expr)

bigframes/core/compile/sqlglot/expressions/generic_ops.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,19 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
140140
return sge.Coalesce(this=left.expr, expressions=[right.expr])
141141

142142

143+
@register_binary_op(ops.BinaryRemoteFunctionOp, pass_op=True)
144+
def _(
145+
left: TypedExpr, right: TypedExpr, op: ops.BinaryRemoteFunctionOp
146+
) -> sge.Expression:
147+
routine_ref = op.function_def.routine_ref
148+
# Quote project, dataset, and routine IDs to avoid keyword clashes.
149+
func_name = (
150+
f"`{routine_ref.project}`.`{routine_ref.dataset_id}`.`{routine_ref.routine_id}`"
151+
)
152+
153+
return sge.func(func_name, left.expr, right.expr)
154+
155+
143156
@register_nary_op(ops.case_when_op)
144157
def _(*cases_and_outputs: TypedExpr) -> sge.Expression:
145158
# Need to upcast BOOL to INT if any output is numeric

bigframes/core/compile/sqlglot/expressions/numeric_ops.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,9 @@ def _(expr: TypedExpr) -> sge.Expression:
388388

389389
@register_binary_op(ops.add_op)
390390
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
391+
if left.expr == sge.null() or right.expr == sge.null():
392+
return sge.null()
393+
391394
if left.dtype == dtypes.STRING_DTYPE and right.dtype == dtypes.STRING_DTYPE:
392395
# String addition
393396
return sge.Concat(expressions=[left.expr, right.expr])
@@ -442,6 +445,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
442445

443446
@register_binary_op(ops.floordiv_op)
444447
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
448+
if left.expr == sge.null() or right.expr == sge.null():
449+
return sge.null()
450+
445451
left_expr = _coerce_bool_to_int(left)
446452
right_expr = _coerce_bool_to_int(right)
447453

@@ -525,6 +531,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
525531

526532
@register_binary_op(ops.mul_op)
527533
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
534+
if left.expr == sge.null() or right.expr == sge.null():
535+
return sge.null()
536+
528537
left_expr = _coerce_bool_to_int(left)
529538
right_expr = _coerce_bool_to_int(right)
530539

@@ -548,6 +557,9 @@ def _(expr: TypedExpr, n_digits: TypedExpr) -> sge.Expression:
548557

549558
@register_binary_op(ops.sub_op)
550559
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
560+
if left.expr == sge.null() or right.expr == sge.null():
561+
return sge.null()
562+
551563
if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype):
552564
left_expr = _coerce_bool_to_int(left)
553565
right_expr = _coerce_bool_to_int(right)

bigframes/display/anywidget.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ class TableWidget(_WIDGET_BASE):
6666

6767
page = traitlets.Int(0).tag(sync=True)
6868
page_size = traitlets.Int(0).tag(sync=True)
69+
max_columns = traitlets.Int(allow_none=True, default_value=None).tag(sync=True)
6970
row_count = traitlets.Int(allow_none=True, default_value=None).tag(sync=True)
7071
table_html = traitlets.Unicode("").tag(sync=True)
7172
sort_context = traitlets.List(traitlets.Dict(), default_value=[]).tag(sync=True)
@@ -103,10 +104,13 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
103104

104105
# respect display options for initial page size
105106
initial_page_size = bigframes.options.display.max_rows
107+
initial_max_columns = bigframes.options.display.max_columns
106108

107109
# set traitlets properties that trigger observers
108110
# TODO(b/462525985): Investigate and improve TableWidget UX for DataFrames with a large number of columns.
109111
self.page_size = initial_page_size
112+
self.max_columns = initial_max_columns
113+
110114
# TODO(b/469861913): Nested columns from structs (e.g., 'struct_col.name') are not currently sortable.
111115
# TODO(b/463754889): Support non-string column labels for sorting.
112116
if all(isinstance(col, str) for col in dataframe.columns):
@@ -218,6 +222,14 @@ def _validate_page_size(self, proposal: dict[str, Any]) -> int:
218222
max_page_size = 1000
219223
return min(value, max_page_size)
220224

225+
@traitlets.validate("max_columns")
226+
def _validate_max_columns(self, proposal: dict[str, Any]) -> int:
227+
"""Validate max columns to ensure it's positive or 0 (for all)."""
228+
value = proposal["value"]
229+
if value is None:
230+
return 0 # Normalize None to 0 for traitlet
231+
return max(0, value)
232+
221233
def _get_next_batch(self) -> bool:
222234
"""
223235
Gets the next batch of data from the generator and appends to cache.
@@ -348,6 +360,7 @@ def _set_table_html(self) -> None:
348360
dataframe=page_data,
349361
table_id=f"table-{self._table_id}",
350362
orderable_columns=self.orderable_columns,
363+
max_columns=self.max_columns,
351364
)
352365

353366
if new_page is not None:
@@ -382,3 +395,10 @@ def _page_size_changed(self, _change: dict[str, Any]) -> None:
382395

383396
# Update the table display
384397
self._set_table_html()
398+
399+
@traitlets.observe("max_columns")
400+
def _max_columns_changed(self, _change: dict[str, Any]) -> None:
401+
"""Handler for when max columns is changed from the frontend."""
402+
if not self._initial_load_complete:
403+
return
404+
self._set_table_html()

bigframes/display/html.py

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,34 +46,64 @@ def render_html(
4646
dataframe: pd.DataFrame,
4747
table_id: str,
4848
orderable_columns: list[str] | None = None,
49+
max_columns: int | None = None,
4950
) -> str:
5051
"""Render a pandas DataFrame to HTML with specific styling and nested data support."""
5152
# Flatten nested data first
5253
flatten_result = _flatten.flatten_nested_data(dataframe)
54+
flat_df = flatten_result.dataframe
5355

5456
orderable_columns = orderable_columns or []
5557
classes = "dataframe table table-striped table-hover"
5658
table_html_parts = [f'<table border="1" class="{classes}" id="{table_id}">']
59+
60+
# Handle column truncation
61+
columns = list(flat_df.columns)
62+
if max_columns is not None and max_columns > 0 and len(columns) > max_columns:
63+
half = max_columns // 2
64+
left_columns = columns[:half]
65+
# Ensure we don't take more than available if half is 0 or calculation is weird,
66+
# but typical case is safe.
67+
right_count = max_columns - half
68+
right_columns = columns[-right_count:] if right_count > 0 else []
69+
show_ellipsis = True
70+
else:
71+
left_columns = columns
72+
right_columns = []
73+
show_ellipsis = False
74+
5775
table_html_parts.append(
58-
_render_table_header(flatten_result.dataframe, orderable_columns)
76+
_render_table_header(
77+
flat_df, orderable_columns, left_columns, right_columns, show_ellipsis
78+
)
5979
)
6080
table_html_parts.append(
6181
_render_table_body(
62-
flatten_result.dataframe,
82+
flat_df,
6383
flatten_result.row_labels,
6484
flatten_result.continuation_rows,
6585
flatten_result.cleared_on_continuation,
6686
flatten_result.nested_columns,
87+
left_columns,
88+
right_columns,
89+
show_ellipsis,
6790
)
6891
)
6992
table_html_parts.append("</table>")
7093
return "".join(table_html_parts)
7194

7295

73-
def _render_table_header(dataframe: pd.DataFrame, orderable_columns: list[str]) -> str:
96+
def _render_table_header(
97+
dataframe: pd.DataFrame,
98+
orderable_columns: list[str],
99+
left_columns: list[Any],
100+
right_columns: list[Any],
101+
show_ellipsis: bool,
102+
) -> str:
74103
"""Render the header of the HTML table."""
75104
header_parts = [" <thead>", " <tr>"]
76-
for col in dataframe.columns:
105+
106+
def render_col_header(col):
77107
th_classes = []
78108
if col in orderable_columns:
79109
th_classes.append("sortable")
@@ -82,6 +112,18 @@ def _render_table_header(dataframe: pd.DataFrame, orderable_columns: list[str])
82112
f' <th {class_str}><div class="bf-header-content">'
83113
f"{html.escape(str(col))}</div></th>"
84114
)
115+
116+
for col in left_columns:
117+
render_col_header(col)
118+
119+
if show_ellipsis:
120+
header_parts.append(
121+
' <th><div class="bf-header-content" style="cursor: default;">...</div></th>'
122+
)
123+
124+
for col in right_columns:
125+
render_col_header(col)
126+
85127
header_parts.extend([" </tr>", " </thead>"])
86128
return "\n".join(header_parts)
87129

@@ -92,6 +134,9 @@ def _render_table_body(
92134
continuation_rows: set[int] | None,
93135
clear_on_continuation: list[str],
94136
nested_originated_columns: set[str],
137+
left_columns: list[Any],
138+
right_columns: list[Any],
139+
show_ellipsis: bool,
95140
) -> str:
96141
"""Render the body of the HTML table."""
97142
body_parts = [" <tbody>"]
@@ -117,7 +162,9 @@ def _render_table_body(
117162
body_parts.append(" <tr>")
118163

119164
row = dataframe.iloc[i]
120-
for col_name, value in row.items():
165+
166+
def render_col_cell(col_name):
167+
value = row[col_name]
121168
dtype = dataframe.dtypes.loc[col_name] # type: ignore
122169
cell_html = _render_cell(
123170
value,
@@ -129,6 +176,17 @@ def _render_table_body(
129176
precision,
130177
)
131178
body_parts.append(cell_html)
179+
180+
for col in left_columns:
181+
render_col_cell(col)
182+
183+
if show_ellipsis:
184+
# Ellipsis cell
185+
body_parts.append(' <td class="cell-align-left">...</td>')
186+
187+
for col in right_columns:
188+
render_col_cell(col)
189+
132190
body_parts.append(" </tr>")
133191
body_parts.append(" </tbody>")
134192
return "\n".join(body_parts)

bigframes/display/table_widget.css

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,15 +120,24 @@ body[data-theme='dark'] .bigframes-widget.bigframes-widget {
120120
margin: 0 8px;
121121
}
122122

123-
.bigframes-widget .page-size {
123+
.bigframes-widget .settings {
124124
align-items: center;
125125
display: flex;
126126
flex-direction: row;
127-
gap: 4px;
127+
gap: 16px;
128128
justify-content: end;
129129
}
130130

131-
.bigframes-widget .page-size label {
131+
.bigframes-widget .page-size,
132+
.bigframes-widget .max-columns {
133+
align-items: center;
134+
display: flex;
135+
flex-direction: row;
136+
gap: 4px;
137+
}
138+
139+
.bigframes-widget .page-size label,
140+
.bigframes-widget .max-columns label {
132141
margin-right: 8px;
133142
}
134143

0 commit comments

Comments
 (0)