Skip to content

Commit d978c97

Browse files
committed
test: update testcase
1 parent 8f049b8 commit d978c97

File tree

4 files changed

+67
-24
lines changed

4 files changed

+67
-24
lines changed

bigframes/display/html.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,16 @@ def _try_parse_json_strings(dataframe: pd.DataFrame) -> pd.DataFrame:
5050

5151
def _flatten_nested_data(
5252
dataframe: pd.DataFrame,
53-
) -> tuple[pd.DataFrame, dict[str, list[int]], list[str]]:
53+
) -> tuple[pd.DataFrame, dict[str, list[int]], list[str], set[str]]:
5454
"""Flatten nested STRUCT and ARRAY columns for display."""
5555
if dataframe.empty:
56-
return dataframe.copy(), {}, []
56+
return dataframe.copy(), {}, [], set()
5757

5858
result_df = _try_parse_json_strings(dataframe)
5959
initial_columns = list(result_df.columns)
6060

6161
array_row_groups: dict[str, list[int]] = {}
62+
nested_originated_columns: set[str] = set()
6263

6364
# First, identify all STRUCT and ARRAY columns
6465
struct_columns: list[str] = []
@@ -73,8 +74,10 @@ def _flatten_nested_data(
7374
pa_type = dtype.pyarrow_dtype
7475
if pa.types.is_struct(pa_type):
7576
struct_columns.append(col_name)
77+
nested_originated_columns.add(col_name)
7678
elif pa.types.is_list(pa_type):
7779
array_columns.append(col_name)
80+
nested_originated_columns.add(col_name)
7881
if hasattr(pa_type, "value_type") and (
7982
pa.types.is_struct(pa_type.value_type)
8083
):
@@ -85,25 +88,36 @@ def _flatten_nested_data(
8588
clear_on_continuation_cols.append(col_name)
8689

8790
result_df, array_columns = _flatten_array_of_struct_columns(
88-
result_df, array_of_struct_columns, array_columns
91+
result_df, array_of_struct_columns, array_columns, nested_originated_columns
8992
)
9093

9194
result_df, clear_on_continuation_cols = _flatten_struct_columns(
92-
result_df, struct_columns, clear_on_continuation_cols
95+
result_df, struct_columns, clear_on_continuation_cols, nested_originated_columns
9396
)
9497

9598
# Now handle ARRAY columns (including the newly created ones from ARRAY of STRUCT)
9699
if not array_columns:
97-
return result_df, array_row_groups, clear_on_continuation_cols
100+
return (
101+
result_df,
102+
array_row_groups,
103+
clear_on_continuation_cols,
104+
nested_originated_columns,
105+
)
98106

99107
result_df, array_row_groups = _explode_array_columns(result_df, array_columns)
100-
return result_df, array_row_groups, clear_on_continuation_cols
108+
return (
109+
result_df,
110+
array_row_groups,
111+
clear_on_continuation_cols,
112+
nested_originated_columns,
113+
)
101114

102115

103116
def _flatten_array_of_struct_columns(
104117
dataframe: pd.DataFrame,
105118
array_of_struct_columns: list[str],
106119
array_columns: list[str],
120+
nested_originated_columns: set[str],
107121
) -> tuple[pd.DataFrame, list[str]]:
108122
"""Flatten ARRAY of STRUCT columns into separate array columns for each field."""
109123
result_df = dataframe.copy()
@@ -116,6 +130,7 @@ def _flatten_array_of_struct_columns(
116130
for field_idx in range(struct_type.num_fields):
117131
field = struct_type.field(field_idx)
118132
new_col_name = f"{col_name}.{field.name}"
133+
nested_originated_columns.add(new_col_name)
119134

120135
# Extract field values from each array element
121136
struct_field_values: list[list[Any]] = []
@@ -224,6 +239,7 @@ def _flatten_struct_columns(
224239
dataframe: pd.DataFrame,
225240
struct_columns: list[str],
226241
clear_on_continuation_cols: list[str],
242+
nested_originated_columns: set[str],
227243
) -> tuple[pd.DataFrame, list[str]]:
228244
"""Flatten regular STRUCT columns."""
229245
result_df = dataframe.copy()
@@ -235,6 +251,7 @@ def _flatten_struct_columns(
235251
for field_idx in range(pa_type.num_fields):
236252
field = pa_type.field(field_idx)
237253
new_col_name = f"{col_name}.{field.name}"
254+
nested_originated_columns.add(new_col_name)
238255
clear_on_continuation_cols.append(new_col_name)
239256

240257
regular_field_values: list[Any] = []
@@ -252,6 +269,11 @@ def _flatten_struct_columns(
252269

253270
def _is_dtype_numeric(dtype: Any) -> bool:
254271
"""Check if a dtype is numeric for alignment purposes."""
272+
# Arrays should always be left-aligned, even if they contain numeric elements
273+
if isinstance(dtype, pd.ArrowDtype) and isinstance(
274+
dtype.pyarrow_dtype, pa.ListType
275+
):
276+
return False
255277
return pandas.api.types.is_numeric_dtype(dtype)
256278

257279

@@ -266,6 +288,7 @@ def render_html(
266288
flattened_df,
267289
array_row_groups,
268290
clear_on_continuation,
291+
nested_originated_columns,
269292
) = _flatten_nested_data(dataframe)
270293

271294
classes = "dataframe table table-striped table-hover"
@@ -312,7 +335,12 @@ def render_html(
312335
table_html_parts.append(" <td></td>\n")
313336
continue
314337
dtype = flattened_df.dtypes.loc[col_name] # type: ignore
315-
align = "right" if _is_dtype_numeric(dtype) else "left"
338+
339+
# Check if column originated from an array
340+
if col_name_str in nested_originated_columns:
341+
align = "left"
342+
else:
343+
align = "right" if _is_dtype_numeric(dtype) else "left"
316344

317345
cell_content = ""
318346
if pandas.api.types.is_scalar(value) and pd.isna(value):

notebooks/dataframes/anywidget_mode.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@
299299
"name": "python",
300300
"nbconvert_exporter": "python",
301301
"pygments_lexer": "ipython3",
302-
"version": "3.13.0"
302+
"version": "3.10.15"
303303
}
304304
},
305305
"nbformat": 4,

tests/system/small/test_anywidget.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,10 +1274,10 @@ def test_render_html_with_nested_data(nested_data_df: pd.DataFrame):
12741274
first_row = """ <tr>
12751275
<td class="cell-align-right">1</td>
12761276
<td class="cell-align-left">Alice</td>
1277-
<td class="cell-align-right">30</td>
1278-
<td class="cell-align-right">10</td>
1277+
<td class="cell-align-left">30</td>
1278+
<td class="cell-align-left">10</td>
12791279
<td class="cell-align-left">A</td>
1280-
<td class="cell-align-right">100</td>
1280+
<td class="cell-align-left">100</td>
12811281
</tr>"""
12821282
assert first_row in result_html
12831283

@@ -1288,17 +1288,17 @@ def test_render_html_with_nested_data(nested_data_df: pd.DataFrame):
12881288
<td></td>
12891289
<td></td>
12901290
<td></td>
1291-
<td class="cell-align-right">20</td>
1291+
<td class="cell-align-left">20</td>
12921292
<td class="cell-align-left">B</td>
1293-
<td class="cell-align-right">200</td>
1293+
<td class="cell-align-left">200</td>
12941294
</tr>"""
12951295
assert continuation_row_for_id1_line2 in result_html
12961296

12971297
continuation_row_for_id1_line3 = """ <tr class="array-continuation" data-orig-row="0">
12981298
<td></td>
12991299
<td></td>
13001300
<td></td>
1301-
<td class="cell-align-right">30</td>
1301+
<td class="cell-align-left">30</td>
13021302
<td class="cell-align-left"></td>
13031303
<td class="cell-align-left"></td>
13041304
</tr>"""
@@ -1309,12 +1309,23 @@ def test_render_html_with_nested_data(nested_data_df: pd.DataFrame):
13091309
<td></td>
13101310
<td></td>
13111311
<td></td>
1312-
<td class="cell-align-right">50</td>
1312+
<td class="cell-align-left">50</td>
13131313
<td class="cell-align-left"></td>
13141314
<td class="cell-align-left"></td>
13151315
</tr>"""
13161316
assert continuation_row_for_id2_line2 in result_html
13171317

1318+
# Check that there is NOT an extra padded row for id=2
1319+
extra_row_for_id2 = """ <tr class="array-continuation" data-orig-row="1">
1320+
<td></td>
1321+
<td></td>
1322+
<td></td>
1323+
<td class="cell-align-left"></td>
1324+
<td class="cell-align-left"></td>
1325+
<td class="cell-align-left"></td>
1326+
</tr>"""
1327+
assert extra_row_for_id2 not in result_html
1328+
13181329

13191330
def test_render_html_with_arrays_of_different_lengths(
13201331
different_lengths_arrays_df: pd.DataFrame,
@@ -1333,23 +1344,23 @@ def test_render_html_with_arrays_of_different_lengths(
13331344
# The first row should contain the first element of both arrays
13341345
first_row = """ <tr>
13351346
<td class="cell-align-right">1</td>
1336-
<td class="cell-align-right">10</td>
1337-
<td class="cell-align-right">100</td>
1347+
<td class="cell-align-left">10</td>
1348+
<td class="cell-align-left">100</td>
13381349
</tr>"""
13391350
assert first_row in result_html
13401351

13411352
# The second row should contain the second element of both arrays
13421353
second_row = """ <tr class="array-continuation" data-orig-row="0">
13431354
<td></td>
1344-
<td class="cell-align-right">20</td>
1345-
<td class="cell-align-right">200</td>
1355+
<td class="cell-align-left">20</td>
1356+
<td class="cell-align-left">200</td>
13461357
</tr>"""
13471358
assert second_row in result_html
13481359

13491360
# The third row should contain the third element of the first array, and an empty cell for the second
13501361
third_row = """ <tr class="array-continuation" data-orig-row="0">
13511362
<td></td>
1352-
<td class="cell-align-right">30</td>
1363+
<td class="cell-align-left">30</td>
13531364
<td class="cell-align-left"></td>
13541365
</tr>"""
13551366
assert third_row in result_html

tests/unit/display/test_html.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,13 @@ def test_flatten_nested_data():
165165
}
166166
)
167167

168-
flattened, _, _ = _flatten_nested_data(struct_data)
168+
flattened, _, _, nested_originated_columns = _flatten_nested_data(struct_data)
169169
assert "struct_col.name" in flattened.columns
170170
assert "struct_col.age" in flattened.columns
171171
assert flattened["struct_col.name"].tolist() == ["Alice", "Bob"]
172+
assert "struct_col" in nested_originated_columns
173+
assert "struct_col.name" in nested_originated_columns
174+
assert "struct_col.age" in nested_originated_columns
172175

173176

174177
def test_array_explode():
@@ -182,9 +185,10 @@ def test_array_explode():
182185
}
183186
)
184187

185-
flattened, groups, _ = _flatten_nested_data(array_data)
186-
assert len(flattened) == 6 # 3 + 2 array elements, padded to 3*2
188+
flattened, groups, _, nested_originated_columns = _flatten_nested_data(array_data)
189+
assert len(flattened) == 5 # 3 + 2 array elements
187190
assert "0" in groups # First original row
188191
assert len(groups["0"]) == 3 # Three array elements
189192
assert "1" in groups
190-
assert len(groups["1"]) == 3
193+
assert len(groups["1"]) == 2
194+
assert "array_col" in nested_originated_columns

0 commit comments

Comments
 (0)