test: update testcase

shuoweil · shuoweil · commit d978c9730399 · 2025-12-17T23:47:56.000Z
diff --git a/bigframes/display/html.py b/bigframes/display/html.py
@@ -50,15 +50,16 @@ def _try_parse_json_strings(dataframe: pd.DataFrame) -> pd.DataFrame:
 
 def _flatten_nested_data(
     dataframe: pd.DataFrame,
-) -> tuple[pd.DataFrame, dict[str, list[int]], list[str]]:
+) -> tuple[pd.DataFrame, dict[str, list[int]], list[str], set[str]]:
     """Flatten nested STRUCT and ARRAY columns for display."""
     if dataframe.empty:
-        return dataframe.copy(), {}, []
+        return dataframe.copy(), {}, [], set()
 
     result_df = _try_parse_json_strings(dataframe)
     initial_columns = list(result_df.columns)
 
     array_row_groups: dict[str, list[int]] = {}
+    nested_originated_columns: set[str] = set()
 
     # First, identify all STRUCT and ARRAY columns
     struct_columns: list[str] = []
@@ -73,8 +74,10 @@ def _flatten_nested_data(
             pa_type = dtype.pyarrow_dtype
             if pa.types.is_struct(pa_type):
                 struct_columns.append(col_name)
+                nested_originated_columns.add(col_name)
             elif pa.types.is_list(pa_type):
                 array_columns.append(col_name)
+                nested_originated_columns.add(col_name)
                 if hasattr(pa_type, "value_type") and (
                     pa.types.is_struct(pa_type.value_type)
                 ):
@@ -85,25 +88,36 @@ def _flatten_nested_data(
             clear_on_continuation_cols.append(col_name)
 
     result_df, array_columns = _flatten_array_of_struct_columns(
-        result_df, array_of_struct_columns, array_columns
+        result_df, array_of_struct_columns, array_columns, nested_originated_columns
     )
 
     result_df, clear_on_continuation_cols = _flatten_struct_columns(
-        result_df, struct_columns, clear_on_continuation_cols
+        result_df, struct_columns, clear_on_continuation_cols, nested_originated_columns
     )
 
     # Now handle ARRAY columns (including the newly created ones from ARRAY of STRUCT)
     if not array_columns:
-        return result_df, array_row_groups, clear_on_continuation_cols
+        return (
+            result_df,
+            array_row_groups,
+            clear_on_continuation_cols,
+            nested_originated_columns,
+        )
 
     result_df, array_row_groups = _explode_array_columns(result_df, array_columns)
-    return result_df, array_row_groups, clear_on_continuation_cols
+    return (
+        result_df,
+        array_row_groups,
+        clear_on_continuation_cols,
+        nested_originated_columns,
+    )
 
 
 def _flatten_array_of_struct_columns(
     dataframe: pd.DataFrame,
     array_of_struct_columns: list[str],
     array_columns: list[str],
+    nested_originated_columns: set[str],
 ) -> tuple[pd.DataFrame, list[str]]:
     """Flatten ARRAY of STRUCT columns into separate array columns for each field."""
     result_df = dataframe.copy()
@@ -116,6 +130,7 @@ def _flatten_array_of_struct_columns(
         for field_idx in range(struct_type.num_fields):
             field = struct_type.field(field_idx)
             new_col_name = f"{col_name}.{field.name}"
+            nested_originated_columns.add(new_col_name)
 
             # Extract field values from each array element
             struct_field_values: list[list[Any]] = []
@@ -224,6 +239,7 @@ def _flatten_struct_columns(
     dataframe: pd.DataFrame,
     struct_columns: list[str],
     clear_on_continuation_cols: list[str],
+    nested_originated_columns: set[str],
 ) -> tuple[pd.DataFrame, list[str]]:
     """Flatten regular STRUCT columns."""
     result_df = dataframe.copy()
@@ -235,6 +251,7 @@ def _flatten_struct_columns(
         for field_idx in range(pa_type.num_fields):
             field = pa_type.field(field_idx)
             new_col_name = f"{col_name}.{field.name}"
+            nested_originated_columns.add(new_col_name)
             clear_on_continuation_cols.append(new_col_name)
 
             regular_field_values: list[Any] = []
@@ -252,6 +269,11 @@ def _flatten_struct_columns(
 
 def _is_dtype_numeric(dtype: Any) -> bool:
     """Check if a dtype is numeric for alignment purposes."""
+    # Arrays should always be left-aligned, even if they contain numeric elements
+    if isinstance(dtype, pd.ArrowDtype) and isinstance(
+        dtype.pyarrow_dtype, pa.ListType
+    ):
+        return False
     return pandas.api.types.is_numeric_dtype(dtype)
 
 
@@ -266,6 +288,7 @@ def render_html(
         flattened_df,
         array_row_groups,
         clear_on_continuation,
+        nested_originated_columns,
     ) = _flatten_nested_data(dataframe)
 
     classes = "dataframe table table-striped table-hover"
@@ -312,7 +335,12 @@ def render_html(
                 table_html_parts.append("      <td></td>\n")
                 continue
             dtype = flattened_df.dtypes.loc[col_name]  # type: ignore
-            align = "right" if _is_dtype_numeric(dtype) else "left"
+
+            # Check if column originated from an array
+            if col_name_str in nested_originated_columns:
+                align = "left"
+            else:
+                align = "right" if _is_dtype_numeric(dtype) else "left"
 
             cell_content = ""
             if pandas.api.types.is_scalar(value) and pd.isna(value):
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
@@ -299,7 +299,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.0"
+   "version": "3.10.15"
   }
  },
  "nbformat": 4,
diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py
@@ -1274,10 +1274,10 @@ def test_render_html_with_nested_data(nested_data_df: pd.DataFrame):
     first_row = """    <tr>
       <td class="cell-align-right">1</td>
       <td class="cell-align-left">Alice</td>
-      <td class="cell-align-right">30</td>
-      <td class="cell-align-right">10</td>
+      <td class="cell-align-left">30</td>
+      <td class="cell-align-left">10</td>
       <td class="cell-align-left">A</td>
-      <td class="cell-align-right">100</td>
+      <td class="cell-align-left">100</td>
     </tr>"""
     assert first_row in result_html
 
@@ -1288,17 +1288,17 @@ def test_render_html_with_nested_data(nested_data_df: pd.DataFrame):
       <td></td>
       <td></td>
       <td></td>
-      <td class="cell-align-right">20</td>
+      <td class="cell-align-left">20</td>
       <td class="cell-align-left">B</td>
-      <td class="cell-align-right">200</td>
+      <td class="cell-align-left">200</td>
     </tr>"""
     assert continuation_row_for_id1_line2 in result_html
 
     continuation_row_for_id1_line3 = """    <tr class="array-continuation" data-orig-row="0">
       <td></td>
       <td></td>
       <td></td>
-      <td class="cell-align-right">30</td>
+      <td class="cell-align-left">30</td>
       <td class="cell-align-left"></td>
       <td class="cell-align-left"></td>
     </tr>"""
@@ -1309,12 +1309,23 @@ def test_render_html_with_nested_data(nested_data_df: pd.DataFrame):
       <td></td>
       <td></td>
       <td></td>
-      <td class="cell-align-right">50</td>
+      <td class="cell-align-left">50</td>
       <td class="cell-align-left"></td>
       <td class="cell-align-left"></td>
     </tr>"""
     assert continuation_row_for_id2_line2 in result_html
 
+    # Check that there is NOT an extra padded row for id=2
+    extra_row_for_id2 = """    <tr class="array-continuation" data-orig-row="1">
+      <td></td>
+      <td></td>
+      <td></td>
+      <td class="cell-align-left"></td>
+      <td class="cell-align-left"></td>
+      <td class="cell-align-left"></td>
+    </tr>"""
+    assert extra_row_for_id2 not in result_html
+
 
 def test_render_html_with_arrays_of_different_lengths(
     different_lengths_arrays_df: pd.DataFrame,
@@ -1333,23 +1344,23 @@ def test_render_html_with_arrays_of_different_lengths(
     # The first row should contain the first element of both arrays
     first_row = """    <tr>
       <td class="cell-align-right">1</td>
-      <td class="cell-align-right">10</td>
-      <td class="cell-align-right">100</td>
+      <td class="cell-align-left">10</td>
+      <td class="cell-align-left">100</td>
     </tr>"""
     assert first_row in result_html
 
     # The second row should contain the second element of both arrays
     second_row = """    <tr class="array-continuation" data-orig-row="0">
       <td></td>
-      <td class="cell-align-right">20</td>
-      <td class="cell-align-right">200</td>
+      <td class="cell-align-left">20</td>
+      <td class="cell-align-left">200</td>
     </tr>"""
     assert second_row in result_html
 
     # The third row should contain the third element of the first array, and an empty cell for the second
     third_row = """    <tr class="array-continuation" data-orig-row="0">
       <td></td>
-      <td class="cell-align-right">30</td>
+      <td class="cell-align-left">30</td>
       <td class="cell-align-left"></td>
     </tr>"""
     assert third_row in result_html
diff --git a/tests/unit/display/test_html.py b/tests/unit/display/test_html.py
@@ -165,10 +165,13 @@ def test_flatten_nested_data():
         }
     )
 
-    flattened, _, _ = _flatten_nested_data(struct_data)
+    flattened, _, _, nested_originated_columns = _flatten_nested_data(struct_data)
     assert "struct_col.name" in flattened.columns
     assert "struct_col.age" in flattened.columns
     assert flattened["struct_col.name"].tolist() == ["Alice", "Bob"]
+    assert "struct_col" in nested_originated_columns
+    assert "struct_col.name" in nested_originated_columns
+    assert "struct_col.age" in nested_originated_columns
 
 
 def test_array_explode():
@@ -182,9 +185,10 @@ def test_array_explode():
         }
     )
 
-    flattened, groups, _ = _flatten_nested_data(array_data)
-    assert len(flattened) == 6  # 3 + 2 array elements, padded to 3*2
+    flattened, groups, _, nested_originated_columns = _flatten_nested_data(array_data)
+    assert len(flattened) == 5  # 3 + 2 array elements
     assert "0" in groups  # First original row
     assert len(groups["0"]) == 3  # Three array elements
     assert "1" in groups
-    assert len(groups["1"]) == 3
+    assert len(groups["1"]) == 2
+    assert "array_col" in nested_originated_columns

Original file line number	Diff line number	Diff line change
`@@ -299,7 +299,7 @@`
`299`	`299`	`"name": "python",`
`300`	`300`	`"nbconvert_exporter": "python",`
`301`	`301`	`"pygments_lexer": "ipython3",`
`302`		`- "version": "3.13.0"`
	`302`	`+ "version": "3.10.15"`
`303`	`303`	`}`
`304`	`304`	`},`
`305`	`305`	`"nbformat": 4,`
Original file line number	Diff line number	Diff line change
`@@ -165,10 +165,13 @@ def test_flatten_nested_data():`
`165`	`165`	`}`
`166`	`166`	`)`
`167`	`167`
`168`		`- flattened, _, _ = _flatten_nested_data(struct_data)`
	`168`	`+ flattened, _, _, nested_originated_columns = _flatten_nested_data(struct_data)`
`169`	`169`	`assert "struct_col.name" in flattened.columns`
`170`	`170`	`assert "struct_col.age" in flattened.columns`
`171`	`171`	`assert flattened["struct_col.name"].tolist() == ["Alice", "Bob"]`
	`172`	`+ assert "struct_col" in nested_originated_columns`
	`173`	`+ assert "struct_col.name" in nested_originated_columns`
	`174`	`+ assert "struct_col.age" in nested_originated_columns`
`172`	`175`
`173`	`176`
`174`	`177`	`def test_array_explode():`
`@@ -182,9 +185,10 @@ def test_array_explode():`
`182`	`185`	`}`
`183`	`186`	`)`
`184`	`187`
`185`		`- flattened, groups, _ = _flatten_nested_data(array_data)`
`186`		`- assert len(flattened) == 6 # 3 + 2 array elements, padded to 3*2`
	`188`	`+ flattened, groups, _, nested_originated_columns = _flatten_nested_data(array_data)`
	`189`	`+ assert len(flattened) == 5 # 3 + 2 array elements`
`187`	`190`	`assert "0" in groups # First original row`
`188`	`191`	`assert len(groups["0"]) == 3 # Three array elements`
`189`	`192`	`assert "1" in groups`
`190`		`- assert len(groups["1"]) == 3`
	`193`	`+ assert len(groups["1"]) == 2`
	`194`	`+ assert "array_col" in nested_originated_columns`