lincc-frameworks · delucchi-cmu · Jan 8, 2026
diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py
@@ -378,7 +378,7 @@ def get_subcolumns(self, nested_columns="all") -> list[str]:
         return subcols
 
     @deprecated(
-        version="0.6.0", reason="`add_nested` will be removed in version 0.7.0, " "use `join_nested` instead."
+        version="0.6.0", reason="`add_nested` will be removed in version 0.7.0, use `join_nested` instead."
     )
     def add_nested(
         self,
@@ -1828,9 +1828,7 @@ def sort_values(
                 return None
             return new_df
 
-    @deprecated(
-        version="0.6.0", reason="`reduce` will be removed in version 0.7.0, " "use `map_rows` instead."
-    )
+    @deprecated(version="0.6.0", reason="`reduce` will be removed in version 0.7.0, use `map_rows` instead.")
     def reduce(self, func, *args, infer_nesting=True, append_columns=False, **kwargs) -> NestedFrame:  # type: ignore[override]
         """
         Takes a function and applies it to each top-level row of the NestedFrame.

diff --git a/src/nested_pandas/nestedframe/io.py b/src/nested_pandas/nestedframe/io.py
@@ -271,29 +271,58 @@ def _read_table_with_partial_load_check(data, columns=None, filesystem=None, **k
 
 
 def _validate_structs_from_schema(data, columns=None, filesystem=None):
-    """Validate that nested columns are structs"""
-    if columns is not None:
-        schema = pq.read_schema(data, filesystem=filesystem)
-        for col in columns:
-            # check if column is a partial load of a nested structure
-            if "." in col:
-                # first check if column exists as a top-level column
-                if col in schema.names:
-                    continue
-                # if not, inspect the base column name type
-                else:
-                    if col.split(".")[0] in schema.names:
-                        # check if the column is a list-struct
-                        col_type = schema.field(col.split(".")[0]).type
-                        if not pa.types.is_struct(col_type):
-                            base_col = col.split(".")[0]
-                            raise ValueError(
-                                f"The provided column '{col}' signals to partially load a nested structure, "
-                                f"but the nested structure '{base_col}' is not a struct. "
-                                "Partial loading of nested structures is only supported for struct of list "
-                                f"columns. To resolve this, fully load the column '{base_col}' "
-                                f"instead of partially loading it and perform column selection afterwards."
-                            )
+    """Validate that columns specified for partial loading are valid struct types.
+
+    This function validates that when attempting to partially load nested columns
+    from a Parquet file (e.g., loading "nested.a" instead of the full "nested"
+    column), the base column being partially loaded is actually a struct type.
+    Partial loading of nested structures is only supported for struct of list
+    columns, not for list of struct columns.
+
+    Parameters
+    ----------
+    data : str, Path, UPath, or file-like object
+        Path to the parquet file or file-like object to inspect.
+    columns : list of str, optional
+        List of column names to validate. If None, no validation is performed.
+        Columns containing "." are checked to ensure their base column name
+        (the part before the ".") refers to a struct type in the schema.
+    filesystem : pyarrow.fs.FileSystem, optional
+        PyArrow filesystem object to use when reading the schema. If None,
+        the default filesystem for the given path is used.
+
+    Raises
+    ------
+    ValueError
+        If a column in the partial load format (e.g., "nested.a") is specified
+        but the base column ("nested") is not a struct type. This indicates
+        the data structure doesn't support partial loading as nested-pandas
+        requires struct of list columns, not list of struct columns.
+    """
+    if columns is None:
+        return
+    schema = pq.read_schema(data, filesystem=filesystem)
+    for col in columns:
+        # check if column is a partial load of a nested structure
+        if "." not in col:
+            continue
+        # check if column exists as a top-level column
+        if col in schema.names:
+            continue
+        # if not, inspect the base column name type
+        base_col = col.split(".")[0]
+        if base_col not in schema.names:
+            continue
+        # check if the base column is a list-struct
+        col_type = schema.field(base_col).type
+        if not pa.types.is_struct(col_type):
+            raise ValueError(
+                f"The provided column '{col}' signals to partially load a nested structure, "
+                f"but the nested structure '{base_col}' is not a struct. "
+                "Partial loading of nested structures is only supported for struct of list "
+                f"columns. To resolve this, fully load the column '{base_col}' "
+                f"instead of partially loading it and perform column selection afterwards."
+            )
 
 
 def _is_local_dir(upath: UPath, is_dir: bool | None) -> bool:

diff --git a/src/nested_pandas/series/accessor.py b/src/nested_pandas/series/accessor.py
@@ -150,9 +150,7 @@ def flat_length(self) -> int:
         return self._series.array.flat_length
 
     @property
-    @deprecated(
-        version="0.6.0", reason="`fields` will be removed in version 0.7.0, " "use `columns` instead."
-    )
+    @deprecated(version="0.6.0", reason="`fields` will be removed in version 0.7.0, use `columns` instead.")
     def fields(self) -> list[str]:
         """Names of the nested columns"""
         return self.columns
@@ -171,7 +169,7 @@ def flat_index(self) -> pd.Index:
         return flat_index
 
     @deprecated(
-        version="0.6.0", reason="`with_field` will be removed in version 0.7.0, " "use `set_column` instead."
+        version="0.6.0", reason="`with_field` will be removed in version 0.7.0, use `set_column` instead."
     )
     def with_field(self, field: str, value: ArrayLike) -> NestedSeries:
         """Set the field from flat-array of values and return a new series
@@ -241,7 +239,7 @@ def set_column(self, column: str, value: ArrayLike) -> NestedSeries:
 
     @deprecated(
         version="0.6.0",
-        reason="`with_flat_field` will be removed in version 0.7.0, " "use `set_flat_column` instead.",
+        reason="`with_flat_field` will be removed in version 0.7.0, use `set_flat_column` instead.",
     )
     def with_flat_field(self, field: str, value: ArrayLike) -> NestedSeries:
         """Set the field from flat-array of values and return a new series
@@ -311,7 +309,7 @@ def set_flat_column(self, column: str, value: ArrayLike) -> NestedSeries:
 
     @deprecated(
         version="0.6.0",
-        reason="`with_list_field` will be removed in version 0.7.0, " "use `set_list_column` instead.",
+        reason="`with_list_field` will be removed in version 0.7.0, use `set_list_column` instead.",
     )
     def with_list_field(self, field: str, value: ArrayLike) -> NestedSeries:
         """Set the field from list-array of values and return a new series
@@ -385,7 +383,7 @@ def set_list_column(self, column: str, value: ArrayLike) -> NestedSeries:
 
     @deprecated(
         version="0.6.0",
-        reason="`with_filled_field` will be removed in version 0.7.0, " "use `set_filled_column` instead.",
+        reason="`with_filled_field` will be removed in version 0.7.0, use `set_filled_column` instead.",
     )
     def with_filled_field(self, field: str, value: ArrayLike) -> NestedSeries:
         """Set the field by repeating values and return a new series
@@ -464,7 +462,7 @@ def set_filled_column(self, column: str, value: ArrayLike) -> NestedSeries:
         return NestedSeries(new_array, copy=False, index=self._series.index, name=self._series.name)
 
     @deprecated(
-        version="0.6.0", reason="`without_field` will be removed in version 0.7.0, " "use `drop` instead."
+        version="0.6.0", reason="`without_field` will be removed in version 0.7.0, use `drop` instead."
     )
     def without_field(self, field: str | list[str]) -> NestedSeries:
         """Remove the field(s) from the series and return a new series
@@ -533,9 +531,7 @@ def drop(self, column: str | list[str]) -> NestedSeries:
         new_array.pop_fields(column)
         return NestedSeries(new_array, copy=False, index=self._series.index, name=self._series.name)
 
-    @deprecated(
-        version="0.6.0", reason="`query_flat` will be removed in version 0.7.0, " "use `query` instead."
-    )
+    @deprecated(version="0.6.0", reason="`query_flat` will be removed in version 0.7.0, use `query` instead.")
     def query_flat(self, query: str) -> NestedSeries:
         """Query the flat arrays with a boolean expression
 
@@ -610,7 +606,7 @@ def query(self, query: str) -> NestedSeries:
 
     @deprecated(
         version="0.6.0",
-        reason="`get_flat_index` will be removed in version 0.7.0, " "use the `flat_index` property instead.",
+        reason="`get_flat_index` will be removed in version 0.7.0, use the `flat_index` property instead.",
     )
     def get_flat_index(self) -> pd.Index:
         """Index of the flat arrays
@@ -635,7 +631,7 @@ def get_flat_index(self) -> pd.Index:
 
     @deprecated(
         version="0.6.0",
-        reason="`get_flat_series` will be removed in version 0.7.0, " "use `to_flat()[column]` instead.",
+        reason="`get_flat_series` will be removed in version 0.7.0, use `to_flat()[column]` instead.",
     )
     def get_flat_series(self, field: str) -> pd.Series:
         """Get the flat-array field as a pd.Series
@@ -692,7 +688,7 @@ def get_flat_series(self, field: str) -> pd.Series:
 
     @deprecated(
         version="0.6.0",
-        reason="`get_list_series` will be removed in version 0.7.0, " "use `to_lists()[column]` instead.",
+        reason="`get_list_series` will be removed in version 0.7.0, use `to_lists()[column]` instead.",
     )
     def get_list_series(self, field: str) -> pd.Series:
         """Get the list-array field as a Series

diff --git a/src/nested_pandas/series/dtype.py b/src/nested_pandas/series/dtype.py
@@ -216,7 +216,7 @@ def _struct_list_pa_dtype(self) -> pa.StructType:
     @classmethod
     @deprecated(
         version="0.6.0",
-        reason="`from_fields` will be removed in version 0.7.0, " "use `from_columns` instead.",
+        reason="`from_fields` will be removed in version 0.7.0, use `from_columns` instead.",
     )
     def from_fields(cls, fields: Mapping[str, pa.DataType]) -> Self:  # type: ignore[name-defined] # noqa: F821
         """Make NestedDtype from a mapping of field names and list item types.
@@ -304,7 +304,7 @@ def _validate_dtype(pyarrow_dtype: pa.DataType) -> tuple[pa.StructType, pa.ListT
 
     @property
     @deprecated(
-        version="0.6.0", reason="`fields` will be removed in version 0.7.0, " "use `column_dtypes` instead."
+        version="0.6.0", reason="`fields` will be removed in version 0.7.0, use `column_dtypes` instead."
     )
     def fields(self) -> dict[str, pa.DataType]:
         """The mapping of field names and their item types."""
@@ -363,7 +363,7 @@ def to_pandas_arrow_dtype(self, list_struct: bool = False) -> ArrowDtype:
 
     @deprecated(
         version="0.6.0",
-        reason="`field_dtype` will be removed in version 0.7.0, " "use `_struct_list_pa_dtype` instead.",
+        reason="`field_dtype` will be removed in version 0.7.0, use `_struct_list_pa_dtype` instead.",
     )
     def field_dtype(self, field: str) -> pd.ArrowDtype | Self:  # type: ignore[name-defined] # noqa: F821
         """Pandas dtype of a field, pd.ArrowDType or NestedDtype.
@@ -404,7 +404,7 @@ def column_dtype(self, column: str) -> pd.ArrowDtype | Self:  # type: ignore[nam
     @property
     @deprecated(
         version="0.6.0",
-        reason="`field_dtypes` will be removed in version 0.7.0, " "use `_struct_list_pa_dtype` instead.",
+        reason="`field_dtypes` will be removed in version 0.7.0, use `_struct_list_pa_dtype` instead.",
     )
     def field_dtypes(self) -> dict[str, pd.ArrowDtype | Self]:  # type: ignore[name-defined] # noqa: F821
         """Pandas dtypes of this dtype's fields."""

diff --git a/src/nested_pandas/series/nestedseries.py b/src/nested_pandas/series/nestedseries.py
@@ -96,9 +96,7 @@ def __setitem__(self, key, value):
         return super().__setitem__(key, value)
 
     @nested_only
-    @deprecated(
-        version="0.6.0", reason="`to_flat` will be removed in version 0.7.0, " "use `explode` instead."
-    )
+    @deprecated(version="0.6.0", reason="`to_flat` will be removed in version 0.7.0, use `explode` instead.")
     def to_flat(self, fields: list[str] | None = None) -> pd.DataFrame:
         """Convert nested series into dataframe of flat arrays.