rstudio · isabelizimm · Jun 4, 2025 · Aug 13, 2024 · Aug 13, 2024 · Aug 13, 2024
diff --git a/pins/_adaptors.py b/pins/_adaptors.py
@@ -0,0 +1,172 @@
+from __future__ import annotations
+
+import json
+from abc import abstractmethod
+from typing import TYPE_CHECKING, Any, ClassVar, TypeAlias, overload
+
+from databackend import AbstractBackend
+
+if TYPE_CHECKING:
+    import pandas as pd
+
+    _PandasDataFrame: TypeAlias = pd.DataFrame
+    _DataFrame: TypeAlias = _PandasDataFrame
+
+
+class _AbstractPandasFrame(AbstractBackend):
+    _backends = [("pandas", "DataFrame")]
+
+
+_AbstractDF: TypeAlias = _AbstractPandasFrame
+
+
+class _Adaptor:
+    _d: ClassVar[Any]
+
+    def __init__(self, data: Any) -> None:
+        self._d = data
+
+    @overload
+    def write_json(self, file: str) -> None: ...
+    @overload
+    def write_json(self, file: None = ...) -> str: ...
+    def write_json(self, file: str | None = None) -> str | None:
+        if file is None:
+            msg = (
+                f"Writing to JSON string rather than file is not supported for "
+                f"{type(self._d)}"
+            )
+            raise NotImplementedError(msg)
+
+        import json
+
+        json.dump(self._d, open(file, mode="w"))
+
+    def write_joblib(self, file: str) -> None:
+        import joblib
+
+        joblib.dump(self._d, file)
+
+    def write_csv(self, file: str) -> None:
+        msg = f"Writing to CSV is not supported for {type(self._d)}"
+        raise NotImplementedError(msg)
+
+    def write_parquet(self, file: str) -> None:
+        msg = f"Writing to Parquet is not supported for {type(self._d)}"
+        raise NotImplementedError(msg)
+
+    def write_feather(self, file: str) -> None:
+        msg = f"Writing to Feather is not supported for {type(self._d)}"
+        raise NotImplementedError(msg)
+
+    @property
+    def data_preview(self) -> str:
+        # note that the R library uses jsonlite::toJSON
+        import json
+
+        # TODO(compat): set display none in index.html
+        return json.dumps({})
+
+    def default_title(self, name: str) -> str:
+        # TODO(compat): title says CSV rather than data.frame
+        # see https://github.com/machow/pins-python/issues/5
+        return f"{name}: a pinned {self._obj_name}"
+
+    @property
+    def _obj_name(self) -> str:
+        return f"{type(self._d).__qualname__} object"
+
+
+class _DFAdaptor(_Adaptor):
+    _d: ClassVar[_DataFrame]
+
+    def __init__(self, data: _DataFrame) -> None:
+        super().__init__(data)
+
+    @property
+    def df_type(self) -> str:
+        # Consider over-riding this for specialized dataframes
+        return "DataFrame"
+
+    @property
+    @abstractmethod
+    def columns(self) -> list[Any]: ...
+
+    @property
+    @abstractmethod
+    def shape(self) -> tuple[int, int]: ...
+
+    @abstractmethod
+    def head(self, n: int) -> _DFAdaptor: ...
+
+    @property
+    def data_preview(self) -> str:
+        # TODO(compat) is 100 hard-coded?
+        # Note that we go df -> json -> dict, to take advantage of type conversions in the dataframe library
+        data: list[dict[Any, Any]] = json.loads(self.head(100).write_json())
+        columns = [
+            {"name": [col], "label": [col], "align": ["left"], "type": [""]}
+            for col in self.columns
+        ]
+
+        # this reproduces R pins behavior, by omitting entries that would be null
+        data_no_nulls = [{k: v for k, v in row.items() if v is not None} for row in data]
+
+        return json.dumps({"data": data_no_nulls, "columns": columns})
+
+    @property
+    def _obj_name(self) -> str:
+        row, col = self.shape
+        return f"{row} x {col} {self.df_type}"
+
+
+class _PandasAdaptor(_DFAdaptor):
+    _d: ClassVar[_PandasDataFrame]
+
+    def __init__(self, data: _AbstractPandasFrame) -> None:
+        super().__init__(data)
+
+    @property
+    def columns(self) -> list[Any]:
+        return self._d.columns.tolist()
+
+    @property
+    def shape(self) -> tuple[int, int]:
+        return self._d.shape
+
+    def head(self, n: int) -> _PandasAdaptor:
+        return _PandasAdaptor(self._d.head(n))
+
+    @overload
+    def write_json(self, file: str) -> None: ...
+    @overload
+    def write_json(self, file: None) -> str: ...
+    def write_json(self, file: str | None = None) -> str | None:
+        if file is not None:
+            msg = (
+                f"Writing to file rather than JSON string is not supported for "
+                f"{type(self._d)}"
+            )
+            raise NotImplementedError(msg)
+
+        return self._d.to_json(orient="records")
+
+    def write_csv(self, file: str) -> None:
+        self._d.to_csv(file, index=False)
+
+    def write_parquet(self, file: str) -> None:
+        self._d.to_parquet(file)
+
+    def write_feather(self, file: str) -> None:
+        self._d.to_feather(file)
+
+
+@overload
+def _create_adaptor(obj: _DataFrame) -> _DFAdaptor: ...
+@overload
+def _create_adaptor(obj: Any) -> _Adaptor: ...
+def _create_adaptor(obj: Any | _DataFrame) -> _Adaptor | _DFAdaptor:
+    if isinstance(obj, _AbstractPandasFrame):
+        return _PandasAdaptor(obj)
+    else:
+        return _Adaptor(obj)
diff --git a/pins/boards.py b/pins/boards.py
@@ -15,6 +15,7 @@
 from importlib_resources import files
 from importlib_resources.abc import Traversable
 
+from ._adaptors import _Adaptor, _create_adaptor
 from .cache import PinsCache
 from .config import get_allow_rsc_short_name
 from .drivers import REQUIRES_SINGLE_FILE, default_title, load_data, load_file, save_data
@@ -25,6 +26,8 @@
 
 _log = logging.getLogger(__name__)
 
+_ = default_title  # Keep this import for backward compatibility
+
 
 class IFileSystem(Protocol):
     protocol: str | list
@@ -695,6 +698,8 @@ def prepare_pin_version(
         created: datetime | None = None,
         object_name: str | list[str] | None = None,
     ):
+        x = _create_adaptor(x)
+
         meta = self._create_meta(
             pin_dir_path,
             x,
@@ -716,7 +721,7 @@ def prepare_pin_version(
     def _create_meta(
         self,
         pin_dir_path,
-        x,
+        x: _Adaptor,
         name: str | None = None,
         type: str | None = None,
         title: str | None = None,
@@ -733,7 +738,7 @@ def _create_meta(
             raise NotImplementedError("Type argument is required.")
 
         if title is None:
-            title = default_title(x, name)
+            title = x.default_title(name)
 
         # create metadata from object on disk ---------------------------------
         # save all pin data to a temporary folder (including data.txt), so we
@@ -749,7 +754,7 @@ def _create_meta(
         else:
             p_obj = str(Path(pin_dir_path) / object_name)
         # file is saved locally in order to hash, calc size
-        file_names = save_data(x, p_obj, type, apply_suffix)
+        file_names = save_data(x._d, p_obj, type, apply_suffix)
 
         meta = self.meta_factory.create(
             pin_dir_path,
@@ -1199,14 +1204,16 @@ def user_name(self):
         return self.fs.api.get_user()["username"]
 
     def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs):
+        adaptor = _create_adaptor(x)
+
         # RSC pin names can have form <user_name>/<name>, but this will try to
         # create the object in a directory named <user_name>. So we grab just
         # the <name> part.
         short_name = name.split("/")[-1]
 
         # TODO(compat): py pins always uses the short name, R pins uses w/e the
         # user passed, but guessing people want the long name?
-        meta = super()._create_meta(pin_dir_path, x, short_name, *args, **kwargs)
+        meta = super()._create_meta(pin_dir_path, adaptor, short_name, *args, **kwargs)
         meta.name = name
 
         # copy in files needed by index.html ----------------------------------
@@ -1224,46 +1231,17 @@ def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs
         # render index.html ------------------------------------------------
 
         all_files = [meta.file] if isinstance(meta.file, str) else meta.file
-        pin_files = ", ".join(f"""<a href="{x}">{x}</a>""" for x in all_files)
+        pin_files = ", ".join(f"""<a href="{file}">{file}</a>""" for file in all_files)
 
         context = {
             "date": meta.version.created.replace(microsecond=0),
             "pin_name": self.path_to_pin(name),
             "pin_files": pin_files,
             "pin_metadata": meta,
             "board_deparse": board_deparse(self),
+            "data_preview": adaptor.data_preview,
         }
 
-        # data preview ----
-
-        # TODO: move out data_preview logic? Can we draw some limits here?
-        #       note that the R library uses jsonlite::toJSON
-
-        import json
-
-        import pandas as pd
-
-        if isinstance(x, pd.DataFrame):
-            # TODO(compat) is 100 hard-coded?
-            # Note that we go df -> json -> dict, to take advantage of pandas type conversions
-            data = json.loads(x.head(100).to_json(orient="records"))
-            columns = [
-                {"name": [col], "label": [col], "align": ["left"], "type": [""]}
-                for col in x
-            ]
-
-            # this reproduces R pins behavior, by omitting entries that would be null
-            data_no_nulls = [
-                {k: v for k, v in row.items() if v is not None} for row in data
-            ]
-
-            context["data_preview"] = json.dumps(
-                {"data": data_no_nulls, "columns": columns}
-            )
-        else:
-            # TODO(compat): set display none in index.html
-            context["data_preview"] = json.dumps({})
-
         # do not show r code if not round-trip friendly
         if meta.type in ["joblib"]:
             context["show_r_style"] = "display:none"

diff --git a/pins/drivers.py b/pins/drivers.py
@@ -1,5 +1,8 @@
 from collections.abc import Sequence
 from pathlib import Path
+from typing import Any
+
+from pins._adaptors import _create_adaptor
 
 from .config import PINS_ENV_INSECURE_READ, get_allow_pickle_read
 from .errors import PinsInsecureReadError
@@ -13,15 +16,6 @@
 REQUIRES_SINGLE_FILE = frozenset(["csv", "joblib"])
 
 
-def _assert_is_pandas_df(x, file_type: str) -> None:
-    import pandas as pd
-
-    if not isinstance(x, pd.DataFrame):
-        raise NotImplementedError(
-            f"Currently only pandas.DataFrame can be saved as type {file_type!r}."
-        )
-
-
 def load_path(filename: str, path_to_version, pin_type=None):
     # file path creation ------------------------------------------------------
     if pin_type == "table":
@@ -135,6 +129,8 @@ def save_data(
     #       as argument to board, and then type dispatchers for explicit cases
     #       of saving / loading objects different ways.
 
+    adaptor = _create_adaptor(obj)
+
     if apply_suffix:
         if pin_type == "file":
             suffix = "".join(Path(obj).suffixes)
@@ -149,39 +145,22 @@ def save_data(
         final_name = f"{fname}{suffix}"
 
     if pin_type == "csv":
-        _assert_is_pandas_df(obj, file_type=type)
-
-        obj.to_csv(final_name, index=False)
-
+        adaptor.write_csv(final_name)
     elif pin_type == "arrow":
         # NOTE: R pins accepts the type arrow, and saves it as feather.
         #       we allow reading this type, but raise an error for writing.
-        _assert_is_pandas_df(obj, file_type=type)
-
-        obj.to_feather(final_name)
-
+        adaptor.write_feather(final_name)
     elif pin_type == "feather":
-        _assert_is_pandas_df(obj, file_type=type)
-
-        raise NotImplementedError(
+        msg = (
             'Saving data as type "feather" no longer supported. Use type "arrow" instead.'
         )
-
+        raise NotImplementedError(msg)
     elif pin_type == "parquet":
-        _assert_is_pandas_df(obj, file_type=type)
-
-        obj.to_parquet(final_name)
-
+        adaptor.write_parquet(final_name)
     elif pin_type == "joblib":
-        import joblib
-
-        joblib.dump(obj, final_name)
-
+        adaptor.write_joblib(final_name)
     elif pin_type == "json":
-        import json
-
-        json.dump(obj, open(final_name, "w"))
-
+        adaptor.write_json(final_name)
     elif pin_type == "file":
         import contextlib
         import shutil
@@ -202,14 +181,6 @@ def save_data(
     return final_name
 
 
-def default_title(obj, name):
-    import pandas as pd
-
-    if isinstance(obj, pd.DataFrame):
-        # TODO(compat): title says CSV rather than data.frame
-        # see https://github.com/machow/pins-python/issues/5
-        shape_str = " x ".join(map(str, obj.shape))
-        return f"{name}: a pinned {shape_str} DataFrame"
-    else:
-        obj_name = type(obj).__qualname__
-        return f"{name}: a pinned {obj_name} object"
+def default_title(obj: Any, name: str) -> str:
+    # Kept for backward compatibility only.
+    return _create_adaptor(obj).default_title(name)