From 9dd8beb3c5328728165c6fabe1a9c5519473eb63 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Tue, 13 Aug 2024 20:31:19 +1200 Subject: [PATCH 01/20] Support adaptor in prepare_pin_version --- pins/adaptors.py | 74 +++++++++++++++++++++++++++++++++++++ pins/boards.py | 26 ++++++++----- pins/tests/test_adaptors.py | 46 +++++++++++++++++++++++ 3 files changed, 137 insertions(+), 9 deletions(-) create mode 100644 pins/adaptors.py create mode 100644 pins/tests/test_adaptors.py diff --git a/pins/adaptors.py b/pins/adaptors.py new file mode 100644 index 00000000..59927137 --- /dev/null +++ b/pins/adaptors.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +from abc import abstractmethod +from typing import TYPE_CHECKING, Any, ClassVar, Self, TypeAlias, overload + +from ._databackend import AbstractBackend + +if TYPE_CHECKING: + import pandas as pd + + _PandasDataFrame: TypeAlias = pd.DataFrame + _DataFrame: TypeAlias = pd.DataFrame + + +class _AbstractPandasFrame(AbstractBackend): + _backends = [("pandas", "DataFrame")] + + +_AbstractDF: TypeAlias = _AbstractPandasFrame + + +class _Adaptor: + _d: ClassVar[Any] + + def __init__(self, data: Any) -> None: + self._d = data + + +class _DFAdaptor(_Adaptor): + _d: ClassVar[_DataFrame] + + def __init__(self, data: _DataFrame) -> None: + super().__init__(data) + + @property + @abstractmethod + def columns(self) -> list[Any]: ... + + @abstractmethod + def head(self, n: int) -> Self: ... + + @abstractmethod + def write_json(self) -> str: + """Write the dataframe to a JSON string. + + In the format: list like [{column -> value}, ... , {column -> value}] + """ + + +class _PandasAdaptor(_DFAdaptor): + def __init__(self, data: _AbstractPandasFrame) -> None: + super().__init__(data) + + @property + def columns(self) -> list[Any]: + return self._d.columns + + def head(self, n: int) -> Self: + return _PandasAdaptor(self._d.head(n)) + + def write_json(self) -> str: + return self._d.to_json(orient="records") + + +@overload +def _create_df_adaptor(df: _DataFrame) -> _DFAdaptor: ... +@overload +def _create_df_adaptor(df: _PandasDataFrame) -> _PandasAdaptor: ... +def _create_df_adaptor(df): + if isinstance(df, _AbstractPandasFrame): + return _PandasAdaptor(df) + + msg = f"Could not determine dataframe adaptor for {df}" + raise NotImplementedError(msg) diff --git a/pins/boards.py b/pins/boards.py index 44e64edd..73ca50b8 100644 --- a/pins/boards.py +++ b/pins/boards.py @@ -9,10 +9,11 @@ from datetime import datetime, timedelta from io import IOBase from pathlib import Path -from typing import Mapping, Protocol, Sequence +from typing import Any, Mapping, Protocol, Sequence from importlib_resources import files +from .adaptors import _create_df_adaptor, _DFAdaptor from .cache import PinsCache from .config import get_allow_rsc_short_name from .drivers import default_title, load_data, load_file, save_data @@ -1121,7 +1122,14 @@ def path_to_deploy_version(self, name: str, version: str): def user_name(self): return self.fs.api.get_user()["username"] + # TODO(NAMC) what about the functions that call this one? def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs): + try: + x = _create_df_adaptor(x) + except NotImplementedError: + # Not a dataframe. + pass + # RSC pin names can have form /, but this will try to # create the object in a directory named . So we grab just # the part. @@ -1129,7 +1137,9 @@ def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs # TODO(compat): py pins always uses the short name, R pins uses w/e the # user passed, but guessing people want the long name? - meta = super()._create_meta(pin_dir_path, x, short_name, *args, **kwargs) + meta = super()._create_meta( + pin_dir_path, x, short_name, *args, **kwargs + ) # TODO(NAMC) ensure .create_meta can accept adaptor meta.name = name # copy in files needed by index.html ---------------------------------- @@ -1147,7 +1157,7 @@ def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs # render index.html ------------------------------------------------ all_files = [meta.file] if isinstance(meta.file, str) else meta.file - pin_files = ", ".join(f"""{x}""" for x in all_files) + pin_files = ", ".join(f"""{file}""" for file in all_files) context = { "date": meta.version.created.replace(microsecond=0), @@ -1164,15 +1174,13 @@ def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs import json - import pandas as pd - - if isinstance(x, pd.DataFrame): + if isinstance(x, _DFAdaptor): # TODO(compat) is 100 hard-coded? - # Note that we go df -> json -> dict, to take advantage of pandas type conversions - data = json.loads(x.head(100).to_json(orient="records")) + # Note that we go df -> json -> dict, to take advantage of type conversions in the dataframe library + data: list[dict[Any, Any]] = json.loads(x.head(100).write_json()) columns = [ {"name": [col], "label": [col], "align": ["left"], "type": [""]} - for col in x + for col in x.columns ] # this reproduces R pins behavior, by omitting entries that would be null diff --git a/pins/tests/test_adaptors.py b/pins/tests/test_adaptors.py new file mode 100644 index 00000000..700cb436 --- /dev/null +++ b/pins/tests/test_adaptors.py @@ -0,0 +1,46 @@ +import pandas as pd +import pytest +from pandas.testing import assert_frame_equal, assert_index_equal + +from pins.adaptors import _AbstractPandasFrame, _create_df_adaptor, _PandasAdaptor + + +class TestCreateDFAdaptor: + def test_pandas(self): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + adaptor = _create_df_adaptor(df) + assert isinstance(adaptor, _PandasAdaptor) + + def test_non_df(self): + with pytest.raises(NotImplementedError): + _create_df_adaptor(42) + + +class TestPandasAdaptor: + def test_columns(self): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + adaptor = _create_df_adaptor(df) + assert_index_equal(adaptor.columns, pd.Index(["a", "b"])) + + def test_head(self): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + adaptor = _create_df_adaptor(df) + head1_df = pd.DataFrame({"a": [1], "b": [4]}) + expected = _create_df_adaptor(head1_df) + assert isinstance(adaptor.head(1), _PandasAdaptor) + assert_frame_equal(adaptor.head(1)._d, expected._d) + + def test_write_json(self): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + adaptor = _create_df_adaptor(df) + assert adaptor.write_json() == """[{"a":1,"b":4},{"a":2,"b":5},{"a":3,"b":6}]""" + + +class TestAbstractBackends: + class TestAbstractPandasFrame: + def test_isinstance(self): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + assert isinstance(df, _AbstractPandasFrame) + + def test_not_isinstance(self): + assert not isinstance(42, _AbstractPandasFrame) From 040da5ee8daa471285bdff163416d543908910b2 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Tue, 13 Aug 2024 21:54:30 +1200 Subject: [PATCH 02/20] Use adaptor in save_data --- pins/adaptors.py | 101 +++++++++++++++++++++++++++----- pins/boards.py | 41 ++----------- pins/drivers.py | 45 ++++---------- pins/tests/test_adaptors.py | 113 ++++++++++++++++++++++++++++++++---- 4 files changed, 205 insertions(+), 95 deletions(-) diff --git a/pins/adaptors.py b/pins/adaptors.py index 59927137..50a20129 100644 --- a/pins/adaptors.py +++ b/pins/adaptors.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json from abc import abstractmethod from typing import TYPE_CHECKING, Any, ClassVar, Self, TypeAlias, overload @@ -25,6 +26,47 @@ class _Adaptor: def __init__(self, data: Any) -> None: self._d = data + @overload + def write_json(self, file: str) -> None: ... + @overload + def write_json(self, file: None) -> str: ... + def write_json(self, file=None): + if file is None: + msg = ( + f"Writing to JSON string rather than file is not supported for " + f"{type(self._d)}" + ) + raise NotImplementedError(msg) + + import json + + json.dump(self._d, open(file, mode="w")) + + def write_joblib(self, file: str) -> None: + import joblib + + joblib.dump(self._d, file) + + def write_csv(self, file: str) -> None: + msg = f"Writing to CSV is not supported for {type(self._d)}" + raise NotImplementedError(msg) + + def write_parquet(self, file: str) -> None: + msg = f"Writing to Parquet is not supported for {type(self._d)}" + raise NotImplementedError(msg) + + def write_feather(self, file: str) -> None: + msg = f"Writing to Feather is not supported for {type(self._d)}" + raise NotImplementedError(msg) + + @property + def data_preview(self) -> str: + # note that the R library uses jsonlite::toJSON + import json + + # TODO(compat): set display none in index.html + return json.dumps({}) + class _DFAdaptor(_Adaptor): _d: ClassVar[_DataFrame] @@ -39,12 +81,20 @@ def columns(self) -> list[Any]: ... @abstractmethod def head(self, n: int) -> Self: ... - @abstractmethod - def write_json(self) -> str: - """Write the dataframe to a JSON string. + @property + def data_preview(self) -> str: + # TODO(compat) is 100 hard-coded? + # Note that we go df -> json -> dict, to take advantage of type conversions in the dataframe library + data: list[dict[Any, Any]] = json.loads(self.head(100).write_json()) + columns = [ + {"name": [col], "label": [col], "align": ["left"], "type": [""]} + for col in self.columns + ] - In the format: list like [{column -> value}, ... , {column -> value}] - """ + # this reproduces R pins behavior, by omitting entries that would be null + data_no_nulls = [{k: v for k, v in row.items() if v is not None} for row in data] + + return json.dumps({"data": data_no_nulls, "columns": columns}) class _PandasAdaptor(_DFAdaptor): @@ -53,22 +103,43 @@ def __init__(self, data: _AbstractPandasFrame) -> None: @property def columns(self) -> list[Any]: - return self._d.columns + return self._d.columns.tolist() def head(self, n: int) -> Self: return _PandasAdaptor(self._d.head(n)) - def write_json(self) -> str: + @overload + def write_json(self, file: str) -> None: ... + @overload + def write_json(self, file: None) -> str: ... + def write_json(self, file=None): + if file is not None: + msg = ( + f"Writing to file rather than JSON string is not supported for " + f"{type(self._d)}" + ) + raise NotImplementedError(msg) + return self._d.to_json(orient="records") + def write_csv(self, file: str) -> None: + self._d.to_csv(file, index=False) + + def write_parquet(self, file: str) -> None: + self._d.to_parquet(file) + + def write_feather(self, file: str) -> None: + self._d.to_feather(file) + @overload -def _create_df_adaptor(df: _DataFrame) -> _DFAdaptor: ... +def _create_adaptor(obj: Any) -> _Adaptor: ... @overload -def _create_df_adaptor(df: _PandasDataFrame) -> _PandasAdaptor: ... -def _create_df_adaptor(df): - if isinstance(df, _AbstractPandasFrame): - return _PandasAdaptor(df) - - msg = f"Could not determine dataframe adaptor for {df}" - raise NotImplementedError(msg) +def _create_adaptor(obj: _DataFrame) -> _DFAdaptor: ... +@overload +def _create_adaptor(obj: _PandasDataFrame) -> _PandasAdaptor: ... +def _create_adaptor(obj): + if isinstance(obj, _AbstractPandasFrame): + return _PandasAdaptor(obj) + else: + return _Adaptor(obj) diff --git a/pins/boards.py b/pins/boards.py index 73ca50b8..6c17db14 100644 --- a/pins/boards.py +++ b/pins/boards.py @@ -9,11 +9,11 @@ from datetime import datetime, timedelta from io import IOBase from pathlib import Path -from typing import Any, Mapping, Protocol, Sequence +from typing import Mapping, Protocol, Sequence from importlib_resources import files -from .adaptors import _create_df_adaptor, _DFAdaptor +from .adaptors import _create_adaptor from .cache import PinsCache from .config import get_allow_rsc_short_name from .drivers import default_title, load_data, load_file, save_data @@ -1124,11 +1124,7 @@ def user_name(self): # TODO(NAMC) what about the functions that call this one? def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs): - try: - x = _create_df_adaptor(x) - except NotImplementedError: - # Not a dataframe. - pass + adaptor = _create_adaptor(x) # RSC pin names can have form /, but this will try to # create the object in a directory named . So we grab just @@ -1138,7 +1134,7 @@ def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs # TODO(compat): py pins always uses the short name, R pins uses w/e the # user passed, but guessing people want the long name? meta = super()._create_meta( - pin_dir_path, x, short_name, *args, **kwargs + pin_dir_path, adaptor, short_name, *args, **kwargs ) # TODO(NAMC) ensure .create_meta can accept adaptor meta.name = name @@ -1165,36 +1161,9 @@ def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs "pin_files": pin_files, "pin_metadata": meta, "board_deparse": board_deparse(self), + "data_preview": adaptor.data_preview, } - # data preview ---- - - # TODO: move out data_preview logic? Can we draw some limits here? - # note that the R library uses jsonlite::toJSON - - import json - - if isinstance(x, _DFAdaptor): - # TODO(compat) is 100 hard-coded? - # Note that we go df -> json -> dict, to take advantage of type conversions in the dataframe library - data: list[dict[Any, Any]] = json.loads(x.head(100).write_json()) - columns = [ - {"name": [col], "label": [col], "align": ["left"], "type": [""]} - for col in x.columns - ] - - # this reproduces R pins behavior, by omitting entries that would be null - data_no_nulls = [ - {k: v for k, v in row.items() if v is not None} for row in data - ] - - context["data_preview"] = json.dumps( - {"data": data_no_nulls, "columns": columns} - ) - else: - # TODO(compat): set display none in index.html - context["data_preview"] = json.dumps({}) - # do not show r code if not round-trip friendly if meta.type in ["joblib"]: context["show_r_style"] = "display:none" diff --git a/pins/drivers.py b/pins/drivers.py index 5aa3e186..63e80d75 100644 --- a/pins/drivers.py +++ b/pins/drivers.py @@ -1,6 +1,8 @@ from pathlib import Path from typing import Sequence +from pins.adaptors import _create_adaptor + from .config import PINS_ENV_INSECURE_READ, get_allow_pickle_read from .errors import PinsInsecureReadError from .meta import Meta @@ -13,15 +15,6 @@ REQUIRES_SINGLE_FILE = frozenset(["csv", "joblib", "file"]) -def _assert_is_pandas_df(x, file_type: str) -> None: - import pandas as pd - - if not isinstance(x, pd.DataFrame): - raise NotImplementedError( - f"Currently only pandas.DataFrame can be saved as type {file_type!r}." - ) - - def load_path(meta, path_to_version): # Check that only a single file name was given fnames = [meta.file] if isinstance(meta.file, str) else meta.file @@ -141,6 +134,8 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen # as argument to board, and then type dispatchers for explicit cases # of saving / loading objects different ways. + adaptor = _create_adaptor(obj) + if apply_suffix: if type == "file": suffix = "".join(Path(obj).suffixes) @@ -152,39 +147,22 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen final_name = f"{fname}{suffix}" if type == "csv": - _assert_is_pandas_df(obj, file_type=type) - - obj.to_csv(final_name, index=False) - + adaptor.write_csv(final_name) elif type == "arrow": # NOTE: R pins accepts the type arrow, and saves it as feather. # we allow reading this type, but raise an error for writing. - _assert_is_pandas_df(obj, file_type=type) - - obj.to_feather(final_name) - + adaptor.write_feather(final_name) elif type == "feather": - _assert_is_pandas_df(obj, file_type=type) - - raise NotImplementedError( + msg = ( 'Saving data as type "feather" no longer supported. Use type "arrow" instead.' ) - + raise NotImplementedError(msg) elif type == "parquet": - _assert_is_pandas_df(obj, file_type=type) - - obj.to_parquet(final_name) - + adaptor.write_parquet(final_name) elif type == "joblib": - import joblib - - joblib.dump(obj, final_name) - + adaptor.write_joblib(final_name) elif type == "json": - import json - - json.dump(obj, open(final_name, "w")) - + adaptor.write_json(final_name) elif type == "file": import contextlib import shutil @@ -192,7 +170,6 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen # ignore the case where the source is the same as the target with contextlib.suppress(shutil.SameFileError): shutil.copyfile(str(obj), final_name) - else: raise NotImplementedError(f"Cannot save type: {type}") diff --git a/pins/tests/test_adaptors.py b/pins/tests/test_adaptors.py index 700cb436..154d6854 100644 --- a/pins/tests/test_adaptors.py +++ b/pins/tests/test_adaptors.py @@ -1,40 +1,133 @@ +from pathlib import Path + +import joblib import pandas as pd import pytest -from pandas.testing import assert_frame_equal, assert_index_equal +from pandas.testing import assert_frame_equal -from pins.adaptors import _AbstractPandasFrame, _create_df_adaptor, _PandasAdaptor +from pins.adaptors import ( + _AbstractPandasFrame, + _Adaptor, + _create_adaptor, + _DFAdaptor, + _PandasAdaptor, +) -class TestCreateDFAdaptor: +class TestCreateAdaptor: def test_pandas(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _create_df_adaptor(df) + adaptor = _create_adaptor(df) + assert isinstance(adaptor, _Adaptor) assert isinstance(adaptor, _PandasAdaptor) def test_non_df(self): + adaptor = _create_adaptor(42) + assert isinstance(adaptor, _Adaptor) + assert not isinstance(adaptor, _PandasAdaptor) + assert not isinstance(adaptor, _DFAdaptor) + + +class TestAdaptor: + def test_write_json(self, tmp_path: Path): + data = {"a": 1, "b": 2} + adaptor = _Adaptor(data) + file = tmp_path / "file.json" + adaptor.write_json(file) + assert file.read_text() == '{"a": 1, "b": 2}' + + def test_write_joblib(self, tmp_path: Path): + data = {"a": 1, "b": 2} + adaptor = _Adaptor(data) + file = tmp_path / "file.joblib" + adaptor.write_joblib(file) + + # Dump independently and check contents + expected_file = tmp_path / "expected.joblib" + joblib.dump(data, expected_file) + assert expected_file.read_bytes() == file.read_bytes() + + def test_write_csv(self): with pytest.raises(NotImplementedError): - _create_df_adaptor(42) + adaptor = _Adaptor(42) + adaptor.write_csv("file.csv") + + def test_write_parquet(self): + with pytest.raises(NotImplementedError): + adaptor = _Adaptor(42) + adaptor.write_parquet("file.parquet") + + def test_write_feather(self): + with pytest.raises(NotImplementedError): + adaptor = _Adaptor(42) + adaptor.write_feather("file.feather") + + class TestDataPreview: + def test_int(self): + adaptor = _Adaptor(42) + assert adaptor.data_preview == "{}" + + def test_dict(self): + data = {"a": 1, "b": 2} + adaptor = _Adaptor(data) + assert adaptor.data_preview == "{}" class TestPandasAdaptor: def test_columns(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _create_df_adaptor(df) - assert_index_equal(adaptor.columns, pd.Index(["a", "b"])) + adaptor = _PandasAdaptor(df) + assert isinstance(adaptor, _DFAdaptor) + assert isinstance(adaptor, _PandasAdaptor) + assert adaptor.columns == ["a", "b"] def test_head(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _create_df_adaptor(df) + adaptor = _PandasAdaptor(df) head1_df = pd.DataFrame({"a": [1], "b": [4]}) - expected = _create_df_adaptor(head1_df) + expected = _create_adaptor(head1_df) + assert isinstance(adaptor, _DFAdaptor) + assert isinstance(adaptor.head(1), _DFAdaptor) assert isinstance(adaptor.head(1), _PandasAdaptor) assert_frame_equal(adaptor.head(1)._d, expected._d) def test_write_json(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _create_df_adaptor(df) + adaptor = _PandasAdaptor(df) + assert isinstance(adaptor, _DFAdaptor) assert adaptor.write_json() == """[{"a":1,"b":4},{"a":2,"b":5},{"a":3,"b":6}]""" + def test_write_csv(self, tmp_path: Path): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + adaptor = _PandasAdaptor(df) + file = tmp_path / "file.csv" + adaptor.write_csv(file) + assert file.read_text() == "a,b\n1,4\n2,5\n3,6\n" + + def test_write_parquet(self, tmp_path: Path): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + adaptor = _PandasAdaptor(df) + file = tmp_path / "file.parquet" + adaptor.write_parquet(file) + assert_frame_equal(pd.read_parquet(file), df) + + def test_write_feather(self, tmp_path: Path): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + adaptor = _PandasAdaptor(df) + file = tmp_path / "file.feather" + adaptor.write_feather(file) + assert_frame_equal(pd.read_feather(file), df) + + def test_data_preview(self): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + adaptor = _PandasAdaptor(df) + expected = ( + '{"data": [{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}], ' + '"columns": [{"name": ["a"], "label": ["a"], "align": ["left"], "type": [""]}, ' + '{"name": ["b"], "label": ["b"], "align": ["left"], "type": [""]}]}' + ) + assert adaptor.data_preview == expected + class TestAbstractBackends: class TestAbstractPandasFrame: From 4ba393d1624d0d39486fb7abb740f6a41f2399db Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Tue, 13 Aug 2024 22:10:56 +1200 Subject: [PATCH 03/20] Use adaptor for default_title --- pins/adaptors.py | 21 +++++++++++++++++++++ pins/drivers.py | 16 ++++------------ pins/tests/test_adaptors.py | 16 ++++++++++++++++ 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/pins/adaptors.py b/pins/adaptors.py index 50a20129..4abffb05 100644 --- a/pins/adaptors.py +++ b/pins/adaptors.py @@ -67,6 +67,13 @@ def data_preview(self) -> str: # TODO(compat): set display none in index.html return json.dumps({}) + def default_title(self, name: str) -> str: + return f"{name}: a pinned {self._obj_name} object" + + @property + def _obj_name(self) -> str: + return type(self._d).__qualname__ + class _DFAdaptor(_Adaptor): _d: ClassVar[_DataFrame] @@ -78,6 +85,10 @@ def __init__(self, data: _DataFrame) -> None: @abstractmethod def columns(self) -> list[Any]: ... + @property + @abstractmethod + def shape(self) -> tuple[int, int]: ... + @abstractmethod def head(self, n: int) -> Self: ... @@ -96,6 +107,12 @@ def data_preview(self) -> str: return json.dumps({"data": data_no_nulls, "columns": columns}) + def default_title(self, name: str) -> str: + # TODO(compat): title says CSV rather than data.frame + # see https://github.com/machow/pins-python/issues/5 + shape_str = " x ".join(map(str, self.shape)) + return f"{name}: a pinned {shape_str} DataFrame" + class _PandasAdaptor(_DFAdaptor): def __init__(self, data: _AbstractPandasFrame) -> None: @@ -105,6 +122,10 @@ def __init__(self, data: _AbstractPandasFrame) -> None: def columns(self) -> list[Any]: return self._d.columns.tolist() + @property + def shape(self) -> tuple[int, int]: + return self._d.shape + def head(self, n: int) -> Self: return _PandasAdaptor(self._d.head(n)) diff --git a/pins/drivers.py b/pins/drivers.py index 63e80d75..2dc301c7 100644 --- a/pins/drivers.py +++ b/pins/drivers.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Sequence +from typing import Any, Sequence from pins.adaptors import _create_adaptor @@ -176,14 +176,6 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen return final_name -def default_title(obj, name): - import pandas as pd - - if isinstance(obj, pd.DataFrame): - # TODO(compat): title says CSV rather than data.frame - # see https://github.com/machow/pins-python/issues/5 - shape_str = " x ".join(map(str, obj.shape)) - return f"{name}: a pinned {shape_str} DataFrame" - else: - obj_name = type(obj).__qualname__ - return f"{name}: a pinned {obj_name} object" +def default_title(obj: Any, name: str) -> str: + # Kept for backward compatibility only. + return _create_adaptor(obj).default_title(name) diff --git a/pins/tests/test_adaptors.py b/pins/tests/test_adaptors.py index 154d6854..5b3f6afc 100644 --- a/pins/tests/test_adaptors.py +++ b/pins/tests/test_adaptors.py @@ -72,6 +72,10 @@ def test_dict(self): adaptor = _Adaptor(data) assert adaptor.data_preview == "{}" + def test_default_title(self): + adaptor = _Adaptor(42) + assert adaptor.default_title("my_data") == "my_data: a pinned int object" + class TestPandasAdaptor: def test_columns(self): @@ -81,6 +85,13 @@ def test_columns(self): assert isinstance(adaptor, _PandasAdaptor) assert adaptor.columns == ["a", "b"] + def test_shape(self): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + adaptor = _PandasAdaptor(df) + assert isinstance(adaptor, _DFAdaptor) + assert isinstance(adaptor, _PandasAdaptor) + assert adaptor.shape == (3, 2) + def test_head(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) adaptor = _PandasAdaptor(df) @@ -128,6 +139,11 @@ def test_data_preview(self): ) assert adaptor.data_preview == expected + def test_default_title(self): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + adaptor = _PandasAdaptor(df) + assert adaptor.default_title("my_df") == "my_df: a pinned 3 x 2 DataFrame" + class TestAbstractBackends: class TestAbstractPandasFrame: From 7898ce7f910a4eb61f221f22e9802a8f644d167a Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Tue, 13 Aug 2024 22:21:24 +1200 Subject: [PATCH 04/20] underscore prefix for _adaptors.py; abstracting df_type in default_title --- pins/{adaptors.py => _adaptors.py} | 21 +++++++++++++++------ pins/boards.py | 2 +- pins/drivers.py | 2 +- pins/tests/test_adaptors.py | 7 ++++++- 4 files changed, 23 insertions(+), 9 deletions(-) rename pins/{adaptors.py => _adaptors.py} (91%) diff --git a/pins/adaptors.py b/pins/_adaptors.py similarity index 91% rename from pins/adaptors.py rename to pins/_adaptors.py index 4abffb05..7cfa8bad 100644 --- a/pins/adaptors.py +++ b/pins/_adaptors.py @@ -68,11 +68,13 @@ def data_preview(self) -> str: return json.dumps({}) def default_title(self, name: str) -> str: - return f"{name}: a pinned {self._obj_name} object" + # TODO(compat): title says CSV rather than data.frame + # see https://github.com/machow/pins-python/issues/5 + return f"{name}: a pinned {self._obj_name}" @property def _obj_name(self) -> str: - return type(self._d).__qualname__ + return f"{type(self._d).__qualname__} object" class _DFAdaptor(_Adaptor): @@ -81,6 +83,11 @@ class _DFAdaptor(_Adaptor): def __init__(self, data: _DataFrame) -> None: super().__init__(data) + @property + def df_type(self) -> str: + # Consider over-riding this for specialized dataframes + return "DataFrame" + @property @abstractmethod def columns(self) -> list[Any]: ... @@ -107,11 +114,13 @@ def data_preview(self) -> str: return json.dumps({"data": data_no_nulls, "columns": columns}) + @property + def _obj_name(self) -> str: + return f"{type(self._d).__qualname__} object" + def default_title(self, name: str) -> str: - # TODO(compat): title says CSV rather than data.frame - # see https://github.com/machow/pins-python/issues/5 - shape_str = " x ".join(map(str, self.shape)) - return f"{name}: a pinned {shape_str} DataFrame" + row, col = self.shape + return f"{name}: a pinned {row} x {col} {self.df_type}" class _PandasAdaptor(_DFAdaptor): diff --git a/pins/boards.py b/pins/boards.py index 6c17db14..a20c9a53 100644 --- a/pins/boards.py +++ b/pins/boards.py @@ -13,7 +13,7 @@ from importlib_resources import files -from .adaptors import _create_adaptor +from ._adaptors import _create_adaptor from .cache import PinsCache from .config import get_allow_rsc_short_name from .drivers import default_title, load_data, load_file, save_data diff --git a/pins/drivers.py b/pins/drivers.py index 2dc301c7..49fb4388 100644 --- a/pins/drivers.py +++ b/pins/drivers.py @@ -1,7 +1,7 @@ from pathlib import Path from typing import Any, Sequence -from pins.adaptors import _create_adaptor +from pins._adaptors import _create_adaptor from .config import PINS_ENV_INSECURE_READ, get_allow_pickle_read from .errors import PinsInsecureReadError diff --git a/pins/tests/test_adaptors.py b/pins/tests/test_adaptors.py index 5b3f6afc..925ade42 100644 --- a/pins/tests/test_adaptors.py +++ b/pins/tests/test_adaptors.py @@ -5,7 +5,7 @@ import pytest from pandas.testing import assert_frame_equal -from pins.adaptors import ( +from pins._adaptors import ( _AbstractPandasFrame, _Adaptor, _create_adaptor, @@ -78,6 +78,11 @@ def test_default_title(self): class TestPandasAdaptor: + def test_df_type(self): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + adaptor = _PandasAdaptor(df) + assert adaptor.df_type == "DataFrame" + def test_columns(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) adaptor = _PandasAdaptor(df) From 4a3ea017e37aa60b752d93615f945ae0dc510529 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Tue, 13 Aug 2024 22:28:54 +1200 Subject: [PATCH 05/20] Removing duplication in _obj_name definition --- pins/_adaptors.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pins/_adaptors.py b/pins/_adaptors.py index 7cfa8bad..e14f9aab 100644 --- a/pins/_adaptors.py +++ b/pins/_adaptors.py @@ -116,11 +116,8 @@ def data_preview(self) -> str: @property def _obj_name(self) -> str: - return f"{type(self._d).__qualname__} object" - - def default_title(self, name: str) -> str: row, col = self.shape - return f"{name}: a pinned {row} x {col} {self.df_type}" + return f"{row} x {col} {self.df_type}" class _PandasAdaptor(_DFAdaptor): From 007ad3ab065dd0678581fe3d764d4c49215943a8 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Tue, 13 Aug 2024 22:46:16 +1200 Subject: [PATCH 06/20] Use adaptor in _create_meta --- pins/boards.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pins/boards.py b/pins/boards.py index a20c9a53..503e8fbd 100644 --- a/pins/boards.py +++ b/pins/boards.py @@ -13,7 +13,7 @@ from importlib_resources import files -from ._adaptors import _create_adaptor +from ._adaptors import _Adaptor, _create_adaptor from .cache import PinsCache from .config import get_allow_rsc_short_name from .drivers import default_title, load_data, load_file, save_data @@ -24,6 +24,8 @@ _log = logging.getLogger(__name__) +_ = default_title # Keep this import for backward compatibility + class IFileSystem(Protocol): protocol: str | list @@ -623,6 +625,8 @@ def prepare_pin_version( created: datetime | None = None, object_name: str | None = None, ): + x = _create_adaptor(x) + meta = self._create_meta( pin_dir_path, x, @@ -644,7 +648,7 @@ def prepare_pin_version( def _create_meta( self, pin_dir_path, - x, + x: _Adaptor, name: str | None = None, type: str | None = None, title: str | None = None, @@ -661,7 +665,7 @@ def _create_meta( raise NotImplementedError("Type argument is required.") if title is None: - title = default_title(x, name) + title = x.default_title(name) # create metadata from object on disk --------------------------------- # save all pin data to a temporary folder (including data.txt), so we @@ -673,7 +677,7 @@ def _create_meta( p_obj = Path(pin_dir_path) / object_name # file is saved locally in order to hash, calc size - file_names = save_data(x, str(p_obj), type) + file_names = save_data(x._d, str(p_obj), type) meta = self.meta_factory.create( pin_dir_path, @@ -1122,7 +1126,6 @@ def path_to_deploy_version(self, name: str, version: str): def user_name(self): return self.fs.api.get_user()["username"] - # TODO(NAMC) what about the functions that call this one? def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs): adaptor = _create_adaptor(x) @@ -1133,9 +1136,7 @@ def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs # TODO(compat): py pins always uses the short name, R pins uses w/e the # user passed, but guessing people want the long name? - meta = super()._create_meta( - pin_dir_path, adaptor, short_name, *args, **kwargs - ) # TODO(NAMC) ensure .create_meta can accept adaptor + meta = super()._create_meta(pin_dir_path, adaptor, short_name, *args, **kwargs) meta.name = name # copy in files needed by index.html ---------------------------------- From d577b0294ba85f4c78f943a252af5f9c542859dc Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Tue, 13 Aug 2024 22:54:28 +1200 Subject: [PATCH 07/20] Pass pyright --- pins/_adaptors.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pins/_adaptors.py b/pins/_adaptors.py index e14f9aab..2fe22356 100644 --- a/pins/_adaptors.py +++ b/pins/_adaptors.py @@ -29,8 +29,8 @@ def __init__(self, data: Any) -> None: @overload def write_json(self, file: str) -> None: ... @overload - def write_json(self, file: None) -> str: ... - def write_json(self, file=None): + def write_json(self, file: None = ...) -> str: ... + def write_json(self, file: str | None = None) -> str | None: if file is None: msg = ( f"Writing to JSON string rather than file is not supported for " @@ -139,7 +139,7 @@ def head(self, n: int) -> Self: def write_json(self, file: str) -> None: ... @overload def write_json(self, file: None) -> str: ... - def write_json(self, file=None): + def write_json(self, file: str | None = None) -> str | None: if file is not None: msg = ( f"Writing to file rather than JSON string is not supported for " @@ -159,13 +159,11 @@ def write_feather(self, file: str) -> None: self._d.to_feather(file) -@overload -def _create_adaptor(obj: Any) -> _Adaptor: ... @overload def _create_adaptor(obj: _DataFrame) -> _DFAdaptor: ... @overload -def _create_adaptor(obj: _PandasDataFrame) -> _PandasAdaptor: ... -def _create_adaptor(obj): +def _create_adaptor(obj: Any) -> _Adaptor: ... +def _create_adaptor(obj: Any | _DataFrame) -> _Adaptor | _DFAdaptor: if isinstance(obj, _AbstractPandasFrame): return _PandasAdaptor(obj) else: From 3aaabbb893d3593ace072569a17cad795d264cf4 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Tue, 13 Aug 2024 23:01:05 +1200 Subject: [PATCH 08/20] Fix broken import --- pins/_adaptors.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pins/_adaptors.py b/pins/_adaptors.py index 2fe22356..185e5f66 100644 --- a/pins/_adaptors.py +++ b/pins/_adaptors.py @@ -2,7 +2,9 @@ import json from abc import abstractmethod -from typing import TYPE_CHECKING, Any, ClassVar, Self, TypeAlias, overload +from typing import TYPE_CHECKING, Any, ClassVar, TypeAlias, overload + +from typing_extensions import Self from ._databackend import AbstractBackend From 56c3285eabc402a4c7398444173b2d3233415794 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 21 Aug 2024 10:16:09 +1200 Subject: [PATCH 09/20] Refactoring type hints to avoid use of Self Various other type improvements --- pins/_adaptors.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pins/_adaptors.py b/pins/_adaptors.py index 185e5f66..f7f6d768 100644 --- a/pins/_adaptors.py +++ b/pins/_adaptors.py @@ -2,9 +2,7 @@ import json from abc import abstractmethod -from typing import TYPE_CHECKING, Any, ClassVar, TypeAlias, overload - -from typing_extensions import Self +from typing import TYPE_CHECKING, Any, ClassVar, TypeAlias, Union, overload from ._databackend import AbstractBackend @@ -12,7 +10,7 @@ import pandas as pd _PandasDataFrame: TypeAlias = pd.DataFrame - _DataFrame: TypeAlias = pd.DataFrame + _DataFrame: TypeAlias = Union[_PandasDataFrame,] class _AbstractPandasFrame(AbstractBackend): @@ -99,7 +97,7 @@ def columns(self) -> list[Any]: ... def shape(self) -> tuple[int, int]: ... @abstractmethod - def head(self, n: int) -> Self: ... + def head(self, n: int) -> _DFAdaptor: ... @property def data_preview(self) -> str: @@ -123,6 +121,8 @@ def _obj_name(self) -> str: class _PandasAdaptor(_DFAdaptor): + _d: ClassVar[_PandasDataFrame] + def __init__(self, data: _AbstractPandasFrame) -> None: super().__init__(data) @@ -134,7 +134,7 @@ def columns(self) -> list[Any]: def shape(self) -> tuple[int, int]: return self._d.shape - def head(self, n: int) -> Self: + def head(self, n: int) -> _PandasAdaptor: return _PandasAdaptor(self._d.head(n)) @overload From 0171d727528e6457280b8751fd8eb86b8d70d4da Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 21 Aug 2024 10:25:13 +1200 Subject: [PATCH 10/20] Remove singleton Union --- pins/_adaptors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pins/_adaptors.py b/pins/_adaptors.py index f7f6d768..728bc93b 100644 --- a/pins/_adaptors.py +++ b/pins/_adaptors.py @@ -2,7 +2,7 @@ import json from abc import abstractmethod -from typing import TYPE_CHECKING, Any, ClassVar, TypeAlias, Union, overload +from typing import TYPE_CHECKING, Any, ClassVar, TypeAlias, overload from ._databackend import AbstractBackend @@ -10,7 +10,7 @@ import pandas as pd _PandasDataFrame: TypeAlias = pd.DataFrame - _DataFrame: TypeAlias = Union[_PandasDataFrame,] + _DataFrame: TypeAlias = _PandasDataFrame class _AbstractPandasFrame(AbstractBackend): From fe6092f3b4f031be9b1e9bc51f03d2a53bfa4258 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 28 Aug 2024 10:38:37 +1200 Subject: [PATCH 11/20] Add databackend as a dependency --- pins/_adaptors.py | 2 +- pyproject.toml | 1 + requirements/dev.txt | 69 ++++++++++++++++++++-------------------- requirements/minimum.txt | 1 + 4 files changed, 37 insertions(+), 36 deletions(-) diff --git a/pins/_adaptors.py b/pins/_adaptors.py index 728bc93b..e8ea4f23 100644 --- a/pins/_adaptors.py +++ b/pins/_adaptors.py @@ -4,7 +4,7 @@ from abc import abstractmethod from typing import TYPE_CHECKING, Any, ClassVar, TypeAlias, overload -from ._databackend import AbstractBackend +from databackend import AbstractBackend if TYPE_CHECKING: import pandas as pd diff --git a/pyproject.toml b/pyproject.toml index f1f7af39..79b25f14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "pyyaml>=3.13", "requests", "xxhash>=1", + "databackend>=0.0.3", ] [project.optional-dependencies] diff --git a/requirements/dev.txt b/requirements/dev.txt index 2b2a43a0..54a9bcb9 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -2,13 +2,12 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --extra=doc --extra=test --extra=check --output-file=- --strip-extras setup.cfg +# pip-compile --extra=doc --extra=test --extra=check --output-file=- --strip-extras pyproject.toml # --index-url https://pypi.python.org/simple/ --trusted-host pypi.org - -adlfs==2022.2.0 - # via pins (setup.cfg) +adlfs==2024.7.0 + # via pins (pyproject.toml) aiobotocore==2.13.1 # via s3fs aiohttp==3.9.5 @@ -24,7 +23,7 @@ aiosignal==1.3.1 annotated-types==0.7.0 # via pydantic appdirs==1.4.4 - # via pins (setup.cfg) + # via pins (pyproject.toml) appnope==0.1.4 # via # ipykernel @@ -87,6 +86,8 @@ cryptography==42.0.8 # azure-storage-blob # msal # pyjwt +databackend==0.0.3 + # via pins (pyproject.toml) debugpy==1.8.2 # via ipykernel decopatch==1.4.10 @@ -102,7 +103,7 @@ executing==2.0.1 fastjsonschema==2.20.0 # via nbformat fastparquet==2024.5.0 - # via pins (setup.cfg) + # via pins (pyproject.toml) filelock==3.15.4 # via virtualenv frozenlist==1.4.1 @@ -111,13 +112,13 @@ frozenlist==1.4.1 # aiosignal fsspec==2024.6.1 # via + # pins (pyproject.toml) # adlfs # fastparquet # gcsfs - # pins (setup.cfg) # s3fs gcsfs==2024.6.1 - # via pins (setup.cfg) + # via pins (pyproject.toml) google-api-core==2.19.1 # via # google-cloud-core @@ -146,7 +147,7 @@ googleapis-common-protos==1.63.2 griffe==0.48.0 # via quartodoc humanize==4.10.0 - # via pins (setup.cfg) + # via pins (pyproject.toml) identify==2.6.0 # via pre-commit idna==3.7 @@ -155,30 +156,30 @@ idna==3.7 # yarl importlib-metadata==8.0.0 # via - # pins (setup.cfg) + # pins (pyproject.toml) # quartodoc importlib-resources==6.4.0 # via - # pins (setup.cfg) + # pins (pyproject.toml) # quartodoc iniconfig==2.0.0 # via pytest ipykernel==6.29.5 - # via pins (setup.cfg) + # via pins (pyproject.toml) ipython==8.12.0 # via + # pins (pyproject.toml) # ipykernel - # pins (setup.cfg) isodate==0.6.1 # via azure-storage-blob jedi==0.19.1 # via ipython jinja2==3.1.4 - # via pins (setup.cfg) + # via pins (pyproject.toml) jmespath==1.0.1 # via botocore joblib==1.4.2 - # via pins (setup.cfg) + # via pins (pyproject.toml) jsonschema==4.23.0 # via # nbformat @@ -221,11 +222,11 @@ multidict==6.0.5 # aiohttp # yarl nbclient==0.10.0 - # via pins (setup.cfg) + # via pins (pyproject.toml) nbformat==5.10.4 # via + # pins (pyproject.toml) # nbclient - # pins (setup.cfg) nest-asyncio==1.6.0 # via ipykernel nodeenv==1.9.1 @@ -248,8 +249,8 @@ packaging==24.1 # pytest-cases pandas==2.2.2 # via + # pins (pyproject.toml) # fastparquet - # pins (setup.cfg) parso==0.8.4 # via jedi pexpect==4.9.0 @@ -257,7 +258,7 @@ pexpect==4.9.0 pickleshare==0.7.5 # via ipython pip-tools==7.4.1 - # via pins (setup.cfg) + # via pins (pyproject.toml) platformdirs==4.2.2 # via # jupyter-core @@ -269,7 +270,7 @@ plum-dispatch==2.5.1.post1 portalocker==2.10.1 # via msal-extensions pre-commit==3.7.1 - # via pins (setup.cfg) + # via pins (pyproject.toml) prompt-toolkit==3.0.47 # via ipython proto-plus==1.24.0 @@ -288,7 +289,7 @@ pure-eval==0.2.2 py==1.11.0 # via pytest pyarrow==16.1.0 - # via pins (setup.cfg) + # via pins (pyproject.toml) pyasn1==0.6.0 # via # pyasn1-modules @@ -306,26 +307,24 @@ pygments==2.18.0 # ipython # rich pyjwt==2.8.0 - # via - # msal - # pyjwt + # via msal pyproject-hooks==1.1.0 # via # build # pip-tools pyright==1.1.372 - # via pins (setup.cfg) + # via pins (pyproject.toml) pytest==7.1.3 # via - # pins (setup.cfg) + # pins (pyproject.toml) # pytest-dotenv # pytest-parallel pytest-cases==3.8.5 - # via pins (setup.cfg) + # via pins (pyproject.toml) pytest-dotenv==0.5.2 - # via pins (setup.cfg) + # via pins (pyproject.toml) pytest-parallel==0.1.1 - # via pins (setup.cfg) + # via pins (pyproject.toml) python-dateutil==2.9.0.post0 # via # botocore @@ -337,7 +336,7 @@ pytz==2024.1 # via pandas pyyaml==6.0.1 # via - # pins (setup.cfg) + # pins (pyproject.toml) # pre-commit # quartodoc pyzmq==26.0.3 @@ -345,20 +344,20 @@ pyzmq==26.0.3 # ipykernel # jupyter-client quartodoc==0.7.5 - # via pins (setup.cfg) + # via pins (pyproject.toml) referencing==0.35.1 # via # jsonschema # jsonschema-specifications requests==2.32.3 # via + # pins (pyproject.toml) # azure-core # azure-datalake-store # gcsfs # google-api-core # google-cloud-storage # msal - # pins (setup.cfg) # quartodoc # requests-oauthlib requests-oauthlib==2.0.0 @@ -372,7 +371,7 @@ rpds-py==0.19.0 rsa==4.9 # via google-auth s3fs==2024.6.1 - # via pins (setup.cfg) + # via pins (pyproject.toml) six==1.16.0 # via # asttokens @@ -404,7 +403,7 @@ traitlets==5.14.3 # nbclient # nbformat types-appdirs==1.4.3.5 - # via pins (setup.cfg) + # via pins (pyproject.toml) typing-extensions==4.12.2 # via # azure-core @@ -430,7 +429,7 @@ wheel==0.43.0 wrapt==1.16.0 # via aiobotocore xxhash==3.4.1 - # via pins (setup.cfg) + # via pins (pyproject.toml) yarl==1.9.4 # via aiohttp zipp==3.19.2 diff --git a/requirements/minimum.txt b/requirements/minimum.txt index 325990dc..73c3f3f3 100644 --- a/requirements/minimum.txt +++ b/requirements/minimum.txt @@ -7,3 +7,4 @@ importlib-metadata==4.4 importlib-resources==1.3 appdirs<2.0.0 humanize==1.0.0 +databackend==0.0.3 From 1d5c47ff2486b5e53d3085d7f583ae1a92451a80 Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Tue, 1 Apr 2025 16:05:33 -0400 Subject: [PATCH 12/20] dev: add ruff to pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 4f1fabcd..a91ca77a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ azure = ["adlfs"] check = [ "pre-commit", "pyright==1.1.372", # Pinned; manually sync with .github/workflows/code-checks.yml + "ruff==0.5.4", # Pinned; manually sync with pre-commit-config.yaml "types-appdirs", ] doc = [ From d0fa9c96d9ad762ba4fd6bc28d68ee75802922f6 Mon Sep 17 00:00:00 2001 From: Michael Chow Date: Tue, 1 Apr 2025 17:20:46 -0400 Subject: [PATCH 13/20] feat: allow save_data to accept an Adaptor --- pins/boards.py | 2 +- pins/drivers.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pins/boards.py b/pins/boards.py index 2bc62803..165a4692 100644 --- a/pins/boards.py +++ b/pins/boards.py @@ -754,7 +754,7 @@ def _create_meta( else: p_obj = str(Path(pin_dir_path) / object_name) # file is saved locally in order to hash, calc size - file_names = save_data(x._d, p_obj, type, apply_suffix) + file_names = save_data(x, p_obj, type, apply_suffix) meta = self.meta_factory.create( pin_dir_path, diff --git a/pins/drivers.py b/pins/drivers.py index aa961f53..d5d9f95b 100644 --- a/pins/drivers.py +++ b/pins/drivers.py @@ -2,7 +2,7 @@ from pathlib import Path from typing import Any -from pins._adaptors import _create_adaptor +from pins._adaptors import _Adaptor, _create_adaptor from .config import PINS_ENV_INSECURE_READ, get_allow_pickle_read from .errors import PinsInsecureReadError @@ -120,7 +120,7 @@ def load_data( def save_data( - obj, fname, pin_type=None, apply_suffix: bool = True + obj: "_Adaptor | Any", fname, pin_type=None, apply_suffix: bool = True ) -> "str | Sequence[str]": # TODO: extensible saving with deferred importing # TODO: how to encode arguments to saving / loading drivers? @@ -129,7 +129,10 @@ def save_data( # as argument to board, and then type dispatchers for explicit cases # of saving / loading objects different ways. - adaptor = _create_adaptor(obj) + if isinstance(obj, _Adaptor): + adaptor, obj = obj, obj._d + else: + adaptor = _create_adaptor(obj) if apply_suffix: if pin_type == "file": From 81f6779493e6673a6ea7d89b9823d9c3a611e692 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 2 Apr 2025 11:39:53 +1300 Subject: [PATCH 14/20] Remove unnecessary underscores --- pins/_adaptors.py | 40 +++++++++--------- pins/boards.py | 11 +++-- pins/drivers.py | 6 +-- pins/tests/test_adaptors.py | 82 ++++++++++++++++++------------------- 4 files changed, 69 insertions(+), 70 deletions(-) diff --git a/pins/_adaptors.py b/pins/_adaptors.py index e8ea4f23..fb6f7bad 100644 --- a/pins/_adaptors.py +++ b/pins/_adaptors.py @@ -9,18 +9,18 @@ if TYPE_CHECKING: import pandas as pd - _PandasDataFrame: TypeAlias = pd.DataFrame - _DataFrame: TypeAlias = _PandasDataFrame + PandasDataFrame: TypeAlias = pd.DataFrame + DataFrame: TypeAlias = PandasDataFrame -class _AbstractPandasFrame(AbstractBackend): +class AbstractPandasFrame(AbstractBackend): _backends = [("pandas", "DataFrame")] -_AbstractDF: TypeAlias = _AbstractPandasFrame +AbstractDF: TypeAlias = AbstractPandasFrame -class _Adaptor: +class Adaptor: _d: ClassVar[Any] def __init__(self, data: Any) -> None: @@ -77,10 +77,10 @@ def _obj_name(self) -> str: return f"{type(self._d).__qualname__} object" -class _DFAdaptor(_Adaptor): - _d: ClassVar[_DataFrame] +class DFAdaptor(Adaptor): + _d: ClassVar[DataFrame] - def __init__(self, data: _DataFrame) -> None: + def __init__(self, data: DataFrame) -> None: super().__init__(data) @property @@ -97,7 +97,7 @@ def columns(self) -> list[Any]: ... def shape(self) -> tuple[int, int]: ... @abstractmethod - def head(self, n: int) -> _DFAdaptor: ... + def head(self, n: int) -> DFAdaptor: ... @property def data_preview(self) -> str: @@ -120,10 +120,10 @@ def _obj_name(self) -> str: return f"{row} x {col} {self.df_type}" -class _PandasAdaptor(_DFAdaptor): - _d: ClassVar[_PandasDataFrame] +class PandasAdaptor(DFAdaptor): + _d: ClassVar[PandasDataFrame] - def __init__(self, data: _AbstractPandasFrame) -> None: + def __init__(self, data: AbstractPandasFrame) -> None: super().__init__(data) @property @@ -134,8 +134,8 @@ def columns(self) -> list[Any]: def shape(self) -> tuple[int, int]: return self._d.shape - def head(self, n: int) -> _PandasAdaptor: - return _PandasAdaptor(self._d.head(n)) + def head(self, n: int) -> PandasAdaptor: + return PandasAdaptor(self._d.head(n)) @overload def write_json(self, file: str) -> None: ... @@ -162,11 +162,11 @@ def write_feather(self, file: str) -> None: @overload -def _create_adaptor(obj: _DataFrame) -> _DFAdaptor: ... +def create_adaptor(obj: DataFrame) -> DFAdaptor: ... @overload -def _create_adaptor(obj: Any) -> _Adaptor: ... -def _create_adaptor(obj: Any | _DataFrame) -> _Adaptor | _DFAdaptor: - if isinstance(obj, _AbstractPandasFrame): - return _PandasAdaptor(obj) +def create_adaptor(obj: Any) -> Adaptor: ... +def create_adaptor(obj: Any | DataFrame) -> Adaptor | DFAdaptor: + if isinstance(obj, AbstractPandasFrame): + return PandasAdaptor(obj) else: - return _Adaptor(obj) + return Adaptor(obj) diff --git a/pins/boards.py b/pins/boards.py index 2bc62803..4ac0fe12 100644 --- a/pins/boards.py +++ b/pins/boards.py @@ -15,7 +15,7 @@ from importlib_resources import files from importlib_resources.abc import Traversable -from ._adaptors import _Adaptor, _create_adaptor +from ._adaptors import Adaptor, create_adaptor from .cache import PinsCache from .config import get_allow_rsc_short_name from .drivers import REQUIRES_SINGLE_FILE, default_title, load_data, load_file, save_data @@ -136,8 +136,7 @@ def pin_meta(self, name, version: str = None) -> Meta: # ensure pin and version exist if not self.fs.exists(self.construct_path([pin_name, version])): raise PinsError( - f"Pin {name} either does not exist, " - f"or is missing version: {version}." + f"Pin {name} either does not exist, or is missing version: {version}." ) selected_version = guess_version(version) @@ -698,7 +697,7 @@ def prepare_pin_version( created: datetime | None = None, object_name: str | list[str] | None = None, ): - x = _create_adaptor(x) + x = create_adaptor(x) meta = self._create_meta( pin_dir_path, @@ -721,7 +720,7 @@ def prepare_pin_version( def _create_meta( self, pin_dir_path, - x: _Adaptor, + x: Adaptor, name: str | None = None, type: str | None = None, title: str | None = None, @@ -1204,7 +1203,7 @@ def user_name(self): return self.fs.api.get_user()["username"] def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs): - adaptor = _create_adaptor(x) + adaptor = create_adaptor(x) # RSC pin names can have form /, but this will try to # create the object in a directory named . So we grab just diff --git a/pins/drivers.py b/pins/drivers.py index aa961f53..3906f0c1 100644 --- a/pins/drivers.py +++ b/pins/drivers.py @@ -2,7 +2,7 @@ from pathlib import Path from typing import Any -from pins._adaptors import _create_adaptor +from pins._adaptors import create_adaptor from .config import PINS_ENV_INSECURE_READ, get_allow_pickle_read from .errors import PinsInsecureReadError @@ -129,7 +129,7 @@ def save_data( # as argument to board, and then type dispatchers for explicit cases # of saving / loading objects different ways. - adaptor = _create_adaptor(obj) + adaptor = create_adaptor(obj) if apply_suffix: if pin_type == "file": @@ -183,4 +183,4 @@ def save_data( def default_title(obj: Any, name: str) -> str: # Kept for backward compatibility only. - return _create_adaptor(obj).default_title(name) + return create_adaptor(obj).default_title(name) diff --git a/pins/tests/test_adaptors.py b/pins/tests/test_adaptors.py index 925ade42..8c79052a 100644 --- a/pins/tests/test_adaptors.py +++ b/pins/tests/test_adaptors.py @@ -6,39 +6,39 @@ from pandas.testing import assert_frame_equal from pins._adaptors import ( - _AbstractPandasFrame, - _Adaptor, - _create_adaptor, - _DFAdaptor, - _PandasAdaptor, + AbstractPandasFrame, + Adaptor, + DFAdaptor, + PandasAdaptor, + create_adaptor, ) class TestCreateAdaptor: def test_pandas(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _create_adaptor(df) - assert isinstance(adaptor, _Adaptor) - assert isinstance(adaptor, _PandasAdaptor) + adaptor = create_adaptor(df) + assert isinstance(adaptor, Adaptor) + assert isinstance(adaptor, PandasAdaptor) def test_non_df(self): - adaptor = _create_adaptor(42) - assert isinstance(adaptor, _Adaptor) - assert not isinstance(adaptor, _PandasAdaptor) - assert not isinstance(adaptor, _DFAdaptor) + adaptor = create_adaptor(42) + assert isinstance(adaptor, Adaptor) + assert not isinstance(adaptor, PandasAdaptor) + assert not isinstance(adaptor, DFAdaptor) class TestAdaptor: def test_write_json(self, tmp_path: Path): data = {"a": 1, "b": 2} - adaptor = _Adaptor(data) + adaptor = Adaptor(data) file = tmp_path / "file.json" adaptor.write_json(file) assert file.read_text() == '{"a": 1, "b": 2}' def test_write_joblib(self, tmp_path: Path): data = {"a": 1, "b": 2} - adaptor = _Adaptor(data) + adaptor = Adaptor(data) file = tmp_path / "file.joblib" adaptor.write_joblib(file) @@ -49,94 +49,94 @@ def test_write_joblib(self, tmp_path: Path): def test_write_csv(self): with pytest.raises(NotImplementedError): - adaptor = _Adaptor(42) + adaptor = Adaptor(42) adaptor.write_csv("file.csv") def test_write_parquet(self): with pytest.raises(NotImplementedError): - adaptor = _Adaptor(42) + adaptor = Adaptor(42) adaptor.write_parquet("file.parquet") def test_write_feather(self): with pytest.raises(NotImplementedError): - adaptor = _Adaptor(42) + adaptor = Adaptor(42) adaptor.write_feather("file.feather") class TestDataPreview: def test_int(self): - adaptor = _Adaptor(42) + adaptor = Adaptor(42) assert adaptor.data_preview == "{}" def test_dict(self): data = {"a": 1, "b": 2} - adaptor = _Adaptor(data) + adaptor = Adaptor(data) assert adaptor.data_preview == "{}" def test_default_title(self): - adaptor = _Adaptor(42) + adaptor = Adaptor(42) assert adaptor.default_title("my_data") == "my_data: a pinned int object" class TestPandasAdaptor: def test_df_type(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _PandasAdaptor(df) + adaptor = PandasAdaptor(df) assert adaptor.df_type == "DataFrame" def test_columns(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _PandasAdaptor(df) - assert isinstance(adaptor, _DFAdaptor) - assert isinstance(adaptor, _PandasAdaptor) + adaptor = PandasAdaptor(df) + assert isinstance(adaptor, DFAdaptor) + assert isinstance(adaptor, PandasAdaptor) assert adaptor.columns == ["a", "b"] def test_shape(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _PandasAdaptor(df) - assert isinstance(adaptor, _DFAdaptor) - assert isinstance(adaptor, _PandasAdaptor) + adaptor = PandasAdaptor(df) + assert isinstance(adaptor, DFAdaptor) + assert isinstance(adaptor, PandasAdaptor) assert adaptor.shape == (3, 2) def test_head(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _PandasAdaptor(df) + adaptor = PandasAdaptor(df) head1_df = pd.DataFrame({"a": [1], "b": [4]}) - expected = _create_adaptor(head1_df) - assert isinstance(adaptor, _DFAdaptor) - assert isinstance(adaptor.head(1), _DFAdaptor) - assert isinstance(adaptor.head(1), _PandasAdaptor) + expected = create_adaptor(head1_df) + assert isinstance(adaptor, DFAdaptor) + assert isinstance(adaptor.head(1), DFAdaptor) + assert isinstance(adaptor.head(1), PandasAdaptor) assert_frame_equal(adaptor.head(1)._d, expected._d) def test_write_json(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _PandasAdaptor(df) - assert isinstance(adaptor, _DFAdaptor) + adaptor = PandasAdaptor(df) + assert isinstance(adaptor, DFAdaptor) assert adaptor.write_json() == """[{"a":1,"b":4},{"a":2,"b":5},{"a":3,"b":6}]""" def test_write_csv(self, tmp_path: Path): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _PandasAdaptor(df) + adaptor = PandasAdaptor(df) file = tmp_path / "file.csv" adaptor.write_csv(file) assert file.read_text() == "a,b\n1,4\n2,5\n3,6\n" def test_write_parquet(self, tmp_path: Path): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _PandasAdaptor(df) + adaptor = PandasAdaptor(df) file = tmp_path / "file.parquet" adaptor.write_parquet(file) assert_frame_equal(pd.read_parquet(file), df) def test_write_feather(self, tmp_path: Path): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _PandasAdaptor(df) + adaptor = PandasAdaptor(df) file = tmp_path / "file.feather" adaptor.write_feather(file) assert_frame_equal(pd.read_feather(file), df) def test_data_preview(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _PandasAdaptor(df) + adaptor = PandasAdaptor(df) expected = ( '{"data": [{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}], ' '"columns": [{"name": ["a"], "label": ["a"], "align": ["left"], "type": [""]}, ' @@ -146,7 +146,7 @@ def test_data_preview(self): def test_default_title(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - adaptor = _PandasAdaptor(df) + adaptor = PandasAdaptor(df) assert adaptor.default_title("my_df") == "my_df: a pinned 3 x 2 DataFrame" @@ -154,7 +154,7 @@ class TestAbstractBackends: class TestAbstractPandasFrame: def test_isinstance(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - assert isinstance(df, _AbstractPandasFrame) + assert isinstance(df, AbstractPandasFrame) def test_not_isinstance(self): - assert not isinstance(42, _AbstractPandasFrame) + assert not isinstance(42, AbstractPandasFrame) From 15405006cf250abd3fea096c5ca28c6933fb291b Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 2 Apr 2025 11:44:25 +1300 Subject: [PATCH 15/20] Remove misleading/unnecessary ClassVar declaration --- pins/_adaptors.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pins/_adaptors.py b/pins/_adaptors.py index fb6f7bad..f759b94b 100644 --- a/pins/_adaptors.py +++ b/pins/_adaptors.py @@ -21,8 +21,6 @@ class AbstractPandasFrame(AbstractBackend): class Adaptor: - _d: ClassVar[Any] - def __init__(self, data: Any) -> None: self._d = data From daa4239ed4f7b32e3fc93caf8817f2f3f65ab221 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 2 Apr 2025 11:59:34 +1300 Subject: [PATCH 16/20] Separate write_json from to_json (CQS) --- pins/_adaptors.py | 32 +++++++------------------------- pins/tests/test_adaptors.py | 4 ++-- 2 files changed, 9 insertions(+), 27 deletions(-) diff --git a/pins/_adaptors.py b/pins/_adaptors.py index f759b94b..1152357e 100644 --- a/pins/_adaptors.py +++ b/pins/_adaptors.py @@ -24,21 +24,14 @@ class Adaptor: def __init__(self, data: Any) -> None: self._d = data - @overload - def write_json(self, file: str) -> None: ... - @overload - def write_json(self, file: None = ...) -> str: ... - def write_json(self, file: str | None = None) -> str | None: - if file is None: - msg = ( - f"Writing to JSON string rather than file is not supported for " - f"{type(self._d)}" - ) - raise NotImplementedError(msg) + def write_json(self, file: str) -> None: + with open(file, "w") as f: + f.write(self.to_json()) + def to_json(self) -> str: import json - json.dump(self._d, open(file, mode="w")) + return json.dumps(self._d) def write_joblib(self, file: str) -> None: import joblib @@ -101,7 +94,7 @@ def head(self, n: int) -> DFAdaptor: ... def data_preview(self) -> str: # TODO(compat) is 100 hard-coded? # Note that we go df -> json -> dict, to take advantage of type conversions in the dataframe library - data: list[dict[Any, Any]] = json.loads(self.head(100).write_json()) + data: list[dict[Any, Any]] = json.loads(self.head(100).to_json()) columns = [ {"name": [col], "label": [col], "align": ["left"], "type": [""]} for col in self.columns @@ -135,18 +128,7 @@ def shape(self) -> tuple[int, int]: def head(self, n: int) -> PandasAdaptor: return PandasAdaptor(self._d.head(n)) - @overload - def write_json(self, file: str) -> None: ... - @overload - def write_json(self, file: None) -> str: ... - def write_json(self, file: str | None = None) -> str | None: - if file is not None: - msg = ( - f"Writing to file rather than JSON string is not supported for " - f"{type(self._d)}" - ) - raise NotImplementedError(msg) - + def to_json(self) -> str: return self._d.to_json(orient="records") def write_csv(self, file: str) -> None: diff --git a/pins/tests/test_adaptors.py b/pins/tests/test_adaptors.py index 8c79052a..7c3fe9c6 100644 --- a/pins/tests/test_adaptors.py +++ b/pins/tests/test_adaptors.py @@ -107,11 +107,11 @@ def test_head(self): assert isinstance(adaptor.head(1), PandasAdaptor) assert_frame_equal(adaptor.head(1)._d, expected._d) - def test_write_json(self): + def test_to_json(self): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) adaptor = PandasAdaptor(df) assert isinstance(adaptor, DFAdaptor) - assert adaptor.write_json() == """[{"a":1,"b":4},{"a":2,"b":5},{"a":3,"b":6}]""" + assert adaptor.to_json() == """[{"a":1,"b":4},{"a":2,"b":5},{"a":3,"b":6}]""" def test_write_csv(self, tmp_path: Path): df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) From f11141a28a32ed36643ac4cb69ea09b12e8ef1ef Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 2 Apr 2025 12:25:39 +1300 Subject: [PATCH 17/20] Move calls to create_adapter to hide them at a lower level --- pins/_adaptors.py | 2 ++ pins/boards.py | 14 +++++--------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pins/_adaptors.py b/pins/_adaptors.py index 1152357e..5df3b982 100644 --- a/pins/_adaptors.py +++ b/pins/_adaptors.py @@ -148,5 +148,7 @@ def create_adaptor(obj: Any) -> Adaptor: ... def create_adaptor(obj: Any | DataFrame) -> Adaptor | DFAdaptor: if isinstance(obj, AbstractPandasFrame): return PandasAdaptor(obj) + elif isinstance(obj, Adaptor): + return obj else: return Adaptor(obj) diff --git a/pins/boards.py b/pins/boards.py index c16ee2bb..79a90251 100644 --- a/pins/boards.py +++ b/pins/boards.py @@ -10,7 +10,7 @@ from datetime import datetime, timedelta from io import IOBase from pathlib import Path -from typing import Protocol +from typing import Any, Protocol from importlib_resources import files from importlib_resources.abc import Traversable @@ -697,8 +697,6 @@ def prepare_pin_version( created: datetime | None = None, object_name: str | list[str] | None = None, ): - x = create_adaptor(x) - meta = self._create_meta( pin_dir_path, x, @@ -720,7 +718,7 @@ def prepare_pin_version( def _create_meta( self, pin_dir_path, - x: Adaptor, + x: Adaptor | Any, name: str | None = None, type: str | None = None, title: str | None = None, @@ -737,7 +735,7 @@ def _create_meta( raise NotImplementedError("Type argument is required.") if title is None: - title = x.default_title(name) + title = create_adaptor(x).default_title(name) # create metadata from object on disk --------------------------------- # save all pin data to a temporary folder (including data.txt), so we @@ -1203,8 +1201,6 @@ def user_name(self): return self.fs.api.get_user()["username"] def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs): - adaptor = create_adaptor(x) - # RSC pin names can have form /, but this will try to # create the object in a directory named . So we grab just # the part. @@ -1212,7 +1208,7 @@ def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs # TODO(compat): py pins always uses the short name, R pins uses w/e the # user passed, but guessing people want the long name? - meta = super()._create_meta(pin_dir_path, adaptor, short_name, *args, **kwargs) + meta = super()._create_meta(pin_dir_path, x, short_name, *args, **kwargs) meta.name = name # copy in files needed by index.html ---------------------------------- @@ -1238,7 +1234,7 @@ def prepare_pin_version(self, pin_dir_path, x, name: str | None, *args, **kwargs "pin_files": pin_files, "pin_metadata": meta, "board_deparse": board_deparse(self), - "data_preview": adaptor.data_preview, + "data_preview": create_adaptor(x).data_preview, } # do not show r code if not round-trip friendly From 13d356ea358c81b9cb29f6979a66215b1042d44b Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 2 Apr 2025 12:42:00 +1300 Subject: [PATCH 18/20] Add some tests --- pins/tests/test_adaptors.py | 6 ++++++ pins/tests/test_drivers.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/pins/tests/test_adaptors.py b/pins/tests/test_adaptors.py index 7c3fe9c6..d771c062 100644 --- a/pins/tests/test_adaptors.py +++ b/pins/tests/test_adaptors.py @@ -27,6 +27,12 @@ def test_non_df(self): assert not isinstance(adaptor, PandasAdaptor) assert not isinstance(adaptor, DFAdaptor) + def test_already_adaptor(self): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + adaptor = create_adaptor(df) + assert isinstance(adaptor, PandasAdaptor) + assert create_adaptor(adaptor) is adaptor + class TestAdaptor: def test_write_json(self, tmp_path: Path): diff --git a/pins/tests/test_drivers.py b/pins/tests/test_drivers.py index 351550af..5959e028 100644 --- a/pins/tests/test_drivers.py +++ b/pins/tests/test_drivers.py @@ -6,6 +6,7 @@ import pandas as pd import pytest +from pins._adaptors import create_adaptor from pins.config import PINS_ENV_INSECURE_READ from pins.drivers import default_title, load_data, load_path, save_data from pins.errors import PinsInsecureReadError @@ -163,6 +164,23 @@ def test_driver_apply_suffix_false(tmp_path: Path): assert Path(res_fname).name == "some_df" +class TestSaveData: + def test_accepts_pandas_df(self, tmp_path: Path): + import pandas as pd + + df = pd.DataFrame({"x": [1, 2, 3]}) + result = save_data(df, tmp_path / "some_df", "csv") + assert Path(result) == tmp_path / "some_df.csv" + + def test_accepts_adaptor(self, tmp_path: Path): + import pandas as pd + + df = pd.DataFrame({"x": [1, 2, 3]}) + adaptor = create_adaptor(df) + result = save_data(adaptor, tmp_path / "some_df", "csv") + assert Path(result) == tmp_path / "some_df.csv" + + class TestLoadFile: def test_str_file(self): class _MockMetaStrFile: From 18818f6f6f72333b205a5916e1b08482ace634f3 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 4 Jun 2025 11:55:06 +1200 Subject: [PATCH 19/20] Use backported typing_extensions.TypeAlias for Python 3.9 --- pins/_adaptors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pins/_adaptors.py b/pins/_adaptors.py index 5df3b982..80fb9f68 100644 --- a/pins/_adaptors.py +++ b/pins/_adaptors.py @@ -2,9 +2,10 @@ import json from abc import abstractmethod -from typing import TYPE_CHECKING, Any, ClassVar, TypeAlias, overload +from typing import TYPE_CHECKING, Any, ClassVar, overload from databackend import AbstractBackend +from typing_extensions import TypeAlias if TYPE_CHECKING: import pandas as pd From dc683dd00cfe0d880d944c651d43918ace90a08f Mon Sep 17 00:00:00 2001 From: isabel zimmerman Date: Tue, 3 Jun 2025 21:06:26 -0500 Subject: [PATCH 20/20] add typing_extensions --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index a91ca77a..d4d1e357 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ "requests", "xxhash>=1", "databackend>=0.0.3", + "typing_extensions" ] [project.optional-dependencies]