Skip to content

Commit a4aecfc

Browse files
authored
Merge pull request #201 from cpcloud/support-pin-write-with-files
feat: add support for `pin_write` with `type="file"`
2 parents d072260 + 47bedd5 commit a4aecfc

File tree

4 files changed

+336
-88
lines changed

4 files changed

+336
-88
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,3 +144,5 @@ _site/
144144
objects.json
145145
reference/
146146
src/
147+
148+
/.luarc.json

pins/boards.py

Lines changed: 135 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from .versions import VersionRaw, guess_version
1515
from .meta import Meta, MetaRaw, MetaFactory
1616
from .errors import PinsError
17-
from .drivers import load_data, save_data, default_title
17+
from .drivers import load_data, save_data, load_file, default_title
1818
from .utils import inform, warn_deprecated, ExtendMethodDoc
1919
from .config import get_allow_rsc_short_name
2020

@@ -225,7 +225,7 @@ def pin_read(self, name, version: Optional[str] = None, hash: Optional[str] = No
225225
meta, self.construct_path([pin_name, meta.version.version])
226226
)
227227

228-
def pin_write(
228+
def _pin_store(
229229
self,
230230
x,
231231
name: Optional[str] = None,
@@ -236,32 +236,6 @@ def pin_write(
236236
versioned: Optional[bool] = None,
237237
created: Optional[datetime] = None,
238238
) -> Meta:
239-
"""Write a pin object to the board.
240-
241-
Parameters
242-
----------
243-
x:
244-
An object (e.g. a pandas DataFrame) to pin.
245-
name:
246-
Pin name.
247-
type:
248-
File type used to save `x` to disk. May be "csv", "arrow", "parquet",
249-
"joblib", "json", or "file".
250-
title:
251-
A title for the pin; most important for shared boards so that others
252-
can understand what the pin contains. If omitted, a brief description
253-
of the contents will be automatically generated.
254-
description:
255-
A detailed description of the pin contents.
256-
metadata:
257-
A dictionary containing additional metadata to store with the pin.
258-
This gets stored on the Meta.user field.
259-
versioned:
260-
Whether the pin should be versioned. Defaults to versioning.
261-
created:
262-
A date to store in the Meta.created field. This field may be used as
263-
part of the pin version name.
264-
"""
265239

266240
if type == "feather":
267241
warn_deprecated(
@@ -271,6 +245,18 @@ def pin_write(
271245
)
272246
type = "arrow"
273247

248+
if type == "file":
249+
# the file type makes the name of the data the exact filename, rather
250+
# than the pin name + a suffix (e.g. my_pin.csv).
251+
if isinstance(x, (tuple, list)) and len(x) == 1:
252+
x = x[0]
253+
254+
_p = Path(x)
255+
_base_len = len(_p.name) - len("".join(_p.suffixes))
256+
object_name = _p.name[:_base_len]
257+
else:
258+
object_name = None
259+
274260
pin_name = self.path_to_pin(name)
275261

276262
with tempfile.TemporaryDirectory() as tmp_dir:
@@ -285,6 +271,7 @@ def pin_write(
285271
metadata,
286272
versioned,
287273
created,
274+
object_name=object_name,
288275
)
289276

290277
# move pin to destination ----
@@ -326,7 +313,55 @@ def pin_write(
326313

327314
return meta
328315

329-
def pin_download(self, name, version=None, hash=None):
316+
def pin_write(
317+
self,
318+
x,
319+
name: Optional[str] = None,
320+
type: Optional[str] = None,
321+
title: Optional[str] = None,
322+
description: Optional[str] = None,
323+
metadata: Optional[Mapping] = None,
324+
versioned: Optional[bool] = None,
325+
created: Optional[datetime] = None,
326+
) -> Meta:
327+
"""Write a pin object to the board.
328+
329+
Parameters
330+
----------
331+
x:
332+
An object (e.g. a pandas DataFrame) to pin.
333+
name:
334+
Pin name.
335+
type:
336+
File type used to save `x` to disk. May be "csv", "arrow", "parquet",
337+
"joblib", or "json".
338+
title:
339+
A title for the pin; most important for shared boards so that others
340+
can understand what the pin contains. If omitted, a brief description
341+
of the contents will be automatically generated.
342+
description:
343+
A detailed description of the pin contents.
344+
metadata:
345+
A dictionary containing additional metadata to store with the pin.
346+
This gets stored on the Meta.user field.
347+
versioned:
348+
Whether the pin should be versioned. Defaults to versioning.
349+
created:
350+
A date to store in the Meta.created field. This field may be used as
351+
part of the pin version name.
352+
"""
353+
354+
if type == "file":
355+
raise NotImplementedError(
356+
".pin_write() does not support type='file'. "
357+
"Use .pin_upload() to save a file as a pin."
358+
)
359+
360+
return self._pin_store(
361+
x, name, type, title, description, metadata, versioned, created
362+
)
363+
364+
def pin_download(self, name, version=None, hash=None) -> Sequence[str]:
330365
"""Download the files contained in a pin.
331366
332367
This method only downloads the files in a pin. In order to read and load
@@ -342,20 +377,68 @@ def pin_download(self, name, version=None, hash=None):
342377
A hash used to validate the retrieved pin data. If specified, it is
343378
compared against the `pin_hash` field retrived by [](`~pins.boards.BaseBoard.pin_meta`).
344379
345-
346380
"""
347-
raise NotImplementedError()
348381

349-
def pin_upload(self, paths, name=None, title=None, description=None, metadata=None):
382+
meta = self.pin_fetch(name, version)
383+
384+
if hash is not None:
385+
raise NotImplementedError("TODO: validate hash")
386+
387+
pin_name = self.path_to_pin(name)
388+
389+
# TODO: raise for multiple files
390+
# fetch file
391+
f = load_file(
392+
meta, self.fs, self.construct_path([pin_name, meta.version.version])
393+
)
394+
395+
# could also check whether f isinstance of PinCache
396+
fname = getattr(f, "name", None)
397+
398+
if fname is None:
399+
raise PinsError("pin_download requires a cache.")
400+
401+
return [str(Path(fname).absolute())]
402+
403+
def pin_upload(
404+
self,
405+
paths: "str | list[str]",
406+
name=None,
407+
title=None,
408+
description=None,
409+
metadata=None,
410+
):
350411
"""Write a pin based on paths to one or more files.
351412
352413
This method simply uploads the files given, so they can be downloaded later
353414
using [](`~pins.boards.BaseBoard.pin_download`).
415+
416+
Parameters
417+
----------
418+
paths:
419+
Paths of files to upload. Currently, only uploading a single file
420+
is supported.
421+
name:
422+
Pin name.
423+
title:
424+
A title for the pin; most important for shared boards so that others
425+
can understand what the pin contains. If omitted, a brief description
426+
of the contents will be automatically generated.
427+
description:
428+
A detailed description of the pin contents.
429+
metadata:
430+
A dictionary containing additional metadata to store with the pin.
431+
This gets stored on the Meta.user field.
354432
"""
355-
# TODO(question): why does this method exist? Isn't it equiv to a user
356-
# doing this?: pin_write(board, c("filea.txt", "fileb.txt"), type="file")
357-
# pin_download makes since, because it will download *regardless of type*
358-
raise NotImplementedError()
433+
434+
return self._pin_store(
435+
paths,
436+
name,
437+
type="file",
438+
title=title,
439+
description=description,
440+
metadata=metadata,
441+
)
359442

360443
def pin_version_delete(self, name: str, version: str):
361444
"""Delete a single version of a pin.
@@ -553,6 +636,7 @@ def prepare_pin_version(
553636
metadata: Optional[Mapping] = None,
554637
versioned: Optional[bool] = None,
555638
created: Optional[datetime] = None,
639+
object_name: Optional[str] = None,
556640
):
557641
if name is None:
558642
raise NotImplementedError("Name must be specified.")
@@ -570,7 +654,10 @@ def prepare_pin_version(
570654
# save all pin data to a temporary folder (including data.txt), so we
571655
# can fs.put it all straight onto the backend filesystem
572656

573-
p_obj = Path(pin_dir_path) / name
657+
if object_name is None:
658+
p_obj = Path(pin_dir_path) / name
659+
else:
660+
p_obj = Path(pin_dir_path) / object_name
574661

575662
# file is saved locally in order to hash, calc size
576663
file_names = save_data(x, str(p_obj), type)
@@ -716,12 +803,19 @@ def pin_download(self, name, version=None, hash=None) -> Sequence[str]:
716803
meta = self.pin_meta(name, version)
717804

718805
if isinstance(meta, MetaRaw):
806+
f = load_file(meta, self.fs, None)
807+
else:
808+
raise NotImplementedError(
809+
"TODO: pin_download currently can only read a url to a single file."
810+
)
719811

720-
return self._load_data(meta, None)
812+
# could also check whether f isinstance of PinCache
813+
fname = getattr(f, "name", None)
721814

722-
raise NotImplementedError(
723-
"TODO: pin_download currently can only read a url to a single file."
724-
)
815+
if fname is None:
816+
raise PinsError("pin_download requires a cache.")
817+
818+
return [str(Path(fname).absolute())]
725819

726820
def construct_path(self, elements):
727821
# TODO: in practice every call to construct_path has the first element of

0 commit comments

Comments
 (0)