Skip to content

Commit 36fc873

Browse files
authored
add missing stat() methods to DBFSPath and WorkspacePath (#144)
Fixes #142 and #143 --------- Co-authored-by: Eric Vergnaud <[email protected]>
1 parent c531c3f commit 36fc873

File tree

3 files changed

+59
-2
lines changed

3 files changed

+59
-2
lines changed

src/databricks/labs/blueprint/paths.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import posixpath
1212
import re
1313
import shutil
14+
import stat
1415
from abc import abstractmethod
1516
from collections.abc import Generator, Iterable, Sequence
1617
from io import BytesIO, StringIO
@@ -121,7 +122,6 @@ class _DatabricksPath(Path, abc.ABC): # pylint: disable=too-many-public-methods
121122
# Public APIs that we don't support.
122123
as_uri = _na("as_uri")
123124
cwd = _na("cwd")
124-
stat = _na("stat")
125125
chmod = _na("chmod")
126126
lchmod = _na("lchmod")
127127
lstat = _na("lstat")
@@ -138,6 +138,7 @@ def __new__(cls, *args, **kwargs):
138138
# Force all initialisation to go via __init__() irrespective of the (Python-specific) base version.
139139
return object.__new__(cls)
140140

141+
# pylint: disable=super-init-not-called
141142
def __init__(self, ws: WorkspaceClient, *args: str | bytes | os.PathLike) -> None:
142143
# We deliberately do _not_ call the super initializer because we're taking over complete responsibility for the
143144
# implementation of the public API.
@@ -385,6 +386,7 @@ def with_suffix(self: P, suffix: str) -> P:
385386
raise ValueError(msg)
386387
return self.with_name(stem + suffix)
387388

389+
# pylint: disable=arguments-differ
388390
def relative_to(self: P, *other: str | bytes | os.PathLike, walk_up: bool = False) -> P:
389391
normalized = self.with_segments(*other)
390392
if self.anchor != normalized.anchor:
@@ -691,6 +693,14 @@ def _file_info(self) -> FileInfo:
691693
self._cached_file_info = self._ws.dbfs.get_status(self.as_posix())
692694
return self._cached_file_info
693695

696+
def stat(self, *, follow_symlinks=True) -> os.stat_result:
697+
seq: list[float] = [-1.0] * 10
698+
seq[stat.ST_SIZE] = self._file_info.file_size or -1 # 6
699+
seq[stat.ST_MTIME] = (
700+
float(self._file_info.modification_time) / 1000.0 if self._file_info.modification_time else -1.0
701+
) # 8
702+
return os.stat_result(seq)
703+
694704
def is_dir(self) -> bool:
695705
"""Return True if the path points to a DBFS directory."""
696706
try:
@@ -841,6 +851,15 @@ def _object_info(self) -> ObjectInfo:
841851
self._cached_object_info = self._ws.workspace.get_status(self.as_posix())
842852
return self._object_info
843853

854+
def stat(self, *, follow_symlinks=True) -> os.stat_result:
855+
seq: list[float] = [-1.0] * 10
856+
seq[stat.ST_SIZE] = self._object_info.size or -1 # 6
857+
seq[stat.ST_MTIME] = (
858+
float(self._object_info.modified_at) / 1000.0 if self._object_info.modified_at else -1.0
859+
) # 8
860+
seq[stat.ST_CTIME] = float(self._object_info.created_at) / 1000.0 if self._object_info.created_at else -1.0 # 9
861+
return os.stat_result(seq)
862+
844863
def is_dir(self) -> bool:
845864
"""Return True if the path points to a directory in Databricks Workspace."""
846865
try:

tests/integration/test_paths.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import codecs
2+
from datetime import datetime
23
from pathlib import Path
34

45
import pytest
@@ -67,6 +68,21 @@ def test_open_text_io(ws, make_random, cls):
6768
assert not hello_txt.exists()
6869

6970

71+
@pytest.mark.parametrize("cls", DATABRICKS_PATHLIKE)
72+
def test_stat(ws, make_random, cls):
73+
now = datetime.now().timestamp()
74+
name = make_random()
75+
wsp = cls(ws, f"~/{name}/a/b/c")
76+
with_user = wsp.expanduser()
77+
with_user.mkdir(parents=True)
78+
79+
hello_txt = with_user / "hello.txt"
80+
hello_txt.write_text("Hello, World!")
81+
if cls is WorkspacePath: # DBFSPath has no st_ctime
82+
assert hello_txt.stat().st_ctime >= now
83+
assert hello_txt.stat().st_mtime >= now
84+
85+
7086
@pytest.mark.parametrize("cls", DATABRICKS_PATHLIKE)
7187
def test_unlink(ws, make_random, cls):
7288
name = make_random()

tests/unit/test_paths.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,15 @@
77
from databricks.sdk import WorkspaceClient
88
from databricks.sdk.errors import NotFound, ResourceDoesNotExist
99
from databricks.sdk.mixins.workspace import WorkspaceExt
10+
from databricks.sdk.service.files import FileInfo
1011
from databricks.sdk.service.workspace import (
1112
ImportFormat,
1213
Language,
1314
ObjectInfo,
1415
ObjectType,
1516
)
1617

17-
from databricks.labs.blueprint.paths import WorkspacePath
18+
from databricks.labs.blueprint.paths import DBFSPath, WorkspacePath
1819

1920

2021
def test_empty_init() -> None:
@@ -1007,3 +1008,24 @@ def test_rglob() -> None:
10071008
WorkspacePath(ws, "/test/path/dir1/file1.json"),
10081009
WorkspacePath(ws, "/test/path/dir2/file2.json"),
10091010
}
1011+
1012+
1013+
def test_workspace_path_stat_has_fields():
1014+
info = ObjectInfo(created_at=1234, modified_at=2345, size=3456)
1015+
ws = create_autospec(WorkspaceClient)
1016+
ws.workspace.get_status.return_value = info
1017+
workspace_path = WorkspacePath(ws, "/test/path")
1018+
stats = workspace_path.stat()
1019+
assert stats.st_ctime == info.created_at / 1000.0
1020+
assert stats.st_mtime == info.modified_at / 1000.0
1021+
assert stats.st_size == info.size
1022+
1023+
1024+
def test_dbfs_path_stat_has_fields():
1025+
info = FileInfo(modification_time=2345, file_size=3456)
1026+
ws = create_autospec(WorkspaceClient)
1027+
ws.dbfs.get_status.return_value = info
1028+
dbfs_path = DBFSPath(ws, "/test/path")
1029+
stats = dbfs_path.stat()
1030+
assert stats.st_mtime == info.modification_time / 1000.0
1031+
assert stats.st_size == info.file_size

0 commit comments

Comments
 (0)