Skip to content

Commit 8ce70ee

Browse files
authored
Implement created and modified methods from abstract filesystem class… (#1971)
1 parent dd6945a commit 8ce70ee

File tree

2 files changed

+103
-0
lines changed

2 files changed

+103
-0
lines changed

fsspec/implementations/tests/test_webhdfs.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import shlex
33
import subprocess
44
import time
5+
from datetime import datetime
56

67
import pytest
78

@@ -208,3 +209,87 @@ def test_protocol_prefixed_path(hdfs_cluster):
208209

209210
file_info = fs.ls(protocol_prefixed_path, detail=True)
210211
assert len(file_info) == 0
212+
213+
214+
def test_modified_nonexistent_path(hdfs_cluster):
215+
fs = WebHDFS(
216+
hdfs_cluster,
217+
user="testuser",
218+
data_proxy={"worker.example.com": "localhost"},
219+
)
220+
nonexistent_path = "/user/testuser/nonexistent_file.txt"
221+
222+
with pytest.raises(FileNotFoundError):
223+
fs.modified(nonexistent_path)
224+
225+
226+
def test_modified_time(hdfs_cluster):
227+
fs = WebHDFS(
228+
hdfs_cluster,
229+
user="testuser",
230+
data_proxy={"worker.example.com": "localhost"},
231+
)
232+
dir_path = "/user/testuser/"
233+
file_path = f"{dir_path}/testfile.txt"
234+
235+
fs.mkdir(dir_path)
236+
237+
# Check first modified time for directories
238+
modified_dir_date: datetime = fs.modified(dir_path)
239+
240+
# I think it is the only thing we can assume, but I'm not sure if the server has a different time
241+
assert modified_dir_date <= datetime.now()
242+
243+
# Create a file and check modified time again
244+
with fs.open(file_path, "wb") as f:
245+
f.write(b"test content")
246+
247+
modified_file_date: datetime = fs.modified(file_path)
248+
assert modified_file_date >= modified_dir_date
249+
assert modified_file_date <= datetime.now()
250+
251+
252+
# NOTE: These following two tests are a copy of the modified ones, as
253+
# WebHDFS does not have a created time API, we are using modified as a proxy.
254+
255+
256+
def test_created_nonexistent_path(hdfs_cluster):
257+
fs = WebHDFS(
258+
hdfs_cluster,
259+
user="testuser",
260+
data_proxy={"worker.example.com": "localhost"},
261+
)
262+
nonexistent_path = "/user/testuser/nonexistent_file.txt"
263+
264+
with pytest.raises(FileNotFoundError):
265+
fs.created(nonexistent_path)
266+
267+
268+
def test_created_time(hdfs_cluster):
269+
fs = WebHDFS(
270+
hdfs_cluster,
271+
user="testuser",
272+
data_proxy={"worker.example.com": "localhost"},
273+
)
274+
dir_path = "/user/testuser/"
275+
file_path = f"{dir_path}/testfile.txt"
276+
277+
fs.mkdir(dir_path)
278+
279+
time.sleep(1)
280+
281+
# Check first created time for directories
282+
created_dir_date: datetime = fs.created(dir_path)
283+
284+
# I think it is the only thing we can assume, but I'm not sure if the server has a different time
285+
assert created_dir_date < datetime.now()
286+
287+
# Create a file and check created time again
288+
with fs.open(file_path, "wb") as f:
289+
f.write(b"test content")
290+
291+
time.sleep(1)
292+
293+
created_file_date: datetime = fs.created(file_path)
294+
assert created_file_date > created_dir_date
295+
assert created_file_date < datetime.now()

fsspec/implementations/webhdfs.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import tempfile
88
import uuid
99
from contextlib import suppress
10+
from datetime import datetime
1011
from urllib.parse import quote
1112

1213
import requests
@@ -268,6 +269,23 @@ def info(self, path):
268269
info["name"] = path
269270
return self._process_info(info)
270271

272+
def created(self, path):
273+
"""Return the created timestamp of a file as a datetime.datetime"""
274+
# The API does not provide creation time, so we use modification time
275+
info = self.info(path)
276+
mtime = info.get("modificationTime", None)
277+
if mtime is not None:
278+
return datetime.fromtimestamp(mtime / 1000)
279+
raise RuntimeError("Could not retrieve creation time (modification time).")
280+
281+
def modified(self, path):
282+
"""Return the modified timestamp of a file as a datetime.datetime"""
283+
info = self.info(path)
284+
mtime = info.get("modificationTime", None)
285+
if mtime is not None:
286+
return datetime.fromtimestamp(mtime / 1000)
287+
raise RuntimeError("Could not retrieve modification time.")
288+
271289
def ls(self, path, detail=False, **kwargs):
272290
out = self._call("LISTSTATUS", path=path)
273291
infos = out.json()["FileStatuses"]["FileStatus"]

0 commit comments

Comments
 (0)