Skip to content

Commit 8d79664

Browse files
authored
Support viewfs scheme along side with hdfs (#777)
1 parent 84a2c04 commit 8d79664

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

pyiceberg/io/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ def delete(self, location: Union[str, InputFile, OutputFile]) -> None:
284284
"gs": [ARROW_FILE_IO],
285285
"file": [ARROW_FILE_IO, FSSPEC_FILE_IO],
286286
"hdfs": [ARROW_FILE_IO],
287+
"viewfs": [ARROW_FILE_IO],
287288
"abfs": [FSSPEC_FILE_IO],
288289
"abfss": [FSSPEC_FILE_IO],
289290
}

pyiceberg/io/pyarrow.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def parse_location(location: str) -> Tuple[str, str, str]:
332332
uri = urlparse(location)
333333
if not uri.scheme:
334334
return "file", uri.netloc, os.path.abspath(location)
335-
elif uri.scheme == "hdfs":
335+
elif uri.scheme in ("hdfs", "viewfs"):
336336
return uri.scheme, uri.netloc, uri.path
337337
else:
338338
return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}"
@@ -356,12 +356,12 @@ def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSyste
356356
client_kwargs["connect_timeout"] = float(connect_timeout)
357357

358358
return S3FileSystem(**client_kwargs)
359-
elif scheme == "hdfs":
359+
elif scheme in ("hdfs", "viewfs"):
360360
from pyarrow.fs import HadoopFileSystem
361361

362362
hdfs_kwargs: Dict[str, Any] = {}
363363
if netloc:
364-
return HadoopFileSystem.from_uri(f"hdfs://{netloc}")
364+
return HadoopFileSystem.from_uri(f"{scheme}://{netloc}")
365365
if host := self.properties.get(HDFS_HOST):
366366
hdfs_kwargs["host"] = host
367367
if port := self.properties.get(HDFS_PORT):

0 commit comments

Comments
 (0)